Skip to content

models

models

Machine learning models for FPL prediction.

BaselineModel

BaselineModel(
    method: str = "rolling_mean", window: int = 5
)

Bases: BaseModel

Baseline model using simple heuristics.

Methods: - Rolling average of points - Weighted recent form - Form-based prediction

Initialize baseline model.

PARAMETER DESCRIPTION
method

Prediction method: 'rolling_mean', 'ewma', 'last_value'

TYPE: str DEFAULT: 'rolling_mean'

window

Window size for rolling calculations

TYPE: int DEFAULT: 5

Source code in fplx/models/baseline.py
def __init__(self, method: str = "rolling_mean", window: int = 5):
    """
    Initialize baseline model.

    Parameters
    ----------
    method : str
        Prediction method: 'rolling_mean', 'ewma', 'last_value'
    window : int
        Window size for rolling calculations
    """
    self.method = method
    self.window = window
    self.predictions = {}

fit

fit(X, y=None)

Fit the model (no-op for baseline).

Source code in fplx/models/baseline.py
def fit(self, X, y=None):
    """Fit the model (no-op for baseline)."""
    return self

predict

predict(X: DataFrame) -> float

Predict next gameweek points for a player.

PARAMETER DESCRIPTION
X

Player historical data

TYPE: DataFrame

RETURNS DESCRIPTION
float

Predicted points

Source code in fplx/models/baseline.py
def predict(self, X: pd.DataFrame) -> float:
    """
    Predict next gameweek points for a player.

    Parameters
    ----------
    X : pd.DataFrame
        Player historical data

    Returns
    -------
    float
        Predicted points
    """
    if X.empty or "points" not in X.columns:
        return 0.0

    points = X["points"]

    if self.method == "rolling_mean":
        return self._rolling_mean(points)
    if self.method == "ewma":
        return self._ewma(points)
    if self.method == "last_value":
        return points.iloc[-1]
    logger.warning(f"Unknown method {self.method}, using rolling_mean")
    return self._rolling_mean(points)

batch_predict

batch_predict(
    players_data: dict[str, DataFrame],
) -> dict[str, float]

Predict for multiple players.

PARAMETER DESCRIPTION
players_data

Dictionary mapping player ID to their data

TYPE: dict[str, DataFrame]

RETURNS DESCRIPTION
dict[str, float]

Dictionary of predictions

Source code in fplx/models/baseline.py
def batch_predict(self, players_data: dict[str, pd.DataFrame]) -> dict[str, float]:
    """
    Predict for multiple players.

    Parameters
    ----------
    players_data : dict[str, pd.DataFrame]
        Dictionary mapping player ID to their data

    Returns
    -------
    dict[str, float]
        Dictionary of predictions
    """
    predictions = {}
    for player_id, data in players_data.items():
        predictions[player_id] = self.predict(data)

    self.predictions = predictions
    return predictions

EnsembleModel

EnsembleModel(
    models: list, weights: Optional[list[float]] = None
)

Ensemble combining multiple models with weighted averaging.

PARAMETER DESCRIPTION
models

List of model instances

TYPE: list

weights

Weights for each model (must sum to 1)

TYPE: Optional[list[float]] DEFAULT: None

Source code in fplx/models/ensemble.py
def __init__(self, models: list, weights: Optional[list[float]] = None):
    self.models = models

    if weights is None:
        # Equal weights
        self.weights = [1.0 / len(models)] * len(models)
    else:
        if len(weights) != len(models):
            raise ValueError("Number of weights must match number of models")
        if not np.isclose(sum(weights), 1.0):
            raise ValueError("Weights must sum to 1")
        self.weights = weights

predict

predict(player_data: DataFrame) -> float

Ensemble prediction for a single player.

PARAMETER DESCRIPTION
player_data

Player historical data

TYPE: DataFrame

RETURNS DESCRIPTION
float

Ensemble prediction

Source code in fplx/models/ensemble.py
def predict(self, player_data: pd.DataFrame) -> float:
    """
    Ensemble prediction for a single player.

    Parameters
    ----------
    player_data : pd.DataFrame
        Player historical data

    Returns
    -------
    float
        Ensemble prediction
    """
    predictions = []

    for model in self.models:
        try:
            pred = model.predict(player_data)
            predictions.append(pred)
        except Exception as e:
            logger.warning(f"Model {type(model).__name__} failed: {e}")
            predictions.append(0.0)

    # Weighted average
    ensemble_pred = sum(p * w for p, w in zip(predictions, self.weights))
    return max(0, ensemble_pred)

batch_predict

batch_predict(
    players_data: dict[str, DataFrame],
) -> dict[str, float]

Ensemble predictions for multiple players.

PARAMETER DESCRIPTION
players_data

Dictionary mapping player ID to their data

TYPE: Dict[str, DataFrame]

RETURNS DESCRIPTION
Dict[str, float]

Dictionary of ensemble predictions

Source code in fplx/models/ensemble.py
def batch_predict(self, players_data: dict[str, pd.DataFrame]) -> dict[str, float]:
    """
    Ensemble predictions for multiple players.

    Parameters
    ----------
    players_data : Dict[str, pd.DataFrame]
        Dictionary mapping player ID to their data

    Returns
    -------
    Dict[str, float]
        Dictionary of ensemble predictions
    """
    predictions = {}

    for player_id, data in players_data.items():
        predictions[player_id] = self.predict(data)

    return predictions

RegressionModel

RegressionModel(
    model_type: str = "ridge",
    initial_train_size: int = 10,
    test_size: int = 1,
    step: int = 1,
    **model_kwargs
)

Bases: BaseModel

Machine learning regression model for FPL predictions.

Adapted from the MLSP project's regressor patterns.

PARAMETER DESCRIPTION
model_type

Type of model: 'ridge', 'xgboost', 'lightgbm'

TYPE: str DEFAULT: 'ridge'

initial_train_size

Size of initial training window

TYPE: int DEFAULT: 10

test_size

Forecast horizon

TYPE: int DEFAULT: 1

step

Rolling window step size

TYPE: int DEFAULT: 1

Source code in fplx/models/regression.py
def __init__(
    self,
    model_type: str = "ridge",
    initial_train_size: int = 10,
    test_size: int = 1,
    step: int = 1,
    **model_kwargs,
):
    if not SKLEARN_AVAILABLE:
        raise ImportError(
            "sklearn, xgboost, or lightgbm not available. Install with: pip install fplx[ml]"
        )

    self.model_type = model_type
    self.cv = RollingCV(initial_train_size, test_size, step)
    self.model = self._create_model(model_type, **model_kwargs)
    self.predictions = []
    self.true_values = []
    self.feature_importance = None
    self.feature_names_ = None

fit

fit(X, y=None)

Fit the model.

Source code in fplx/models/regression.py
def fit(self, X, y=None):
    """Fit the model."""
    self.feature_names_ = list(X.columns)
    self.model.fit(X, y)
    return self

predict

predict(X)

Generate predictions.

Source code in fplx/models/regression.py
def predict(self, X):
    """Generate predictions."""
    # Ensure the prediction data has the same columns as the training data
    if self.feature_names_:
        X_pred = X.reindex(columns=self.feature_names_, fill_value=0)
        return self.model.predict(X_pred)
    return self.model.predict(X)

fit_predict

fit_predict(
    y: Series, X: DataFrame, verbose: bool = False
) -> Series

Fit model and generate predictions using rolling CV.

PARAMETER DESCRIPTION
y

Target time series (points to predict)

TYPE: Series

X

Feature matrix

TYPE: DataFrame

verbose

Print progress

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
Series

Predictions aligned with test indices

Source code in fplx/models/regression.py
def fit_predict(self, y: pd.Series, X: pd.DataFrame, verbose: bool = False) -> pd.Series:
    """
    Fit model and generate predictions using rolling CV.

    Parameters
    ----------
    y : pd.Series
        Target time series (points to predict)
    X : pd.DataFrame
        Feature matrix
    verbose : bool
        Print progress

    Returns
    -------
    pd.Series
        Predictions aligned with test indices
    """
    X_vals = X.values
    y_vals = y.values

    self.predictions = []
    self.true_values = []
    pred_indices = []

    for fold, (train_idx, test_idx) in enumerate(self.cv.split(X_vals)):
        X_train, X_test = X_vals[train_idx], X_vals[test_idx]
        y_train, y_test = y_vals[train_idx], y_vals[test_idx]

        # Handle NaN values
        valid_train = ~np.isnan(X_train).any(axis=1) & ~np.isnan(y_train)
        if valid_train.sum() < 5:
            if verbose:
                logger.warning(f"Fold {fold}: insufficient valid training data")
            continue

        X_train_clean = X_train[valid_train]
        y_train_clean = y_train[valid_train]

        # Fit model
        self.model.fit(X_train_clean, y_train_clean)

        # Predict
        valid_test = ~np.isnan(X_test).any(axis=1)
        if not valid_test.any():
            continue

        X_test_clean = X_test[valid_test]
        y_pred = self.model.predict(X_test_clean)

        self.predictions.extend(y_pred)
        self.true_values.extend(y_test[valid_test])
        pred_indices.extend(test_idx[valid_test])

        if verbose:
            rmse = np.sqrt(mean_squared_error(y_test[valid_test], y_pred))
            logger.info(f"Fold {fold}: RMSE = {rmse:.3f}")

    return pd.Series(self.predictions, index=pred_indices, name="predicted_points")

predict_next

predict_next(X: DataFrame) -> float

Predict next value given features.

PARAMETER DESCRIPTION
X

Feature matrix (single row for next gameweek)

TYPE: DataFrame

RETURNS DESCRIPTION
float

Predicted points

Source code in fplx/models/regression.py
def predict_next(self, X: pd.DataFrame) -> float:
    """
    Predict next value given features.

    Parameters
    ----------
    X : pd.DataFrame
        Feature matrix (single row for next gameweek)

    Returns
    -------
    float
        Predicted points
    """
    if X.empty or self.model is None:
        return 0.0

    X_vals = X.values
    if np.isnan(X_vals).any():
        # Impute with mean
        X_vals = np.nan_to_num(X_vals, nan=0.0)

    pred = self.model.predict(X_vals)
    return max(0, pred[0])

get_feature_importance

get_feature_importance(
    feature_names: list[str],
) -> DataFrame

Get feature importance (for tree-based models).

PARAMETER DESCRIPTION
feature_names

Names of features

TYPE: list[str]

RETURNS DESCRIPTION
DataFrame

Feature importance scores

Source code in fplx/models/regression.py
def get_feature_importance(self, feature_names: list[str]) -> pd.DataFrame:
    """
    Get feature importance (for tree-based models).

    Parameters
    ----------
    feature_names : list[str]
        Names of features

    Returns
    -------
    pd.DataFrame
        Feature importance scores
    """
    if self.model_type in ["xgboost", "lightgbm"]:
        importance = self.model.feature_importances_
        return pd.DataFrame({
            "feature": feature_names,
            "importance": importance,
        }).sort_values("importance", ascending=False)
    logger.warning("Feature importance only available for tree-based models")
    return pd.DataFrame()

evaluate

evaluate() -> dict[str, float]

Evaluate model performance.

RETURNS DESCRIPTION
dict[str, float]

Dictionary of metrics

Source code in fplx/models/regression.py
def evaluate(self) -> dict[str, float]:
    """
    Evaluate model performance.

    Returns
    -------
    dict[str, float]
        Dictionary of metrics
    """
    if not self.predictions:
        return {}

    predictions = np.array(self.predictions)
    true_values = np.array(self.true_values)

    rmse = np.sqrt(mean_squared_error(true_values, predictions))
    mae = np.mean(np.abs(true_values - predictions))

    return {
        "rmse": rmse,
        "mae": mae,
        "n_predictions": len(predictions),
    }

RollingCV

RollingCV(
    initial_train_size: int, test_size: int, step: int = 1
)

Generates indices for rolling cross-validation splits.

This is adapted from the MLSP project for time-series validation.

PARAMETER DESCRIPTION
initial_train_size

Size of the initial training set.

TYPE: int

test_size

Size of the test set (forecast horizon).

TYPE: int

step

Step size to move the training window forward.

TYPE: int DEFAULT: 1

Source code in fplx/models/rolling_cv.py
def __init__(self, initial_train_size: int, test_size: int, step: int = 1):
    if initial_train_size <= 0 or test_size <= 0 or step <= 0:
        raise ValueError(
            "initial_train_size, test_size, and step must be positive integers."
        )
    self.initial_train_size = initial_train_size
    self.test_size = test_size
    self.step = step

split

split(X) -> Generator[tuple[ndarray, ndarray], None, None]

Generate indices to split data into training and test sets.

PARAMETER DESCRIPTION
X

Time series data.

TYPE: array - like

YIELDS DESCRIPTION
train_indices

The training set indices for that split.

TYPE:: ndarray

test_indices

The testing set indices for that split.

TYPE:: ndarray

Source code in fplx/models/rolling_cv.py
def split(self, X) -> Generator[tuple[np.ndarray, np.ndarray], None, None]:
    """
    Generate indices to split data into training and test sets.

    Parameters
    ----------
    X : array-like
        Time series data.

    Yields
    ------
    train_indices : np.ndarray
        The training set indices for that split.
    test_indices : np.ndarray
        The testing set indices for that split.
    """
    n_samples = len(X)
    if self.initial_train_size + self.test_size > n_samples:
        raise ValueError(
            "initial_train_size + test_size is larger than the number of samples."
        )

    train_start = 0
    while train_start + self.initial_train_size + self.test_size <= n_samples:
        train_end = train_start + self.initial_train_size
        test_end = train_end + self.test_size

        train_indices = np.arange(train_start, train_end)
        test_indices = np.arange(train_end, test_end)

        yield train_indices, test_indices

        train_start += self.step