Skip to content

fplx

fplx

FPLX - Fantasy Premier League Time-Series Analysis & Squad Optimization

A production-ready Python library for: - FPL player time-series data analysis - News & injury signal integration - Expected performance scoring - Optimal 15-player squad and 11-player lineup selection

FPLModel

FPLModel(
    budget: float = 100.0,
    horizon: int = 1,
    formation: str = "auto",
    config: Optional[dict] = None,
)

High-level interface for FPL analysis and squad optimization.

This is the main user-facing API. It orchestrates data loading, feature engineering, model fitting, and squad optimization.

PARAMETER DESCRIPTION
budget

Maximum squad budget (default 100.0)

TYPE: float DEFAULT: 100.0

horizon

Prediction horizon in gameweeks (default 1)

TYPE: int DEFAULT: 1

formation

Desired formation, or "auto" for optimization

TYPE: str DEFAULT: 'auto'

config

Custom configuration

TYPE: Optional[Dict] DEFAULT: None

Examples:

>>> from fplx import FPLModel
>>> model = FPLModel(budget=100, horizon=1)
>>> model.load_data()
>>> model.fit()
>>> squad = model.select_best_11()
>>> squad.summary()
Source code in fplx/api/interface.py
def __init__(
    self,
    budget: float = 100.0,
    horizon: int = 1,
    formation: str = "auto",
    config: Optional[dict] = None,
):
    self.budget = budget
    self.horizon = horizon
    self.formation = formation
    self.config = Config(config)

    # core components are instantiated on-demand
    self._data_loader = None
    self._feature_engineer = None
    self._news_collector = None
    self._stats_signal = None
    self._news_signal = None
    self._fixture_signal = None
    self._model = None
    self._optimizer = None

    # Data containers
    self.players: list[Player] = []
    self.players_data: dict[int, pd.DataFrame] = {}
    self.expected_points: dict[int, float] = {}
    self.expected_variance: dict[int, float] = {}
    self.current_gameweek: int = 1

load_data

load_data(
    source: str = "api",
    path: Optional[Union[str, Path]] = None,
) -> None

Load player and fixture data.

PARAMETER DESCRIPTION
source

Data source: 'api' or 'local'

TYPE: str DEFAULT: 'api'

path

Path to local data (if source is 'local')

TYPE: Optional[Union[str, Path]] DEFAULT: None

Source code in fplx/api/interface.py
def load_data(self, source: str = "api", path: Optional[Union[str, Path]] = None) -> None:
    """
    Load player and fixture data.

    Parameters
    ----------
    source : str
        Data source: 'api' or 'local'
    path : Optional[Union[str, Path]]
        Path to local data (if source is 'local')
    """
    logger.info(f"Loading data from {source}...")
    if source == "api":
        bootstrap_data = self.data_loader.fetch_bootstrap_data()
        self.players = self.data_loader.load_players(bootstrap_data)

        # Determine current gameweek from bootstrap events
        for event in bootstrap_data.get("events", []):
            if event.get("is_current"):
                self.current_gameweek = event["id"]
                break

        # Collect per-gameweek news snapshots for inference
        self.news_collector.collect_from_bootstrap(bootstrap_data, self.current_gameweek)

    elif source == "local":
        if path is None:
            raise ValueError("Path must be provided for local data source.")
        self.players = self.data_loader.load_from_csv(path)

    logger.info(f"Loaded {len(self.players)} players.")

    # load detailed time-series for each player
    for player in self.players:
        # This is a simplification; in reality, you'd fetch this
        # or have it in your local data.
        self.players_data[player.id] = player.timeseries

fit

fit() -> None

Fit the prediction model.

Uses the probabilistic inference pipeline (HMM + Kalman + Fusion) when model_type is 'inference'. Falls back to the original feature engineering pipeline for baseline/ML models.

Source code in fplx/api/interface.py
def fit(self) -> None:
    """
    Fit the prediction model.

    Uses the probabilistic inference pipeline (HMM + Kalman + Fusion)
    when model_type is 'inference'. Falls back to the original
    feature engineering pipeline for baseline/ML models.
    """
    if not self.players:
        raise RuntimeError("Data not loaded. Call load_data() first.")

    model_type = self.config.get("model_type", "baseline")
    logger.info(f"Fitting model '{model_type}'...")

    if model_type == "inference":
        self._fit_inference()
    else:
        self._fit_legacy(model_type)

    logger.info("Model fitting complete.")

select_best_11

select_best_11() -> FullSquad

Select the optimal 15-player squad and 11-player starting lineup.

RETURNS DESCRIPTION
FullSquad

The optimized squad with lineup.

Source code in fplx/api/interface.py
def select_best_11(self) -> FullSquad:
    """
    Select the optimal 15-player squad and 11-player starting lineup.

    Returns
    -------
    FullSquad
        The optimized squad with lineup.
    """
    if not self.expected_points:
        raise RuntimeError("Model not fitted. Call fit() first.")

    logger.info("Optimizing squad with %s optimizer...", self.config.get("optimizer", "greedy"))

    squad = self.optimizer.solve(
        players=self.players,
        expected_points=self.expected_points,
        expected_variance=self.expected_variance or None,
    )

    logger.info("Squad optimization complete.")
    return squad

Matchweek dataclass

Matchweek(
    gameweek: int,
    date: datetime,
    fixtures: list[dict],
    team_difficulty: dict[str, float],
)

Represents a matchweek with global context.

ATTRIBUTE DESCRIPTION
gameweek

Gameweek number

TYPE: int

date

Date of the gameweek

TYPE: datetime

fixtures

List of fixtures

TYPE: list[dict]

team_difficulty

Team-level difficulty ratings

TYPE: dict[str, float]

Player dataclass

Player(
    id: int,
    name: str,
    team: str,
    position: str,
    price: float,
    timeseries: DataFrame,
    news: Optional[dict] = None,
)

Represents a Fantasy Premier League player.

ATTRIBUTE DESCRIPTION
id

Unique player identifier

TYPE: int

name

Player full name

TYPE: str

team

Current team

TYPE: str

position

Position (GK, DEF, MID, FWD)

TYPE: str

price

Current price in FPL

TYPE: float

timeseries

Historical stats (points, xG, minutes, etc.)

TYPE: DataFrame

news

Latest news/injury information

TYPE: Optional[dict]

last_5_points property

last_5_points: float

Average points over last 5 gameweeks.

availability property

availability: float

Availability score (0-1) based on news.

FullSquad dataclass

FullSquad(
    squad_players: list[Player],
    lineup: Squad,
    bench: list[Player] = list(),
    squad_cost: float = 0.0,
    expected_points: float = 0.0,
)

Represents a 15-player FPL squad with a selected 11-player lineup.

The two-level FPL structure: Level 1: 15-player squad (2 GK, 5 DEF, 5 MID, 3 FWD) under budget. Level 2: 11-player starting lineup chosen from the squad each gameweek.

ATTRIBUTE DESCRIPTION
squad_players

All 15 squad members.

TYPE: list[Player]

lineup

The 11-player starting lineup (subset of squad_players).

TYPE: Squad

bench

The 4 bench players.

TYPE: list[Player]

squad_cost

Total cost of all 15 players.

TYPE: float

expected_points

Expected points for the starting 11.

TYPE: float

summary

summary() -> str

Returns a formatted string summary of the full squad.

Source code in fplx/core/squad.py
def summary(self) -> str:
    """Returns a formatted string summary of the full squad."""
    lines = [
        f"Squad Cost: £{self.squad_cost:.1f}m / £100.0m",
        f"Remaining Budget: £{100.0 - self.squad_cost:.1f}m",
        "",
        self.lineup.summary(),
        "",
        "--- Bench ---",
    ]
    for p in self.bench:
        lines.append(f"  {p.name} ({p.position}, {p.team}, £{p.price}m)")
    return "\n".join(lines)

Squad dataclass

Squad(
    players: list[Player],
    formation: str,
    total_cost: float,
    expected_points: float,
    captain: Optional[Player] = None,
)

Represents an 11-player starting lineup.

ATTRIBUTE DESCRIPTION
players

Selected starters (exactly 11).

TYPE: list[Player]

formation

Formation string (e.g., "3-4-3").

TYPE: str

total_cost

Total cost of the starting 11.

TYPE: float

expected_points

Expected total points for the starting 11.

TYPE: float

captain

Captain selection (earns double points).

TYPE: Optional[Player]

summary

summary() -> str

Returns a formatted string summary of the lineup.

Source code in fplx/core/squad.py
def summary(self) -> str:
    """Returns a formatted string summary of the lineup."""
    pos_order = {"GK": 0, "DEF": 1, "MID": 2, "FWD": 3}
    lines = [
        f"Formation: {self.formation}",
        f"Total Cost: £{self.total_cost:.1f}m",
        f"Expected Points: {self.expected_points:.2f}",
        f"Captain: {self.captain.name if self.captain else 'None'}",
        "",
        "--- Starting XI ---",
    ]
    for p in sorted(self.players, key=lambda x: pos_order.get(x.position, 9)):
        lines.append(f"  {p.name} ({p.position}, {p.team}, £{p.price}m)")
    return "\n".join(lines)

FPLDataLoader

FPLDataLoader(cache_dir: Optional[Path] = None)

Load and manage FPL data from various sources (API, CSV, cache).

PARAMETER DESCRIPTION
cache_dir

Directory to cache downloaded data

TYPE: Optional[Path] DEFAULT: None

Source code in fplx/data/loaders.py
def __init__(self, cache_dir: Optional[Path] = None):
    self.cache_dir = cache_dir or Path.home() / ".fplx" / "cache"
    self.cache_dir.mkdir(parents=True, exist_ok=True)
    self._bootstrap_data = None

fetch_bootstrap_data

fetch_bootstrap_data(force_refresh: bool = False) -> dict

Fetch main FPL data (players, teams, gameweeks).

PARAMETER DESCRIPTION
force_refresh

Force refresh even if cached

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
Dict

Bootstrap data containing players, teams, events

Source code in fplx/data/loaders.py
def fetch_bootstrap_data(self, force_refresh: bool = False) -> dict:
    """
    Fetch main FPL data (players, teams, gameweeks).

    Parameters
    ----------
    force_refresh : bool
        Force refresh even if cached

    Returns
    -------
    Dict
        Bootstrap data containing players, teams, events
    """
    cache_file = self.cache_dir / "bootstrap.json"

    if not force_refresh and cache_file.exists():
        import json

        with open(cache_file) as f:
            logger.info("Loading bootstrap data from cache")
            return json.load(f)

    logger.info("Fetching bootstrap data from FPL API")
    response = requests.get(self.BOOTSTRAP_URL)
    response.raise_for_status()

    data = response.json()

    # Cache the data
    import json

    with open(cache_file, "w") as f:
        json.dump(data, f)

    self._bootstrap_data = data
    return data

load_players

load_players(force_refresh: bool = False) -> list[Player]

Load all players with basic info.

PARAMETER DESCRIPTION
force_refresh

Force refresh from API

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
list[Player]

List of Player objects

Source code in fplx/data/loaders.py
def load_players(self, force_refresh: bool = False) -> list[Player]:
    """
    Load all players with basic info.

    Parameters
    ----------
    force_refresh : bool
        Force refresh from API

    Returns
    -------
    list[Player]
        List of Player objects
    """
    data = self.fetch_bootstrap_data(force_refresh)

    # Build team mapping
    teams = {t["id"]: t["name"] for t in data["teams"]}
    positions = {1: "GK", 2: "DEF", 3: "MID", 4: "FWD"}

    players = []
    for element in data["elements"]:
        # Create minimal timeseries (can be enriched later)
        ts_data = {
            "gameweek": [0],
            "points": [element.get("total_points", 0)],
            "minutes": [element.get("minutes", 0)],
            "form": [float(element.get("form", 0))],
        }

        player = Player(
            id=element["id"],
            name=element["web_name"],
            team=teams[element["team"]],
            position=positions[element["element_type"]],
            price=element["now_cost"] / 10.0,  # Convert to £m
            timeseries=pd.DataFrame(ts_data),
            news={
                "text": element.get("news", ""),
                "availability": 1.0
                if element.get("chance_of_playing_next_round") is None
                else element.get("chance_of_playing_next_round") / 100.0,
            },
        )
        players.append(player)

    logger.info(f"Loaded {len(players)} players")
    return players

load_player_history

load_player_history(player_id: int) -> DataFrame

Load detailed historical data for a specific player.

PARAMETER DESCRIPTION
player_id

Player ID

TYPE: int

RETURNS DESCRIPTION
DataFrame

Historical gameweek stats

Source code in fplx/data/loaders.py
def load_player_history(self, player_id: int) -> pd.DataFrame:
    """
    Load detailed historical data for a specific player.

    Parameters
    ----------
    player_id : int
        Player ID

    Returns
    -------
    pd.DataFrame
        Historical gameweek stats
    """
    url = self.PLAYER_DETAIL_URL.format(player_id=player_id)
    response = requests.get(url)
    response.raise_for_status()

    data = response.json()
    history = pd.DataFrame(data["history"])

    # Rename columns for consistency
    if not history.empty:
        history = history.rename(
            columns={
                "round": "gameweek",
                "total_points": "points",
                "minutes": "minutes",
                "goals_scored": "goals",
                "assists": "assists",
                "expected_goals": "xG",
                "expected_assists": "xA",
            }
        )

    return history

load_fixtures

load_fixtures() -> DataFrame

Load all fixtures.

RETURNS DESCRIPTION
DataFrame

Fixtures data

Source code in fplx/data/loaders.py
def load_fixtures(self) -> pd.DataFrame:
    """
    Load all fixtures.

    Returns
    -------
    pd.DataFrame
        Fixtures data
    """
    response = requests.get(self.FIXTURES_URL)
    response.raise_for_status()

    fixtures = pd.DataFrame(response.json())
    return fixtures

load_from_csv

load_from_csv(filepath: Path) -> DataFrame

Load data from CSV file.

PARAMETER DESCRIPTION
filepath

Path to CSV file

TYPE: Path

RETURNS DESCRIPTION
DataFrame

Loaded data

Source code in fplx/data/loaders.py
def load_from_csv(self, filepath: Path) -> pd.DataFrame:
    """
    Load data from CSV file.

    Parameters
    ----------
    filepath : Path
        Path to CSV file

    Returns
    -------
    pd.DataFrame
        Loaded data
    """
    logger.info("Loading data from %s", filepath)
    df = pd.read_csv(filepath)
    return df

enrich_player_history

enrich_player_history(
    players: list[Player],
) -> list[Player]

Enrich players with full historical data.

PARAMETER DESCRIPTION
players

List of players to enrich

TYPE: list[Player]

RETURNS DESCRIPTION
list[Player]

Players with enriched timeseries

Source code in fplx/data/loaders.py
def enrich_player_history(self, players: list[Player]) -> list[Player]:
    """
    Enrich players with full historical data.

    Parameters
    ----------
    players : list[Player]
        List of players to enrich

    Returns
    -------
    list[Player]
        Players with enriched timeseries
    """
    enriched = []
    for player in players:
        try:
            history = self.load_player_history(player.id)
            if not history.empty:
                player.timeseries = history
            enriched.append(player)
        except Exception as e:
            logger.warning(f"Could not load history for %s : %s", player.name, e)
            enriched.append(player)

    return enriched

api

API module.

FPLModel

FPLModel(
    budget: float = 100.0,
    horizon: int = 1,
    formation: str = "auto",
    config: Optional[dict] = None,
)

High-level interface for FPL analysis and squad optimization.

This is the main user-facing API. It orchestrates data loading, feature engineering, model fitting, and squad optimization.

PARAMETER DESCRIPTION
budget

Maximum squad budget (default 100.0)

TYPE: float DEFAULT: 100.0

horizon

Prediction horizon in gameweeks (default 1)

TYPE: int DEFAULT: 1

formation

Desired formation, or "auto" for optimization

TYPE: str DEFAULT: 'auto'

config

Custom configuration

TYPE: Optional[Dict] DEFAULT: None

Examples:

>>> from fplx import FPLModel
>>> model = FPLModel(budget=100, horizon=1)
>>> model.load_data()
>>> model.fit()
>>> squad = model.select_best_11()
>>> squad.summary()
Source code in fplx/api/interface.py
def __init__(
    self,
    budget: float = 100.0,
    horizon: int = 1,
    formation: str = "auto",
    config: Optional[dict] = None,
):
    self.budget = budget
    self.horizon = horizon
    self.formation = formation
    self.config = Config(config)

    # core components are instantiated on-demand
    self._data_loader = None
    self._feature_engineer = None
    self._news_collector = None
    self._stats_signal = None
    self._news_signal = None
    self._fixture_signal = None
    self._model = None
    self._optimizer = None

    # Data containers
    self.players: list[Player] = []
    self.players_data: dict[int, pd.DataFrame] = {}
    self.expected_points: dict[int, float] = {}
    self.expected_variance: dict[int, float] = {}
    self.current_gameweek: int = 1
load_data
load_data(
    source: str = "api",
    path: Optional[Union[str, Path]] = None,
) -> None

Load player and fixture data.

PARAMETER DESCRIPTION
source

Data source: 'api' or 'local'

TYPE: str DEFAULT: 'api'

path

Path to local data (if source is 'local')

TYPE: Optional[Union[str, Path]] DEFAULT: None

Source code in fplx/api/interface.py
def load_data(self, source: str = "api", path: Optional[Union[str, Path]] = None) -> None:
    """
    Load player and fixture data.

    Parameters
    ----------
    source : str
        Data source: 'api' or 'local'
    path : Optional[Union[str, Path]]
        Path to local data (if source is 'local')
    """
    logger.info(f"Loading data from {source}...")
    if source == "api":
        bootstrap_data = self.data_loader.fetch_bootstrap_data()
        self.players = self.data_loader.load_players(bootstrap_data)

        # Determine current gameweek from bootstrap events
        for event in bootstrap_data.get("events", []):
            if event.get("is_current"):
                self.current_gameweek = event["id"]
                break

        # Collect per-gameweek news snapshots for inference
        self.news_collector.collect_from_bootstrap(bootstrap_data, self.current_gameweek)

    elif source == "local":
        if path is None:
            raise ValueError("Path must be provided for local data source.")
        self.players = self.data_loader.load_from_csv(path)

    logger.info(f"Loaded {len(self.players)} players.")

    # load detailed time-series for each player
    for player in self.players:
        # This is a simplification; in reality, you'd fetch this
        # or have it in your local data.
        self.players_data[player.id] = player.timeseries
fit
fit() -> None

Fit the prediction model.

Uses the probabilistic inference pipeline (HMM + Kalman + Fusion) when model_type is 'inference'. Falls back to the original feature engineering pipeline for baseline/ML models.

Source code in fplx/api/interface.py
def fit(self) -> None:
    """
    Fit the prediction model.

    Uses the probabilistic inference pipeline (HMM + Kalman + Fusion)
    when model_type is 'inference'. Falls back to the original
    feature engineering pipeline for baseline/ML models.
    """
    if not self.players:
        raise RuntimeError("Data not loaded. Call load_data() first.")

    model_type = self.config.get("model_type", "baseline")
    logger.info(f"Fitting model '{model_type}'...")

    if model_type == "inference":
        self._fit_inference()
    else:
        self._fit_legacy(model_type)

    logger.info("Model fitting complete.")
select_best_11
select_best_11() -> FullSquad

Select the optimal 15-player squad and 11-player starting lineup.

RETURNS DESCRIPTION
FullSquad

The optimized squad with lineup.

Source code in fplx/api/interface.py
def select_best_11(self) -> FullSquad:
    """
    Select the optimal 15-player squad and 11-player starting lineup.

    Returns
    -------
    FullSquad
        The optimized squad with lineup.
    """
    if not self.expected_points:
        raise RuntimeError("Model not fitted. Call fit() first.")

    logger.info("Optimizing squad with %s optimizer...", self.config.get("optimizer", "greedy"))

    squad = self.optimizer.solve(
        players=self.players,
        expected_points=self.expected_points,
        expected_variance=self.expected_variance or None,
    )

    logger.info("Squad optimization complete.")
    return squad

interface

High-level API interface for FPLX.

FPLModel
FPLModel(
    budget: float = 100.0,
    horizon: int = 1,
    formation: str = "auto",
    config: Optional[dict] = None,
)

High-level interface for FPL analysis and squad optimization.

This is the main user-facing API. It orchestrates data loading, feature engineering, model fitting, and squad optimization.

PARAMETER DESCRIPTION
budget

Maximum squad budget (default 100.0)

TYPE: float DEFAULT: 100.0

horizon

Prediction horizon in gameweeks (default 1)

TYPE: int DEFAULT: 1

formation

Desired formation, or "auto" for optimization

TYPE: str DEFAULT: 'auto'

config

Custom configuration

TYPE: Optional[Dict] DEFAULT: None

Examples:

>>> from fplx import FPLModel
>>> model = FPLModel(budget=100, horizon=1)
>>> model.load_data()
>>> model.fit()
>>> squad = model.select_best_11()
>>> squad.summary()
Source code in fplx/api/interface.py
def __init__(
    self,
    budget: float = 100.0,
    horizon: int = 1,
    formation: str = "auto",
    config: Optional[dict] = None,
):
    self.budget = budget
    self.horizon = horizon
    self.formation = formation
    self.config = Config(config)

    # core components are instantiated on-demand
    self._data_loader = None
    self._feature_engineer = None
    self._news_collector = None
    self._stats_signal = None
    self._news_signal = None
    self._fixture_signal = None
    self._model = None
    self._optimizer = None

    # Data containers
    self.players: list[Player] = []
    self.players_data: dict[int, pd.DataFrame] = {}
    self.expected_points: dict[int, float] = {}
    self.expected_variance: dict[int, float] = {}
    self.current_gameweek: int = 1
load_data
load_data(
    source: str = "api",
    path: Optional[Union[str, Path]] = None,
) -> None

Load player and fixture data.

PARAMETER DESCRIPTION
source

Data source: 'api' or 'local'

TYPE: str DEFAULT: 'api'

path

Path to local data (if source is 'local')

TYPE: Optional[Union[str, Path]] DEFAULT: None

Source code in fplx/api/interface.py
def load_data(self, source: str = "api", path: Optional[Union[str, Path]] = None) -> None:
    """
    Load player and fixture data.

    Parameters
    ----------
    source : str
        Data source: 'api' or 'local'
    path : Optional[Union[str, Path]]
        Path to local data (if source is 'local')
    """
    logger.info(f"Loading data from {source}...")
    if source == "api":
        bootstrap_data = self.data_loader.fetch_bootstrap_data()
        self.players = self.data_loader.load_players(bootstrap_data)

        # Determine current gameweek from bootstrap events
        for event in bootstrap_data.get("events", []):
            if event.get("is_current"):
                self.current_gameweek = event["id"]
                break

        # Collect per-gameweek news snapshots for inference
        self.news_collector.collect_from_bootstrap(bootstrap_data, self.current_gameweek)

    elif source == "local":
        if path is None:
            raise ValueError("Path must be provided for local data source.")
        self.players = self.data_loader.load_from_csv(path)

    logger.info(f"Loaded {len(self.players)} players.")

    # load detailed time-series for each player
    for player in self.players:
        # This is a simplification; in reality, you'd fetch this
        # or have it in your local data.
        self.players_data[player.id] = player.timeseries
fit
fit() -> None

Fit the prediction model.

Uses the probabilistic inference pipeline (HMM + Kalman + Fusion) when model_type is 'inference'. Falls back to the original feature engineering pipeline for baseline/ML models.

Source code in fplx/api/interface.py
def fit(self) -> None:
    """
    Fit the prediction model.

    Uses the probabilistic inference pipeline (HMM + Kalman + Fusion)
    when model_type is 'inference'. Falls back to the original
    feature engineering pipeline for baseline/ML models.
    """
    if not self.players:
        raise RuntimeError("Data not loaded. Call load_data() first.")

    model_type = self.config.get("model_type", "baseline")
    logger.info(f"Fitting model '{model_type}'...")

    if model_type == "inference":
        self._fit_inference()
    else:
        self._fit_legacy(model_type)

    logger.info("Model fitting complete.")
select_best_11
select_best_11() -> FullSquad

Select the optimal 15-player squad and 11-player starting lineup.

RETURNS DESCRIPTION
FullSquad

The optimized squad with lineup.

Source code in fplx/api/interface.py
def select_best_11(self) -> FullSquad:
    """
    Select the optimal 15-player squad and 11-player starting lineup.

    Returns
    -------
    FullSquad
        The optimized squad with lineup.
    """
    if not self.expected_points:
        raise RuntimeError("Model not fitted. Call fit() first.")

    logger.info("Optimizing squad with %s optimizer...", self.config.get("optimizer", "greedy"))

    squad = self.optimizer.solve(
        players=self.players,
        expected_points=self.expected_points,
        expected_variance=self.expected_variance or None,
    )

    logger.info("Squad optimization complete.")
    return squad

core

Matchweek dataclass

Matchweek(
    gameweek: int,
    date: datetime,
    fixtures: list[dict],
    team_difficulty: dict[str, float],
)

Represents a matchweek with global context.

ATTRIBUTE DESCRIPTION
gameweek

Gameweek number

TYPE: int

date

Date of the gameweek

TYPE: datetime

fixtures

List of fixtures

TYPE: list[dict]

team_difficulty

Team-level difficulty ratings

TYPE: dict[str, float]

Player dataclass

Player(
    id: int,
    name: str,
    team: str,
    position: str,
    price: float,
    timeseries: DataFrame,
    news: Optional[dict] = None,
)

Represents a Fantasy Premier League player.

ATTRIBUTE DESCRIPTION
id

Unique player identifier

TYPE: int

name

Player full name

TYPE: str

team

Current team

TYPE: str

position

Position (GK, DEF, MID, FWD)

TYPE: str

price

Current price in FPL

TYPE: float

timeseries

Historical stats (points, xG, minutes, etc.)

TYPE: DataFrame

news

Latest news/injury information

TYPE: Optional[dict]

last_5_points property
last_5_points: float

Average points over last 5 gameweeks.

availability property
availability: float

Availability score (0-1) based on news.

FullSquad dataclass

FullSquad(
    squad_players: list[Player],
    lineup: Squad,
    bench: list[Player] = list(),
    squad_cost: float = 0.0,
    expected_points: float = 0.0,
)

Represents a 15-player FPL squad with a selected 11-player lineup.

The two-level FPL structure: Level 1: 15-player squad (2 GK, 5 DEF, 5 MID, 3 FWD) under budget. Level 2: 11-player starting lineup chosen from the squad each gameweek.

ATTRIBUTE DESCRIPTION
squad_players

All 15 squad members.

TYPE: list[Player]

lineup

The 11-player starting lineup (subset of squad_players).

TYPE: Squad

bench

The 4 bench players.

TYPE: list[Player]

squad_cost

Total cost of all 15 players.

TYPE: float

expected_points

Expected points for the starting 11.

TYPE: float

summary
summary() -> str

Returns a formatted string summary of the full squad.

Source code in fplx/core/squad.py
def summary(self) -> str:
    """Returns a formatted string summary of the full squad."""
    lines = [
        f"Squad Cost: £{self.squad_cost:.1f}m / £100.0m",
        f"Remaining Budget: £{100.0 - self.squad_cost:.1f}m",
        "",
        self.lineup.summary(),
        "",
        "--- Bench ---",
    ]
    for p in self.bench:
        lines.append(f"  {p.name} ({p.position}, {p.team}, £{p.price}m)")
    return "\n".join(lines)

Squad dataclass

Squad(
    players: list[Player],
    formation: str,
    total_cost: float,
    expected_points: float,
    captain: Optional[Player] = None,
)

Represents an 11-player starting lineup.

ATTRIBUTE DESCRIPTION
players

Selected starters (exactly 11).

TYPE: list[Player]

formation

Formation string (e.g., "3-4-3").

TYPE: str

total_cost

Total cost of the starting 11.

TYPE: float

expected_points

Expected total points for the starting 11.

TYPE: float

captain

Captain selection (earns double points).

TYPE: Optional[Player]

summary
summary() -> str

Returns a formatted string summary of the lineup.

Source code in fplx/core/squad.py
def summary(self) -> str:
    """Returns a formatted string summary of the lineup."""
    pos_order = {"GK": 0, "DEF": 1, "MID": 2, "FWD": 3}
    lines = [
        f"Formation: {self.formation}",
        f"Total Cost: £{self.total_cost:.1f}m",
        f"Expected Points: {self.expected_points:.2f}",
        f"Captain: {self.captain.name if self.captain else 'None'}",
        "",
        "--- Starting XI ---",
    ]
    for p in sorted(self.players, key=lambda x: pos_order.get(x.position, 9)):
        lines.append(f"  {p.name} ({p.position}, {p.team}, £{p.price}m)")
    return "\n".join(lines)

matchweek

Matchweek domain object.

Matchweek dataclass
Matchweek(
    gameweek: int,
    date: datetime,
    fixtures: list[dict],
    team_difficulty: dict[str, float],
)

Represents a matchweek with global context.

ATTRIBUTE DESCRIPTION
gameweek

Gameweek number

TYPE: int

date

Date of the gameweek

TYPE: datetime

fixtures

List of fixtures

TYPE: list[dict]

team_difficulty

Team-level difficulty ratings

TYPE: dict[str, float]

player

Player domain object.

Player dataclass
Player(
    id: int,
    name: str,
    team: str,
    position: str,
    price: float,
    timeseries: DataFrame,
    news: Optional[dict] = None,
)

Represents a Fantasy Premier League player.

ATTRIBUTE DESCRIPTION
id

Unique player identifier

TYPE: int

name

Player full name

TYPE: str

team

Current team

TYPE: str

position

Position (GK, DEF, MID, FWD)

TYPE: str

price

Current price in FPL

TYPE: float

timeseries

Historical stats (points, xG, minutes, etc.)

TYPE: DataFrame

news

Latest news/injury information

TYPE: Optional[dict]

last_5_points property
last_5_points: float

Average points over last 5 gameweeks.

availability property
availability: float

Availability score (0-1) based on news.

squad

Squad and FullSquad domain objects.

Squad dataclass
Squad(
    players: list[Player],
    formation: str,
    total_cost: float,
    expected_points: float,
    captain: Optional[Player] = None,
)

Represents an 11-player starting lineup.

ATTRIBUTE DESCRIPTION
players

Selected starters (exactly 11).

TYPE: list[Player]

formation

Formation string (e.g., "3-4-3").

TYPE: str

total_cost

Total cost of the starting 11.

TYPE: float

expected_points

Expected total points for the starting 11.

TYPE: float

captain

Captain selection (earns double points).

TYPE: Optional[Player]

summary
summary() -> str

Returns a formatted string summary of the lineup.

Source code in fplx/core/squad.py
def summary(self) -> str:
    """Returns a formatted string summary of the lineup."""
    pos_order = {"GK": 0, "DEF": 1, "MID": 2, "FWD": 3}
    lines = [
        f"Formation: {self.formation}",
        f"Total Cost: £{self.total_cost:.1f}m",
        f"Expected Points: {self.expected_points:.2f}",
        f"Captain: {self.captain.name if self.captain else 'None'}",
        "",
        "--- Starting XI ---",
    ]
    for p in sorted(self.players, key=lambda x: pos_order.get(x.position, 9)):
        lines.append(f"  {p.name} ({p.position}, {p.team}, £{p.price}m)")
    return "\n".join(lines)
FullSquad dataclass
FullSquad(
    squad_players: list[Player],
    lineup: Squad,
    bench: list[Player] = list(),
    squad_cost: float = 0.0,
    expected_points: float = 0.0,
)

Represents a 15-player FPL squad with a selected 11-player lineup.

The two-level FPL structure: Level 1: 15-player squad (2 GK, 5 DEF, 5 MID, 3 FWD) under budget. Level 2: 11-player starting lineup chosen from the squad each gameweek.

ATTRIBUTE DESCRIPTION
squad_players

All 15 squad members.

TYPE: list[Player]

lineup

The 11-player starting lineup (subset of squad_players).

TYPE: Squad

bench

The 4 bench players.

TYPE: list[Player]

squad_cost

Total cost of all 15 players.

TYPE: float

expected_points

Expected points for the starting 11.

TYPE: float

summary
summary() -> str

Returns a formatted string summary of the full squad.

Source code in fplx/core/squad.py
def summary(self) -> str:
    """Returns a formatted string summary of the full squad."""
    lines = [
        f"Squad Cost: £{self.squad_cost:.1f}m / £100.0m",
        f"Remaining Budget: £{100.0 - self.squad_cost:.1f}m",
        "",
        self.lineup.summary(),
        "",
        "--- Bench ---",
    ]
    for p in self.bench:
        lines.append(f"  {p.name} ({p.position}, {p.team}, £{p.price}m)")
    return "\n".join(lines)

data

Data loading and schema definitions.

FPLDataLoader

FPLDataLoader(cache_dir: Optional[Path] = None)

Load and manage FPL data from various sources (API, CSV, cache).

PARAMETER DESCRIPTION
cache_dir

Directory to cache downloaded data

TYPE: Optional[Path] DEFAULT: None

Source code in fplx/data/loaders.py
def __init__(self, cache_dir: Optional[Path] = None):
    self.cache_dir = cache_dir or Path.home() / ".fplx" / "cache"
    self.cache_dir.mkdir(parents=True, exist_ok=True)
    self._bootstrap_data = None
fetch_bootstrap_data
fetch_bootstrap_data(force_refresh: bool = False) -> dict

Fetch main FPL data (players, teams, gameweeks).

PARAMETER DESCRIPTION
force_refresh

Force refresh even if cached

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
Dict

Bootstrap data containing players, teams, events

Source code in fplx/data/loaders.py
def fetch_bootstrap_data(self, force_refresh: bool = False) -> dict:
    """
    Fetch main FPL data (players, teams, gameweeks).

    Parameters
    ----------
    force_refresh : bool
        Force refresh even if cached

    Returns
    -------
    Dict
        Bootstrap data containing players, teams, events
    """
    cache_file = self.cache_dir / "bootstrap.json"

    if not force_refresh and cache_file.exists():
        import json

        with open(cache_file) as f:
            logger.info("Loading bootstrap data from cache")
            return json.load(f)

    logger.info("Fetching bootstrap data from FPL API")
    response = requests.get(self.BOOTSTRAP_URL)
    response.raise_for_status()

    data = response.json()

    # Cache the data
    import json

    with open(cache_file, "w") as f:
        json.dump(data, f)

    self._bootstrap_data = data
    return data
load_players
load_players(force_refresh: bool = False) -> list[Player]

Load all players with basic info.

PARAMETER DESCRIPTION
force_refresh

Force refresh from API

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
list[Player]

List of Player objects

Source code in fplx/data/loaders.py
def load_players(self, force_refresh: bool = False) -> list[Player]:
    """
    Load all players with basic info.

    Parameters
    ----------
    force_refresh : bool
        Force refresh from API

    Returns
    -------
    list[Player]
        List of Player objects
    """
    data = self.fetch_bootstrap_data(force_refresh)

    # Build team mapping
    teams = {t["id"]: t["name"] for t in data["teams"]}
    positions = {1: "GK", 2: "DEF", 3: "MID", 4: "FWD"}

    players = []
    for element in data["elements"]:
        # Create minimal timeseries (can be enriched later)
        ts_data = {
            "gameweek": [0],
            "points": [element.get("total_points", 0)],
            "minutes": [element.get("minutes", 0)],
            "form": [float(element.get("form", 0))],
        }

        player = Player(
            id=element["id"],
            name=element["web_name"],
            team=teams[element["team"]],
            position=positions[element["element_type"]],
            price=element["now_cost"] / 10.0,  # Convert to £m
            timeseries=pd.DataFrame(ts_data),
            news={
                "text": element.get("news", ""),
                "availability": 1.0
                if element.get("chance_of_playing_next_round") is None
                else element.get("chance_of_playing_next_round") / 100.0,
            },
        )
        players.append(player)

    logger.info(f"Loaded {len(players)} players")
    return players
load_player_history
load_player_history(player_id: int) -> DataFrame

Load detailed historical data for a specific player.

PARAMETER DESCRIPTION
player_id

Player ID

TYPE: int

RETURNS DESCRIPTION
DataFrame

Historical gameweek stats

Source code in fplx/data/loaders.py
def load_player_history(self, player_id: int) -> pd.DataFrame:
    """
    Load detailed historical data for a specific player.

    Parameters
    ----------
    player_id : int
        Player ID

    Returns
    -------
    pd.DataFrame
        Historical gameweek stats
    """
    url = self.PLAYER_DETAIL_URL.format(player_id=player_id)
    response = requests.get(url)
    response.raise_for_status()

    data = response.json()
    history = pd.DataFrame(data["history"])

    # Rename columns for consistency
    if not history.empty:
        history = history.rename(
            columns={
                "round": "gameweek",
                "total_points": "points",
                "minutes": "minutes",
                "goals_scored": "goals",
                "assists": "assists",
                "expected_goals": "xG",
                "expected_assists": "xA",
            }
        )

    return history
load_fixtures
load_fixtures() -> DataFrame

Load all fixtures.

RETURNS DESCRIPTION
DataFrame

Fixtures data

Source code in fplx/data/loaders.py
def load_fixtures(self) -> pd.DataFrame:
    """
    Load all fixtures.

    Returns
    -------
    pd.DataFrame
        Fixtures data
    """
    response = requests.get(self.FIXTURES_URL)
    response.raise_for_status()

    fixtures = pd.DataFrame(response.json())
    return fixtures
load_from_csv
load_from_csv(filepath: Path) -> DataFrame

Load data from CSV file.

PARAMETER DESCRIPTION
filepath

Path to CSV file

TYPE: Path

RETURNS DESCRIPTION
DataFrame

Loaded data

Source code in fplx/data/loaders.py
def load_from_csv(self, filepath: Path) -> pd.DataFrame:
    """
    Load data from CSV file.

    Parameters
    ----------
    filepath : Path
        Path to CSV file

    Returns
    -------
    pd.DataFrame
        Loaded data
    """
    logger.info("Loading data from %s", filepath)
    df = pd.read_csv(filepath)
    return df
enrich_player_history
enrich_player_history(
    players: list[Player],
) -> list[Player]

Enrich players with full historical data.

PARAMETER DESCRIPTION
players

List of players to enrich

TYPE: list[Player]

RETURNS DESCRIPTION
list[Player]

Players with enriched timeseries

Source code in fplx/data/loaders.py
def enrich_player_history(self, players: list[Player]) -> list[Player]:
    """
    Enrich players with full historical data.

    Parameters
    ----------
    players : list[Player]
        List of players to enrich

    Returns
    -------
    list[Player]
        Players with enriched timeseries
    """
    enriched = []
    for player in players:
        try:
            history = self.load_player_history(player.id)
            if not history.empty:
                player.timeseries = history
            enriched.append(player)
        except Exception as e:
            logger.warning(f"Could not load history for %s : %s", player.name, e)
            enriched.append(player)

    return enriched

VaastavLoader

VaastavLoader(
    season: str = "2023-24",
    data_dir: Optional[str | Path] = None,
    cache_dir: Optional[str | Path] = None,
)

Load historical FPL data from the vaastav dataset.

PARAMETER DESCRIPTION
season

Season string, e.g. "2023-24".

TYPE: str DEFAULT: '2023-24'

data_dir

Path to a local clone. If None, fetches from GitHub.

TYPE: str or Path DEFAULT: None

cache_dir

Where to cache downloaded CSVs. Defaults to ~/.fplx/vaastav/.

TYPE: str or Path DEFAULT: None

Source code in fplx/data/vaastav_loader.py
def __init__(
    self,
    season: str = "2023-24",
    data_dir: Optional[str | Path] = None,
    cache_dir: Optional[str | Path] = None,
):
    self.season = self._validate_season(season)
    self.data_dir = Path(data_dir) if data_dir else None
    # Default cache is project-local to keep artifacts within the workspace.
    project_root = Path(__file__).resolve().parents[2]
    self.cache_dir = Path(cache_dir) if cache_dir else project_root / ".fplx" / "vaastav"
    self.cache_dir.mkdir(parents=True, exist_ok=True)

    self._merged_gw: Optional[pd.DataFrame] = None
    self._player_raw: Optional[pd.DataFrame] = None
load_merged_gw
load_merged_gw() -> DataFrame

Load the merged gameweek file (all GWs, all players, one CSV).

RETURNS DESCRIPTION
DataFrame

One row per player-gameweek appearance.

Source code in fplx/data/vaastav_loader.py
def load_merged_gw(self) -> pd.DataFrame:
    """
    Load the merged gameweek file (all GWs, all players, one CSV).

    Returns
    -------
    pd.DataFrame
        One row per player-gameweek appearance.
    """
    if self._merged_gw is not None:
        return self._merged_gw

    df = self._read_csv("gws/merged_gw.csv")
    df = df.rename(columns={c: COLUMN_MAP.get(c, c) for c in df.columns})
    df = self._coalesce_duplicate_columns(df)

    if "gameweek" in df.columns:
        df["gameweek"] = pd.to_numeric(df["gameweek"], errors="coerce")

    self._merged_gw = df
    logger.info(
        "Loaded merged_gw: %d rows, %d players, GW %d-%d",
        len(df),
        df["element"].nunique(),
        df["gameweek"].min(),
        df["gameweek"].max(),
    )
    return df
load_player_raw
load_player_raw() -> DataFrame

Load season-level player metadata.

Source code in fplx/data/vaastav_loader.py
def load_player_raw(self) -> pd.DataFrame:
    """Load season-level player metadata."""
    if self._player_raw is not None:
        return self._player_raw
    self._player_raw = self._read_csv("players_raw.csv")
    return self._player_raw
load_gameweek
load_gameweek(gw: int) -> DataFrame

Load a single gameweek from merged data.

Source code in fplx/data/vaastav_loader.py
def load_gameweek(self, gw: int) -> pd.DataFrame:
    """Load a single gameweek from merged data."""
    df = self.load_merged_gw()
    return df[df["gameweek"] == gw].copy()
build_player_objects
build_player_objects(
    up_to_gw: Optional[int] = None,
) -> list[Player]

Build Player objects with timeseries up to a given gameweek.

PARAMETER DESCRIPTION
up_to_gw

Only include gameweeks 1..up_to_gw. If None, include all.

TYPE: int DEFAULT: None

RETURNS DESCRIPTION
list[Player]
Source code in fplx/data/vaastav_loader.py
def build_player_objects(
    self,
    up_to_gw: Optional[int] = None,
) -> list[Player]:
    """
    Build Player objects with timeseries up to a given gameweek.

    Parameters
    ----------
    up_to_gw : int, optional
        Only include gameweeks 1..up_to_gw. If None, include all.

    Returns
    -------
    list[Player]
    """
    all_gw = self.load_merged_gw()

    if up_to_gw is not None:
        all_gw = all_gw[all_gw["gameweek"] <= up_to_gw]

    if all_gw.empty:
        return []

    players = []
    grouped = all_gw.groupby("element")

    for pid, grp in grouped:
        pid = int(pid)
        grp = grp.sort_values("gameweek").reset_index(drop=True)

        # Player metadata from the row itself
        name = str(grp["name"].iloc[0]) if "name" in grp.columns else f"Player_{pid}"
        team = str(grp["team"].iloc[0]) if "team" in grp.columns else "Unknown"
        pos_raw = grp["position"].iloc[0] if "position" in grp.columns else "MID"
        price = grp["value"].iloc[-1] / 10.0 if "value" in grp.columns else 5.0

        position = POSITION_MAP.get(pos_raw, POSITION_MAP.get(str(pos_raw), "MID"))

        # Build timeseries with available columns
        keep = [
            c
            for c in [
                "gameweek",
                "points",
                "minutes",
                "starts",
                "goals",
                "assists",
                "xG",
                "xA",
                "bonus",
                "bps",
                "clean_sheets",
                "goals_conceded",
                "saves",
                "yellow_cards",
                "red_cards",
                "own_goals",
                "penalties_missed",
                "penalties_saved",
                "influence",
                "creativity",
                "threat",
                "ict_index",
                "was_home",
                "opponent_team",
                "expected_goals_conceded",
                "xP",
                "value",
                "selected",
                "transfers_in",
                "transfers_out",
            ]
            if c in grp.columns
        ]
        timeseries = grp[keep].copy()
        for col in timeseries.columns:
            timeseries[col] = pd.to_numeric(timeseries[col], errors="coerce")

        # ── DGW aggregation ───────────────────────────────────────────
        # Always collapse to one row per GW decision period.
        # DGW gameweeks receive per-fixture normalised scores so that the
        # inference pipeline (HMM, enriched, KF) operates on single-game-
        # equivalent observations. See double_gameweek.py for details.
        timeseries = aggregate_dgw_timeseries(timeseries)

        player = Player(
            id=pid,
            name=name,
            team=team,
            position=position,
            price=float(price),
            timeseries=timeseries,
        )
        players.append(player)

    logger.info("Built %d Player objects (up_to_gw=%s).", len(players), up_to_gw)
    return players
get_actual_points
get_actual_points(gw: int) -> dict[int, float]

Get actual points scored by each player in a specific gameweek.

For Double Gameweek players (two fixtures in the same round) the points from both fixtures are summed, which is the correct FPL score for that gameweek. The previous implementation used dict(zip(…)) which silently discarded the first fixture row when a player appeared twice, underreporting DGW scores.

RETURNS DESCRIPTION
dict[int, float]

{player_id: actual_points} (summed across fixtures for DGW players)

Source code in fplx/data/vaastav_loader.py
def get_actual_points(self, gw: int) -> dict[int, float]:
    """
    Get actual points scored by each player in a specific gameweek.

    For Double Gameweek players (two fixtures in the same round) the
    points from both fixtures are **summed**, which is the correct FPL
    score for that gameweek. The previous implementation used ``dict(zip(…))``
    which silently discarded the first fixture row when a player appeared
    twice, underreporting DGW scores.

    Returns
    -------
    dict[int, float]
        {player_id: actual_points}  (summed across fixtures for DGW players)
    """
    df = self.load_gameweek(gw)
    pts_col = "points" if "points" in df.columns else "total_points"
    # groupby + sum handles both SGW (one row) and DGW (two rows) correctly
    summed = df.groupby("element")[pts_col].sum().reset_index()
    return dict(zip(summed["element"].astype(int), summed[pts_col].astype(float)))
get_fixture_info
get_fixture_info(gw: int) -> dict[int, dict]

Get fixture context (opponent, home/away, xP) per player for a GW.

Source code in fplx/data/vaastav_loader.py
def get_fixture_info(self, gw: int) -> dict[int, dict]:
    """Get fixture context (opponent, home/away, xP) per player for a GW."""
    df = self.load_gameweek(gw)
    info = {}
    for _, row in df.iterrows():
        pid = int(row.get("element", 0))
        info[pid] = {
            "was_home": bool(row.get("was_home", False)),
            "opponent_team": int(row.get("opponent_team", 0)) if "opponent_team" in df.columns else 0,
            "xP": float(row.get("xP", 0.0)) if "xP" in df.columns else 0.0,
        }
    return info

double_gameweek

Double Gameweek (DGW) detection, timeseries aggregation, and prediction scaling.

A Double Gameweek occurs when a team plays two Premier League fixtures in the same FPL gameweek. From the perspective of the inference pipeline and optimizer, this has two distinct effects:

  1. Historical timeseries (training/inference input) The vaastav dataset stores each fixture as a separate row. A DGW player therefore has two rows sharing the same gameweek value. If not aggregated, the HMM will see them as two sequential timesteps with single-game-calibrated emissions, causing the model to misinterpret a large total (e.g. 14 pts from two good games) as a single "Star" observation when it is actually two "Good" observations.

  2. Forward prediction (next-GW forecast for ILP) When the upcoming gameweek is a DGW, a player plays twice. Their expected FPL points should be approximately 2× the single-game prediction (under independence), and their variance should also scale accordingly.

Usage

from fplx.data.double_gameweek import ( ... detect_dgw_gameweeks, ... aggregate_dgw_timeseries, ... scale_predictions_for_dgw, ... get_fixture_counts_from_bootstrap, ... )

detect_dgw_gameweeks
detect_dgw_gameweeks(
    timeseries: DataFrame,
) -> dict[int, int]

Return a mapping of {gameweek: n_fixtures} for a single player's timeseries.

A gameweek with n_fixtures > 1 is a Double (or Triple) Gameweek.

PARAMETER DESCRIPTION
timeseries

Per-fixture timeseries as returned by VaastavLoader.build_player_objects. Must contain a gameweek column.

TYPE: DataFrame

RETURNS DESCRIPTION
dict[int, int]

{gameweek_number: fixture_count} for all gameweeks in the data. Gameweeks with a single fixture have value 1.

Examples:

>>> counts = detect_dgw_gameweeks(player.timeseries)
>>> dgw_gws = [gw for gw, n in counts.items() if n > 1]
Source code in fplx/data/double_gameweek.py
def detect_dgw_gameweeks(timeseries: pd.DataFrame) -> dict[int, int]:
    """Return a mapping of {gameweek: n_fixtures} for a single player's timeseries.

    A gameweek with n_fixtures > 1 is a Double (or Triple) Gameweek.

    Parameters
    ----------
    timeseries : pd.DataFrame
        Per-fixture timeseries as returned by ``VaastavLoader.build_player_objects``.
        Must contain a ``gameweek`` column.

    Returns
    -------
    dict[int, int]
        ``{gameweek_number: fixture_count}`` for all gameweeks in the data.
        Gameweeks with a single fixture have value 1.

    Examples
    --------
    >>> counts = detect_dgw_gameweeks(player.timeseries)
    >>> dgw_gws = [gw for gw, n in counts.items() if n > 1]
    """
    if timeseries.empty or "gameweek" not in timeseries.columns:
        return {}
    return timeseries.groupby("gameweek").size().to_dict()
aggregate_dgw_timeseries
aggregate_dgw_timeseries(
    timeseries: DataFrame,
) -> DataFrame

Collapse per-fixture rows into one normalised row per gameweek.

This is the single place where Double Gameweek handling lives. All downstream consumers (inference pipeline, enriched predictor, MV-HMM, Kalman Filter) always receive exactly one row per FPL decision period and never need to be aware of DGWs.

For a DGW gameweek (n_fixtures == 2):

  • Additive stats (goals, minutes, bonus, …) are summed to reflect the total accumulated across both matches.
  • Per-fixture normalisation is applied to points and to every additive stat that forms an inference feature. The normalised column is stored alongside the raw total:

.. code-block:: text

  points         # raw total (used for scoring / oracle)
  points_norm    # per-fixture average (used by inference / HMM)

The HMM emission distributions are calibrated on points_norm, so a DGW observation of 10 total points (points_norm = 5) is correctly interpreted as an "Average" game rather than misidentified as a "Star" event (8.5 pts single-game emission mean).

  • Rate / expected stats (xG, xA, …) are averaged — they already represent per-match rates.

  • Context columns (price, opponent) take the last-fixture value.

For a single-fixture gameweek (n_fixtures == 1) the row is returned unchanged and points_norm == points.

PARAMETER DESCRIPTION
timeseries

Raw per-fixture timeseries (may contain duplicate gameweek values for DGW players).

TYPE: DataFrame

RETURNS DESCRIPTION
DataFrame

One row per gameweek, sorted ascending by gameweek. New columns added: - n_fixtures : int, number of fixtures played that round - points_norm : float, per-fixture normalised points

Source code in fplx/data/double_gameweek.py
def aggregate_dgw_timeseries(timeseries: pd.DataFrame) -> pd.DataFrame:
    """Collapse per-fixture rows into one normalised row per gameweek.

    This is the **single place** where Double Gameweek handling lives.  All
    downstream consumers (inference pipeline, enriched predictor, MV-HMM,
    Kalman Filter) always receive exactly one row per FPL decision period and
    never need to be aware of DGWs.

    For a DGW gameweek (``n_fixtures == 2``):

    * **Additive stats** (goals, minutes, bonus, …) are **summed** to reflect
      the total accumulated across both matches.
    * **Per-fixture normalisation** is applied to ``points`` and to every
      additive stat that forms an inference feature.  The normalised column is
      stored alongside the raw total:

      .. code-block:: text

          points         # raw total (used for scoring / oracle)
          points_norm    # per-fixture average (used by inference / HMM)

      The HMM emission distributions are calibrated on ``points_norm``, so a
      DGW observation of 10 total points (``points_norm = 5``) is correctly
      interpreted as an "Average" game rather than misidentified as a "Star"
      event (8.5 pts single-game emission mean).

    * **Rate / expected stats** (xG, xA, …) are averaged — they already
      represent per-match rates.

    * **Context columns** (price, opponent) take the last-fixture value.

    For a single-fixture gameweek (``n_fixtures == 1``) the row is returned
    unchanged and ``points_norm == points``.

    Parameters
    ----------
    timeseries : pd.DataFrame
        Raw per-fixture timeseries (may contain duplicate ``gameweek`` values
        for DGW players).

    Returns
    -------
    pd.DataFrame
        One row per gameweek, sorted ascending by ``gameweek``.
        New columns added:
        - ``n_fixtures``  : int, number of fixtures played that round
        - ``points_norm`` : float, per-fixture normalised points
    """
    if timeseries.empty or "gameweek" not in timeseries.columns:
        return timeseries.copy()

    gw_counts = timeseries.groupby("gameweek").size()
    has_multi = (gw_counts > 1).any()

    if not has_multi:
        ts = timeseries.copy()
        ts["n_fixtures"] = 1
        ts["points_norm"] = ts["points"] if "points" in ts.columns else 0.0
        return ts.sort_values("gameweek").reset_index(drop=True)

    agg_rows = []
    for gw, grp in timeseries.groupby("gameweek"):
        n = len(grp)
        row: dict = {"gameweek": gw, "n_fixtures": n}

        # ── Additive stats: sum across fixtures ───────────────────────────
        for col in _ADDITIVE_COLS:
            if col in grp.columns:
                row[col] = pd.to_numeric(grp[col], errors="coerce").fillna(0.0).sum()

        # ── Per-fixture normalisation of inference-facing columns ─────────
        # points_norm is what the HMM / enriched predictor trains on.
        # All other additive stat norms follow the same pattern.
        pts_total = row.get("points", 0.0)
        row["points_norm"] = pts_total / n if n > 0 else 0.0

        for col in _ADDITIVE_COLS:
            if col in row and col != "points":
                row[f"{col}_norm"] = row[col] / n if n > 0 else 0.0

        # ── Rate / expected stats: average across fixtures ────────────────
        for col in _RATE_COLS:
            if col in grp.columns:
                row[col] = pd.to_numeric(grp[col], errors="coerce").mean()

        # ── Context: last fixture value ───────────────────────────────────
        for col in _LAST_COLS:
            if col in grp.columns:
                row[col] = grp[col].iloc[-1]

        # Remaining columns: last value
        handled = set(_ADDITIVE_COLS + _RATE_COLS + _LAST_COLS + ["gameweek"])
        for col in grp.columns:
            if col not in handled:
                with contextlib.suppress(Exception):
                    row[col] = grp[col].iloc[-1]
        agg_rows.append(row)

    result = pd.DataFrame(agg_rows).sort_values("gameweek").reset_index(drop=True)

    for col in result.columns:
        if col != "gameweek":
            with contextlib.suppress(Exception):
                result[col] = pd.to_numeric(result[col], errors="coerce")

    return result
scale_predictions_for_dgw
scale_predictions_for_dgw(
    expected_points: dict[int, float],
    variances: dict[int, float],
    downside_risks: dict[int, float],
    fixture_counts: dict[int, int],
    variance_mode: str = "additive",
) -> tuple[
    dict[int, float], dict[int, float], dict[int, float]
]

Scale single-game predictions to account for a Double Gameweek.

For a player with n fixtures in the upcoming gameweek:

  • Expected points: E[P_total] = n * E[P_single]
  • Variance (additive, under independence): Var[P_total] = n * Var[P_single]
  • Downside risk: DR_total = sqrt(n) * DR_single

This is exact under independence of the two match performances. The independence assumption is acceptable because FPL points in different matches are only weakly correlated (shared clean sheet probability for the same game counts for both defenders, but that is captured in the single-game variance estimate).

PARAMETER DESCRIPTION
expected_points

Single-game expected points per player id.

TYPE: dict[int, float]

variances

Single-game predictive variance per player id.

TYPE: dict[int, float]

downside_risks

Single-game semi-deviation per player id.

TYPE: dict[int, float]

fixture_counts

Number of upcoming fixtures per player id (1 for SGW, 2 for DGW). Players absent from this dict are assumed to have 1 fixture.

TYPE: dict[int, int]

variance_mode

"additive" (default): Var[P_total] = n * Var[P_single] — correct under independence. "conservative": multiply variance by n^2 to account for possible correlation (e.g. both games against the same strong opponent).

TYPE: str DEFAULT: 'additive'

RETURNS DESCRIPTION
ep_scaled, var_scaled, dr_scaled : tuple of dicts

Scaled prediction dicts with the same keys as the inputs.

Notes

Blank gameweek (BGW) players (n = 0) receive E[P] = 0, Var[P] = 0.1, DR = 0. The optimizer will naturally exclude them since their expected points are zero.

Examples:

>>> ep_scaled, var_scaled, dr_scaled = scale_predictions_for_dgw(
...     expected_points, variances, downside_risks, fixture_counts
... )
Source code in fplx/data/double_gameweek.py
def scale_predictions_for_dgw(
    expected_points: dict[int, float],
    variances: dict[int, float],
    downside_risks: dict[int, float],
    fixture_counts: dict[int, int],
    variance_mode: str = "additive",
) -> tuple[dict[int, float], dict[int, float], dict[int, float]]:
    """Scale single-game predictions to account for a Double Gameweek.

    For a player with ``n`` fixtures in the upcoming gameweek:

    - Expected points: ``E[P_total] = n * E[P_single]``
    - Variance (additive, under independence): ``Var[P_total] = n * Var[P_single]``
    - Downside risk: ``DR_total = sqrt(n) * DR_single``

    This is exact under independence of the two match performances. The
    independence assumption is acceptable because FPL points in different
    matches are only weakly correlated (shared clean sheet probability for the
    same game counts for both defenders, but that is captured in the single-game
    variance estimate).

    Parameters
    ----------
    expected_points : dict[int, float]
        Single-game expected points per player id.
    variances : dict[int, float]
        Single-game predictive variance per player id.
    downside_risks : dict[int, float]
        Single-game semi-deviation per player id.
    fixture_counts : dict[int, int]
        Number of upcoming fixtures per player id (1 for SGW, 2 for DGW).
        Players absent from this dict are assumed to have 1 fixture.
    variance_mode : str
        ``"additive"`` (default): ``Var[P_total] = n * Var[P_single]`` — correct
        under independence.
        ``"conservative"``: multiply variance by ``n^2`` to account for possible
        correlation (e.g. both games against the same strong opponent).

    Returns
    -------
    ep_scaled, var_scaled, dr_scaled : tuple of dicts
        Scaled prediction dicts with the same keys as the inputs.

    Notes
    -----
    Blank gameweek (BGW) players (``n = 0``) receive ``E[P] = 0``,
    ``Var[P] = 0.1``, ``DR = 0``. The optimizer will naturally exclude them
    since their expected points are zero.

    Examples
    --------
    >>> ep_scaled, var_scaled, dr_scaled = scale_predictions_for_dgw(
    ...     expected_points, variances, downside_risks, fixture_counts
    ... )
    """
    ep_out: dict[int, float] = {}
    var_out: dict[int, float] = {}
    dr_out: dict[int, float] = {}

    for pid, ep in expected_points.items():
        n = fixture_counts.get(pid, 1)

        if n == 0:
            # Blank gameweek — player has no fixture
            ep_out[pid] = 0.0
            var_out[pid] = 0.1
            dr_out[pid] = 0.0
            continue

        var = variances.get(pid, 4.0)
        dr = downside_risks.get(pid, var**0.5 / 2**0.5)

        ep_out[pid] = ep * n

        if variance_mode == "additive":
            var_out[pid] = var * n
        else:  # conservative
            var_out[pid] = var * n * n

        # Semi-deviation scales as sqrt(n) under independence
        dr_out[pid] = dr * (n**0.5)

    return ep_out, var_out, dr_out
get_fixture_counts_from_bootstrap
get_fixture_counts_from_bootstrap(
    bootstrap: dict, target_gw: int
) -> dict[int, int]

Derive per-player fixture counts for a gameweek from FPL bootstrap data.

Parses the fixtures list in the bootstrap-static response to count how many fixtures each team plays in target_gw. Returns a player-level mapping derived from each player's team id.

PARAMETER DESCRIPTION
bootstrap

Full bootstrap-static API response containing "fixtures" and "elements" lists.

TYPE: dict

target_gw

The gameweek to inspect.

TYPE: int

RETURNS DESCRIPTION
dict[int, int]

{player_id: n_fixtures} for all players. Players whose team has no fixture in target_gw (BGW) receive 0.

Source code in fplx/data/double_gameweek.py
def get_fixture_counts_from_bootstrap(
    bootstrap: dict,
    target_gw: int,
) -> dict[int, int]:
    """Derive per-player fixture counts for a gameweek from FPL bootstrap data.

    Parses the ``fixtures`` list in the bootstrap-static response to count how
    many fixtures each team plays in ``target_gw``. Returns a player-level
    mapping derived from each player's ``team`` id.

    Parameters
    ----------
    bootstrap : dict
        Full bootstrap-static API response containing ``"fixtures"`` and
        ``"elements"`` lists.
    target_gw : int
        The gameweek to inspect.

    Returns
    -------
    dict[int, int]
        ``{player_id: n_fixtures}`` for all players. Players whose team has no
        fixture in ``target_gw`` (BGW) receive 0.
    """
    fixtures = bootstrap.get("fixtures", [])
    elements = bootstrap.get("elements", [])

    # Count fixtures per team in target_gw
    team_fixture_counts: dict[int, int] = {}
    for fix in fixtures:
        if fix.get("event") != target_gw:
            continue
        h = fix.get("team_h")
        a = fix.get("team_a")
        if h is not None:
            team_fixture_counts[h] = team_fixture_counts.get(h, 0) + 1
        if a is not None:
            team_fixture_counts[a] = team_fixture_counts.get(a, 0) + 1

    # Map player → team → fixture count
    player_counts: dict[int, int] = {}
    for elem in elements:
        pid = elem["id"]
        team = elem.get("team")
        player_counts[pid] = team_fixture_counts.get(team, 1)

    n_dgw = sum(1 for t, n in team_fixture_counts.items() if n > 1)
    n_bgw = sum(1 for t, n in team_fixture_counts.items() if n == 0)
    if n_dgw:
        logger.info("GW%d: %d teams with DGW, %d teams with BGW.", target_gw, n_dgw, n_bgw)

    return player_counts
get_fixture_counts_from_vaastav
get_fixture_counts_from_vaastav(
    loader, target_gw: int
) -> dict[int, int]

Derive per-player fixture counts for a historical gameweek from vaastav data.

Uses the merged_gw CSV to count how many rows each player has for target_gw. This is the ground-truth fixture count for backtesting.

PARAMETER DESCRIPTION
loader

An initialised loader instance.

TYPE: VaastavLoader

target_gw

Gameweek to inspect.

TYPE: int

RETURNS DESCRIPTION
dict[int, int]

{player_id: n_fixtures} — 1 for SGW, 2 for DGW, 0 if no fixture.

Source code in fplx/data/double_gameweek.py
def get_fixture_counts_from_vaastav(
    loader,
    target_gw: int,
) -> dict[int, int]:
    """Derive per-player fixture counts for a historical gameweek from vaastav data.

    Uses the merged_gw CSV to count how many rows each player has for
    ``target_gw``. This is the ground-truth fixture count for backtesting.

    Parameters
    ----------
    loader : VaastavLoader
        An initialised loader instance.
    target_gw : int
        Gameweek to inspect.

    Returns
    -------
    dict[int, int]
        ``{player_id: n_fixtures}`` — 1 for SGW, 2 for DGW, 0 if no fixture.
    """
    df = loader.load_gameweek(target_gw)
    if df.empty:
        return {}
    counts = df.groupby("element").size()
    return counts.to_dict()

loaders

Data loaders for FPL data sources.

FPLDataLoader
FPLDataLoader(cache_dir: Optional[Path] = None)

Load and manage FPL data from various sources (API, CSV, cache).

PARAMETER DESCRIPTION
cache_dir

Directory to cache downloaded data

TYPE: Optional[Path] DEFAULT: None

Source code in fplx/data/loaders.py
def __init__(self, cache_dir: Optional[Path] = None):
    self.cache_dir = cache_dir or Path.home() / ".fplx" / "cache"
    self.cache_dir.mkdir(parents=True, exist_ok=True)
    self._bootstrap_data = None
fetch_bootstrap_data
fetch_bootstrap_data(force_refresh: bool = False) -> dict

Fetch main FPL data (players, teams, gameweeks).

PARAMETER DESCRIPTION
force_refresh

Force refresh even if cached

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
Dict

Bootstrap data containing players, teams, events

Source code in fplx/data/loaders.py
def fetch_bootstrap_data(self, force_refresh: bool = False) -> dict:
    """
    Fetch main FPL data (players, teams, gameweeks).

    Parameters
    ----------
    force_refresh : bool
        Force refresh even if cached

    Returns
    -------
    Dict
        Bootstrap data containing players, teams, events
    """
    cache_file = self.cache_dir / "bootstrap.json"

    if not force_refresh and cache_file.exists():
        import json

        with open(cache_file) as f:
            logger.info("Loading bootstrap data from cache")
            return json.load(f)

    logger.info("Fetching bootstrap data from FPL API")
    response = requests.get(self.BOOTSTRAP_URL)
    response.raise_for_status()

    data = response.json()

    # Cache the data
    import json

    with open(cache_file, "w") as f:
        json.dump(data, f)

    self._bootstrap_data = data
    return data
load_players
load_players(force_refresh: bool = False) -> list[Player]

Load all players with basic info.

PARAMETER DESCRIPTION
force_refresh

Force refresh from API

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
list[Player]

List of Player objects

Source code in fplx/data/loaders.py
def load_players(self, force_refresh: bool = False) -> list[Player]:
    """
    Load all players with basic info.

    Parameters
    ----------
    force_refresh : bool
        Force refresh from API

    Returns
    -------
    list[Player]
        List of Player objects
    """
    data = self.fetch_bootstrap_data(force_refresh)

    # Build team mapping
    teams = {t["id"]: t["name"] for t in data["teams"]}
    positions = {1: "GK", 2: "DEF", 3: "MID", 4: "FWD"}

    players = []
    for element in data["elements"]:
        # Create minimal timeseries (can be enriched later)
        ts_data = {
            "gameweek": [0],
            "points": [element.get("total_points", 0)],
            "minutes": [element.get("minutes", 0)],
            "form": [float(element.get("form", 0))],
        }

        player = Player(
            id=element["id"],
            name=element["web_name"],
            team=teams[element["team"]],
            position=positions[element["element_type"]],
            price=element["now_cost"] / 10.0,  # Convert to £m
            timeseries=pd.DataFrame(ts_data),
            news={
                "text": element.get("news", ""),
                "availability": 1.0
                if element.get("chance_of_playing_next_round") is None
                else element.get("chance_of_playing_next_round") / 100.0,
            },
        )
        players.append(player)

    logger.info(f"Loaded {len(players)} players")
    return players
load_player_history
load_player_history(player_id: int) -> DataFrame

Load detailed historical data for a specific player.

PARAMETER DESCRIPTION
player_id

Player ID

TYPE: int

RETURNS DESCRIPTION
DataFrame

Historical gameweek stats

Source code in fplx/data/loaders.py
def load_player_history(self, player_id: int) -> pd.DataFrame:
    """
    Load detailed historical data for a specific player.

    Parameters
    ----------
    player_id : int
        Player ID

    Returns
    -------
    pd.DataFrame
        Historical gameweek stats
    """
    url = self.PLAYER_DETAIL_URL.format(player_id=player_id)
    response = requests.get(url)
    response.raise_for_status()

    data = response.json()
    history = pd.DataFrame(data["history"])

    # Rename columns for consistency
    if not history.empty:
        history = history.rename(
            columns={
                "round": "gameweek",
                "total_points": "points",
                "minutes": "minutes",
                "goals_scored": "goals",
                "assists": "assists",
                "expected_goals": "xG",
                "expected_assists": "xA",
            }
        )

    return history
load_fixtures
load_fixtures() -> DataFrame

Load all fixtures.

RETURNS DESCRIPTION
DataFrame

Fixtures data

Source code in fplx/data/loaders.py
def load_fixtures(self) -> pd.DataFrame:
    """
    Load all fixtures.

    Returns
    -------
    pd.DataFrame
        Fixtures data
    """
    response = requests.get(self.FIXTURES_URL)
    response.raise_for_status()

    fixtures = pd.DataFrame(response.json())
    return fixtures
load_from_csv
load_from_csv(filepath: Path) -> DataFrame

Load data from CSV file.

PARAMETER DESCRIPTION
filepath

Path to CSV file

TYPE: Path

RETURNS DESCRIPTION
DataFrame

Loaded data

Source code in fplx/data/loaders.py
def load_from_csv(self, filepath: Path) -> pd.DataFrame:
    """
    Load data from CSV file.

    Parameters
    ----------
    filepath : Path
        Path to CSV file

    Returns
    -------
    pd.DataFrame
        Loaded data
    """
    logger.info("Loading data from %s", filepath)
    df = pd.read_csv(filepath)
    return df
enrich_player_history
enrich_player_history(
    players: list[Player],
) -> list[Player]

Enrich players with full historical data.

PARAMETER DESCRIPTION
players

List of players to enrich

TYPE: list[Player]

RETURNS DESCRIPTION
list[Player]

Players with enriched timeseries

Source code in fplx/data/loaders.py
def enrich_player_history(self, players: list[Player]) -> list[Player]:
    """
    Enrich players with full historical data.

    Parameters
    ----------
    players : list[Player]
        List of players to enrich

    Returns
    -------
    list[Player]
        Players with enriched timeseries
    """
    enriched = []
    for player in players:
        try:
            history = self.load_player_history(player.id)
            if not history.empty:
                player.timeseries = history
            enriched.append(player)
        except Exception as e:
            logger.warning(f"Could not load history for %s : %s", player.name, e)
            enriched.append(player)

    return enriched

news_collector

News collection and per-gameweek persistence.

NewsSnapshot
NewsSnapshot(
    player_id: int,
    gameweek: int,
    news_text: str = "",
    status: str = "a",
    chance_this_round: Optional[float] = None,
    chance_next_round: Optional[float] = None,
    timestamp: str = "",
)

A single player's news state at a specific gameweek.

ATTRIBUTE DESCRIPTION
player_id

TYPE: int

gameweek

TYPE: int

news_text

Raw news string from FPL API.

TYPE: str

status

FPL status code: "a", "d", "i", "s", "u", "n".

TYPE: str

chance_this_round

Probability of playing this round (0-100 scale from API, stored as 0-1).

TYPE: float or None

chance_next_round

Probability of playing next round (0-1).

TYPE: float or None

timestamp

When the news was added (ISO format from API).

TYPE: str

Source code in fplx/data/news_collector.py
def __init__(
    self,
    player_id: int,
    gameweek: int,
    news_text: str = "",
    status: str = "a",
    chance_this_round: Optional[float] = None,
    chance_next_round: Optional[float] = None,
    timestamp: str = "",
):
    self.player_id = player_id
    self.gameweek = gameweek
    self.news_text = news_text
    self.status = status
    self.chance_this_round = chance_this_round
    self.chance_next_round = chance_next_round
    self.timestamp = timestamp
to_news_signal_input
to_news_signal_input() -> str

Convert to the text format that NewsSignal.generate_signal() expects.

Combines the raw news text with status information to give the existing NewsParser richer input.

Source code in fplx/data/news_collector.py
def to_news_signal_input(self) -> str:
    """
    Convert to the text format that NewsSignal.generate_signal() expects.

    Combines the raw news text with status information to give the
    existing NewsParser richer input.
    """
    parts = []

    if self.news_text and self.news_text.strip():
        parts.append(self.news_text.strip())

    # Augment with status if not already implied by text
    status_text = {
        "i": "injured",
        "s": "suspended",
        "u": "unavailable",
        "d": "doubtful",
        "n": "not in squad",
    }
    if self.status in status_text and status_text[self.status] not in " ".join(parts).lower():
        parts.append(f"Status: {status_text[self.status]}")

    # Augment with chance percentage
    if self.chance_next_round is not None and self.chance_next_round < 1.0:
        pct = int(self.chance_next_round * 100)
        parts.append(f"{pct}% chance of playing")

    return ". ".join(parts) if parts else ""
NewsCollector
NewsCollector(cache_dir: Optional[Path] = None)

Collects and persists player news snapshots per gameweek.

Usage (live): collector = NewsCollector(cache_dir="~/.fplx/news") collector.collect_from_bootstrap(bootstrap_data, gameweek=25) # Later, feed into inference: snapshots = collector.get_player_history(player_id=123)

Usage (backtest): collector = NewsCollector(cache_dir="~/.fplx/news") # Load all pre-collected snapshots for gw in range(1, 39): snapshots = collector.get_gameweek(gw) # inject into pipeline per player

PARAMETER DESCRIPTION
cache_dir

Directory to persist snapshots as JSON.

TYPE: Path or str DEFAULT: None

Source code in fplx/data/news_collector.py
def __init__(self, cache_dir: Optional[Path] = None):
    self.cache_dir = Path(cache_dir) if cache_dir else Path.home() / ".fplx" / "news"
    self.cache_dir.mkdir(parents=True, exist_ok=True)

    # In-memory store: {gameweek: {player_id: NewsSnapshot}}
    self._store: dict[int, dict[int, NewsSnapshot]] = {}
collect_from_bootstrap
collect_from_bootstrap(
    bootstrap_data: dict, gameweek: int
) -> int

Extract news from a bootstrap-static API response.

This is the key method. Call it each gameweek with fresh API data.

PARAMETER DESCRIPTION
bootstrap_data

Response from https://fantasy.premierleague.com/api/bootstrap-static/

TYPE: dict

gameweek

Current gameweek number.

TYPE: int

RETURNS DESCRIPTION
int

Number of players with active news.

Source code in fplx/data/news_collector.py
def collect_from_bootstrap(self, bootstrap_data: dict, gameweek: int) -> int:
    """
    Extract news from a bootstrap-static API response.

    This is the key method. Call it each gameweek with fresh API data.

    Parameters
    ----------
    bootstrap_data : dict
        Response from https://fantasy.premierleague.com/api/bootstrap-static/
    gameweek : int
        Current gameweek number.

    Returns
    -------
    int
        Number of players with active news.
    """
    elements = bootstrap_data.get("elements", [])
    gw_snapshots = {}
    news_count = 0

    for el in elements:
        player_id = el["id"]

        news_text = el.get("news", "") or ""
        status = el.get("status", "a") or "a"

        # Convert API percentages (0-100 or None) to 0-1
        chance_this = el.get("chance_of_playing_this_round")
        chance_next = el.get("chance_of_playing_next_round")

        if chance_this is not None:
            chance_this = chance_this / 100.0
        if chance_next is not None:
            chance_next = chance_next / 100.0

        snapshot = NewsSnapshot(
            player_id=player_id,
            gameweek=gameweek,
            news_text=news_text,
            status=status,
            chance_this_round=chance_this,
            chance_next_round=chance_next,
            timestamp=el.get("news_added", ""),
        )

        gw_snapshots[player_id] = snapshot

        if news_text.strip() or status not in ("a",):
            news_count += 1

    self._store[gameweek] = gw_snapshots
    self._persist_gameweek(gameweek)

    logger.info(
        "GW %s: collected news for %d players (%d with active news)",
        gameweek,
        len(gw_snapshots),
        news_count,
    )
    return news_count
get_player_news
get_player_news(
    player_id: int, gameweek: int
) -> Optional[NewsSnapshot]

Get a specific player's news at a specific gameweek.

Source code in fplx/data/news_collector.py
def get_player_news(self, player_id: int, gameweek: int) -> Optional[NewsSnapshot]:
    """Get a specific player's news at a specific gameweek."""
    self._ensure_loaded(gameweek)
    gw_data = self._store.get(gameweek, {})
    return gw_data.get(player_id)
get_player_history
get_player_history(player_id: int) -> list[NewsSnapshot]

Get all news snapshots for a player across all collected gameweeks.

Returns list sorted by gameweek.

Source code in fplx/data/news_collector.py
def get_player_history(self, player_id: int) -> list[NewsSnapshot]:
    """
    Get all news snapshots for a player across all collected gameweeks.

    Returns list sorted by gameweek.
    """
    self._load_all()
    history = []
    for gw in sorted(self._store.keys()):
        snapshot = self._store[gw].get(player_id)
        if snapshot is not None:
            history.append(snapshot)
    return history
get_gameweek
get_gameweek(gameweek: int) -> dict[int, NewsSnapshot]

Get all player news for a specific gameweek.

Source code in fplx/data/news_collector.py
def get_gameweek(self, gameweek: int) -> dict[int, NewsSnapshot]:
    """Get all player news for a specific gameweek."""
    self._ensure_loaded(gameweek)
    return self._store.get(gameweek, {})
get_players_with_news
get_players_with_news(gameweek: int) -> list[NewsSnapshot]

Get only players with non-trivial news at a gameweek.

Source code in fplx/data/news_collector.py
def get_players_with_news(self, gameweek: int) -> list[NewsSnapshot]:
    """Get only players with non-trivial news at a gameweek."""
    gw_data = self.get_gameweek(gameweek)
    return [snap for snap in gw_data.values() if snap.news_text.strip() or snap.status not in ("a",)]
collect_season_from_api
collect_season_from_api(data_loader) -> int

Collect news for all gameweeks in a season.

Requires calling the FPL API once per gameweek (the bootstrap-static endpoint only gives current-week news). For backtesting, you'd need to have cached the bootstrap data weekly during the season.

For a single-shot collection (current state only), just call collect_from_bootstrap() once with the current bootstrap data and the current gameweek number.

PARAMETER DESCRIPTION
data_loader

Your existing data loader.

TYPE: FPLDataLoader

RETURNS DESCRIPTION
int

Number of gameweeks collected.

Source code in fplx/data/news_collector.py
def collect_season_from_api(self, data_loader) -> int:
    """
    Collect news for all gameweeks in a season.

    Requires calling the FPL API once per gameweek (the bootstrap-static
    endpoint only gives current-week news). For backtesting, you'd need
    to have cached the bootstrap data weekly during the season.

    For a single-shot collection (current state only), just call
    collect_from_bootstrap() once with the current bootstrap data and
    the current gameweek number.

    Parameters
    ----------
    data_loader : FPLDataLoader
        Your existing data loader.

    Returns
    -------
    int
        Number of gameweeks collected.
    """
    bootstrap = data_loader.fetch_bootstrap_data(force_refresh=True)

    # Determine current gameweek
    events = bootstrap.get("events", [])
    current_gw = 1
    for event in events:
        if event.get("is_current"):
            current_gw = event["id"]
            break

    self.collect_from_bootstrap(bootstrap, current_gw)
    return 1  # Only current GW available from a single API call

schemas

Data validation schemas for FPL data sources.

BootstrapStatic

Bases: BaseModel

Schema for the main FPL bootstrap-static endpoint.

Fixture

Bases: BaseModel

Schema for a single fixture.

PlayerHistory

Bases: BaseModel

Schema for a player's historical performance data.

PlayerSummary

Bases: BaseModel

Schema for a player's summary data.

tft_dataset

Dataset utilities for Temporal Fusion Transformer (TFT).

This module converts vaastav merged gameweek data into a global panel format compatible with pytorch_forecasting.TimeSeriesDataSet.

build_tft_panel
build_tft_panel(merged_gw: DataFrame) -> DataFrame

Build TFT panel dataframe from merged gameweek data.

Output schema includes: - group_id: player identifier - time_idx: gameweek index - static categoricals: position, team - known covariates: fixture_difficulty, is_home - unknown covariates: xPts, mins_frac, news_sentiment, actual_points

Source code in fplx/data/tft_dataset.py
def build_tft_panel(merged_gw: pd.DataFrame) -> pd.DataFrame:
    """Build TFT panel dataframe from merged gameweek data.

    Output schema includes:
    - group_id: player identifier
    - time_idx: gameweek index
    - static categoricals: position, team
    - known covariates: fixture_difficulty, is_home
    - unknown covariates: xPts, mins_frac, news_sentiment, actual_points
    """
    df = merged_gw.copy()

    rename_map = {
        "element": "group_id",
        "gameweek": "time_idx",
        "points": "actual_points",
    }
    df = df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns})

    if "group_id" not in df.columns and "element" in merged_gw.columns:
        df["group_id"] = merged_gw["element"]
    if "time_idx" not in df.columns and "gameweek" in merged_gw.columns:
        df["time_idx"] = merged_gw["gameweek"]
    if "actual_points" not in df.columns and "points" in merged_gw.columns:
        df["actual_points"] = merged_gw["points"]

    df["group_id"] = pd.to_numeric(df["group_id"], errors="coerce").astype("Int64")
    df["time_idx"] = pd.to_numeric(df["time_idx"], errors="coerce")
    df["actual_points"] = pd.to_numeric(df["actual_points"], errors="coerce").fillna(0.0)

    if "position" not in df.columns:
        df["position"] = "MID"
    if "team" not in df.columns:
        df["team"] = "Unknown"
    df["position"] = df["position"].astype(str)
    df["team"] = df["team"].astype(str)

    # Known future covariates.
    if "was_home" in df.columns:
        df["is_home"] = pd.to_numeric(df["was_home"], errors="coerce").fillna(0.0)
    else:
        df["is_home"] = 0.0

    if "fixture_difficulty" in df.columns:
        df["fixture_difficulty"] = pd.to_numeric(df["fixture_difficulty"], errors="coerce").fillna(3.0)
    else:
        df["fixture_difficulty"] = 3.0

    if "minutes" in df.columns:
        mins = pd.to_numeric(df["minutes"], errors="coerce").fillna(0.0)
        df["mins_frac"] = np.clip(mins / 90.0, 0.0, 1.0)
    else:
        df["mins_frac"] = 0.0

    # Placeholder until historical NLP news pipeline is fully integrated.
    df["news_sentiment"] = 0.0

    # Structural xPts projection per player-position trajectory.
    xpts = np.zeros(len(df), dtype=float)
    for _, grp in df.groupby("group_id", dropna=True):
        grp_sorted = grp.sort_values("time_idx")
        pos = str(grp_sorted["position"].iloc[0])
        x_vals = compute_xpoints(grp_sorted, pos)
        xpts[grp_sorted.index.to_numpy()] = x_vals
    df["xPts"] = xpts

    keep_cols = [
        "group_id",
        "time_idx",
        "position",
        "team",
        "fixture_difficulty",
        "is_home",
        "xPts",
        "mins_frac",
        "news_sentiment",
        "actual_points",
    ]
    df = df[keep_cols].dropna(subset=["group_id", "time_idx"]).copy()
    df["group_id"] = df["group_id"].astype(int)
    df["time_idx"] = df["time_idx"].astype(int)
    return df.sort_values(["group_id", "time_idx"]).reset_index(drop=True)
make_tft_datasets
make_tft_datasets(
    panel_df: DataFrame,
    training_cutoff: int,
    encoder_length: int = 15,
    prediction_length: int = 1,
)

Create TFT training and prediction datasets.

Requires optional dependency pytorch-forecasting.

Source code in fplx/data/tft_dataset.py
def make_tft_datasets(
    panel_df: pd.DataFrame,
    training_cutoff: int,
    encoder_length: int = 15,
    prediction_length: int = 1,
):
    """Create TFT training and prediction datasets.

    Requires optional dependency `pytorch-forecasting`.
    """
    try:
        from pytorch_forecasting import TimeSeriesDataSet
        from pytorch_forecasting.data.encoders import NaNLabelEncoder
    except ImportError as e:
        raise ImportError(
            "TFT dataset creation requires pytorch-forecasting. "
            "Install with: pip install pytorch-forecasting lightning"
        ) from e

    train_df = panel_df[panel_df["time_idx"] <= training_cutoff].copy()
    train_df["actual_points"] = (
        pd.to_numeric(train_df["actual_points"], errors="coerce").fillna(0.0).astype(float)
    )

    # Ensure encoder length is feasible for available history.
    hist_len = train_df.groupby("group_id")["time_idx"].nunique()
    if hist_len.empty:
        raise ValueError("No training data available for TFT dataset creation.")

    max_possible_encoder = int(hist_len.max() - prediction_length)
    if max_possible_encoder < 1:
        raise ValueError(
            "Insufficient history to build TFT windows. Increase training cutoff or reduce prediction length."
        )

    eff_encoder_length = min(int(encoder_length), max_possible_encoder)
    min_required_len = eff_encoder_length + prediction_length

    valid_ids = hist_len[hist_len >= min_required_len].index
    train_df = train_df[train_df["group_id"].isin(valid_ids)].copy()
    if train_df.empty:
        raise ValueError(
            "No groups have enough history after encoder-length adjustment. "
            f"Required per-group length: {min_required_len}."
        )

    training = TimeSeriesDataSet(
        train_df,
        time_idx="time_idx",
        target="actual_points",
        group_ids=["group_id"],
        min_encoder_length=eff_encoder_length,
        max_encoder_length=eff_encoder_length,
        min_prediction_length=prediction_length,
        max_prediction_length=prediction_length,
        static_categoricals=["position", "team"],
        time_varying_known_reals=["time_idx", "fixture_difficulty", "is_home"],
        time_varying_unknown_reals=["xPts", "mins_frac", "news_sentiment", "actual_points"],
        categorical_encoders={
            "position": NaNLabelEncoder(add_nan=True),
            "team": NaNLabelEncoder(add_nan=True),
        },
        allow_missing_timesteps=True,
        add_relative_time_idx=True,
        add_target_scales=True,
        add_encoder_length=True,
    )

    pred_df = panel_df[panel_df["group_id"].isin(valid_ids)].copy()
    pred_df["actual_points"] = (
        pd.to_numeric(pred_df["actual_points"], errors="coerce").fillna(0.0).astype(float)
    )

    prediction = TimeSeriesDataSet.from_dataset(
        training,
        pred_df,
        predict=True,
        stop_randomization=True,
    )

    return training, prediction

vaastav_loader

Loader for the vaastav/Fantasy-Premier-League dataset.

Supports two modes: 1. Remote: fetch CSVs directly from GitHub (no clone needed). 2. Local: read from a cloned repo directory.

Usage (remote): loader = VaastavLoader(season="2023-24") players = loader.build_player_objects(up_to_gw=20)

Usage (local): loader = VaastavLoader(season="2023-24", data_dir="./Fantasy-Premier-League") players = loader.build_player_objects(up_to_gw=20)

Dataset: https://github.com/vaastav/Fantasy-Premier-League

Double Gameweek handling

build_player_objects automatically calls aggregate_dgw_timeseries on every player's raw timeseries before constructing the Player object. This means all downstream consumers (inference pipeline, MV-HMM, enriched predictor, Kalman Filter) always receive exactly one row per FPL decision period.

For DGW gameweeks, the resulting row contains: points – raw total (both fixtures summed, used for scoring / oracle) points_norm – per-fixture average (used by inference components) n_fixtures – number of fixtures played (1 for SGW, 2 for DGW)

The inference pipeline uses points_norm so that HMM emission distributions remain calibrated on single-game-equivalent observations. The ILP objective then scales back via scale_predictions_for_dgw to reflect the full DGW opportunity.

VaastavLoader
VaastavLoader(
    season: str = "2023-24",
    data_dir: Optional[str | Path] = None,
    cache_dir: Optional[str | Path] = None,
)

Load historical FPL data from the vaastav dataset.

PARAMETER DESCRIPTION
season

Season string, e.g. "2023-24".

TYPE: str DEFAULT: '2023-24'

data_dir

Path to a local clone. If None, fetches from GitHub.

TYPE: str or Path DEFAULT: None

cache_dir

Where to cache downloaded CSVs. Defaults to ~/.fplx/vaastav/.

TYPE: str or Path DEFAULT: None

Source code in fplx/data/vaastav_loader.py
def __init__(
    self,
    season: str = "2023-24",
    data_dir: Optional[str | Path] = None,
    cache_dir: Optional[str | Path] = None,
):
    self.season = self._validate_season(season)
    self.data_dir = Path(data_dir) if data_dir else None
    # Default cache is project-local to keep artifacts within the workspace.
    project_root = Path(__file__).resolve().parents[2]
    self.cache_dir = Path(cache_dir) if cache_dir else project_root / ".fplx" / "vaastav"
    self.cache_dir.mkdir(parents=True, exist_ok=True)

    self._merged_gw: Optional[pd.DataFrame] = None
    self._player_raw: Optional[pd.DataFrame] = None
load_merged_gw
load_merged_gw() -> DataFrame

Load the merged gameweek file (all GWs, all players, one CSV).

RETURNS DESCRIPTION
DataFrame

One row per player-gameweek appearance.

Source code in fplx/data/vaastav_loader.py
def load_merged_gw(self) -> pd.DataFrame:
    """
    Load the merged gameweek file (all GWs, all players, one CSV).

    Returns
    -------
    pd.DataFrame
        One row per player-gameweek appearance.
    """
    if self._merged_gw is not None:
        return self._merged_gw

    df = self._read_csv("gws/merged_gw.csv")
    df = df.rename(columns={c: COLUMN_MAP.get(c, c) for c in df.columns})
    df = self._coalesce_duplicate_columns(df)

    if "gameweek" in df.columns:
        df["gameweek"] = pd.to_numeric(df["gameweek"], errors="coerce")

    self._merged_gw = df
    logger.info(
        "Loaded merged_gw: %d rows, %d players, GW %d-%d",
        len(df),
        df["element"].nunique(),
        df["gameweek"].min(),
        df["gameweek"].max(),
    )
    return df
load_player_raw
load_player_raw() -> DataFrame

Load season-level player metadata.

Source code in fplx/data/vaastav_loader.py
def load_player_raw(self) -> pd.DataFrame:
    """Load season-level player metadata."""
    if self._player_raw is not None:
        return self._player_raw
    self._player_raw = self._read_csv("players_raw.csv")
    return self._player_raw
load_gameweek
load_gameweek(gw: int) -> DataFrame

Load a single gameweek from merged data.

Source code in fplx/data/vaastav_loader.py
def load_gameweek(self, gw: int) -> pd.DataFrame:
    """Load a single gameweek from merged data."""
    df = self.load_merged_gw()
    return df[df["gameweek"] == gw].copy()
build_player_objects
build_player_objects(
    up_to_gw: Optional[int] = None,
) -> list[Player]

Build Player objects with timeseries up to a given gameweek.

PARAMETER DESCRIPTION
up_to_gw

Only include gameweeks 1..up_to_gw. If None, include all.

TYPE: int DEFAULT: None

RETURNS DESCRIPTION
list[Player]
Source code in fplx/data/vaastav_loader.py
def build_player_objects(
    self,
    up_to_gw: Optional[int] = None,
) -> list[Player]:
    """
    Build Player objects with timeseries up to a given gameweek.

    Parameters
    ----------
    up_to_gw : int, optional
        Only include gameweeks 1..up_to_gw. If None, include all.

    Returns
    -------
    list[Player]
    """
    all_gw = self.load_merged_gw()

    if up_to_gw is not None:
        all_gw = all_gw[all_gw["gameweek"] <= up_to_gw]

    if all_gw.empty:
        return []

    players = []
    grouped = all_gw.groupby("element")

    for pid, grp in grouped:
        pid = int(pid)
        grp = grp.sort_values("gameweek").reset_index(drop=True)

        # Player metadata from the row itself
        name = str(grp["name"].iloc[0]) if "name" in grp.columns else f"Player_{pid}"
        team = str(grp["team"].iloc[0]) if "team" in grp.columns else "Unknown"
        pos_raw = grp["position"].iloc[0] if "position" in grp.columns else "MID"
        price = grp["value"].iloc[-1] / 10.0 if "value" in grp.columns else 5.0

        position = POSITION_MAP.get(pos_raw, POSITION_MAP.get(str(pos_raw), "MID"))

        # Build timeseries with available columns
        keep = [
            c
            for c in [
                "gameweek",
                "points",
                "minutes",
                "starts",
                "goals",
                "assists",
                "xG",
                "xA",
                "bonus",
                "bps",
                "clean_sheets",
                "goals_conceded",
                "saves",
                "yellow_cards",
                "red_cards",
                "own_goals",
                "penalties_missed",
                "penalties_saved",
                "influence",
                "creativity",
                "threat",
                "ict_index",
                "was_home",
                "opponent_team",
                "expected_goals_conceded",
                "xP",
                "value",
                "selected",
                "transfers_in",
                "transfers_out",
            ]
            if c in grp.columns
        ]
        timeseries = grp[keep].copy()
        for col in timeseries.columns:
            timeseries[col] = pd.to_numeric(timeseries[col], errors="coerce")

        # ── DGW aggregation ───────────────────────────────────────────
        # Always collapse to one row per GW decision period.
        # DGW gameweeks receive per-fixture normalised scores so that the
        # inference pipeline (HMM, enriched, KF) operates on single-game-
        # equivalent observations. See double_gameweek.py for details.
        timeseries = aggregate_dgw_timeseries(timeseries)

        player = Player(
            id=pid,
            name=name,
            team=team,
            position=position,
            price=float(price),
            timeseries=timeseries,
        )
        players.append(player)

    logger.info("Built %d Player objects (up_to_gw=%s).", len(players), up_to_gw)
    return players
get_actual_points
get_actual_points(gw: int) -> dict[int, float]

Get actual points scored by each player in a specific gameweek.

For Double Gameweek players (two fixtures in the same round) the points from both fixtures are summed, which is the correct FPL score for that gameweek. The previous implementation used dict(zip(…)) which silently discarded the first fixture row when a player appeared twice, underreporting DGW scores.

RETURNS DESCRIPTION
dict[int, float]

{player_id: actual_points} (summed across fixtures for DGW players)

Source code in fplx/data/vaastav_loader.py
def get_actual_points(self, gw: int) -> dict[int, float]:
    """
    Get actual points scored by each player in a specific gameweek.

    For Double Gameweek players (two fixtures in the same round) the
    points from both fixtures are **summed**, which is the correct FPL
    score for that gameweek. The previous implementation used ``dict(zip(…))``
    which silently discarded the first fixture row when a player appeared
    twice, underreporting DGW scores.

    Returns
    -------
    dict[int, float]
        {player_id: actual_points}  (summed across fixtures for DGW players)
    """
    df = self.load_gameweek(gw)
    pts_col = "points" if "points" in df.columns else "total_points"
    # groupby + sum handles both SGW (one row) and DGW (two rows) correctly
    summed = df.groupby("element")[pts_col].sum().reset_index()
    return dict(zip(summed["element"].astype(int), summed[pts_col].astype(float)))
get_fixture_info
get_fixture_info(gw: int) -> dict[int, dict]

Get fixture context (opponent, home/away, xP) per player for a GW.

Source code in fplx/data/vaastav_loader.py
def get_fixture_info(self, gw: int) -> dict[int, dict]:
    """Get fixture context (opponent, home/away, xP) per player for a GW."""
    df = self.load_gameweek(gw)
    info = {}
    for _, row in df.iterrows():
        pid = int(row.get("element", 0))
        info[pid] = {
            "was_home": bool(row.get("was_home", False)),
            "opponent_team": int(row.get("opponent_team", 0)) if "opponent_team" in df.columns else 0,
            "xP": float(row.get("xP", 0.0)) if "xP" in df.columns else 0.0,
        }
    return info

evaluation

Evaluation metrics for inference and optimization.

InferenceMetrics dataclass

InferenceMetrics(
    predicted_means: list[float] = list(),
    predicted_vars: list[float] = list(),
    actuals: list[float] = list(),
    model_predictions: dict[str, list[float]] = dict(),
)

Collects and computes inference evaluation metrics.

Usage: metrics = InferenceMetrics() for each player-gameweek: metrics.add(predicted_mean, predicted_var, actual_points) report = metrics.compute()

add
add(
    predicted_mean: float,
    predicted_var: float,
    actual: float,
    model_preds: dict[str, float] | None = None,
)

Record a single prediction-actual pair.

Source code in fplx/evaluation/metrics.py
def add(
    self,
    predicted_mean: float,
    predicted_var: float,
    actual: float,
    model_preds: dict[str, float] | None = None,
):
    """Record a single prediction-actual pair."""
    self.predicted_means.append(predicted_mean)
    self.predicted_vars.append(predicted_var)
    self.actuals.append(actual)

    if model_preds:
        for name, pred in model_preds.items():
            if name not in self.model_predictions:
                self.model_predictions[name] = []
            self.model_predictions[name].append(pred)
compute
compute() -> dict

Compute all inference metrics.

Source code in fplx/evaluation/metrics.py
def compute(self) -> dict:
    """Compute all inference metrics."""
    preds = np.array(self.predicted_means)
    varis = np.array(self.predicted_vars)
    acts = np.array(self.actuals)

    if len(preds) == 0:
        return {}

    errors = preds - acts

    report = {
        "n_predictions": len(preds),
        "mse": float(np.mean(errors**2)),
        "rmse": float(np.sqrt(np.mean(errors**2))),
        "mae": float(np.mean(np.abs(errors))),
        "mean_bias": float(np.mean(errors)),
    }

    # Calibration: what fraction of actuals fall within 95% CI?
    stds = np.sqrt(np.maximum(varis, 1e-8))
    lower_95 = preds - 1.96 * stds
    upper_95 = preds + 1.96 * stds
    in_ci = (acts >= lower_95) & (acts <= upper_95)
    report["calibration_95"] = float(np.mean(in_ci))

    # Also check 50% CI
    lower_50 = preds - 0.674 * stds
    upper_50 = preds + 0.674 * stds
    in_ci_50 = (acts >= lower_50) & (acts <= upper_50)
    report["calibration_50"] = float(np.mean(in_ci_50))

    # Mean predicted std (average uncertainty)
    report["mean_predicted_std"] = float(np.mean(stds))

    # Log-likelihood under Gaussian predictive distribution
    # log p(y | mu, sigma^2) = -0.5 * (log(2*pi*sigma^2) + (y-mu)^2/sigma^2)
    ll = -0.5 * (np.log(2 * np.pi * np.maximum(varis, 1e-8)) + errors**2 / np.maximum(varis, 1e-8))
    report["mean_log_likelihood"] = float(np.mean(ll))

    # Per-model ablation MSE
    ablation = {}
    for name, model_preds in self.model_predictions.items():
        mp = np.array(model_preds)
        if len(mp) == len(acts):
            ablation[name] = {
                "mse": float(np.mean((mp - acts) ** 2)),
                "mae": float(np.mean(np.abs(mp - acts))),
            }
    if ablation:
        report["ablation"] = ablation

    return report

OptimizationMetrics dataclass

OptimizationMetrics(
    strategy_points: dict[str, list[float]] = dict(),
    oracle_points: list[float] = list(),
    gameweeks: list[int] = list(),
)

Collects and computes optimization evaluation metrics.

Tracks actual points earned per gameweek under different strategies, and compares against oracle (hindsight-optimal).

Usage: metrics = OptimizationMetrics() for each gameweek: metrics.add_gameweek(gw, actual_points, oracle_points) report = metrics.compute()

add_gameweek
add_gameweek(
    gw: int,
    strategy_results: dict[str, float],
    oracle: float,
)

Record actual points for one gameweek across strategies.

PARAMETER DESCRIPTION
gw

Gameweek number.

TYPE: int

strategy_results

{strategy_name: actual_points_earned}

TYPE: dict[str, float]

oracle

Best possible points with hindsight.

TYPE: float

Source code in fplx/evaluation/metrics.py
def add_gameweek(
    self,
    gw: int,
    strategy_results: dict[str, float],
    oracle: float,
):
    """
    Record actual points for one gameweek across strategies.

    Parameters
    ----------
    gw : int
        Gameweek number.
    strategy_results : dict[str, float]
        {strategy_name: actual_points_earned}
    oracle : float
        Best possible points with hindsight.
    """
    self.gameweeks.append(gw)
    self.oracle_points.append(oracle)

    for name, pts in strategy_results.items():
        if name not in self.strategy_points:
            self.strategy_points[name] = []
        self.strategy_points[name].append(pts)
compute
compute() -> dict

Compute optimization metrics for all strategies.

Source code in fplx/evaluation/metrics.py
def compute(self) -> dict:
    """Compute optimization metrics for all strategies."""
    oracle = np.array(self.oracle_points)
    report = {
        "n_gameweeks": len(self.gameweeks),
        "oracle_total": float(np.sum(oracle)),
        "oracle_mean_per_gw": float(np.mean(oracle)) if len(oracle) > 0 else 0.0,
        "strategies": {},
    }

    for name, pts_list in self.strategy_points.items():
        pts = np.array(pts_list)
        total = float(np.sum(pts))
        mean_gw = float(np.mean(pts)) if len(pts) > 0 else 0.0
        std_gw = float(np.std(pts)) if len(pts) > 0 else 0.0

        # Optimality gap: (oracle - strategy) / oracle
        gaps = (oracle[: len(pts)] - pts) / np.maximum(oracle[: len(pts)], 1e-6)
        mean_gap = float(np.mean(gaps)) if len(gaps) > 0 else 0.0

        # Worst-case: minimum points in any single gameweek
        worst_gw = float(np.min(pts)) if len(pts) > 0 else 0.0

        # Consistency: coefficient of variation
        cv = std_gw / mean_gw if mean_gw > 0 else 0.0

        report["strategies"][name] = {
            "total_points": total,
            "mean_per_gw": mean_gw,
            "std_per_gw": std_gw,
            "cv": cv,
            "worst_gw_points": worst_gw,
            "mean_optimality_gap": mean_gap,
            "pct_of_oracle": total / float(np.sum(oracle)) * 100 if np.sum(oracle) > 0 else 0,
        }

    return report

metrics

Metrics for evaluating inference accuracy and optimization quality.

Part I (18-662) metrics: prediction accuracy, calibration, ablation. Part II (18-660) metrics: actual points, optimality gap, consistency.

InferenceMetrics dataclass
InferenceMetrics(
    predicted_means: list[float] = list(),
    predicted_vars: list[float] = list(),
    actuals: list[float] = list(),
    model_predictions: dict[str, list[float]] = dict(),
)

Collects and computes inference evaluation metrics.

Usage: metrics = InferenceMetrics() for each player-gameweek: metrics.add(predicted_mean, predicted_var, actual_points) report = metrics.compute()

add
add(
    predicted_mean: float,
    predicted_var: float,
    actual: float,
    model_preds: dict[str, float] | None = None,
)

Record a single prediction-actual pair.

Source code in fplx/evaluation/metrics.py
def add(
    self,
    predicted_mean: float,
    predicted_var: float,
    actual: float,
    model_preds: dict[str, float] | None = None,
):
    """Record a single prediction-actual pair."""
    self.predicted_means.append(predicted_mean)
    self.predicted_vars.append(predicted_var)
    self.actuals.append(actual)

    if model_preds:
        for name, pred in model_preds.items():
            if name not in self.model_predictions:
                self.model_predictions[name] = []
            self.model_predictions[name].append(pred)
compute
compute() -> dict

Compute all inference metrics.

Source code in fplx/evaluation/metrics.py
def compute(self) -> dict:
    """Compute all inference metrics."""
    preds = np.array(self.predicted_means)
    varis = np.array(self.predicted_vars)
    acts = np.array(self.actuals)

    if len(preds) == 0:
        return {}

    errors = preds - acts

    report = {
        "n_predictions": len(preds),
        "mse": float(np.mean(errors**2)),
        "rmse": float(np.sqrt(np.mean(errors**2))),
        "mae": float(np.mean(np.abs(errors))),
        "mean_bias": float(np.mean(errors)),
    }

    # Calibration: what fraction of actuals fall within 95% CI?
    stds = np.sqrt(np.maximum(varis, 1e-8))
    lower_95 = preds - 1.96 * stds
    upper_95 = preds + 1.96 * stds
    in_ci = (acts >= lower_95) & (acts <= upper_95)
    report["calibration_95"] = float(np.mean(in_ci))

    # Also check 50% CI
    lower_50 = preds - 0.674 * stds
    upper_50 = preds + 0.674 * stds
    in_ci_50 = (acts >= lower_50) & (acts <= upper_50)
    report["calibration_50"] = float(np.mean(in_ci_50))

    # Mean predicted std (average uncertainty)
    report["mean_predicted_std"] = float(np.mean(stds))

    # Log-likelihood under Gaussian predictive distribution
    # log p(y | mu, sigma^2) = -0.5 * (log(2*pi*sigma^2) + (y-mu)^2/sigma^2)
    ll = -0.5 * (np.log(2 * np.pi * np.maximum(varis, 1e-8)) + errors**2 / np.maximum(varis, 1e-8))
    report["mean_log_likelihood"] = float(np.mean(ll))

    # Per-model ablation MSE
    ablation = {}
    for name, model_preds in self.model_predictions.items():
        mp = np.array(model_preds)
        if len(mp) == len(acts):
            ablation[name] = {
                "mse": float(np.mean((mp - acts) ** 2)),
                "mae": float(np.mean(np.abs(mp - acts))),
            }
    if ablation:
        report["ablation"] = ablation

    return report
OptimizationMetrics dataclass
OptimizationMetrics(
    strategy_points: dict[str, list[float]] = dict(),
    oracle_points: list[float] = list(),
    gameweeks: list[int] = list(),
)

Collects and computes optimization evaluation metrics.

Tracks actual points earned per gameweek under different strategies, and compares against oracle (hindsight-optimal).

Usage: metrics = OptimizationMetrics() for each gameweek: metrics.add_gameweek(gw, actual_points, oracle_points) report = metrics.compute()

add_gameweek
add_gameweek(
    gw: int,
    strategy_results: dict[str, float],
    oracle: float,
)

Record actual points for one gameweek across strategies.

PARAMETER DESCRIPTION
gw

Gameweek number.

TYPE: int

strategy_results

{strategy_name: actual_points_earned}

TYPE: dict[str, float]

oracle

Best possible points with hindsight.

TYPE: float

Source code in fplx/evaluation/metrics.py
def add_gameweek(
    self,
    gw: int,
    strategy_results: dict[str, float],
    oracle: float,
):
    """
    Record actual points for one gameweek across strategies.

    Parameters
    ----------
    gw : int
        Gameweek number.
    strategy_results : dict[str, float]
        {strategy_name: actual_points_earned}
    oracle : float
        Best possible points with hindsight.
    """
    self.gameweeks.append(gw)
    self.oracle_points.append(oracle)

    for name, pts in strategy_results.items():
        if name not in self.strategy_points:
            self.strategy_points[name] = []
        self.strategy_points[name].append(pts)
compute
compute() -> dict

Compute optimization metrics for all strategies.

Source code in fplx/evaluation/metrics.py
def compute(self) -> dict:
    """Compute optimization metrics for all strategies."""
    oracle = np.array(self.oracle_points)
    report = {
        "n_gameweeks": len(self.gameweeks),
        "oracle_total": float(np.sum(oracle)),
        "oracle_mean_per_gw": float(np.mean(oracle)) if len(oracle) > 0 else 0.0,
        "strategies": {},
    }

    for name, pts_list in self.strategy_points.items():
        pts = np.array(pts_list)
        total = float(np.sum(pts))
        mean_gw = float(np.mean(pts)) if len(pts) > 0 else 0.0
        std_gw = float(np.std(pts)) if len(pts) > 0 else 0.0

        # Optimality gap: (oracle - strategy) / oracle
        gaps = (oracle[: len(pts)] - pts) / np.maximum(oracle[: len(pts)], 1e-6)
        mean_gap = float(np.mean(gaps)) if len(gaps) > 0 else 0.0

        # Worst-case: minimum points in any single gameweek
        worst_gw = float(np.min(pts)) if len(pts) > 0 else 0.0

        # Consistency: coefficient of variation
        cv = std_gw / mean_gw if mean_gw > 0 else 0.0

        report["strategies"][name] = {
            "total_points": total,
            "mean_per_gw": mean_gw,
            "std_per_gw": std_gw,
            "cv": cv,
            "worst_gw_points": worst_gw,
            "mean_optimality_gap": mean_gap,
            "pct_of_oracle": total / float(np.sum(oracle)) * 100 if np.sum(oracle) > 0 else 0,
        }

    return report

inference

Probabilistic inference modules for FPLX.

HMMInference

HMMInference(
    transition_matrix: Optional[ndarray] = None,
    emission_params: Optional[dict] = None,
    initial_dist: Optional[ndarray] = None,
)

Hidden Markov Model for discrete player form states.

Supports dynamic transition matrix perturbation so that external signals (news, injuries) can shift state probabilities mid-sequence.

PARAMETER DESCRIPTION
transition_matrix

transition_matrix[i,j] = P(S_{t+1}=j | S_t=i). Rows must sum to 1.

TYPE: (ndarray, shape(N, N)) DEFAULT: None

emission_params

{state_index: (mean, std)} for Gaussian emissions.

TYPE: dict DEFAULT: None

initial_dist

Prior over initial state.

TYPE: (ndarray, shape(N)) DEFAULT: None

Source code in fplx/inference/hmm.py
def __init__(
    self,
    transition_matrix: Optional[np.ndarray] = None,
    emission_params: Optional[dict] = None,
    initial_dist: Optional[np.ndarray] = None,
):
    self.transition_matrix = (
        transition_matrix.copy() if transition_matrix is not None else DEFAULT_TRANSITION_MATRIX.copy()
    )
    self.emission_params = emission_params or dict(DEFAULT_EMISSION_PARAMS)
    self.pi = initial_dist.copy() if initial_dist is not None else DEFAULT_INITIAL_DIST.copy()
    self.n_states = len(self.pi)

    # per-timestep transition overrides (for news injection)
    # key: timestep t, Value: modified transition_matrix matrix for that step
    self._transition_overrides: dict[int, np.ndarray] = {}
inject_news_perturbation
inject_news_perturbation(
    timestep: int,
    state_boost: dict[int, float],
    confidence: float = 1.0,
)

Perturb transition matrix at a specific timestep based on news.

For each source state, the transition probability toward boosted target states is multiplied by the boost factor (scaled by confidence), then the row is renormalized.

PARAMETER DESCRIPTION
timestep

The gameweek at which the perturbation applies.

TYPE: int

state_boost

{target_state: multiplicative_boost}. E.g., {0: 10.0} means "10x more likely to transition to Injured."

TYPE: dict[int, float]

confidence

Scales the perturbation. 0 = no effect, 1 = full effect.

TYPE: float DEFAULT: 1.0

Source code in fplx/inference/hmm.py
def inject_news_perturbation(
    self,
    timestep: int,
    state_boost: dict[int, float],
    confidence: float = 1.0,
):
    """
    Perturb transition matrix at a specific timestep based on news.

    For each source state, the transition probability toward boosted
    target states is multiplied by the boost factor (scaled by confidence),
    then the row is renormalized.

    Parameters
    ----------
    timestep : int
        The gameweek at which the perturbation applies.
    state_boost : dict[int, float]
        {target_state: multiplicative_boost}. E.g., {0: 10.0} means
        "10x more likely to transition to Injured."
    confidence : float
        Scales the perturbation. 0 = no effect, 1 = full effect.
    """
    perturbed_transition_matrix = self.transition_matrix.copy()

    for source_state in range(self.n_states):
        for target_state, boost in state_boost.items():
            # scale boost by confidence: effective_boost = 1 + confidence*(boost-1)
            effective_boost = 1.0 + confidence * (boost - 1.0)
            perturbed_transition_matrix[source_state, target_state] *= effective_boost

        # renormalize row
        row_sum = perturbed_transition_matrix[source_state].sum()
        if row_sum > 0:
            perturbed_transition_matrix[source_state] /= row_sum

    self._transition_overrides[timestep] = perturbed_transition_matrix
clear_perturbations
clear_perturbations()

Remove all per-timestep transition overrides.

Source code in fplx/inference/hmm.py
def clear_perturbations(self):
    """Remove all per-timestep transition overrides."""
    self._transition_overrides.clear()
forward
forward(observations: ndarray)

Forward algorithm with dynamic transition matrices.

PARAMETER DESCRIPTION
observations

TYPE: (ndarray, shape(num_timesteps))

RETURNS DESCRIPTION
forward_messages

Normalized forward messages. forward_messages[t] = P(S_t | y_1:t)

TYPE: (ndarray, shape(num_timesteps, N))

scale

Per-timestep normalization constants.

TYPE: (ndarray, shape(num_timesteps))

Source code in fplx/inference/hmm.py
def forward(self, observations: np.ndarray):
    """
    Forward algorithm with dynamic transition matrices.

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    forward_messages : np.ndarray, shape (num_timesteps, N)
        Normalized forward messages. forward_messages[t] = P(S_t | y_1:t)
    scale : np.ndarray, shape (num_timesteps,)
        Per-timestep normalization constants.
    """
    num_timesteps = len(observations)
    forward_messages = np.zeros((num_timesteps, self.n_states))
    scale = np.zeros(num_timesteps)

    # t = 0
    b = self._emission_vector(observations[0])
    forward_messages[0] = self.pi * b
    scale[0] = forward_messages[0].sum()
    if scale[0] > 0:
        forward_messages[0] /= scale[0]

    # t = 1..num_timesteps-1
    for t in range(1, num_timesteps):
        transition_matrix_t = self._get_transition_matrix(t)
        b = self._emission_vector(observations[t])
        forward_messages[t] = (forward_messages[t - 1] @ transition_matrix_t) * b
        scale[t] = forward_messages[t].sum()
        if scale[t] > 0:
            forward_messages[t] /= scale[t]

    return forward_messages, scale
forward_backward
forward_backward(observations: ndarray) -> ndarray

Compute smoothed posteriors P(S_t | y_1:num_timesteps).

PARAMETER DESCRIPTION
observations

TYPE: (ndarray, shape(num_timesteps))

RETURNS DESCRIPTION
smoothed_posteriors

smoothed_posteriors[t, s] = P(S_t=s | y_1:num_timesteps)

TYPE: (ndarray, shape(num_timesteps, N))

Source code in fplx/inference/hmm.py
def forward_backward(self, observations: np.ndarray) -> np.ndarray:
    """
    Compute smoothed posteriors P(S_t | y_1:num_timesteps).

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    smoothed_posteriors : np.ndarray, shape (num_timesteps, N)
        smoothed_posteriors[t, s] = P(S_t=s | y_1:num_timesteps)
    """
    num_timesteps = len(observations)
    forward_messages, scale = self.forward(observations)

    backward_messages = np.zeros((num_timesteps, self.n_states))
    backward_messages[num_timesteps - 1] = 1.0

    for t in range(num_timesteps - 2, -1, -1):
        transition_matrix_t_plus_1 = self._get_transition_matrix(t + 1)
        b_next = self._emission_vector(observations[t + 1])
        backward_messages[t] = transition_matrix_t_plus_1 @ (b_next * backward_messages[t + 1])
        if scale[t + 1] > 0:
            backward_messages[t] /= scale[t + 1]

    smoothed_posteriors = forward_messages * backward_messages
    row_sums = smoothed_posteriors.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1.0
    smoothed_posteriors /= row_sums

    return smoothed_posteriors
viterbi
viterbi(observations: ndarray) -> ndarray

Most likely state sequence via Viterbi decoding.

PARAMETER DESCRIPTION
observations

TYPE: (ndarray, shape(num_timesteps))

RETURNS DESCRIPTION
best_path

TYPE: np.ndarray of int, shape (num_timesteps,)

Source code in fplx/inference/hmm.py
def viterbi(self, observations: np.ndarray) -> np.ndarray:
    """
    Most likely state sequence via Viterbi decoding.

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    best_path : np.ndarray of int, shape (num_timesteps,)
    """
    num_timesteps = len(observations)
    log_pi = np.log(self.pi + 1e-300)

    log_probabilities = np.zeros((num_timesteps, self.n_states))
    backpointers = np.zeros((num_timesteps, self.n_states), dtype=int)

    b0 = self._emission_vector(observations[0])
    log_probabilities[0] = log_pi + np.log(b0 + 1e-300)

    for t in range(1, num_timesteps):
        transition_matrix_t = self._get_transition_matrix(t)
        log_transition_matrix_t = np.log(transition_matrix_t + 1e-300)
        b = self._emission_vector(observations[t])
        for s in range(self.n_states):
            candidates = log_probabilities[t - 1] + log_transition_matrix_t[:, s]
            backpointers[t, s] = np.argmax(candidates)
            log_probabilities[t, s] = candidates[backpointers[t, s]] + np.log(b[s] + 1e-300)

    best_path = np.zeros(num_timesteps, dtype=int)
    best_path[num_timesteps - 1] = np.argmax(log_probabilities[num_timesteps - 1])
    for t in range(num_timesteps - 2, -1, -1):
        best_path[t] = backpointers[t + 1, best_path[t + 1]]

    return best_path
predict_next
predict_next(
    observations: ndarray,
) -> tuple[float, float, ndarray]

Predict next timestep's points distribution.

Runs forward algorithm, then propagates one step ahead via the transition matrix.

PARAMETER DESCRIPTION
observations

TYPE: (ndarray, shape(num_timesteps))

RETURNS DESCRIPTION
expected_points

E[Y_{num_timesteps+1} | y_1:num_timesteps]

TYPE: float

variance

Var[Y_{num_timesteps+1} | y_1:num_timesteps] (from law of total variance)

TYPE: float

next_state_dist

P(S_{num_timesteps+1} | y_1:num_timesteps)

TYPE: (ndarray, shape(N))

Source code in fplx/inference/hmm.py
def predict_next(self, observations: np.ndarray) -> tuple[float, float, np.ndarray]:
    """
    Predict next timestep's points distribution.

    Runs forward algorithm, then propagates one step ahead via
    the transition matrix.

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    expected_points : float
        E[Y_{num_timesteps+1} | y_1:num_timesteps]
    variance : float
        Var[Y_{num_timesteps+1} | y_1:num_timesteps] (from law of total variance)
    next_state_dist : np.ndarray, shape (N,)
        P(S_{num_timesteps+1} | y_1:num_timesteps)
    """
    forward_messages, _ = self.forward(observations)
    current_belief = forward_messages[-1]  # P(S_num_timesteps | y_1:num_timesteps)

    num_timesteps = len(observations)
    next_transition_matrix = self._get_transition_matrix(num_timesteps)  # transition for next step
    next_state_dist = (
        current_belief @ next_transition_matrix
    )  # P(S_{num_timesteps+1} | y_1:num_timesteps)

    state_means = np.array([self.emission_params[s][0] for s in range(self.n_states)])
    state_vars = np.array([self.emission_params[s][1] ** 2 for s in range(self.n_states)])

    expected_points = next_state_dist @ state_means

    # law of total variance: Var = E[Var|S] + Var[E|S]
    variance = next_state_dist @ state_vars + next_state_dist @ (state_means**2) - expected_points**2

    return expected_points, max(0.0, variance), next_state_dist
fit
fit(
    observations: ndarray,
    n_iter: int = 20,
    tol: float = 0.0001,
    verbose: bool = False,
)

Learn transition matrix and emission parameters via Baum-Welch EM.

PARAMETER DESCRIPTION
observations

Training sequence.

TYPE: (ndarray, shape(num_timesteps))

n_iter

Maximum EM iterations.

TYPE: int DEFAULT: 20

tol

Convergence tolerance on log-likelihood.

TYPE: float DEFAULT: 0.0001

verbose

Print progress.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
self
Source code in fplx/inference/hmm.py
def fit(
    self,
    observations: np.ndarray,
    n_iter: int = 20,
    tol: float = 1e-4,
    verbose: bool = False,
):
    """
    Learn transition matrix and emission parameters via Baum-Welch EM.

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)
        Training sequence.
    n_iter : int
        Maximum EM iterations.
    tol : float
        Convergence tolerance on log-likelihood.
    verbose : bool
        Print progress.

    Returns
    -------
    self
    """
    num_timesteps = len(observations)
    prev_log_likelihood = -np.inf

    for iteration in range(n_iter):
        # E-step
        forward_messages, scale = self.forward(observations)

        # Backward pass using the same scaling factors as forward()
        backward_messages = np.zeros((num_timesteps, self.n_states))
        backward_messages[num_timesteps - 1] = 1.0
        for t in range(num_timesteps - 2, -1, -1):
            transition_matrix_t_plus_1 = self._get_transition_matrix(t + 1)
            b_next = self._emission_vector(observations[t + 1])
            backward_messages[t] = transition_matrix_t_plus_1 @ (b_next * backward_messages[t + 1])
            if scale[t + 1] > 0:
                backward_messages[t] /= scale[t + 1]

        # gamma_t(i) = P(S_t=i | y_1:T)
        smoothed_posteriors = forward_messages * backward_messages
        row_sums = smoothed_posteriors.sum(axis=1, keepdims=True)
        row_sums[row_sums == 0] = 1.0
        smoothed_posteriors /= row_sums

        # transition_posteriors: P(S_t=i, S_{t+1}=j | y_1:num_timesteps) for transition re-estimation
        transition_posteriors = np.zeros((num_timesteps - 1, self.n_states, self.n_states))
        for t in range(num_timesteps - 1):
            transition_matrix_t_plus_1 = self._get_transition_matrix(t + 1)
            b_next = self._emission_vector(observations[t + 1])

            # xi_t(i,j) = P(S_t=i, S_{t+1}=j | y_1:T)
            for i in range(self.n_states):
                for j in range(self.n_states):
                    transition_posteriors[t, i, j] = (
                        forward_messages[t, i]
                        * transition_matrix_t_plus_1[i, j]
                        * b_next[j]
                        * backward_messages[t + 1, j]
                    )

            xi_sum = transition_posteriors[t].sum()
            if xi_sum > 0:
                transition_posteriors[t] /= xi_sum

        # M-step
        # Re-estimate initial distribution
        self.pi = smoothed_posteriors[0]

        # Re-estimate transition matrix
        for i in range(self.n_states):
            denom = smoothed_posteriors[:-1, i].sum()
            if denom > 0:
                for j in range(self.n_states):
                    self.transition_matrix[i, j] = transition_posteriors[:, i, j].sum() / denom
            # Renormalize
            row_sum = self.transition_matrix[i].sum()
            if row_sum > 0:
                self.transition_matrix[i] /= row_sum

        # re-estimate emission parameters
        for s in range(self.n_states):
            weights = smoothed_posteriors[:, s]
            w_sum = weights.sum()
            if w_sum > 1e-10:
                mu = np.average(observations, weights=weights)
                var = np.average((observations - mu) ** 2, weights=weights)
                sigma = max(np.sqrt(var), 0.1)  # floor to prevent collapse
                self.emission_params[s] = (mu, sigma)

        # log-likelihood
        log_likelihood = np.sum(np.log(scale + 1e-300))
        if verbose:
            logger.info("EM iteration %d: LL = %.4f", iteration, log_likelihood)

        if abs(log_likelihood - prev_log_likelihood) < tol:
            if verbose:
                logger.info("Converged at iteration %d", iteration)
            break
        prev_log_likelihood = log_likelihood

    return self

KalmanFilter

KalmanFilter(
    process_noise: float = 1.0,
    observation_noise: float = 4.0,
    initial_state_mean: float = 4.0,
    initial_state_covariance: float = 2.0,
)

1D Kalman Filter for tracking latent point potential.

PARAMETER DESCRIPTION
process_noise

Default process noise variance (form drift rate).

TYPE: float DEFAULT: 1.0

observation_noise

Default observation noise variance (weekly point noise).

TYPE: float DEFAULT: 4.0

initial_state_mean

Initial state estimate.

TYPE: float DEFAULT: 4.0

initial_state_covariance

Initial state uncertainty (variance).

TYPE: float DEFAULT: 2.0

Source code in fplx/inference/kalman.py
def __init__(
    self,
    process_noise: float = 1.0,
    observation_noise: float = 4.0,
    initial_state_mean: float = 4.0,
    initial_state_covariance: float = 2.0,
):
    self.default_process_noise = process_noise
    self.default_observation_noise = observation_noise
    self.initial_state_mean = initial_state_mean
    self.initial_state_covariance = initial_state_covariance

    # Per-timestep noise overrides
    self._process_noise_overrides: dict[int, float] = {}
    self._observation_noise_overrides: dict[int, float] = {}

    # Stored results after filtering
    self.filtered_state_means: Optional[np.ndarray] = None
    self.filtered_state_covariances: Optional[np.ndarray] = None
    self.kalman_gains: Optional[np.ndarray] = None  # Kalman gains
inject_process_shock
inject_process_shock(timestep: int, multiplier: float)

Inflate process noise at a specific timestep.

Use when news indicates a sudden form change (injury, transfer). process_noise_t = default_process_noise * multiplier.

PARAMETER DESCRIPTION
timestep

Gameweek index.

TYPE: int

multiplier

Process noise multiplier (>1 = more uncertainty about form drift).

TYPE: float

Source code in fplx/inference/kalman.py
def inject_process_shock(self, timestep: int, multiplier: float):
    """
    Inflate process noise at a specific timestep.

    Use when news indicates a sudden form change (injury, transfer).
    process_noise_t = default_process_noise * multiplier.

    Parameters
    ----------
    timestep : int
        Gameweek index.
    multiplier : float
        Process noise multiplier (>1 = more uncertainty about form drift).
    """
    self._process_noise_overrides[timestep] = self.default_process_noise * multiplier
inject_observation_noise
inject_observation_noise(timestep: int, factor: float)

Adjust observation noise at a specific timestep.

Use for fixture difficulty: harder opponents → less predictable points. observation_noise_t = default_observation_noise * factor.

PARAMETER DESCRIPTION
timestep

Gameweek index.

TYPE: int

factor

Observation noise factor (>1 = harder fixture, noisier observation).

TYPE: float

Source code in fplx/inference/kalman.py
def inject_observation_noise(self, timestep: int, factor: float):
    """
    Adjust observation noise at a specific timestep.

    Use for fixture difficulty: harder opponents → less predictable points.
    observation_noise_t = default_observation_noise * factor.

    Parameters
    ----------
    timestep : int
        Gameweek index.
    factor : float
        Observation noise factor (>1 = harder fixture, noisier observation).
    """
    self._observation_noise_overrides[timestep] = self.default_observation_noise * factor
clear_overrides
clear_overrides()

Remove all per-timestep noise overrides.

Source code in fplx/inference/kalman.py
def clear_overrides(self):
    """Remove all per-timestep noise overrides."""
    self._process_noise_overrides.clear()
    self._observation_noise_overrides.clear()
get_process_noise_override
get_process_noise_override(
    timestep: int,
) -> Optional[float]

Return explicit process noise override at timestep, if any.

Source code in fplx/inference/kalman.py
def get_process_noise_override(self, timestep: int) -> Optional[float]:
    """Return explicit process noise override at timestep, if any."""
    return self._process_noise_overrides.get(timestep)
set_noise_overrides
set_noise_overrides(
    process_noise_overrides: dict[int, float],
    observation_noise_overrides: dict[int, float],
)

Replace per-timestep noise overrides.

Source code in fplx/inference/kalman.py
def set_noise_overrides(
    self,
    process_noise_overrides: dict[int, float],
    observation_noise_overrides: dict[int, float],
):
    """Replace per-timestep noise overrides."""
    self._process_noise_overrides = dict(process_noise_overrides)
    self._observation_noise_overrides = dict(observation_noise_overrides)
copy_with_overrides
copy_with_overrides(
    max_timestep: Optional[int] = None,
) -> KalmanFilter

Create a parameter-identical filter with copied noise overrides.

PARAMETER DESCRIPTION
max_timestep

If provided, only overrides for timesteps <= max_timestep are copied.

TYPE: int DEFAULT: None

Source code in fplx/inference/kalman.py
def copy_with_overrides(self, max_timestep: Optional[int] = None) -> "KalmanFilter":
    """Create a parameter-identical filter with copied noise overrides.

    Parameters
    ----------
    max_timestep : int, optional
        If provided, only overrides for timesteps <= max_timestep are copied.
    """
    copied = KalmanFilter(
        process_noise=self.default_process_noise,
        observation_noise=self.default_observation_noise,
        initial_state_mean=self.initial_state_mean,
        initial_state_covariance=self.initial_state_covariance,
    )

    if max_timestep is None:
        proc = dict(self._process_noise_overrides)
        obs = dict(self._observation_noise_overrides)
    else:
        proc = {k: v for k, v in self._process_noise_overrides.items() if k <= max_timestep}
        obs = {k: v for k, v in self._observation_noise_overrides.items() if k <= max_timestep}

    copied.set_noise_overrides(proc, obs)

    return copied
filter
filter(observations: ndarray)

Run Kalman filter on observations with per-timestep noise.

PARAMETER DESCRIPTION
observations

TYPE: (ndarray, shape(num_timesteps))

RETURNS DESCRIPTION
filtered_state_means

Filtered state estimates (posterior mean).

TYPE: (ndarray, shape(num_timesteps))

filtered_state_covariances

Filtered state uncertainties (posterior variance).

TYPE: (ndarray, shape(num_timesteps))

Source code in fplx/inference/kalman.py
def filter(self, observations: np.ndarray):
    """
    Run Kalman filter on observations with per-timestep noise.

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    filtered_state_means : np.ndarray, shape (num_timesteps,)
        Filtered state estimates (posterior mean).
    filtered_state_covariances : np.ndarray, shape (num_timesteps,)
        Filtered state uncertainties (posterior variance).
    """
    num_timesteps = len(observations)
    filtered_state_means = np.zeros(num_timesteps)
    filtered_state_covariances = np.zeros(num_timesteps)
    kalman_gains = np.zeros(num_timesteps)

    predicted_state_mean = self.initial_state_mean
    predicted_state_covariance = self.initial_state_covariance

    for t in range(num_timesteps):
        process_noise_t = self._get_process_noise(t)
        observation_noise_t = self._get_observation_noise(t)

        # Predict
        if t > 0:
            predicted_state_mean = filtered_state_means[t - 1]
            predicted_state_covariance = filtered_state_covariances[t - 1] + process_noise_t

        # Update
        y = observations[t]
        innovation = y - predicted_state_mean
        innovation_covariance = predicted_state_covariance + observation_noise_t  # Innovation covariance
        kalman_gain = predicted_state_covariance / innovation_covariance  # Kalman gain

        filtered_state_means[t] = predicted_state_mean + kalman_gain * innovation
        filtered_state_covariances[t] = (1 - kalman_gain) * predicted_state_covariance
        kalman_gains[t] = kalman_gain

    self.filtered_state_means = filtered_state_means
    self.filtered_state_covariances = filtered_state_covariances
    self.kalman_gains = kalman_gains

    return filtered_state_means, filtered_state_covariances
predict_next
predict_next() -> tuple[float, float]

Predict next observation with uncertainty.

Returns the predictive distribution for Y_{t+1} (the observation), not X_{t+1} (the latent state). This ensures consistency with the HMM predict_next which also returns observation-level variance.

Var[Y_{t+1}] = Var[X_{t+1}|y_{1:t}] + R = (P_t + Q) + R

Must call filter() first.

RETURNS DESCRIPTION
predicted_mean

E[Y_{t+1} | y_{1:t}].

TYPE: float

predicted_var

Var[Y_{t+1} | y_{1:t}] (observation-level, includes R).

TYPE: float

Source code in fplx/inference/kalman.py
def predict_next(self) -> tuple[float, float]:
    """
    Predict next observation with uncertainty.

    Returns the predictive distribution for Y_{t+1} (the observation),
    not X_{t+1} (the latent state). This ensures consistency with the
    HMM predict_next which also returns observation-level variance.

    Var[Y_{t+1}] = Var[X_{t+1}|y_{1:t}] + R
                 = (P_t + Q) + R

    Must call filter() first.

    Returns
    -------
    predicted_mean : float
        E[Y_{t+1} | y_{1:t}].
    predicted_var : float
        Var[Y_{t+1} | y_{1:t}] (observation-level, includes R).
    """
    if self.filtered_state_means is None or self.filtered_state_covariances is None:
        raise RuntimeError("Must call filter() before predict_next().")

    num_timesteps = len(self.filtered_state_means)
    next_process_noise = self._get_process_noise(num_timesteps)
    next_observation_noise = self._get_observation_noise(num_timesteps)

    predicted_mean = self.filtered_state_means[-1]
    # State-level: P_{t+1|t} = P_{t|t} + Q
    state_var = self.filtered_state_covariances[-1] + next_process_noise
    # Observation-level: Var[Y] = P_{t+1|t} + R
    predicted_var = state_var + next_observation_noise

    return predicted_mean, predicted_var
smooth
smooth(observations: ndarray)

Run RTS smoother (backward pass after forward Kalman filter).

PARAMETER DESCRIPTION
observations

TYPE: (ndarray, shape(num_timesteps))

RETURNS DESCRIPTION
smoothed_state_means

Smoothed state estimates.

TYPE: (ndarray, shape(num_timesteps))

smoothed_state_covariances

Smoothed state uncertainties.

TYPE: (ndarray, shape(num_timesteps))

Source code in fplx/inference/kalman.py
def smooth(self, observations: np.ndarray):
    """
    Run RTS smoother (backward pass after forward Kalman filter).

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    smoothed_state_means : np.ndarray, shape (num_timesteps,)
        Smoothed state estimates.
    smoothed_state_covariances : np.ndarray, shape (num_timesteps,)
        Smoothed state uncertainties.
    """
    filtered_state_means, filtered_state_covariances = self.filter(observations)
    num_timesteps = len(observations)

    smoothed_state_means = np.zeros(num_timesteps)
    smoothed_state_covariances = np.zeros(num_timesteps)

    smoothed_state_means[-1] = filtered_state_means[-1]
    smoothed_state_covariances[-1] = filtered_state_covariances[-1]

    for t in range(num_timesteps - 2, -1, -1):
        next_process_noise = self._get_process_noise(t + 1)
        predicted_state_covariance = filtered_state_covariances[t] + next_process_noise

        # Smoother gain
        if predicted_state_covariance > 0:
            smoother_gain = filtered_state_covariances[t] / predicted_state_covariance
        else:
            smoother_gain = 0.0

        smoothed_state_means[t] = filtered_state_means[t] + smoother_gain * (
            smoothed_state_means[t + 1] - filtered_state_means[t]
        )
        smoothed_state_covariances[t] = filtered_state_covariances[t] + smoother_gain**2 * (
            smoothed_state_covariances[t + 1] - predicted_state_covariance
        )

    return smoothed_state_means, smoothed_state_covariances

MultivariateHMM

MultivariateHMM(
    position: str = "MID",
    transition_matrix: Optional[ndarray] = None,
    initial_dist: Optional[ndarray] = None,
)

Position-aware HMM with multivariate diagonal Gaussian emissions.

PARAMETER DESCRIPTION
position

GK, DEF, MID, FWD. Determines feature set and default emissions.

TYPE: str DEFAULT: 'MID'

Source code in fplx/inference/multivariate_hmm.py
def __init__(
    self,
    position: str = "MID",
    transition_matrix: Optional[np.ndarray] = None,
    initial_dist: Optional[np.ndarray] = None,
):
    self.position = position
    self.means, self.vars = _default_emissions(position)

    # Priors for MAP-style regularization in Baum-Welch.
    self.prior_means = self.means.copy()
    self.prior_vars = self.vars.copy()
    self.prior_A = (
        transition_matrix.copy() if transition_matrix is not None else DEFAULT_TRANSITION.copy()
    )

    self.A = self.prior_A.copy()
    self.pi = initial_dist.copy() if initial_dist is not None else DEFAULT_INITIAL.copy()
    self.n_states = N_STATES
    self.n_features = self.means.shape[1]
    self._transition_overrides: dict[int, np.ndarray] = {}
inject_news_perturbation
inject_news_perturbation(
    timestep: int,
    state_boost: dict,
    confidence: float = 1.0,
)

Perturb transition matrix at timestep (same API as scalar HMM).

Source code in fplx/inference/multivariate_hmm.py
def inject_news_perturbation(self, timestep: int, state_boost: dict, confidence: float = 1.0):
    """Perturb transition matrix at timestep (same API as scalar HMM)."""
    A_p = self.A.copy()
    for src in range(self.n_states):
        for tgt, boost in state_boost.items():
            A_p[src, tgt] *= 1.0 + confidence * (boost - 1.0)
        s = A_p[src].sum()
        if s > 0:
            A_p[src] /= s
    self._transition_overrides[timestep] = A_p
forward
forward(observations: ndarray)

Forward algorithm. observations: (T, D).

Source code in fplx/inference/multivariate_hmm.py
def forward(self, observations: np.ndarray):
    """Forward algorithm. observations: (T, D)."""
    T = len(observations)
    alpha = np.zeros((T, self.n_states))
    scale = np.zeros(T)
    b = self._emission_prob_vector(observations[0])
    alpha[0] = self.pi * b
    scale[0] = alpha[0].sum()
    if scale[0] > 0:
        alpha[0] /= scale[0]
    for t in range(1, T):
        b = self._emission_prob_vector(observations[t])
        alpha[t] = (alpha[t - 1] @ self._get_A(t)) * b
        scale[t] = alpha[t].sum()
        if scale[t] > 0:
            alpha[t] /= scale[t]
    return alpha, scale
forward_backward
forward_backward(observations: ndarray) -> ndarray

Smoothed posteriors P(S_t | y_{1:T}).

Source code in fplx/inference/multivariate_hmm.py
def forward_backward(self, observations: np.ndarray) -> np.ndarray:
    """Smoothed posteriors P(S_t | y_{1:T})."""
    T = len(observations)
    alpha, scale = self.forward(observations)
    beta = np.zeros((T, self.n_states))
    beta[T - 1] = 1.0
    for t in range(T - 2, -1, -1):
        b_next = self._emission_prob_vector(observations[t + 1])
        beta[t] = self._get_A(t + 1) @ (b_next * beta[t + 1])
        if scale[t + 1] > 0:
            beta[t] /= scale[t + 1]
    gamma = alpha * beta
    rs = gamma.sum(axis=1, keepdims=True)
    rs[rs == 0] = 1.0
    return gamma / rs
viterbi
viterbi(observations: ndarray) -> ndarray

Most likely state sequence.

Source code in fplx/inference/multivariate_hmm.py
def viterbi(self, observations: np.ndarray) -> np.ndarray:
    """Most likely state sequence."""
    T = len(observations)
    log_d = np.zeros((T, self.n_states))
    psi = np.zeros((T, self.n_states), dtype=int)
    log_d[0] = np.log(self.pi + 1e-300) + np.array([
        self._emission_log_prob(observations[0], s) for s in range(self.n_states)
    ])
    for t in range(1, T):
        log_A = np.log(self._get_A(t) + 1e-300)
        log_b = np.array([self._emission_log_prob(observations[t], s) for s in range(self.n_states)])
        for s in range(self.n_states):
            c = log_d[t - 1] + log_A[:, s]
            psi[t, s] = np.argmax(c)
            log_d[t, s] = c[psi[t, s]] + log_b[s]
    path = np.zeros(T, dtype=int)
    path[T - 1] = np.argmax(log_d[T - 1])
    for t in range(T - 2, -1, -1):
        path[t] = psi[t + 1, path[t + 1]]
    return path
predict_next_features
predict_next_features(observations: ndarray)

Predict next gameweek's feature vector.

Returns mean, var (per feature), and state distribution.

Source code in fplx/inference/multivariate_hmm.py
def predict_next_features(self, observations: np.ndarray):
    """
    Predict next gameweek's feature vector.

    Returns mean, var (per feature), and state distribution.
    """
    alpha, _ = self.forward(observations)
    next_dist = alpha[-1] @ self._get_A(len(observations))
    mean = next_dist @ self.means
    var = next_dist @ self.vars + next_dist @ (self.means**2) - mean**2
    return mean, np.maximum(var, 1e-8), next_dist
one_step_point_predictions
one_step_point_predictions(
    observations: ndarray,
) -> ndarray

One-step-ahead point predictions for each historical timestep.

Returns array preds where preds[t] predicts points at timestep t, using information up to t-1 (preds[0] is NaN).

Source code in fplx/inference/multivariate_hmm.py
def one_step_point_predictions(self, observations: np.ndarray) -> np.ndarray:
    """One-step-ahead point predictions for each historical timestep.

    Returns array preds where preds[t] predicts points at timestep t,
    using information up to t-1 (preds[0] is NaN).
    """
    T = len(observations)
    preds = np.full(T, np.nan)
    if T < 2:
        return preds

    alpha, _ = self.forward(observations)
    for t in range(1, T):
        pred_dist = alpha[t - 1] @ self._get_A(t)
        preds[t] = self._expected_points_from_state_dist(pred_dist)
    return preds
predict_next_points
predict_next_points(
    observations: ndarray,
) -> tuple[float, float]

Convert predicted features → expected FPL points.

Uses FPL scoring rules applied to predicted feature rates.

Source code in fplx/inference/multivariate_hmm.py
def predict_next_points(self, observations: np.ndarray) -> tuple[float, float]:
    """
    Convert predicted features → expected FPL points.

    Uses FPL scoring rules applied to predicted feature rates.
    """
    feat_mean, feat_var, _ = self.predict_next_features(observations)
    feat_names = POSITION_FEATURES[self.position]
    xpts_idx = feat_names.index("xPts")

    ep = max(0.0, float(feat_mean[xpts_idx]))
    var_pts = float(max(feat_var[xpts_idx], 1e-6) + 1.0)  # residual floor
    return ep, var_pts
fit
fit(
    observations: ndarray,
    n_iter: int = 20,
    tol: float = 0.0001,
    prior_weight: float = 0.85,
)

Baum-Welch EM with MAP-style prior interpolation.

PARAMETER DESCRIPTION
observations

Feature matrix with shape (T, D).

TYPE: ndarray

n_iter

Maximum EM iterations.

TYPE: int DEFAULT: 20

tol

Convergence tolerance on log-likelihood.

TYPE: float DEFAULT: 0.0001

prior_weight

Weight on prior parameters in [0, 1]. Higher values increase regularization toward position-level default emissions/transitions.

TYPE: float DEFAULT: 0.85

Source code in fplx/inference/multivariate_hmm.py
def fit(
    self,
    observations: np.ndarray,
    n_iter: int = 20,
    tol: float = 1e-4,
    prior_weight: float = 0.85,
):
    """Baum-Welch EM with MAP-style prior interpolation.

    Parameters
    ----------
    observations : np.ndarray
        Feature matrix with shape (T, D).
    n_iter : int
        Maximum EM iterations.
    tol : float
        Convergence tolerance on log-likelihood.
    prior_weight : float
        Weight on prior parameters in [0, 1]. Higher values increase
        regularization toward position-level default emissions/transitions.
    """
    T = observations.shape[0]
    prev_ll = -np.inf
    prior_weight = float(np.clip(prior_weight, 0.0, 1.0))

    for _ in range(n_iter):
        alpha, scale = self.forward(observations)

        # Backward pass with scaling aligned to forward()
        beta = np.zeros((T, self.n_states))
        beta[T - 1] = 1.0
        for t in range(T - 2, -1, -1):
            b_next = self._emission_prob_vector(observations[t + 1])
            beta[t] = self._get_A(t + 1) @ (b_next * beta[t + 1])
            if scale[t + 1] > 0:
                beta[t] /= scale[t + 1]

        gamma = alpha * beta
        rs = gamma.sum(axis=1, keepdims=True)
        rs[rs == 0] = 1.0
        gamma /= rs

        # M-step: initial
        self.pi = np.maximum(gamma[0], 1e-10)
        self.pi /= self.pi.sum()

        # M-step: transitions
        xi = np.zeros((T - 1, self.n_states, self.n_states))
        for t in range(T - 1):
            b_next = self._emission_prob_vector(observations[t + 1])
            for i in range(self.n_states):
                for j in range(self.n_states):
                    xi[t, i, j] = alpha[t, i] * self._get_A(t + 1)[i, j] * b_next[j] * beta[t + 1, j]
            xs = xi[t].sum()
            if xs > 0:
                xi[t] /= xs
        for i in range(self.n_states):
            d = gamma[:-1, i].sum()
            if d > 1e-10:
                mle_A = xi[:, i, :].sum(axis=0) / d
                self.A[i] = prior_weight * self.prior_A[i] + (1.0 - prior_weight) * mle_A
            rs = self.A[i].sum()
            if rs > 0:
                self.A[i] /= rs

        # M-step: emissions
        for s in range(self.n_states):
            w = gamma[:, s]
            ws = w.sum()
            if ws > 1e-10:
                mle_mu = np.average(observations, axis=0, weights=w)
                diff = observations - mle_mu
                mle_var = np.average(diff**2, axis=0, weights=w)
                self.means[s] = prior_weight * self.prior_means[s] + (1.0 - prior_weight) * mle_mu
                self.vars[s] = np.maximum(
                    prior_weight * self.prior_vars[s] + (1.0 - prior_weight) * mle_var,
                    1e-4,
                )

        ll = np.sum(np.log(scale + 1e-300))
        if abs(ll - prev_ll) < tol:
            break
        prev_ll = ll
    return self

InferenceResult dataclass

InferenceResult(
    filtered_beliefs: ndarray,
    smoothed_beliefs: ndarray,
    viterbi_path: ndarray,
    hmm_predicted_mean: float = 0.0,
    hmm_predicted_var: float = 0.0,
    kalman_filtered: ndarray = (lambda: array([]))(),
    kalman_uncertainty: ndarray = (lambda: array([]))(),
    kf_predicted_mean: float = 0.0,
    kf_predicted_var: float = 0.0,
    fused_mean: ndarray = (lambda: array([]))(),
    fused_var: ndarray = (lambda: array([]))(),
    fusion_alpha: Optional[float] = None,
    predicted_mean: float = 0.0,
    predicted_var: float = 0.0,
)

Container for inference pipeline outputs.

PlayerInferencePipeline

PlayerInferencePipeline(
    hmm_params: Optional[dict] = None,
    kf_params: Optional[dict] = None,
    hmm_variance_floor: float = 1.0,
    news_params: Optional[dict] = None,
    fusion_mode: str = "precision",
    fusion_params: Optional[dict] = None,
)

Orchestrates HMM + Kalman inference for a single player.

PARAMETER DESCRIPTION
hmm_params

Override HMM parameters: transition_matrix, emission_params, initial_dist.

TYPE: dict DEFAULT: None

kf_params

Override Kalman parameters: Q, R, x0, P0.

TYPE: dict DEFAULT: None

Source code in fplx/inference/pipeline.py
def __init__(
    self,
    hmm_params: Optional[dict] = None,
    kf_params: Optional[dict] = None,
    hmm_variance_floor: float = 1.0,
    news_params: Optional[dict] = None,
    fusion_mode: str = "precision",
    fusion_params: Optional[dict] = None,
):
    hmm_params = hmm_params or {}
    kf_params = kf_params or {}

    self.hmm = HMMInference(
        transition_matrix=hmm_params.get("transition_matrix"),
        emission_params=hmm_params.get("emission_params"),
        initial_dist=hmm_params.get("initial_dist"),
    )
    self.kf = KalmanFilter(
        process_noise=kf_params.get("process_noise", 1.0),
        observation_noise=kf_params.get("observation_noise", 4.0),
        initial_state_mean=kf_params.get("initial_state_mean", 4.0),
        initial_state_covariance=kf_params.get("initial_state_covariance", 2.0),
    )
    self.hmm_variance_floor = max(float(hmm_variance_floor), 1e-6)
    self.news_params = _merge_nested_dicts(DEFAULT_NEWS_PARAMS, news_params or {})
    self.fusion_mode = fusion_mode
    self.fusion_params = _merge_nested_dicts(DEFAULT_FUSION_PARAMS, fusion_params or {})
    if self.fusion_mode not in {"precision", "calibrated_alpha"}:
        raise ValueError(
            f"Unknown fusion_mode '{self.fusion_mode}'. Expected one of: 'precision', 'calibrated_alpha'."
        )

    self.observations: Optional[np.ndarray] = None
    self._result: Optional[InferenceResult] = None
ingest_observations
ingest_observations(points: ndarray)

Set the player's historical points sequence.

PARAMETER DESCRIPTION
points

Weekly points history.

TYPE: (ndarray, shape(T))

Source code in fplx/inference/pipeline.py
def ingest_observations(self, points: np.ndarray):
    """
    Set the player's historical points sequence.

    Parameters
    ----------
    points : np.ndarray, shape (T,)
        Weekly points history.
    """
    self.observations = np.asarray(points, dtype=float)
    self._result = None  # invalidate cached result
inject_news
inject_news(news_signal: dict, timestep: int)

Inject a news signal into the inference at a specific gameweek.

Bridges from existing NewsSignal.generate_signal() output format.

PARAMETER DESCRIPTION
news_signal

Output from NewsSignal.generate_signal(). Must contain: 'availability', 'minutes_risk', 'confidence'.

TYPE: dict

timestep

The gameweek index to apply the perturbation.

TYPE: int

Source code in fplx/inference/pipeline.py
def inject_news(
    self,
    news_signal: dict,
    timestep: int,
):
    """
    Inject a news signal into the inference at a specific gameweek.

    Bridges from existing NewsSignal.generate_signal() output format.

    Parameters
    ----------
    news_signal : dict
        Output from NewsSignal.generate_signal(). Must contain:
        'availability', 'minutes_risk', 'confidence'.
    timestep : int
        The gameweek index to apply the perturbation.
    """
    category = _classify_news(
        news_signal.get("availability", 1.0),
        news_signal.get("minutes_risk", 0.0),
        self.news_params.get("classification_thresholds"),
    )
    confidence = news_signal.get(
        "confidence",
        float(self.news_params.get("default_confidence", 0.6)),
    )

    perturbation_map = self.news_params.get("perturbation_map", DEFAULT_NEWS_PERTURBATION_MAP)
    perturbation = perturbation_map.get(
        category,
        perturbation_map.get("neutral", {"state_boost": {}, "kalman_shock": 1.0}),
    )

    # Inject into HMM
    state_boost = perturbation.get("state_boost", {})
    if state_boost:
        self.hmm.inject_news_perturbation(
            timestep=timestep,
            state_boost=state_boost,
            confidence=confidence,
        )

    # Inject into Kalman
    kalman_shock = float(perturbation.get("kalman_shock", 1.0))
    if kalman_shock != 1.0:
        self.kf.inject_process_shock(
            timestep=timestep,
            multiplier=kalman_shock,
        )
inject_fixture_difficulty
inject_fixture_difficulty(difficulty: float, timestep: int)

Inject fixture difficulty into Kalman observation noise.

PARAMETER DESCRIPTION
difficulty

Fixture difficulty score (1-5, from FixtureSignal).

TYPE: float

timestep

The gameweek index.

TYPE: int

Source code in fplx/inference/pipeline.py
def inject_fixture_difficulty(self, difficulty: float, timestep: int):
    """
    Inject fixture difficulty into Kalman observation noise.

    Parameters
    ----------
    difficulty : float
        Fixture difficulty score (1-5, from FixtureSignal).
    timestep : int
        The gameweek index.
    """
    noise_factor = _difficulty_to_noise_factor(difficulty)
    self.kf.inject_observation_noise(timestep=timestep, factor=noise_factor)
run
run() -> InferenceResult

Run full inference pipeline: HMM + Kalman + Fusion.

RETURNS DESCRIPTION
InferenceResult

All inference outputs.

Source code in fplx/inference/pipeline.py
def run(self) -> InferenceResult:
    """
    Run full inference pipeline: HMM + Kalman + Fusion.

    Returns
    -------
    InferenceResult
        All inference outputs.
    """
    if self.observations is None or len(self.observations) == 0:
        raise RuntimeError("No observations ingested. Call ingest_observations().")

    obs = self.observations

    # HMM
    alpha, _ = self.hmm.forward(obs)
    gamma = self.hmm.forward_backward(obs)
    viterbi_path = self.hmm.viterbi(obs)
    hmm_pred_mean, hmm_pred_var, _ = self.hmm.predict_next(obs)

    # Kalman
    kf_x, kf_P = self.kf.filter(obs)
    kf_pred_mean, kf_pred_var = self.kf.predict_next()

    fusion_alpha = None
    if self.fusion_mode == "calibrated_alpha":
        fusion_alpha = self._estimate_fusion_alpha(obs)
        hmm_seq_mean, hmm_seq_var = self._hmm_sequence_moments(gamma)

        fused_mean = fusion_alpha * kf_x + (1.0 - fusion_alpha) * hmm_seq_mean
        fused_var = fusion_alpha**2 * np.maximum(kf_P, 1e-6) + (1.0 - fusion_alpha) ** 2 * np.maximum(
            hmm_seq_var, self.hmm_variance_floor
        )

        pred_mean = fusion_alpha * kf_pred_mean + (1.0 - fusion_alpha) * hmm_pred_mean
        pred_var = fusion_alpha**2 * max(kf_pred_var, 1e-6) + (1.0 - fusion_alpha) ** 2 * max(
            hmm_pred_var, self.hmm_variance_floor
        )
    else:
        # Fusion (full sequence, smoothed)
        # Apply an HMM variance floor so HMM does not become unrealistically
        # overconfident and dominate precision-weighted fusion.
        emission_params_for_fusion = {
            s: (mu, max(std, np.sqrt(self.hmm_variance_floor)))
            for s, (mu, std) in self.hmm.emission_params.items()
        }
        fused_mean, fused_var = fuse_sequences(gamma, kf_x, kf_P, emission_params_for_fusion)

        # Fused one-step-ahead prediction
        pred_mean, pred_var = fuse_estimates(
            hmm_pred_mean,
            max(hmm_pred_var, self.hmm_variance_floor),
            kf_pred_mean,
            kf_pred_var,
        )

    self._result = InferenceResult(
        filtered_beliefs=alpha,
        smoothed_beliefs=gamma,
        viterbi_path=viterbi_path,
        hmm_predicted_mean=hmm_pred_mean,
        hmm_predicted_var=hmm_pred_var,
        kalman_filtered=kf_x,
        kalman_uncertainty=kf_P,
        kf_predicted_mean=kf_pred_mean,
        kf_predicted_var=kf_pred_var,
        fused_mean=fused_mean,
        fused_var=fused_var,
        fusion_alpha=fusion_alpha,
        predicted_mean=pred_mean,
        predicted_var=pred_var,
    )

    return self._result
predict_next
predict_next() -> tuple[float, float]

Get the fused one-step-ahead forecast.

RETURNS DESCRIPTION
expected_points

TYPE: float

variance

TYPE: float

Source code in fplx/inference/pipeline.py
def predict_next(self) -> tuple[float, float]:
    """
    Get the fused one-step-ahead forecast.

    Returns
    -------
    expected_points : float
    variance : float
    """
    if self._result is None:
        self.run()
    return self._result.predicted_mean, self._result.predicted_var
learn_parameters
learn_parameters(n_iter: int = 20)

Run Baum-Welch to learn HMM parameters from current observations.

Call this before run() if you want data-driven parameters.

Source code in fplx/inference/pipeline.py
def learn_parameters(self, n_iter: int = 20):
    """
    Run Baum-Welch to learn HMM parameters from current observations.

    Call this before run() if you want data-driven parameters.
    """
    if self.observations is None:
        raise RuntimeError("No observations. Call ingest_observations() first.")
    self.hmm.fit(self.observations, n_iter=n_iter)

batch_enriched_predict

batch_enriched_predict(
    players, alpha=0.3, fixture_info=None
)

Run enriched prediction for all players. Returns ep, var, downside_risk dicts.

Source code in fplx/inference/enriched.py
def batch_enriched_predict(players, alpha=0.3, fixture_info=None):
    """Run enriched prediction for all players. Returns ep, var, downside_risk dicts."""
    ep, ev, dr = {}, {}, {}
    for p in players:
        fix = fixture_info.get(p.id) if fixture_info else None
        mu, var, dsr = enriched_predict(p.timeseries, p.position, alpha=alpha, upcoming_fixture=fix)
        ep[p.id] = mu
        ev[p.id] = var
        dr[p.id] = dsr
    return ep, ev, dr

compute_xpoints

compute_xpoints(timeseries, position)

Compute per-GW expected points from ALL underlying components.

Source code in fplx/inference/enriched.py
def compute_xpoints(timeseries, position):
    """Compute per-GW expected points from ALL underlying components."""
    n = len(timeseries)
    if n == 0:
        return np.array([])

    mins = _safe_col(timeseries, "minutes")
    played = mins > 0
    appearance = np.where(mins >= 60, 2.0, np.where(played, 1.0, 0.0))

    xg = _safe_col(timeseries, "xG")
    if np.all(xg == 0):
        xg = _safe_col(timeseries, "goals").astype(float)
    xa = _safe_col(timeseries, "xA")
    if np.all(xa == 0):
        xa = _safe_col(timeseries, "assists").astype(float)

    goal_c = xg * GOAL_PTS.get(position, 4)
    assist_c = xa * ASSIST_PTS
    cs_c = _safe_col(timeseries, "clean_sheets") * CS_PTS.get(position, 0)
    gc_c = np.floor(_safe_col(timeseries, "goals_conceded") / 2.0) * GC_PTS.get(position, 0)
    bonus_c = _safe_col(timeseries, "bonus")

    saves_c = np.zeros(n)
    if position == "GK":
        saves_c = np.floor(_safe_col(timeseries, "saves") / 3.0)

    yc = _safe_col(timeseries, "yellow_cards") * (-1)
    rc = _safe_col(timeseries, "red_cards") * (-3)
    og = _safe_col(timeseries, "own_goals") * (-2)
    pm = _safe_col(timeseries, "penalties_missed") * (-2)
    ps = np.zeros(n)
    if position == "GK":
        ps = _safe_col(timeseries, "penalties_saved") * 5

    return (
        appearance + goal_c + assist_c + cs_c + gc_c + bonus_c + saves_c + yc + rc + og + pm + ps
    ) * played

enriched_predict

enriched_predict(
    timeseries,
    position,
    alpha=0.3,
    lookback=15,
    upcoming_fixture=None,
)

Predict expected points with fixture awareness and semi-variance.

PARAMETER DESCRIPTION
timeseries

TYPE: DataFrame

position

TYPE: str

alpha

EWMA decay.

TYPE: float DEFAULT: 0.3

lookback

Max recent GWs (increased from 10 to 15 for more data).

TYPE: int DEFAULT: 15

upcoming_fixture

{"was_home": bool, "opponent_team": int, "xP": float}

TYPE: dict DEFAULT: None

RETURNS DESCRIPTION
expected_points

TYPE: float

variance

TYPE: float

downside_risk

TYPE: float (semi-deviation below E[P])

Source code in fplx/inference/enriched.py
def enriched_predict(timeseries, position, alpha=0.3, lookback=15, upcoming_fixture=None):
    """
    Predict expected points with fixture awareness and semi-variance.

    Parameters
    ----------
    timeseries : pd.DataFrame
    position : str
    alpha : float
        EWMA decay.
    lookback : int
        Max recent GWs (increased from 10 to 15 for more data).
    upcoming_fixture : dict, optional
        {"was_home": bool, "opponent_team": int, "xP": float}

    Returns
    -------
    expected_points : float
    variance : float
    downside_risk : float  (semi-deviation below E[P])
    """
    if timeseries.empty or "minutes" not in timeseries.columns:
        return 0.0, 4.0, 0.0

    ts = timeseries.tail(lookback).copy()
    mins = _safe_col(ts, "minutes")
    played_mask = mins > 0
    n_played = int(played_mask.sum())

    if n_played < 2:
        return 0.0, 4.0, 0.0

    avail = float(played_mask[-min(3, len(played_mask)) :].mean())
    if avail < 0.1:
        return 0.0, 1.0, 0.0

    # xPoints from all components
    xpts = compute_xpoints(ts, position)
    played_xpts = xpts[played_mask]

    # EWMA on played xPoints
    conditional_ep = max(0.0, _ewma(played_xpts, alpha))

    # Fixture adjustments
    fixture_mult = 1.0
    if upcoming_fixture:
        hf, af = _home_away_factor(timeseries)
        fixture_mult = hf if upcoming_fixture.get("was_home", False) else af
        opp_id = upcoming_fixture.get("opponent_team", 0)
        if opp_id > 0:
            fixture_mult *= _opponent_mult(timeseries, opp_id)
    conditional_ep *= fixture_mult

    # Ensemble with xP
    if upcoming_fixture and upcoming_fixture.get("xP", 0) > 0:
        conditional_ep = 0.7 * conditional_ep + 0.3 * upcoming_fixture["xP"]

    # Variance and semi-variance from residuals
    downside_risk = 0.0
    if "points" in ts.columns:
        pts = _safe_col(ts, "points")
        played_pts = pts[played_mask]
        residuals = played_pts - played_xpts
        var_estimate = float(np.var(residuals)) + 1.0

        # Semi-variance: only negative residuals (actual < expected)
        neg_residuals = residuals[residuals < 0]
        if len(neg_residuals) >= 2:
            downside_risk = float(np.sqrt(np.mean(neg_residuals**2)))
        else:
            downside_risk = float(np.sqrt(var_estimate)) * 0.5
    else:
        var_estimate = 4.0
        downside_risk = 1.0

    ep = conditional_ep * avail
    var_out = avail * var_estimate + avail * (1 - avail) * conditional_ep**2
    dr_out = downside_risk * avail

    return ep, var_out, dr_out

fuse_estimates

fuse_estimates(
    hmm_mean: float,
    hmm_var: float,
    kf_mean: float,
    kf_var: float,
) -> tuple[float, float]

Fuse a single HMM estimate with a single Kalman estimate.

Uses inverse-variance weighting: fused_mean = (hmm_mean/hmm_var + kf_mean/kf_var) / (1/hmm_var + 1/kf_var) fused_var = 1 / (1/hmm_var + 1/kf_var)

PARAMETER DESCRIPTION
hmm_mean

HMM expected points (from state posterior weighted emission means).

TYPE: float

hmm_var

HMM variance (law of total variance over state posterior).

TYPE: float

kf_mean

Kalman filtered point estimate.

TYPE: float

kf_var

Kalman filtered uncertainty (posterior variance).

TYPE: float

RETURNS DESCRIPTION
fused_mean

TYPE: float

fused_var

TYPE: float

Source code in fplx/inference/fusion.py
def fuse_estimates(
    hmm_mean: float,
    hmm_var: float,
    kf_mean: float,
    kf_var: float,
) -> tuple[float, float]:
    """
    Fuse a single HMM estimate with a single Kalman estimate.

    Uses inverse-variance weighting:
        fused_mean = (hmm_mean/hmm_var + kf_mean/kf_var) / (1/hmm_var + 1/kf_var)
        fused_var  = 1 / (1/hmm_var + 1/kf_var)

    Parameters
    ----------
    hmm_mean : float
        HMM expected points (from state posterior weighted emission means).
    hmm_var : float
        HMM variance (law of total variance over state posterior).
    kf_mean : float
        Kalman filtered point estimate.
    kf_var : float
        Kalman filtered uncertainty (posterior variance).

    Returns
    -------
    fused_mean : float
    fused_var : float
    """
    hmm_var = max(hmm_var, 1e-6)
    kf_var = max(kf_var, 1e-6)

    precision_hmm = 1.0 / hmm_var
    precision_kf = 1.0 / kf_var
    total_precision = precision_hmm + precision_kf

    fused_mean = (precision_hmm * hmm_mean + precision_kf * kf_mean) / total_precision
    fused_var = 1.0 / total_precision

    return fused_mean, fused_var

fuse_sequences

fuse_sequences(
    hmm_gamma: ndarray,
    kalman_x: ndarray,
    kalman_P: ndarray,
    emission_params: dict,
) -> tuple[ndarray, ndarray]

Fuse full sequences of HMM posteriors and Kalman estimates.

PARAMETER DESCRIPTION
hmm_gamma

Smoothed state posteriors from HMM.

TYPE: (ndarray, shape(T, N))

kalman_x

Kalman filtered estimates.

TYPE: (ndarray, shape(T))

kalman_P

Kalman filtered uncertainties.

TYPE: (ndarray, shape(T))

emission_params

{state_index: (mean, std)} from HMM.

TYPE: dict

RETURNS DESCRIPTION
fused_mean

TYPE: (ndarray, shape(T))

fused_var

TYPE: (ndarray, shape(T))

Source code in fplx/inference/fusion.py
def fuse_sequences(
    hmm_gamma: np.ndarray,
    kalman_x: np.ndarray,
    kalman_P: np.ndarray,
    emission_params: dict,
) -> tuple[np.ndarray, np.ndarray]:
    """
    Fuse full sequences of HMM posteriors and Kalman estimates.

    Parameters
    ----------
    hmm_gamma : np.ndarray, shape (T, N)
        Smoothed state posteriors from HMM.
    kalman_x : np.ndarray, shape (T,)
        Kalman filtered estimates.
    kalman_P : np.ndarray, shape (T,)
        Kalman filtered uncertainties.
    emission_params : dict
        {state_index: (mean, std)} from HMM.

    Returns
    -------
    fused_mean : np.ndarray, shape (T,)
    fused_var : np.ndarray, shape (T,)
    """
    T = len(kalman_x)
    n_states = hmm_gamma.shape[1]

    state_means = np.array([emission_params[s][0] for s in range(n_states)])
    state_vars = np.array([emission_params[s][1] ** 2 for s in range(n_states)])

    # HMM expected value and variance per timestep
    hmm_mean = hmm_gamma @ state_means
    hmm_var = (
        hmm_gamma @ state_vars
        + hmm_gamma @ (state_means ** 2)
        - hmm_mean ** 2
    )
    hmm_var = np.maximum(hmm_var, 1e-6)
    kalman_P_safe = np.maximum(kalman_P, 1e-6)

    # Inverse-variance weighting
    precision_hmm = 1.0 / hmm_var
    precision_kf = 1.0 / kalman_P_safe
    total_precision = precision_hmm + precision_kf

    fused_mean = (precision_hmm * hmm_mean + precision_kf * kalman_x) / total_precision
    fused_var = 1.0 / total_precision

    return fused_mean, fused_var

build_feature_matrix

build_feature_matrix(
    timeseries: DataFrame, position: str
) -> ndarray

Extract position-specific feature matrix from player timeseries.

PARAMETER DESCRIPTION
timeseries

Player gameweek history from vaastav dataset.

TYPE: DataFrame

position

GK, DEF, MID, or FWD.

TYPE: str

RETURNS DESCRIPTION
np.ndarray, shape (T, D) where D depends on position.
Source code in fplx/inference/multivariate_hmm.py
def build_feature_matrix(timeseries: pd.DataFrame, position: str) -> np.ndarray:
    """
    Extract position-specific feature matrix from player timeseries.

    Parameters
    ----------
    timeseries : pd.DataFrame
        Player gameweek history from vaastav dataset.
    position : str
        GK, DEF, MID, or FWD.

    Returns
    -------
    np.ndarray, shape (T, D) where D depends on position.
    """
    n = len(timeseries)
    features = np.zeros((n, 2))

    mins = _safe_col(timeseries, "minutes")
    features[:, 1] = np.clip(mins / 90.0, 0.0, 1.0)  # mins_frac

    # Domain-specific projection from rich event space to structural xPts.
    features[:, 0] = compute_xpoints(timeseries, position)
    return features

enriched

Fixture-aware enriched prediction with semi-variance for downside risk.

Improvements over base enriched: - Cards, own goals, penalties (negative pts previously unmodeled) - Home/away adjustment from player history - Opponent strength adjustment from player history - Ensemble with FPL's xP when available - Semi-variance: only penalize downside deviation below E[P] - Longer lookback with exponential decay (more data, recency bias)

compute_xpoints
compute_xpoints(timeseries, position)

Compute per-GW expected points from ALL underlying components.

Source code in fplx/inference/enriched.py
def compute_xpoints(timeseries, position):
    """Compute per-GW expected points from ALL underlying components."""
    n = len(timeseries)
    if n == 0:
        return np.array([])

    mins = _safe_col(timeseries, "minutes")
    played = mins > 0
    appearance = np.where(mins >= 60, 2.0, np.where(played, 1.0, 0.0))

    xg = _safe_col(timeseries, "xG")
    if np.all(xg == 0):
        xg = _safe_col(timeseries, "goals").astype(float)
    xa = _safe_col(timeseries, "xA")
    if np.all(xa == 0):
        xa = _safe_col(timeseries, "assists").astype(float)

    goal_c = xg * GOAL_PTS.get(position, 4)
    assist_c = xa * ASSIST_PTS
    cs_c = _safe_col(timeseries, "clean_sheets") * CS_PTS.get(position, 0)
    gc_c = np.floor(_safe_col(timeseries, "goals_conceded") / 2.0) * GC_PTS.get(position, 0)
    bonus_c = _safe_col(timeseries, "bonus")

    saves_c = np.zeros(n)
    if position == "GK":
        saves_c = np.floor(_safe_col(timeseries, "saves") / 3.0)

    yc = _safe_col(timeseries, "yellow_cards") * (-1)
    rc = _safe_col(timeseries, "red_cards") * (-3)
    og = _safe_col(timeseries, "own_goals") * (-2)
    pm = _safe_col(timeseries, "penalties_missed") * (-2)
    ps = np.zeros(n)
    if position == "GK":
        ps = _safe_col(timeseries, "penalties_saved") * 5

    return (
        appearance + goal_c + assist_c + cs_c + gc_c + bonus_c + saves_c + yc + rc + og + pm + ps
    ) * played
enriched_predict
enriched_predict(
    timeseries,
    position,
    alpha=0.3,
    lookback=15,
    upcoming_fixture=None,
)

Predict expected points with fixture awareness and semi-variance.

PARAMETER DESCRIPTION
timeseries

TYPE: DataFrame

position

TYPE: str

alpha

EWMA decay.

TYPE: float DEFAULT: 0.3

lookback

Max recent GWs (increased from 10 to 15 for more data).

TYPE: int DEFAULT: 15

upcoming_fixture

{"was_home": bool, "opponent_team": int, "xP": float}

TYPE: dict DEFAULT: None

RETURNS DESCRIPTION
expected_points

TYPE: float

variance

TYPE: float

downside_risk

TYPE: float (semi-deviation below E[P])

Source code in fplx/inference/enriched.py
def enriched_predict(timeseries, position, alpha=0.3, lookback=15, upcoming_fixture=None):
    """
    Predict expected points with fixture awareness and semi-variance.

    Parameters
    ----------
    timeseries : pd.DataFrame
    position : str
    alpha : float
        EWMA decay.
    lookback : int
        Max recent GWs (increased from 10 to 15 for more data).
    upcoming_fixture : dict, optional
        {"was_home": bool, "opponent_team": int, "xP": float}

    Returns
    -------
    expected_points : float
    variance : float
    downside_risk : float  (semi-deviation below E[P])
    """
    if timeseries.empty or "minutes" not in timeseries.columns:
        return 0.0, 4.0, 0.0

    ts = timeseries.tail(lookback).copy()
    mins = _safe_col(ts, "minutes")
    played_mask = mins > 0
    n_played = int(played_mask.sum())

    if n_played < 2:
        return 0.0, 4.0, 0.0

    avail = float(played_mask[-min(3, len(played_mask)) :].mean())
    if avail < 0.1:
        return 0.0, 1.0, 0.0

    # xPoints from all components
    xpts = compute_xpoints(ts, position)
    played_xpts = xpts[played_mask]

    # EWMA on played xPoints
    conditional_ep = max(0.0, _ewma(played_xpts, alpha))

    # Fixture adjustments
    fixture_mult = 1.0
    if upcoming_fixture:
        hf, af = _home_away_factor(timeseries)
        fixture_mult = hf if upcoming_fixture.get("was_home", False) else af
        opp_id = upcoming_fixture.get("opponent_team", 0)
        if opp_id > 0:
            fixture_mult *= _opponent_mult(timeseries, opp_id)
    conditional_ep *= fixture_mult

    # Ensemble with xP
    if upcoming_fixture and upcoming_fixture.get("xP", 0) > 0:
        conditional_ep = 0.7 * conditional_ep + 0.3 * upcoming_fixture["xP"]

    # Variance and semi-variance from residuals
    downside_risk = 0.0
    if "points" in ts.columns:
        pts = _safe_col(ts, "points")
        played_pts = pts[played_mask]
        residuals = played_pts - played_xpts
        var_estimate = float(np.var(residuals)) + 1.0

        # Semi-variance: only negative residuals (actual < expected)
        neg_residuals = residuals[residuals < 0]
        if len(neg_residuals) >= 2:
            downside_risk = float(np.sqrt(np.mean(neg_residuals**2)))
        else:
            downside_risk = float(np.sqrt(var_estimate)) * 0.5
    else:
        var_estimate = 4.0
        downside_risk = 1.0

    ep = conditional_ep * avail
    var_out = avail * var_estimate + avail * (1 - avail) * conditional_ep**2
    dr_out = downside_risk * avail

    return ep, var_out, dr_out
batch_enriched_predict
batch_enriched_predict(
    players, alpha=0.3, fixture_info=None
)

Run enriched prediction for all players. Returns ep, var, downside_risk dicts.

Source code in fplx/inference/enriched.py
def batch_enriched_predict(players, alpha=0.3, fixture_info=None):
    """Run enriched prediction for all players. Returns ep, var, downside_risk dicts."""
    ep, ev, dr = {}, {}, {}
    for p in players:
        fix = fixture_info.get(p.id) if fixture_info else None
        mu, var, dsr = enriched_predict(p.timeseries, p.position, alpha=alpha, upcoming_fixture=fix)
        ep[p.id] = mu
        ev[p.id] = var
        dr[p.id] = dsr
    return ep, ev, dr

fusion

Fusion of HMM and Kalman Filter outputs.

Combines discrete state posteriors (HMM) with continuous estimates (Kalman) using inverse-variance weighting — optimal under Gaussian independence.

fuse_estimates
fuse_estimates(
    hmm_mean: float,
    hmm_var: float,
    kf_mean: float,
    kf_var: float,
) -> tuple[float, float]

Fuse a single HMM estimate with a single Kalman estimate.

Uses inverse-variance weighting: fused_mean = (hmm_mean/hmm_var + kf_mean/kf_var) / (1/hmm_var + 1/kf_var) fused_var = 1 / (1/hmm_var + 1/kf_var)

PARAMETER DESCRIPTION
hmm_mean

HMM expected points (from state posterior weighted emission means).

TYPE: float

hmm_var

HMM variance (law of total variance over state posterior).

TYPE: float

kf_mean

Kalman filtered point estimate.

TYPE: float

kf_var

Kalman filtered uncertainty (posterior variance).

TYPE: float

RETURNS DESCRIPTION
fused_mean

TYPE: float

fused_var

TYPE: float

Source code in fplx/inference/fusion.py
def fuse_estimates(
    hmm_mean: float,
    hmm_var: float,
    kf_mean: float,
    kf_var: float,
) -> tuple[float, float]:
    """
    Fuse a single HMM estimate with a single Kalman estimate.

    Uses inverse-variance weighting:
        fused_mean = (hmm_mean/hmm_var + kf_mean/kf_var) / (1/hmm_var + 1/kf_var)
        fused_var  = 1 / (1/hmm_var + 1/kf_var)

    Parameters
    ----------
    hmm_mean : float
        HMM expected points (from state posterior weighted emission means).
    hmm_var : float
        HMM variance (law of total variance over state posterior).
    kf_mean : float
        Kalman filtered point estimate.
    kf_var : float
        Kalman filtered uncertainty (posterior variance).

    Returns
    -------
    fused_mean : float
    fused_var : float
    """
    hmm_var = max(hmm_var, 1e-6)
    kf_var = max(kf_var, 1e-6)

    precision_hmm = 1.0 / hmm_var
    precision_kf = 1.0 / kf_var
    total_precision = precision_hmm + precision_kf

    fused_mean = (precision_hmm * hmm_mean + precision_kf * kf_mean) / total_precision
    fused_var = 1.0 / total_precision

    return fused_mean, fused_var
fuse_sequences
fuse_sequences(
    hmm_gamma: ndarray,
    kalman_x: ndarray,
    kalman_P: ndarray,
    emission_params: dict,
) -> tuple[ndarray, ndarray]

Fuse full sequences of HMM posteriors and Kalman estimates.

PARAMETER DESCRIPTION
hmm_gamma

Smoothed state posteriors from HMM.

TYPE: (ndarray, shape(T, N))

kalman_x

Kalman filtered estimates.

TYPE: (ndarray, shape(T))

kalman_P

Kalman filtered uncertainties.

TYPE: (ndarray, shape(T))

emission_params

{state_index: (mean, std)} from HMM.

TYPE: dict

RETURNS DESCRIPTION
fused_mean

TYPE: (ndarray, shape(T))

fused_var

TYPE: (ndarray, shape(T))

Source code in fplx/inference/fusion.py
def fuse_sequences(
    hmm_gamma: np.ndarray,
    kalman_x: np.ndarray,
    kalman_P: np.ndarray,
    emission_params: dict,
) -> tuple[np.ndarray, np.ndarray]:
    """
    Fuse full sequences of HMM posteriors and Kalman estimates.

    Parameters
    ----------
    hmm_gamma : np.ndarray, shape (T, N)
        Smoothed state posteriors from HMM.
    kalman_x : np.ndarray, shape (T,)
        Kalman filtered estimates.
    kalman_P : np.ndarray, shape (T,)
        Kalman filtered uncertainties.
    emission_params : dict
        {state_index: (mean, std)} from HMM.

    Returns
    -------
    fused_mean : np.ndarray, shape (T,)
    fused_var : np.ndarray, shape (T,)
    """
    T = len(kalman_x)
    n_states = hmm_gamma.shape[1]

    state_means = np.array([emission_params[s][0] for s in range(n_states)])
    state_vars = np.array([emission_params[s][1] ** 2 for s in range(n_states)])

    # HMM expected value and variance per timestep
    hmm_mean = hmm_gamma @ state_means
    hmm_var = (
        hmm_gamma @ state_vars
        + hmm_gamma @ (state_means ** 2)
        - hmm_mean ** 2
    )
    hmm_var = np.maximum(hmm_var, 1e-6)
    kalman_P_safe = np.maximum(kalman_P, 1e-6)

    # Inverse-variance weighting
    precision_hmm = 1.0 / hmm_var
    precision_kf = 1.0 / kalman_P_safe
    total_precision = precision_hmm + precision_kf

    fused_mean = (precision_hmm * hmm_mean + precision_kf * kalman_x) / total_precision
    fused_var = 1.0 / total_precision

    return fused_mean, fused_var

hmm

Hidden Markov Model for player form state inference.

Implements: - Forward algorithm (online filtering) - Forward-Backward (offline smoothing) - Viterbi decoding (most likely state sequence) - Dynamic transition matrix perturbation (news signal injection) - Baum-Welch parameter learning (EM) - One-step-ahead prediction with uncertainty

HMMInference
HMMInference(
    transition_matrix: Optional[ndarray] = None,
    emission_params: Optional[dict] = None,
    initial_dist: Optional[ndarray] = None,
)

Hidden Markov Model for discrete player form states.

Supports dynamic transition matrix perturbation so that external signals (news, injuries) can shift state probabilities mid-sequence.

PARAMETER DESCRIPTION
transition_matrix

transition_matrix[i,j] = P(S_{t+1}=j | S_t=i). Rows must sum to 1.

TYPE: (ndarray, shape(N, N)) DEFAULT: None

emission_params

{state_index: (mean, std)} for Gaussian emissions.

TYPE: dict DEFAULT: None

initial_dist

Prior over initial state.

TYPE: (ndarray, shape(N)) DEFAULT: None

Source code in fplx/inference/hmm.py
def __init__(
    self,
    transition_matrix: Optional[np.ndarray] = None,
    emission_params: Optional[dict] = None,
    initial_dist: Optional[np.ndarray] = None,
):
    self.transition_matrix = (
        transition_matrix.copy() if transition_matrix is not None else DEFAULT_TRANSITION_MATRIX.copy()
    )
    self.emission_params = emission_params or dict(DEFAULT_EMISSION_PARAMS)
    self.pi = initial_dist.copy() if initial_dist is not None else DEFAULT_INITIAL_DIST.copy()
    self.n_states = len(self.pi)

    # per-timestep transition overrides (for news injection)
    # key: timestep t, Value: modified transition_matrix matrix for that step
    self._transition_overrides: dict[int, np.ndarray] = {}
inject_news_perturbation
inject_news_perturbation(
    timestep: int,
    state_boost: dict[int, float],
    confidence: float = 1.0,
)

Perturb transition matrix at a specific timestep based on news.

For each source state, the transition probability toward boosted target states is multiplied by the boost factor (scaled by confidence), then the row is renormalized.

PARAMETER DESCRIPTION
timestep

The gameweek at which the perturbation applies.

TYPE: int

state_boost

{target_state: multiplicative_boost}. E.g., {0: 10.0} means "10x more likely to transition to Injured."

TYPE: dict[int, float]

confidence

Scales the perturbation. 0 = no effect, 1 = full effect.

TYPE: float DEFAULT: 1.0

Source code in fplx/inference/hmm.py
def inject_news_perturbation(
    self,
    timestep: int,
    state_boost: dict[int, float],
    confidence: float = 1.0,
):
    """
    Perturb transition matrix at a specific timestep based on news.

    For each source state, the transition probability toward boosted
    target states is multiplied by the boost factor (scaled by confidence),
    then the row is renormalized.

    Parameters
    ----------
    timestep : int
        The gameweek at which the perturbation applies.
    state_boost : dict[int, float]
        {target_state: multiplicative_boost}. E.g., {0: 10.0} means
        "10x more likely to transition to Injured."
    confidence : float
        Scales the perturbation. 0 = no effect, 1 = full effect.
    """
    perturbed_transition_matrix = self.transition_matrix.copy()

    for source_state in range(self.n_states):
        for target_state, boost in state_boost.items():
            # scale boost by confidence: effective_boost = 1 + confidence*(boost-1)
            effective_boost = 1.0 + confidence * (boost - 1.0)
            perturbed_transition_matrix[source_state, target_state] *= effective_boost

        # renormalize row
        row_sum = perturbed_transition_matrix[source_state].sum()
        if row_sum > 0:
            perturbed_transition_matrix[source_state] /= row_sum

    self._transition_overrides[timestep] = perturbed_transition_matrix
clear_perturbations
clear_perturbations()

Remove all per-timestep transition overrides.

Source code in fplx/inference/hmm.py
def clear_perturbations(self):
    """Remove all per-timestep transition overrides."""
    self._transition_overrides.clear()
forward
forward(observations: ndarray)

Forward algorithm with dynamic transition matrices.

PARAMETER DESCRIPTION
observations

TYPE: (ndarray, shape(num_timesteps))

RETURNS DESCRIPTION
forward_messages

Normalized forward messages. forward_messages[t] = P(S_t | y_1:t)

TYPE: (ndarray, shape(num_timesteps, N))

scale

Per-timestep normalization constants.

TYPE: (ndarray, shape(num_timesteps))

Source code in fplx/inference/hmm.py
def forward(self, observations: np.ndarray):
    """
    Forward algorithm with dynamic transition matrices.

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    forward_messages : np.ndarray, shape (num_timesteps, N)
        Normalized forward messages. forward_messages[t] = P(S_t | y_1:t)
    scale : np.ndarray, shape (num_timesteps,)
        Per-timestep normalization constants.
    """
    num_timesteps = len(observations)
    forward_messages = np.zeros((num_timesteps, self.n_states))
    scale = np.zeros(num_timesteps)

    # t = 0
    b = self._emission_vector(observations[0])
    forward_messages[0] = self.pi * b
    scale[0] = forward_messages[0].sum()
    if scale[0] > 0:
        forward_messages[0] /= scale[0]

    # t = 1..num_timesteps-1
    for t in range(1, num_timesteps):
        transition_matrix_t = self._get_transition_matrix(t)
        b = self._emission_vector(observations[t])
        forward_messages[t] = (forward_messages[t - 1] @ transition_matrix_t) * b
        scale[t] = forward_messages[t].sum()
        if scale[t] > 0:
            forward_messages[t] /= scale[t]

    return forward_messages, scale
forward_backward
forward_backward(observations: ndarray) -> ndarray

Compute smoothed posteriors P(S_t | y_1:num_timesteps).

PARAMETER DESCRIPTION
observations

TYPE: (ndarray, shape(num_timesteps))

RETURNS DESCRIPTION
smoothed_posteriors

smoothed_posteriors[t, s] = P(S_t=s | y_1:num_timesteps)

TYPE: (ndarray, shape(num_timesteps, N))

Source code in fplx/inference/hmm.py
def forward_backward(self, observations: np.ndarray) -> np.ndarray:
    """
    Compute smoothed posteriors P(S_t | y_1:num_timesteps).

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    smoothed_posteriors : np.ndarray, shape (num_timesteps, N)
        smoothed_posteriors[t, s] = P(S_t=s | y_1:num_timesteps)
    """
    num_timesteps = len(observations)
    forward_messages, scale = self.forward(observations)

    backward_messages = np.zeros((num_timesteps, self.n_states))
    backward_messages[num_timesteps - 1] = 1.0

    for t in range(num_timesteps - 2, -1, -1):
        transition_matrix_t_plus_1 = self._get_transition_matrix(t + 1)
        b_next = self._emission_vector(observations[t + 1])
        backward_messages[t] = transition_matrix_t_plus_1 @ (b_next * backward_messages[t + 1])
        if scale[t + 1] > 0:
            backward_messages[t] /= scale[t + 1]

    smoothed_posteriors = forward_messages * backward_messages
    row_sums = smoothed_posteriors.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1.0
    smoothed_posteriors /= row_sums

    return smoothed_posteriors
viterbi
viterbi(observations: ndarray) -> ndarray

Most likely state sequence via Viterbi decoding.

PARAMETER DESCRIPTION
observations

TYPE: (ndarray, shape(num_timesteps))

RETURNS DESCRIPTION
best_path

TYPE: np.ndarray of int, shape (num_timesteps,)

Source code in fplx/inference/hmm.py
def viterbi(self, observations: np.ndarray) -> np.ndarray:
    """
    Most likely state sequence via Viterbi decoding.

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    best_path : np.ndarray of int, shape (num_timesteps,)
    """
    num_timesteps = len(observations)
    log_pi = np.log(self.pi + 1e-300)

    log_probabilities = np.zeros((num_timesteps, self.n_states))
    backpointers = np.zeros((num_timesteps, self.n_states), dtype=int)

    b0 = self._emission_vector(observations[0])
    log_probabilities[0] = log_pi + np.log(b0 + 1e-300)

    for t in range(1, num_timesteps):
        transition_matrix_t = self._get_transition_matrix(t)
        log_transition_matrix_t = np.log(transition_matrix_t + 1e-300)
        b = self._emission_vector(observations[t])
        for s in range(self.n_states):
            candidates = log_probabilities[t - 1] + log_transition_matrix_t[:, s]
            backpointers[t, s] = np.argmax(candidates)
            log_probabilities[t, s] = candidates[backpointers[t, s]] + np.log(b[s] + 1e-300)

    best_path = np.zeros(num_timesteps, dtype=int)
    best_path[num_timesteps - 1] = np.argmax(log_probabilities[num_timesteps - 1])
    for t in range(num_timesteps - 2, -1, -1):
        best_path[t] = backpointers[t + 1, best_path[t + 1]]

    return best_path
predict_next
predict_next(
    observations: ndarray,
) -> tuple[float, float, ndarray]

Predict next timestep's points distribution.

Runs forward algorithm, then propagates one step ahead via the transition matrix.

PARAMETER DESCRIPTION
observations

TYPE: (ndarray, shape(num_timesteps))

RETURNS DESCRIPTION
expected_points

E[Y_{num_timesteps+1} | y_1:num_timesteps]

TYPE: float

variance

Var[Y_{num_timesteps+1} | y_1:num_timesteps] (from law of total variance)

TYPE: float

next_state_dist

P(S_{num_timesteps+1} | y_1:num_timesteps)

TYPE: (ndarray, shape(N))

Source code in fplx/inference/hmm.py
def predict_next(self, observations: np.ndarray) -> tuple[float, float, np.ndarray]:
    """
    Predict next timestep's points distribution.

    Runs forward algorithm, then propagates one step ahead via
    the transition matrix.

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    expected_points : float
        E[Y_{num_timesteps+1} | y_1:num_timesteps]
    variance : float
        Var[Y_{num_timesteps+1} | y_1:num_timesteps] (from law of total variance)
    next_state_dist : np.ndarray, shape (N,)
        P(S_{num_timesteps+1} | y_1:num_timesteps)
    """
    forward_messages, _ = self.forward(observations)
    current_belief = forward_messages[-1]  # P(S_num_timesteps | y_1:num_timesteps)

    num_timesteps = len(observations)
    next_transition_matrix = self._get_transition_matrix(num_timesteps)  # transition for next step
    next_state_dist = (
        current_belief @ next_transition_matrix
    )  # P(S_{num_timesteps+1} | y_1:num_timesteps)

    state_means = np.array([self.emission_params[s][0] for s in range(self.n_states)])
    state_vars = np.array([self.emission_params[s][1] ** 2 for s in range(self.n_states)])

    expected_points = next_state_dist @ state_means

    # law of total variance: Var = E[Var|S] + Var[E|S]
    variance = next_state_dist @ state_vars + next_state_dist @ (state_means**2) - expected_points**2

    return expected_points, max(0.0, variance), next_state_dist
fit
fit(
    observations: ndarray,
    n_iter: int = 20,
    tol: float = 0.0001,
    verbose: bool = False,
)

Learn transition matrix and emission parameters via Baum-Welch EM.

PARAMETER DESCRIPTION
observations

Training sequence.

TYPE: (ndarray, shape(num_timesteps))

n_iter

Maximum EM iterations.

TYPE: int DEFAULT: 20

tol

Convergence tolerance on log-likelihood.

TYPE: float DEFAULT: 0.0001

verbose

Print progress.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
self
Source code in fplx/inference/hmm.py
def fit(
    self,
    observations: np.ndarray,
    n_iter: int = 20,
    tol: float = 1e-4,
    verbose: bool = False,
):
    """
    Learn transition matrix and emission parameters via Baum-Welch EM.

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)
        Training sequence.
    n_iter : int
        Maximum EM iterations.
    tol : float
        Convergence tolerance on log-likelihood.
    verbose : bool
        Print progress.

    Returns
    -------
    self
    """
    num_timesteps = len(observations)
    prev_log_likelihood = -np.inf

    for iteration in range(n_iter):
        # E-step
        forward_messages, scale = self.forward(observations)

        # Backward pass using the same scaling factors as forward()
        backward_messages = np.zeros((num_timesteps, self.n_states))
        backward_messages[num_timesteps - 1] = 1.0
        for t in range(num_timesteps - 2, -1, -1):
            transition_matrix_t_plus_1 = self._get_transition_matrix(t + 1)
            b_next = self._emission_vector(observations[t + 1])
            backward_messages[t] = transition_matrix_t_plus_1 @ (b_next * backward_messages[t + 1])
            if scale[t + 1] > 0:
                backward_messages[t] /= scale[t + 1]

        # gamma_t(i) = P(S_t=i | y_1:T)
        smoothed_posteriors = forward_messages * backward_messages
        row_sums = smoothed_posteriors.sum(axis=1, keepdims=True)
        row_sums[row_sums == 0] = 1.0
        smoothed_posteriors /= row_sums

        # transition_posteriors: P(S_t=i, S_{t+1}=j | y_1:num_timesteps) for transition re-estimation
        transition_posteriors = np.zeros((num_timesteps - 1, self.n_states, self.n_states))
        for t in range(num_timesteps - 1):
            transition_matrix_t_plus_1 = self._get_transition_matrix(t + 1)
            b_next = self._emission_vector(observations[t + 1])

            # xi_t(i,j) = P(S_t=i, S_{t+1}=j | y_1:T)
            for i in range(self.n_states):
                for j in range(self.n_states):
                    transition_posteriors[t, i, j] = (
                        forward_messages[t, i]
                        * transition_matrix_t_plus_1[i, j]
                        * b_next[j]
                        * backward_messages[t + 1, j]
                    )

            xi_sum = transition_posteriors[t].sum()
            if xi_sum > 0:
                transition_posteriors[t] /= xi_sum

        # M-step
        # Re-estimate initial distribution
        self.pi = smoothed_posteriors[0]

        # Re-estimate transition matrix
        for i in range(self.n_states):
            denom = smoothed_posteriors[:-1, i].sum()
            if denom > 0:
                for j in range(self.n_states):
                    self.transition_matrix[i, j] = transition_posteriors[:, i, j].sum() / denom
            # Renormalize
            row_sum = self.transition_matrix[i].sum()
            if row_sum > 0:
                self.transition_matrix[i] /= row_sum

        # re-estimate emission parameters
        for s in range(self.n_states):
            weights = smoothed_posteriors[:, s]
            w_sum = weights.sum()
            if w_sum > 1e-10:
                mu = np.average(observations, weights=weights)
                var = np.average((observations - mu) ** 2, weights=weights)
                sigma = max(np.sqrt(var), 0.1)  # floor to prevent collapse
                self.emission_params[s] = (mu, sigma)

        # log-likelihood
        log_likelihood = np.sum(np.log(scale + 1e-300))
        if verbose:
            logger.info("EM iteration %d: LL = %.4f", iteration, log_likelihood)

        if abs(log_likelihood - prev_log_likelihood) < tol:
            if verbose:
                logger.info("Converged at iteration %d", iteration)
            break
        prev_log_likelihood = log_likelihood

    return self

kalman

Kalman Filter for continuous player point potential tracking.

State model: x_{t+1} = x_t + w_t, w_t ~ N(0, Q_t) Observation: y_t = x_t + v_t, v_t ~ N(0, R_t)

Supports per-timestep noise overrides so that: - News shocks (injury) → inflate Q_t (true form can jump suddenly) - Fixture difficulty → inflate R_t (harder opponents → noisier observations)

KalmanFilter
KalmanFilter(
    process_noise: float = 1.0,
    observation_noise: float = 4.0,
    initial_state_mean: float = 4.0,
    initial_state_covariance: float = 2.0,
)

1D Kalman Filter for tracking latent point potential.

PARAMETER DESCRIPTION
process_noise

Default process noise variance (form drift rate).

TYPE: float DEFAULT: 1.0

observation_noise

Default observation noise variance (weekly point noise).

TYPE: float DEFAULT: 4.0

initial_state_mean

Initial state estimate.

TYPE: float DEFAULT: 4.0

initial_state_covariance

Initial state uncertainty (variance).

TYPE: float DEFAULT: 2.0

Source code in fplx/inference/kalman.py
def __init__(
    self,
    process_noise: float = 1.0,
    observation_noise: float = 4.0,
    initial_state_mean: float = 4.0,
    initial_state_covariance: float = 2.0,
):
    self.default_process_noise = process_noise
    self.default_observation_noise = observation_noise
    self.initial_state_mean = initial_state_mean
    self.initial_state_covariance = initial_state_covariance

    # Per-timestep noise overrides
    self._process_noise_overrides: dict[int, float] = {}
    self._observation_noise_overrides: dict[int, float] = {}

    # Stored results after filtering
    self.filtered_state_means: Optional[np.ndarray] = None
    self.filtered_state_covariances: Optional[np.ndarray] = None
    self.kalman_gains: Optional[np.ndarray] = None  # Kalman gains
inject_process_shock
inject_process_shock(timestep: int, multiplier: float)

Inflate process noise at a specific timestep.

Use when news indicates a sudden form change (injury, transfer). process_noise_t = default_process_noise * multiplier.

PARAMETER DESCRIPTION
timestep

Gameweek index.

TYPE: int

multiplier

Process noise multiplier (>1 = more uncertainty about form drift).

TYPE: float

Source code in fplx/inference/kalman.py
def inject_process_shock(self, timestep: int, multiplier: float):
    """
    Inflate process noise at a specific timestep.

    Use when news indicates a sudden form change (injury, transfer).
    process_noise_t = default_process_noise * multiplier.

    Parameters
    ----------
    timestep : int
        Gameweek index.
    multiplier : float
        Process noise multiplier (>1 = more uncertainty about form drift).
    """
    self._process_noise_overrides[timestep] = self.default_process_noise * multiplier
inject_observation_noise
inject_observation_noise(timestep: int, factor: float)

Adjust observation noise at a specific timestep.

Use for fixture difficulty: harder opponents → less predictable points. observation_noise_t = default_observation_noise * factor.

PARAMETER DESCRIPTION
timestep

Gameweek index.

TYPE: int

factor

Observation noise factor (>1 = harder fixture, noisier observation).

TYPE: float

Source code in fplx/inference/kalman.py
def inject_observation_noise(self, timestep: int, factor: float):
    """
    Adjust observation noise at a specific timestep.

    Use for fixture difficulty: harder opponents → less predictable points.
    observation_noise_t = default_observation_noise * factor.

    Parameters
    ----------
    timestep : int
        Gameweek index.
    factor : float
        Observation noise factor (>1 = harder fixture, noisier observation).
    """
    self._observation_noise_overrides[timestep] = self.default_observation_noise * factor
clear_overrides
clear_overrides()

Remove all per-timestep noise overrides.

Source code in fplx/inference/kalman.py
def clear_overrides(self):
    """Remove all per-timestep noise overrides."""
    self._process_noise_overrides.clear()
    self._observation_noise_overrides.clear()
get_process_noise_override
get_process_noise_override(
    timestep: int,
) -> Optional[float]

Return explicit process noise override at timestep, if any.

Source code in fplx/inference/kalman.py
def get_process_noise_override(self, timestep: int) -> Optional[float]:
    """Return explicit process noise override at timestep, if any."""
    return self._process_noise_overrides.get(timestep)
set_noise_overrides
set_noise_overrides(
    process_noise_overrides: dict[int, float],
    observation_noise_overrides: dict[int, float],
)

Replace per-timestep noise overrides.

Source code in fplx/inference/kalman.py
def set_noise_overrides(
    self,
    process_noise_overrides: dict[int, float],
    observation_noise_overrides: dict[int, float],
):
    """Replace per-timestep noise overrides."""
    self._process_noise_overrides = dict(process_noise_overrides)
    self._observation_noise_overrides = dict(observation_noise_overrides)
copy_with_overrides
copy_with_overrides(
    max_timestep: Optional[int] = None,
) -> KalmanFilter

Create a parameter-identical filter with copied noise overrides.

PARAMETER DESCRIPTION
max_timestep

If provided, only overrides for timesteps <= max_timestep are copied.

TYPE: int DEFAULT: None

Source code in fplx/inference/kalman.py
def copy_with_overrides(self, max_timestep: Optional[int] = None) -> "KalmanFilter":
    """Create a parameter-identical filter with copied noise overrides.

    Parameters
    ----------
    max_timestep : int, optional
        If provided, only overrides for timesteps <= max_timestep are copied.
    """
    copied = KalmanFilter(
        process_noise=self.default_process_noise,
        observation_noise=self.default_observation_noise,
        initial_state_mean=self.initial_state_mean,
        initial_state_covariance=self.initial_state_covariance,
    )

    if max_timestep is None:
        proc = dict(self._process_noise_overrides)
        obs = dict(self._observation_noise_overrides)
    else:
        proc = {k: v for k, v in self._process_noise_overrides.items() if k <= max_timestep}
        obs = {k: v for k, v in self._observation_noise_overrides.items() if k <= max_timestep}

    copied.set_noise_overrides(proc, obs)

    return copied
filter
filter(observations: ndarray)

Run Kalman filter on observations with per-timestep noise.

PARAMETER DESCRIPTION
observations

TYPE: (ndarray, shape(num_timesteps))

RETURNS DESCRIPTION
filtered_state_means

Filtered state estimates (posterior mean).

TYPE: (ndarray, shape(num_timesteps))

filtered_state_covariances

Filtered state uncertainties (posterior variance).

TYPE: (ndarray, shape(num_timesteps))

Source code in fplx/inference/kalman.py
def filter(self, observations: np.ndarray):
    """
    Run Kalman filter on observations with per-timestep noise.

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    filtered_state_means : np.ndarray, shape (num_timesteps,)
        Filtered state estimates (posterior mean).
    filtered_state_covariances : np.ndarray, shape (num_timesteps,)
        Filtered state uncertainties (posterior variance).
    """
    num_timesteps = len(observations)
    filtered_state_means = np.zeros(num_timesteps)
    filtered_state_covariances = np.zeros(num_timesteps)
    kalman_gains = np.zeros(num_timesteps)

    predicted_state_mean = self.initial_state_mean
    predicted_state_covariance = self.initial_state_covariance

    for t in range(num_timesteps):
        process_noise_t = self._get_process_noise(t)
        observation_noise_t = self._get_observation_noise(t)

        # Predict
        if t > 0:
            predicted_state_mean = filtered_state_means[t - 1]
            predicted_state_covariance = filtered_state_covariances[t - 1] + process_noise_t

        # Update
        y = observations[t]
        innovation = y - predicted_state_mean
        innovation_covariance = predicted_state_covariance + observation_noise_t  # Innovation covariance
        kalman_gain = predicted_state_covariance / innovation_covariance  # Kalman gain

        filtered_state_means[t] = predicted_state_mean + kalman_gain * innovation
        filtered_state_covariances[t] = (1 - kalman_gain) * predicted_state_covariance
        kalman_gains[t] = kalman_gain

    self.filtered_state_means = filtered_state_means
    self.filtered_state_covariances = filtered_state_covariances
    self.kalman_gains = kalman_gains

    return filtered_state_means, filtered_state_covariances
predict_next
predict_next() -> tuple[float, float]

Predict next observation with uncertainty.

Returns the predictive distribution for Y_{t+1} (the observation), not X_{t+1} (the latent state). This ensures consistency with the HMM predict_next which also returns observation-level variance.

Var[Y_{t+1}] = Var[X_{t+1}|y_{1:t}] + R = (P_t + Q) + R

Must call filter() first.

RETURNS DESCRIPTION
predicted_mean

E[Y_{t+1} | y_{1:t}].

TYPE: float

predicted_var

Var[Y_{t+1} | y_{1:t}] (observation-level, includes R).

TYPE: float

Source code in fplx/inference/kalman.py
def predict_next(self) -> tuple[float, float]:
    """
    Predict next observation with uncertainty.

    Returns the predictive distribution for Y_{t+1} (the observation),
    not X_{t+1} (the latent state). This ensures consistency with the
    HMM predict_next which also returns observation-level variance.

    Var[Y_{t+1}] = Var[X_{t+1}|y_{1:t}] + R
                 = (P_t + Q) + R

    Must call filter() first.

    Returns
    -------
    predicted_mean : float
        E[Y_{t+1} | y_{1:t}].
    predicted_var : float
        Var[Y_{t+1} | y_{1:t}] (observation-level, includes R).
    """
    if self.filtered_state_means is None or self.filtered_state_covariances is None:
        raise RuntimeError("Must call filter() before predict_next().")

    num_timesteps = len(self.filtered_state_means)
    next_process_noise = self._get_process_noise(num_timesteps)
    next_observation_noise = self._get_observation_noise(num_timesteps)

    predicted_mean = self.filtered_state_means[-1]
    # State-level: P_{t+1|t} = P_{t|t} + Q
    state_var = self.filtered_state_covariances[-1] + next_process_noise
    # Observation-level: Var[Y] = P_{t+1|t} + R
    predicted_var = state_var + next_observation_noise

    return predicted_mean, predicted_var
smooth
smooth(observations: ndarray)

Run RTS smoother (backward pass after forward Kalman filter).

PARAMETER DESCRIPTION
observations

TYPE: (ndarray, shape(num_timesteps))

RETURNS DESCRIPTION
smoothed_state_means

Smoothed state estimates.

TYPE: (ndarray, shape(num_timesteps))

smoothed_state_covariances

Smoothed state uncertainties.

TYPE: (ndarray, shape(num_timesteps))

Source code in fplx/inference/kalman.py
def smooth(self, observations: np.ndarray):
    """
    Run RTS smoother (backward pass after forward Kalman filter).

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    smoothed_state_means : np.ndarray, shape (num_timesteps,)
        Smoothed state estimates.
    smoothed_state_covariances : np.ndarray, shape (num_timesteps,)
        Smoothed state uncertainties.
    """
    filtered_state_means, filtered_state_covariances = self.filter(observations)
    num_timesteps = len(observations)

    smoothed_state_means = np.zeros(num_timesteps)
    smoothed_state_covariances = np.zeros(num_timesteps)

    smoothed_state_means[-1] = filtered_state_means[-1]
    smoothed_state_covariances[-1] = filtered_state_covariances[-1]

    for t in range(num_timesteps - 2, -1, -1):
        next_process_noise = self._get_process_noise(t + 1)
        predicted_state_covariance = filtered_state_covariances[t] + next_process_noise

        # Smoother gain
        if predicted_state_covariance > 0:
            smoother_gain = filtered_state_covariances[t] / predicted_state_covariance
        else:
            smoother_gain = 0.0

        smoothed_state_means[t] = filtered_state_means[t] + smoother_gain * (
            smoothed_state_means[t + 1] - filtered_state_means[t]
        )
        smoothed_state_covariances[t] = filtered_state_covariances[t] + smoother_gain**2 * (
            smoothed_state_covariances[t + 1] - predicted_state_covariance
        )

    return smoothed_state_means, smoothed_state_covariances

multivariate_hmm

Position-aware multivariate-emission HMM for player form inference.

Uses position-specific feature vectors extracted from the full vaastav dataset:

GK: [saves/90, xGC/90, clean_sheet, bonus, mins_frac] DEF: [xG, xA, xGC/90, clean_sheet, influence/100, bonus, mins_frac] MID: [xG, xA, creativity/100, threat/100, bonus, mins_frac] FWD: [xG, xA, threat/100, bonus, mins_frac]

Each state emits a multivariate Gaussian with diagonal covariance. Baum-Welch learns per-player emission parameters from their history.

The minutes_fraction feature (0 or ~1) lets the HMM identify the Injured state from the feature vector alone, without NLP news signals.

MultivariateHMM
MultivariateHMM(
    position: str = "MID",
    transition_matrix: Optional[ndarray] = None,
    initial_dist: Optional[ndarray] = None,
)

Position-aware HMM with multivariate diagonal Gaussian emissions.

PARAMETER DESCRIPTION
position

GK, DEF, MID, FWD. Determines feature set and default emissions.

TYPE: str DEFAULT: 'MID'

Source code in fplx/inference/multivariate_hmm.py
def __init__(
    self,
    position: str = "MID",
    transition_matrix: Optional[np.ndarray] = None,
    initial_dist: Optional[np.ndarray] = None,
):
    self.position = position
    self.means, self.vars = _default_emissions(position)

    # Priors for MAP-style regularization in Baum-Welch.
    self.prior_means = self.means.copy()
    self.prior_vars = self.vars.copy()
    self.prior_A = (
        transition_matrix.copy() if transition_matrix is not None else DEFAULT_TRANSITION.copy()
    )

    self.A = self.prior_A.copy()
    self.pi = initial_dist.copy() if initial_dist is not None else DEFAULT_INITIAL.copy()
    self.n_states = N_STATES
    self.n_features = self.means.shape[1]
    self._transition_overrides: dict[int, np.ndarray] = {}
inject_news_perturbation
inject_news_perturbation(
    timestep: int,
    state_boost: dict,
    confidence: float = 1.0,
)

Perturb transition matrix at timestep (same API as scalar HMM).

Source code in fplx/inference/multivariate_hmm.py
def inject_news_perturbation(self, timestep: int, state_boost: dict, confidence: float = 1.0):
    """Perturb transition matrix at timestep (same API as scalar HMM)."""
    A_p = self.A.copy()
    for src in range(self.n_states):
        for tgt, boost in state_boost.items():
            A_p[src, tgt] *= 1.0 + confidence * (boost - 1.0)
        s = A_p[src].sum()
        if s > 0:
            A_p[src] /= s
    self._transition_overrides[timestep] = A_p
forward
forward(observations: ndarray)

Forward algorithm. observations: (T, D).

Source code in fplx/inference/multivariate_hmm.py
def forward(self, observations: np.ndarray):
    """Forward algorithm. observations: (T, D)."""
    T = len(observations)
    alpha = np.zeros((T, self.n_states))
    scale = np.zeros(T)
    b = self._emission_prob_vector(observations[0])
    alpha[0] = self.pi * b
    scale[0] = alpha[0].sum()
    if scale[0] > 0:
        alpha[0] /= scale[0]
    for t in range(1, T):
        b = self._emission_prob_vector(observations[t])
        alpha[t] = (alpha[t - 1] @ self._get_A(t)) * b
        scale[t] = alpha[t].sum()
        if scale[t] > 0:
            alpha[t] /= scale[t]
    return alpha, scale
forward_backward
forward_backward(observations: ndarray) -> ndarray

Smoothed posteriors P(S_t | y_{1:T}).

Source code in fplx/inference/multivariate_hmm.py
def forward_backward(self, observations: np.ndarray) -> np.ndarray:
    """Smoothed posteriors P(S_t | y_{1:T})."""
    T = len(observations)
    alpha, scale = self.forward(observations)
    beta = np.zeros((T, self.n_states))
    beta[T - 1] = 1.0
    for t in range(T - 2, -1, -1):
        b_next = self._emission_prob_vector(observations[t + 1])
        beta[t] = self._get_A(t + 1) @ (b_next * beta[t + 1])
        if scale[t + 1] > 0:
            beta[t] /= scale[t + 1]
    gamma = alpha * beta
    rs = gamma.sum(axis=1, keepdims=True)
    rs[rs == 0] = 1.0
    return gamma / rs
viterbi
viterbi(observations: ndarray) -> ndarray

Most likely state sequence.

Source code in fplx/inference/multivariate_hmm.py
def viterbi(self, observations: np.ndarray) -> np.ndarray:
    """Most likely state sequence."""
    T = len(observations)
    log_d = np.zeros((T, self.n_states))
    psi = np.zeros((T, self.n_states), dtype=int)
    log_d[0] = np.log(self.pi + 1e-300) + np.array([
        self._emission_log_prob(observations[0], s) for s in range(self.n_states)
    ])
    for t in range(1, T):
        log_A = np.log(self._get_A(t) + 1e-300)
        log_b = np.array([self._emission_log_prob(observations[t], s) for s in range(self.n_states)])
        for s in range(self.n_states):
            c = log_d[t - 1] + log_A[:, s]
            psi[t, s] = np.argmax(c)
            log_d[t, s] = c[psi[t, s]] + log_b[s]
    path = np.zeros(T, dtype=int)
    path[T - 1] = np.argmax(log_d[T - 1])
    for t in range(T - 2, -1, -1):
        path[t] = psi[t + 1, path[t + 1]]
    return path
predict_next_features
predict_next_features(observations: ndarray)

Predict next gameweek's feature vector.

Returns mean, var (per feature), and state distribution.

Source code in fplx/inference/multivariate_hmm.py
def predict_next_features(self, observations: np.ndarray):
    """
    Predict next gameweek's feature vector.

    Returns mean, var (per feature), and state distribution.
    """
    alpha, _ = self.forward(observations)
    next_dist = alpha[-1] @ self._get_A(len(observations))
    mean = next_dist @ self.means
    var = next_dist @ self.vars + next_dist @ (self.means**2) - mean**2
    return mean, np.maximum(var, 1e-8), next_dist
one_step_point_predictions
one_step_point_predictions(
    observations: ndarray,
) -> ndarray

One-step-ahead point predictions for each historical timestep.

Returns array preds where preds[t] predicts points at timestep t, using information up to t-1 (preds[0] is NaN).

Source code in fplx/inference/multivariate_hmm.py
def one_step_point_predictions(self, observations: np.ndarray) -> np.ndarray:
    """One-step-ahead point predictions for each historical timestep.

    Returns array preds where preds[t] predicts points at timestep t,
    using information up to t-1 (preds[0] is NaN).
    """
    T = len(observations)
    preds = np.full(T, np.nan)
    if T < 2:
        return preds

    alpha, _ = self.forward(observations)
    for t in range(1, T):
        pred_dist = alpha[t - 1] @ self._get_A(t)
        preds[t] = self._expected_points_from_state_dist(pred_dist)
    return preds
predict_next_points
predict_next_points(
    observations: ndarray,
) -> tuple[float, float]

Convert predicted features → expected FPL points.

Uses FPL scoring rules applied to predicted feature rates.

Source code in fplx/inference/multivariate_hmm.py
def predict_next_points(self, observations: np.ndarray) -> tuple[float, float]:
    """
    Convert predicted features → expected FPL points.

    Uses FPL scoring rules applied to predicted feature rates.
    """
    feat_mean, feat_var, _ = self.predict_next_features(observations)
    feat_names = POSITION_FEATURES[self.position]
    xpts_idx = feat_names.index("xPts")

    ep = max(0.0, float(feat_mean[xpts_idx]))
    var_pts = float(max(feat_var[xpts_idx], 1e-6) + 1.0)  # residual floor
    return ep, var_pts
fit
fit(
    observations: ndarray,
    n_iter: int = 20,
    tol: float = 0.0001,
    prior_weight: float = 0.85,
)

Baum-Welch EM with MAP-style prior interpolation.

PARAMETER DESCRIPTION
observations

Feature matrix with shape (T, D).

TYPE: ndarray

n_iter

Maximum EM iterations.

TYPE: int DEFAULT: 20

tol

Convergence tolerance on log-likelihood.

TYPE: float DEFAULT: 0.0001

prior_weight

Weight on prior parameters in [0, 1]. Higher values increase regularization toward position-level default emissions/transitions.

TYPE: float DEFAULT: 0.85

Source code in fplx/inference/multivariate_hmm.py
def fit(
    self,
    observations: np.ndarray,
    n_iter: int = 20,
    tol: float = 1e-4,
    prior_weight: float = 0.85,
):
    """Baum-Welch EM with MAP-style prior interpolation.

    Parameters
    ----------
    observations : np.ndarray
        Feature matrix with shape (T, D).
    n_iter : int
        Maximum EM iterations.
    tol : float
        Convergence tolerance on log-likelihood.
    prior_weight : float
        Weight on prior parameters in [0, 1]. Higher values increase
        regularization toward position-level default emissions/transitions.
    """
    T = observations.shape[0]
    prev_ll = -np.inf
    prior_weight = float(np.clip(prior_weight, 0.0, 1.0))

    for _ in range(n_iter):
        alpha, scale = self.forward(observations)

        # Backward pass with scaling aligned to forward()
        beta = np.zeros((T, self.n_states))
        beta[T - 1] = 1.0
        for t in range(T - 2, -1, -1):
            b_next = self._emission_prob_vector(observations[t + 1])
            beta[t] = self._get_A(t + 1) @ (b_next * beta[t + 1])
            if scale[t + 1] > 0:
                beta[t] /= scale[t + 1]

        gamma = alpha * beta
        rs = gamma.sum(axis=1, keepdims=True)
        rs[rs == 0] = 1.0
        gamma /= rs

        # M-step: initial
        self.pi = np.maximum(gamma[0], 1e-10)
        self.pi /= self.pi.sum()

        # M-step: transitions
        xi = np.zeros((T - 1, self.n_states, self.n_states))
        for t in range(T - 1):
            b_next = self._emission_prob_vector(observations[t + 1])
            for i in range(self.n_states):
                for j in range(self.n_states):
                    xi[t, i, j] = alpha[t, i] * self._get_A(t + 1)[i, j] * b_next[j] * beta[t + 1, j]
            xs = xi[t].sum()
            if xs > 0:
                xi[t] /= xs
        for i in range(self.n_states):
            d = gamma[:-1, i].sum()
            if d > 1e-10:
                mle_A = xi[:, i, :].sum(axis=0) / d
                self.A[i] = prior_weight * self.prior_A[i] + (1.0 - prior_weight) * mle_A
            rs = self.A[i].sum()
            if rs > 0:
                self.A[i] /= rs

        # M-step: emissions
        for s in range(self.n_states):
            w = gamma[:, s]
            ws = w.sum()
            if ws > 1e-10:
                mle_mu = np.average(observations, axis=0, weights=w)
                diff = observations - mle_mu
                mle_var = np.average(diff**2, axis=0, weights=w)
                self.means[s] = prior_weight * self.prior_means[s] + (1.0 - prior_weight) * mle_mu
                self.vars[s] = np.maximum(
                    prior_weight * self.prior_vars[s] + (1.0 - prior_weight) * mle_var,
                    1e-4,
                )

        ll = np.sum(np.log(scale + 1e-300))
        if abs(ll - prev_ll) < tol:
            break
        prev_ll = ll
    return self
build_feature_matrix
build_feature_matrix(
    timeseries: DataFrame, position: str
) -> ndarray

Extract position-specific feature matrix from player timeseries.

PARAMETER DESCRIPTION
timeseries

Player gameweek history from vaastav dataset.

TYPE: DataFrame

position

GK, DEF, MID, or FWD.

TYPE: str

RETURNS DESCRIPTION
np.ndarray, shape (T, D) where D depends on position.
Source code in fplx/inference/multivariate_hmm.py
def build_feature_matrix(timeseries: pd.DataFrame, position: str) -> np.ndarray:
    """
    Extract position-specific feature matrix from player timeseries.

    Parameters
    ----------
    timeseries : pd.DataFrame
        Player gameweek history from vaastav dataset.
    position : str
        GK, DEF, MID, or FWD.

    Returns
    -------
    np.ndarray, shape (T, D) where D depends on position.
    """
    n = len(timeseries)
    features = np.zeros((n, 2))

    mins = _safe_col(timeseries, "minutes")
    features[:, 1] = np.clip(mins / 90.0, 0.0, 1.0)  # mins_frac

    # Domain-specific projection from rich event space to structural xPts.
    features[:, 0] = compute_xpoints(timeseries, position)
    return features

pipeline

Per-player inference pipeline orchestrator.

This is the single entry point that FPLModel.fit() calls for each player. It coordinates HMM, Kalman Filter, signal injection, and fusion.

Usage: pipeline = PlayerInferencePipeline() pipeline.ingest_observations(points_array) pipeline.inject_news("Player ruled out for 3 weeks", timestep=20) pipeline.inject_fixture_difficulty(difficulty=4.5, timestep=21) results = pipeline.run() ep_mean, ep_var = pipeline.predict_next()

InferenceResult dataclass
InferenceResult(
    filtered_beliefs: ndarray,
    smoothed_beliefs: ndarray,
    viterbi_path: ndarray,
    hmm_predicted_mean: float = 0.0,
    hmm_predicted_var: float = 0.0,
    kalman_filtered: ndarray = (lambda: array([]))(),
    kalman_uncertainty: ndarray = (lambda: array([]))(),
    kf_predicted_mean: float = 0.0,
    kf_predicted_var: float = 0.0,
    fused_mean: ndarray = (lambda: array([]))(),
    fused_var: ndarray = (lambda: array([]))(),
    fusion_alpha: Optional[float] = None,
    predicted_mean: float = 0.0,
    predicted_var: float = 0.0,
)

Container for inference pipeline outputs.

PlayerInferencePipeline
PlayerInferencePipeline(
    hmm_params: Optional[dict] = None,
    kf_params: Optional[dict] = None,
    hmm_variance_floor: float = 1.0,
    news_params: Optional[dict] = None,
    fusion_mode: str = "precision",
    fusion_params: Optional[dict] = None,
)

Orchestrates HMM + Kalman inference for a single player.

PARAMETER DESCRIPTION
hmm_params

Override HMM parameters: transition_matrix, emission_params, initial_dist.

TYPE: dict DEFAULT: None

kf_params

Override Kalman parameters: Q, R, x0, P0.

TYPE: dict DEFAULT: None

Source code in fplx/inference/pipeline.py
def __init__(
    self,
    hmm_params: Optional[dict] = None,
    kf_params: Optional[dict] = None,
    hmm_variance_floor: float = 1.0,
    news_params: Optional[dict] = None,
    fusion_mode: str = "precision",
    fusion_params: Optional[dict] = None,
):
    hmm_params = hmm_params or {}
    kf_params = kf_params or {}

    self.hmm = HMMInference(
        transition_matrix=hmm_params.get("transition_matrix"),
        emission_params=hmm_params.get("emission_params"),
        initial_dist=hmm_params.get("initial_dist"),
    )
    self.kf = KalmanFilter(
        process_noise=kf_params.get("process_noise", 1.0),
        observation_noise=kf_params.get("observation_noise", 4.0),
        initial_state_mean=kf_params.get("initial_state_mean", 4.0),
        initial_state_covariance=kf_params.get("initial_state_covariance", 2.0),
    )
    self.hmm_variance_floor = max(float(hmm_variance_floor), 1e-6)
    self.news_params = _merge_nested_dicts(DEFAULT_NEWS_PARAMS, news_params or {})
    self.fusion_mode = fusion_mode
    self.fusion_params = _merge_nested_dicts(DEFAULT_FUSION_PARAMS, fusion_params or {})
    if self.fusion_mode not in {"precision", "calibrated_alpha"}:
        raise ValueError(
            f"Unknown fusion_mode '{self.fusion_mode}'. Expected one of: 'precision', 'calibrated_alpha'."
        )

    self.observations: Optional[np.ndarray] = None
    self._result: Optional[InferenceResult] = None
ingest_observations
ingest_observations(points: ndarray)

Set the player's historical points sequence.

PARAMETER DESCRIPTION
points

Weekly points history.

TYPE: (ndarray, shape(T))

Source code in fplx/inference/pipeline.py
def ingest_observations(self, points: np.ndarray):
    """
    Set the player's historical points sequence.

    Parameters
    ----------
    points : np.ndarray, shape (T,)
        Weekly points history.
    """
    self.observations = np.asarray(points, dtype=float)
    self._result = None  # invalidate cached result
inject_news
inject_news(news_signal: dict, timestep: int)

Inject a news signal into the inference at a specific gameweek.

Bridges from existing NewsSignal.generate_signal() output format.

PARAMETER DESCRIPTION
news_signal

Output from NewsSignal.generate_signal(). Must contain: 'availability', 'minutes_risk', 'confidence'.

TYPE: dict

timestep

The gameweek index to apply the perturbation.

TYPE: int

Source code in fplx/inference/pipeline.py
def inject_news(
    self,
    news_signal: dict,
    timestep: int,
):
    """
    Inject a news signal into the inference at a specific gameweek.

    Bridges from existing NewsSignal.generate_signal() output format.

    Parameters
    ----------
    news_signal : dict
        Output from NewsSignal.generate_signal(). Must contain:
        'availability', 'minutes_risk', 'confidence'.
    timestep : int
        The gameweek index to apply the perturbation.
    """
    category = _classify_news(
        news_signal.get("availability", 1.0),
        news_signal.get("minutes_risk", 0.0),
        self.news_params.get("classification_thresholds"),
    )
    confidence = news_signal.get(
        "confidence",
        float(self.news_params.get("default_confidence", 0.6)),
    )

    perturbation_map = self.news_params.get("perturbation_map", DEFAULT_NEWS_PERTURBATION_MAP)
    perturbation = perturbation_map.get(
        category,
        perturbation_map.get("neutral", {"state_boost": {}, "kalman_shock": 1.0}),
    )

    # Inject into HMM
    state_boost = perturbation.get("state_boost", {})
    if state_boost:
        self.hmm.inject_news_perturbation(
            timestep=timestep,
            state_boost=state_boost,
            confidence=confidence,
        )

    # Inject into Kalman
    kalman_shock = float(perturbation.get("kalman_shock", 1.0))
    if kalman_shock != 1.0:
        self.kf.inject_process_shock(
            timestep=timestep,
            multiplier=kalman_shock,
        )
inject_fixture_difficulty
inject_fixture_difficulty(difficulty: float, timestep: int)

Inject fixture difficulty into Kalman observation noise.

PARAMETER DESCRIPTION
difficulty

Fixture difficulty score (1-5, from FixtureSignal).

TYPE: float

timestep

The gameweek index.

TYPE: int

Source code in fplx/inference/pipeline.py
def inject_fixture_difficulty(self, difficulty: float, timestep: int):
    """
    Inject fixture difficulty into Kalman observation noise.

    Parameters
    ----------
    difficulty : float
        Fixture difficulty score (1-5, from FixtureSignal).
    timestep : int
        The gameweek index.
    """
    noise_factor = _difficulty_to_noise_factor(difficulty)
    self.kf.inject_observation_noise(timestep=timestep, factor=noise_factor)
run
run() -> InferenceResult

Run full inference pipeline: HMM + Kalman + Fusion.

RETURNS DESCRIPTION
InferenceResult

All inference outputs.

Source code in fplx/inference/pipeline.py
def run(self) -> InferenceResult:
    """
    Run full inference pipeline: HMM + Kalman + Fusion.

    Returns
    -------
    InferenceResult
        All inference outputs.
    """
    if self.observations is None or len(self.observations) == 0:
        raise RuntimeError("No observations ingested. Call ingest_observations().")

    obs = self.observations

    # HMM
    alpha, _ = self.hmm.forward(obs)
    gamma = self.hmm.forward_backward(obs)
    viterbi_path = self.hmm.viterbi(obs)
    hmm_pred_mean, hmm_pred_var, _ = self.hmm.predict_next(obs)

    # Kalman
    kf_x, kf_P = self.kf.filter(obs)
    kf_pred_mean, kf_pred_var = self.kf.predict_next()

    fusion_alpha = None
    if self.fusion_mode == "calibrated_alpha":
        fusion_alpha = self._estimate_fusion_alpha(obs)
        hmm_seq_mean, hmm_seq_var = self._hmm_sequence_moments(gamma)

        fused_mean = fusion_alpha * kf_x + (1.0 - fusion_alpha) * hmm_seq_mean
        fused_var = fusion_alpha**2 * np.maximum(kf_P, 1e-6) + (1.0 - fusion_alpha) ** 2 * np.maximum(
            hmm_seq_var, self.hmm_variance_floor
        )

        pred_mean = fusion_alpha * kf_pred_mean + (1.0 - fusion_alpha) * hmm_pred_mean
        pred_var = fusion_alpha**2 * max(kf_pred_var, 1e-6) + (1.0 - fusion_alpha) ** 2 * max(
            hmm_pred_var, self.hmm_variance_floor
        )
    else:
        # Fusion (full sequence, smoothed)
        # Apply an HMM variance floor so HMM does not become unrealistically
        # overconfident and dominate precision-weighted fusion.
        emission_params_for_fusion = {
            s: (mu, max(std, np.sqrt(self.hmm_variance_floor)))
            for s, (mu, std) in self.hmm.emission_params.items()
        }
        fused_mean, fused_var = fuse_sequences(gamma, kf_x, kf_P, emission_params_for_fusion)

        # Fused one-step-ahead prediction
        pred_mean, pred_var = fuse_estimates(
            hmm_pred_mean,
            max(hmm_pred_var, self.hmm_variance_floor),
            kf_pred_mean,
            kf_pred_var,
        )

    self._result = InferenceResult(
        filtered_beliefs=alpha,
        smoothed_beliefs=gamma,
        viterbi_path=viterbi_path,
        hmm_predicted_mean=hmm_pred_mean,
        hmm_predicted_var=hmm_pred_var,
        kalman_filtered=kf_x,
        kalman_uncertainty=kf_P,
        kf_predicted_mean=kf_pred_mean,
        kf_predicted_var=kf_pred_var,
        fused_mean=fused_mean,
        fused_var=fused_var,
        fusion_alpha=fusion_alpha,
        predicted_mean=pred_mean,
        predicted_var=pred_var,
    )

    return self._result
predict_next
predict_next() -> tuple[float, float]

Get the fused one-step-ahead forecast.

RETURNS DESCRIPTION
expected_points

TYPE: float

variance

TYPE: float

Source code in fplx/inference/pipeline.py
def predict_next(self) -> tuple[float, float]:
    """
    Get the fused one-step-ahead forecast.

    Returns
    -------
    expected_points : float
    variance : float
    """
    if self._result is None:
        self.run()
    return self._result.predicted_mean, self._result.predicted_var
learn_parameters
learn_parameters(n_iter: int = 20)

Run Baum-Welch to learn HMM parameters from current observations.

Call this before run() if you want data-driven parameters.

Source code in fplx/inference/pipeline.py
def learn_parameters(self, n_iter: int = 20):
    """
    Run Baum-Welch to learn HMM parameters from current observations.

    Call this before run() if you want data-driven parameters.
    """
    if self.observations is None:
        raise RuntimeError("No observations. Call ingest_observations() first.")
    self.hmm.fit(self.observations, n_iter=n_iter)

tft

Temporal Fusion Transformer (TFT) inference adapter.

This module provides optional deep-learning inference for FPLX using pytorch-forecasting.

TFTQuantilePredictions dataclass
TFTQuantilePredictions(
    p10: dict[int, float],
    p50: dict[int, float],
    p90: dict[int, float],
)

Container for TFT quantile outputs for a single gameweek.

to_optimizer_inputs
to_optimizer_inputs() -> (
    tuple[dict[int, float], dict[int, float]]
)

Map quantiles to objective mean and downside risk.

RETURNS DESCRIPTION
expected_points

Uses q50 as robust expected value proxy.

TYPE: dict[int, float]

downside_risk

Uses q50 - q10 as downside spread.

TYPE: dict[int, float]

Source code in fplx/inference/tft.py
def to_optimizer_inputs(self) -> tuple[dict[int, float], dict[int, float]]:
    """Map quantiles to objective mean and downside risk.

    Returns
    -------
    expected_points : dict[int, float]
        Uses q50 as robust expected value proxy.
    downside_risk : dict[int, float]
        Uses q50 - q10 as downside spread.
    """
    expected_points = {pid: float(v) for pid, v in self.p50.items()}
    downside_risk = {
        pid: max(0.0, float(self.p50.get(pid, 0.0) - self.p10.get(pid, 0.0))) for pid in expected_points
    }
    return expected_points, downside_risk
TFTForecaster
TFTForecaster(
    quantiles: tuple[float, float, float] = (0.1, 0.5, 0.9),
    encoder_length: int = 15,
    prediction_length: int = 1,
)

Wrapper around PyTorch Forecasting's TemporalFusionTransformer.

Source code in fplx/inference/tft.py
def __init__(
    self,
    quantiles: tuple[float, float, float] = (0.1, 0.5, 0.9),
    encoder_length: int = 15,
    prediction_length: int = 1,
):
    self.quantiles = quantiles
    self.encoder_length = encoder_length
    self.prediction_length = prediction_length
    self.model = None
    self._trainer = None
fit
fit(
    panel_df: DataFrame,
    training_cutoff: int,
    max_epochs: int = 20,
    batch_size: int = 256,
    learning_rate: float = 0.001,
    hidden_size: int = 32,
    attention_head_size: int = 4,
    dropout: float = 0.1,
)

Train TFT on panel data.

Source code in fplx/inference/tft.py
def fit(
    self,
    panel_df: pd.DataFrame,
    training_cutoff: int,
    max_epochs: int = 20,
    batch_size: int = 256,
    learning_rate: float = 1e-3,
    hidden_size: int = 32,
    attention_head_size: int = 4,
    dropout: float = 0.1,
):
    """Train TFT on panel data."""
    pl, TemporalFusionTransformer, QuantileLoss = self._imports()

    training, validation = make_tft_datasets(
        panel_df,
        training_cutoff=training_cutoff,
        encoder_length=self.encoder_length,
        prediction_length=self.prediction_length,
    )

    train_loader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
    val_loader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

    self.model = TemporalFusionTransformer.from_dataset(
        training,
        learning_rate=learning_rate,
        hidden_size=hidden_size,
        attention_head_size=attention_head_size,
        dropout=dropout,
        loss=QuantileLoss(self.quantiles),
        output_size=len(self.quantiles),
        reduce_on_plateau_patience=4,
    )

    trainer = pl.Trainer(
        max_epochs=max_epochs,
        accelerator="auto",
        devices=1,
        logger=False,
        enable_checkpointing=False,
        enable_model_summary=False,
    )
    trainer.fit(self.model, train_loader, val_loader)
    self._trainer = trainer
    return self
load
load(checkpoint_path: str | Path)

Load a trained TFT checkpoint.

Source code in fplx/inference/tft.py
def load(self, checkpoint_path: str | Path):
    """Load a trained TFT checkpoint."""
    _, TemporalFusionTransformer, _ = self._imports()
    self.model = TemporalFusionTransformer.load_from_checkpoint(str(checkpoint_path))
    return self
predict_gameweek
predict_gameweek(
    panel_df: DataFrame,
    target_gw: int,
    batch_size: int = 256,
) -> TFTQuantilePredictions

Predict quantiles for one target gameweek across all players.

Source code in fplx/inference/tft.py
def predict_gameweek(
    self,
    panel_df: pd.DataFrame,
    target_gw: int,
    batch_size: int = 256,
) -> TFTQuantilePredictions:
    """Predict quantiles for one target gameweek across all players."""
    if self.model is None:
        raise RuntimeError("Model is not trained/loaded.")

    training, prediction = make_tft_datasets(
        panel_df[panel_df["time_idx"] <= target_gw].copy(),
        training_cutoff=target_gw - 1,
        encoder_length=self.encoder_length,
        prediction_length=self.prediction_length,
    )

    _ = training  # required for consistent schema creation in from_dataset
    pred_loader = prediction.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

    # Quantile output shape: [n_samples, prediction_length, n_quantiles]
    pred_out = self.model.predict(
        pred_loader,
        mode="quantiles",
        return_x=True,
        return_index=True,
    )

    preds = None
    x = None
    index_df = None

    if hasattr(pred_out, "output"):
        preds = pred_out.output
        x = getattr(pred_out, "x", None)
        index_df = getattr(pred_out, "index", None)
    elif isinstance(pred_out, tuple):
        if len(pred_out) >= 1:
            preds = pred_out[0]
        if len(pred_out) >= 2:
            x = pred_out[1]
        if len(pred_out) >= 3:
            index_df = pred_out[2]
    else:
        preds = pred_out

    if preds is None:
        raise RuntimeError("TFT prediction output is empty.")

    q = preds.detach().cpu().numpy()
    q = q[:, 0, :]  # one-step forecast

    # Recover sample player ids from prediction index when available.
    if index_df is not None and "group_id" in index_df.columns:
        player_ids = index_df["group_id"].astype(int).to_numpy()
    elif x is not None and "groups" in x:
        groups = x["groups"].detach().cpu().numpy()
        player_ids = groups[:, 0].astype(int)
    else:
        raise RuntimeError("Unable to recover TFT sample player IDs from prediction output.")

    # Deduplicate by keeping last sample for each player in case of overlap.
    p10, p50, p90 = {}, {}, {}
    for pid, row in zip(player_ids, q, strict=False):
        p10[pid] = float(row[0])
        p50[pid] = float(row[1])
        p90[pid] = float(row[2])

    return TFTQuantilePredictions(p10=p10, p50=p50, p90=p90)

models

Machine learning models for FPL prediction.

BaselineModel

BaselineModel(
    method: str = "rolling_mean", window: int = 5
)

Bases: BaseModel

Baseline model using simple heuristics.

Methods: - Rolling average of points - Weighted recent form - Form-based prediction

Initialize baseline model.

PARAMETER DESCRIPTION
method

Prediction method: 'rolling_mean', 'ewma', 'last_value'

TYPE: str DEFAULT: 'rolling_mean'

window

Window size for rolling calculations

TYPE: int DEFAULT: 5

Source code in fplx/models/baseline.py
def __init__(self, method: str = "rolling_mean", window: int = 5):
    """
    Initialize baseline model.

    Parameters
    ----------
    method : str
        Prediction method: 'rolling_mean', 'ewma', 'last_value'
    window : int
        Window size for rolling calculations
    """
    self.method = method
    self.window = window
    self.predictions = {}
fit
fit(X, y=None)

Fit the model (no-op for baseline).

Source code in fplx/models/baseline.py
def fit(self, X, y=None):
    """Fit the model (no-op for baseline)."""
    return self
predict
predict(X: DataFrame) -> float

Predict next gameweek points for a player.

PARAMETER DESCRIPTION
X

Player historical data

TYPE: DataFrame

RETURNS DESCRIPTION
float

Predicted points

Source code in fplx/models/baseline.py
def predict(self, X: pd.DataFrame) -> float:
    """
    Predict next gameweek points for a player.

    Parameters
    ----------
    X : pd.DataFrame
        Player historical data

    Returns
    -------
    float
        Predicted points
    """
    if X.empty or "points" not in X.columns:
        return 0.0

    points = X["points"]

    if self.method == "rolling_mean":
        return self._rolling_mean(points)
    if self.method == "ewma":
        return self._ewma(points)
    if self.method == "last_value":
        return points.iloc[-1]
    logger.warning(f"Unknown method {self.method}, using rolling_mean")
    return self._rolling_mean(points)
batch_predict
batch_predict(
    players_data: dict[str, DataFrame],
) -> dict[str, float]

Predict for multiple players.

PARAMETER DESCRIPTION
players_data

Dictionary mapping player ID to their data

TYPE: dict[str, DataFrame]

RETURNS DESCRIPTION
dict[str, float]

Dictionary of predictions

Source code in fplx/models/baseline.py
def batch_predict(self, players_data: dict[str, pd.DataFrame]) -> dict[str, float]:
    """
    Predict for multiple players.

    Parameters
    ----------
    players_data : dict[str, pd.DataFrame]
        Dictionary mapping player ID to their data

    Returns
    -------
    dict[str, float]
        Dictionary of predictions
    """
    predictions = {}
    for player_id, data in players_data.items():
        predictions[player_id] = self.predict(data)

    self.predictions = predictions
    return predictions

EnsembleModel

EnsembleModel(
    models: list, weights: Optional[list[float]] = None
)

Ensemble combining multiple models with weighted averaging.

PARAMETER DESCRIPTION
models

List of model instances

TYPE: list

weights

Weights for each model (must sum to 1)

TYPE: Optional[list[float]] DEFAULT: None

Source code in fplx/models/ensemble.py
def __init__(self, models: list, weights: Optional[list[float]] = None):
    self.models = models

    if weights is None:
        # Equal weights
        self.weights = [1.0 / len(models)] * len(models)
    else:
        if len(weights) != len(models):
            raise ValueError("Number of weights must match number of models")
        if not np.isclose(sum(weights), 1.0):
            raise ValueError("Weights must sum to 1")
        self.weights = weights
predict
predict(player_data: DataFrame) -> float

Ensemble prediction for a single player.

PARAMETER DESCRIPTION
player_data

Player historical data

TYPE: DataFrame

RETURNS DESCRIPTION
float

Ensemble prediction

Source code in fplx/models/ensemble.py
def predict(self, player_data: pd.DataFrame) -> float:
    """
    Ensemble prediction for a single player.

    Parameters
    ----------
    player_data : pd.DataFrame
        Player historical data

    Returns
    -------
    float
        Ensemble prediction
    """
    predictions = []

    for model in self.models:
        try:
            pred = model.predict(player_data)
            predictions.append(pred)
        except Exception as e:
            logger.warning(f"Model {type(model).__name__} failed: {e}")
            predictions.append(0.0)

    # Weighted average
    ensemble_pred = sum(p * w for p, w in zip(predictions, self.weights))
    return max(0, ensemble_pred)
batch_predict
batch_predict(
    players_data: dict[str, DataFrame],
) -> dict[str, float]

Ensemble predictions for multiple players.

PARAMETER DESCRIPTION
players_data

Dictionary mapping player ID to their data

TYPE: Dict[str, DataFrame]

RETURNS DESCRIPTION
Dict[str, float]

Dictionary of ensemble predictions

Source code in fplx/models/ensemble.py
def batch_predict(self, players_data: dict[str, pd.DataFrame]) -> dict[str, float]:
    """
    Ensemble predictions for multiple players.

    Parameters
    ----------
    players_data : Dict[str, pd.DataFrame]
        Dictionary mapping player ID to their data

    Returns
    -------
    Dict[str, float]
        Dictionary of ensemble predictions
    """
    predictions = {}

    for player_id, data in players_data.items():
        predictions[player_id] = self.predict(data)

    return predictions

RegressionModel

RegressionModel(
    model_type: str = "ridge",
    initial_train_size: int = 10,
    test_size: int = 1,
    step: int = 1,
    **model_kwargs
)

Bases: BaseModel

Machine learning regression model for FPL predictions.

Adapted from the MLSP project's regressor patterns.

PARAMETER DESCRIPTION
model_type

Type of model: 'ridge', 'xgboost', 'lightgbm'

TYPE: str DEFAULT: 'ridge'

initial_train_size

Size of initial training window

TYPE: int DEFAULT: 10

test_size

Forecast horizon

TYPE: int DEFAULT: 1

step

Rolling window step size

TYPE: int DEFAULT: 1

Source code in fplx/models/regression.py
def __init__(
    self,
    model_type: str = "ridge",
    initial_train_size: int = 10,
    test_size: int = 1,
    step: int = 1,
    **model_kwargs,
):
    if not SKLEARN_AVAILABLE:
        raise ImportError(
            "sklearn, xgboost, or lightgbm not available. Install with: pip install fplx[ml]"
        )

    self.model_type = model_type
    self.cv = RollingCV(initial_train_size, test_size, step)
    self.model = self._create_model(model_type, **model_kwargs)
    self.predictions = []
    self.true_values = []
    self.feature_importance = None
    self.feature_names_ = None
fit
fit(X, y=None)

Fit the model.

Source code in fplx/models/regression.py
def fit(self, X, y=None):
    """Fit the model."""
    self.feature_names_ = list(X.columns)
    self.model.fit(X, y)
    return self
predict
predict(X)

Generate predictions.

Source code in fplx/models/regression.py
def predict(self, X):
    """Generate predictions."""
    # Ensure the prediction data has the same columns as the training data
    if self.feature_names_:
        X_pred = X.reindex(columns=self.feature_names_, fill_value=0)
        return self.model.predict(X_pred)
    return self.model.predict(X)
fit_predict
fit_predict(
    y: Series, X: DataFrame, verbose: bool = False
) -> Series

Fit model and generate predictions using rolling CV.

PARAMETER DESCRIPTION
y

Target time series (points to predict)

TYPE: Series

X

Feature matrix

TYPE: DataFrame

verbose

Print progress

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
Series

Predictions aligned with test indices

Source code in fplx/models/regression.py
def fit_predict(self, y: pd.Series, X: pd.DataFrame, verbose: bool = False) -> pd.Series:
    """
    Fit model and generate predictions using rolling CV.

    Parameters
    ----------
    y : pd.Series
        Target time series (points to predict)
    X : pd.DataFrame
        Feature matrix
    verbose : bool
        Print progress

    Returns
    -------
    pd.Series
        Predictions aligned with test indices
    """
    X_vals = X.values
    y_vals = y.values

    self.predictions = []
    self.true_values = []
    pred_indices = []

    for fold, (train_idx, test_idx) in enumerate(self.cv.split(X_vals)):
        X_train, X_test = X_vals[train_idx], X_vals[test_idx]
        y_train, y_test = y_vals[train_idx], y_vals[test_idx]

        # Handle NaN values
        valid_train = ~np.isnan(X_train).any(axis=1) & ~np.isnan(y_train)
        if valid_train.sum() < 5:
            if verbose:
                logger.warning(f"Fold {fold}: insufficient valid training data")
            continue

        X_train_clean = X_train[valid_train]
        y_train_clean = y_train[valid_train]

        # Fit model
        self.model.fit(X_train_clean, y_train_clean)

        # Predict
        valid_test = ~np.isnan(X_test).any(axis=1)
        if not valid_test.any():
            continue

        X_test_clean = X_test[valid_test]
        y_pred = self.model.predict(X_test_clean)

        self.predictions.extend(y_pred)
        self.true_values.extend(y_test[valid_test])
        pred_indices.extend(test_idx[valid_test])

        if verbose:
            rmse = np.sqrt(mean_squared_error(y_test[valid_test], y_pred))
            logger.info(f"Fold {fold}: RMSE = {rmse:.3f}")

    return pd.Series(self.predictions, index=pred_indices, name="predicted_points")
predict_next
predict_next(X: DataFrame) -> float

Predict next value given features.

PARAMETER DESCRIPTION
X

Feature matrix (single row for next gameweek)

TYPE: DataFrame

RETURNS DESCRIPTION
float

Predicted points

Source code in fplx/models/regression.py
def predict_next(self, X: pd.DataFrame) -> float:
    """
    Predict next value given features.

    Parameters
    ----------
    X : pd.DataFrame
        Feature matrix (single row for next gameweek)

    Returns
    -------
    float
        Predicted points
    """
    if X.empty or self.model is None:
        return 0.0

    X_vals = X.values
    if np.isnan(X_vals).any():
        # Impute with mean
        X_vals = np.nan_to_num(X_vals, nan=0.0)

    pred = self.model.predict(X_vals)
    return max(0, pred[0])
get_feature_importance
get_feature_importance(
    feature_names: list[str],
) -> DataFrame

Get feature importance (for tree-based models).

PARAMETER DESCRIPTION
feature_names

Names of features

TYPE: list[str]

RETURNS DESCRIPTION
DataFrame

Feature importance scores

Source code in fplx/models/regression.py
def get_feature_importance(self, feature_names: list[str]) -> pd.DataFrame:
    """
    Get feature importance (for tree-based models).

    Parameters
    ----------
    feature_names : list[str]
        Names of features

    Returns
    -------
    pd.DataFrame
        Feature importance scores
    """
    if self.model_type in ["xgboost", "lightgbm"]:
        importance = self.model.feature_importances_
        return pd.DataFrame({
            "feature": feature_names,
            "importance": importance,
        }).sort_values("importance", ascending=False)
    logger.warning("Feature importance only available for tree-based models")
    return pd.DataFrame()
evaluate
evaluate() -> dict[str, float]

Evaluate model performance.

RETURNS DESCRIPTION
dict[str, float]

Dictionary of metrics

Source code in fplx/models/regression.py
def evaluate(self) -> dict[str, float]:
    """
    Evaluate model performance.

    Returns
    -------
    dict[str, float]
        Dictionary of metrics
    """
    if not self.predictions:
        return {}

    predictions = np.array(self.predictions)
    true_values = np.array(self.true_values)

    rmse = np.sqrt(mean_squared_error(true_values, predictions))
    mae = np.mean(np.abs(true_values - predictions))

    return {
        "rmse": rmse,
        "mae": mae,
        "n_predictions": len(predictions),
    }

RollingCV

RollingCV(
    initial_train_size: int, test_size: int, step: int = 1
)

Generates indices for rolling cross-validation splits.

This is adapted from the MLSP project for time-series validation.

PARAMETER DESCRIPTION
initial_train_size

Size of the initial training set.

TYPE: int

test_size

Size of the test set (forecast horizon).

TYPE: int

step

Step size to move the training window forward.

TYPE: int DEFAULT: 1

Source code in fplx/models/rolling_cv.py
def __init__(self, initial_train_size: int, test_size: int, step: int = 1):
    if initial_train_size <= 0 or test_size <= 0 or step <= 0:
        raise ValueError(
            "initial_train_size, test_size, and step must be positive integers."
        )
    self.initial_train_size = initial_train_size
    self.test_size = test_size
    self.step = step
split
split(X) -> Generator[tuple[ndarray, ndarray], None, None]

Generate indices to split data into training and test sets.

PARAMETER DESCRIPTION
X

Time series data.

TYPE: array - like

YIELDS DESCRIPTION
train_indices

The training set indices for that split.

TYPE:: ndarray

test_indices

The testing set indices for that split.

TYPE:: ndarray

Source code in fplx/models/rolling_cv.py
def split(self, X) -> Generator[tuple[np.ndarray, np.ndarray], None, None]:
    """
    Generate indices to split data into training and test sets.

    Parameters
    ----------
    X : array-like
        Time series data.

    Yields
    ------
    train_indices : np.ndarray
        The training set indices for that split.
    test_indices : np.ndarray
        The testing set indices for that split.
    """
    n_samples = len(X)
    if self.initial_train_size + self.test_size > n_samples:
        raise ValueError(
            "initial_train_size + test_size is larger than the number of samples."
        )

    train_start = 0
    while train_start + self.initial_train_size + self.test_size <= n_samples:
        train_end = train_start + self.initial_train_size
        test_end = train_end + self.test_size

        train_indices = np.arange(train_start, train_end)
        test_indices = np.arange(train_end, test_end)

        yield train_indices, test_indices

        train_start += self.step

baseline

Baseline heuristic models for FPL prediction.

BaselineModel
BaselineModel(
    method: str = "rolling_mean", window: int = 5
)

Bases: BaseModel

Baseline model using simple heuristics.

Methods: - Rolling average of points - Weighted recent form - Form-based prediction

Initialize baseline model.

PARAMETER DESCRIPTION
method

Prediction method: 'rolling_mean', 'ewma', 'last_value'

TYPE: str DEFAULT: 'rolling_mean'

window

Window size for rolling calculations

TYPE: int DEFAULT: 5

Source code in fplx/models/baseline.py
def __init__(self, method: str = "rolling_mean", window: int = 5):
    """
    Initialize baseline model.

    Parameters
    ----------
    method : str
        Prediction method: 'rolling_mean', 'ewma', 'last_value'
    window : int
        Window size for rolling calculations
    """
    self.method = method
    self.window = window
    self.predictions = {}
fit
fit(X, y=None)

Fit the model (no-op for baseline).

Source code in fplx/models/baseline.py
def fit(self, X, y=None):
    """Fit the model (no-op for baseline)."""
    return self
predict
predict(X: DataFrame) -> float

Predict next gameweek points for a player.

PARAMETER DESCRIPTION
X

Player historical data

TYPE: DataFrame

RETURNS DESCRIPTION
float

Predicted points

Source code in fplx/models/baseline.py
def predict(self, X: pd.DataFrame) -> float:
    """
    Predict next gameweek points for a player.

    Parameters
    ----------
    X : pd.DataFrame
        Player historical data

    Returns
    -------
    float
        Predicted points
    """
    if X.empty or "points" not in X.columns:
        return 0.0

    points = X["points"]

    if self.method == "rolling_mean":
        return self._rolling_mean(points)
    if self.method == "ewma":
        return self._ewma(points)
    if self.method == "last_value":
        return points.iloc[-1]
    logger.warning(f"Unknown method {self.method}, using rolling_mean")
    return self._rolling_mean(points)
batch_predict
batch_predict(
    players_data: dict[str, DataFrame],
) -> dict[str, float]

Predict for multiple players.

PARAMETER DESCRIPTION
players_data

Dictionary mapping player ID to their data

TYPE: dict[str, DataFrame]

RETURNS DESCRIPTION
dict[str, float]

Dictionary of predictions

Source code in fplx/models/baseline.py
def batch_predict(self, players_data: dict[str, pd.DataFrame]) -> dict[str, float]:
    """
    Predict for multiple players.

    Parameters
    ----------
    players_data : dict[str, pd.DataFrame]
        Dictionary mapping player ID to their data

    Returns
    -------
    dict[str, float]
        Dictionary of predictions
    """
    predictions = {}
    for player_id, data in players_data.items():
        predictions[player_id] = self.predict(data)

    self.predictions = predictions
    return predictions
FormBasedModel
FormBasedModel(
    method: str = "rolling_mean", window: int = 5
)

Bases: BaselineModel

Enhanced baseline using form indicators.

Source code in fplx/models/baseline.py
def __init__(self, method: str = "rolling_mean", window: int = 5):
    """
    Initialize baseline model.

    Parameters
    ----------
    method : str
        Prediction method: 'rolling_mean', 'ewma', 'last_value'
    window : int
        Window size for rolling calculations
    """
    self.method = method
    self.window = window
    self.predictions = {}
predict
predict(X: DataFrame) -> float

Predict based on form with adjustments.

PARAMETER DESCRIPTION
X

Player historical data

TYPE: DataFrame

RETURNS DESCRIPTION
float

Predicted points

Source code in fplx/models/baseline.py
def predict(self, X: pd.DataFrame) -> float:
    """
    Predict based on form with adjustments.

    Parameters
    ----------
    X : pd.DataFrame
        Player historical data

    Returns
    -------
    float
        Predicted points
    """
    if X.empty:
        return 0.0

    base_prediction = super().predict(X)

    # Apply adjustments
    latest = X.iloc[-1]

    # Minutes adjustment: if playing less, reduce prediction
    if "minutes" in latest and latest["minutes"] < 60:
        base_prediction *= 0.7

    # Trend adjustment
    if "points_trend_5" in latest:
        trend = latest["points_trend_5"]
        if trend > 0.5:
            base_prediction *= 1.1  # Positive trend bonus
        elif trend < -0.5:
            base_prediction *= 0.9  # Negative trend penalty

    return max(0, base_prediction)

ensemble

Ensemble models combining multiple predictors.

EnsembleModel
EnsembleModel(
    models: list, weights: Optional[list[float]] = None
)

Ensemble combining multiple models with weighted averaging.

PARAMETER DESCRIPTION
models

List of model instances

TYPE: list

weights

Weights for each model (must sum to 1)

TYPE: Optional[list[float]] DEFAULT: None

Source code in fplx/models/ensemble.py
def __init__(self, models: list, weights: Optional[list[float]] = None):
    self.models = models

    if weights is None:
        # Equal weights
        self.weights = [1.0 / len(models)] * len(models)
    else:
        if len(weights) != len(models):
            raise ValueError("Number of weights must match number of models")
        if not np.isclose(sum(weights), 1.0):
            raise ValueError("Weights must sum to 1")
        self.weights = weights
predict
predict(player_data: DataFrame) -> float

Ensemble prediction for a single player.

PARAMETER DESCRIPTION
player_data

Player historical data

TYPE: DataFrame

RETURNS DESCRIPTION
float

Ensemble prediction

Source code in fplx/models/ensemble.py
def predict(self, player_data: pd.DataFrame) -> float:
    """
    Ensemble prediction for a single player.

    Parameters
    ----------
    player_data : pd.DataFrame
        Player historical data

    Returns
    -------
    float
        Ensemble prediction
    """
    predictions = []

    for model in self.models:
        try:
            pred = model.predict(player_data)
            predictions.append(pred)
        except Exception as e:
            logger.warning(f"Model {type(model).__name__} failed: {e}")
            predictions.append(0.0)

    # Weighted average
    ensemble_pred = sum(p * w for p, w in zip(predictions, self.weights))
    return max(0, ensemble_pred)
batch_predict
batch_predict(
    players_data: dict[str, DataFrame],
) -> dict[str, float]

Ensemble predictions for multiple players.

PARAMETER DESCRIPTION
players_data

Dictionary mapping player ID to their data

TYPE: Dict[str, DataFrame]

RETURNS DESCRIPTION
Dict[str, float]

Dictionary of ensemble predictions

Source code in fplx/models/ensemble.py
def batch_predict(self, players_data: dict[str, pd.DataFrame]) -> dict[str, float]:
    """
    Ensemble predictions for multiple players.

    Parameters
    ----------
    players_data : Dict[str, pd.DataFrame]
        Dictionary mapping player ID to their data

    Returns
    -------
    Dict[str, float]
        Dictionary of ensemble predictions
    """
    predictions = {}

    for player_id, data in players_data.items():
        predictions[player_id] = self.predict(data)

    return predictions
AdaptiveEnsemble
AdaptiveEnsemble(models: list, learning_rate: float = 0.1)

Bases: EnsembleModel

Adaptive ensemble that adjusts weights based on recent performance.

Source code in fplx/models/ensemble.py
def __init__(self, models: list, learning_rate: float = 0.1):
    super().__init__(models)
    self.learning_rate = learning_rate
    self.model_errors = [[] for _ in models]
update_weights
update_weights()

Update weights based on recent errors.

Source code in fplx/models/ensemble.py
def update_weights(self):
    """Update weights based on recent errors."""
    if not any(self.model_errors):
        return

    # Calculate inverse error scores
    avg_errors = []
    for errors in self.model_errors:
        if errors:
            avg_errors.append(np.mean(errors[-5:]))  # Last 5 predictions
        else:
            avg_errors.append(1.0)

    # Inverse error weighting
    inv_errors = [1.0 / (e + 1e-6) for e in avg_errors]
    total = sum(inv_errors)
    new_weights = [ie / total for ie in inv_errors]

    # Smooth update
    self.weights = [
        (1 - self.learning_rate) * old + self.learning_rate * new
        for old, new in zip(self.weights, new_weights)
    ]

    # Renormalize
    total_weight = sum(self.weights)
    self.weights = [w / total_weight for w in self.weights]

regression

ML regression models for FPL prediction.

RegressionModel
RegressionModel(
    model_type: str = "ridge",
    initial_train_size: int = 10,
    test_size: int = 1,
    step: int = 1,
    **model_kwargs
)

Bases: BaseModel

Machine learning regression model for FPL predictions.

Adapted from the MLSP project's regressor patterns.

PARAMETER DESCRIPTION
model_type

Type of model: 'ridge', 'xgboost', 'lightgbm'

TYPE: str DEFAULT: 'ridge'

initial_train_size

Size of initial training window

TYPE: int DEFAULT: 10

test_size

Forecast horizon

TYPE: int DEFAULT: 1

step

Rolling window step size

TYPE: int DEFAULT: 1

Source code in fplx/models/regression.py
def __init__(
    self,
    model_type: str = "ridge",
    initial_train_size: int = 10,
    test_size: int = 1,
    step: int = 1,
    **model_kwargs,
):
    if not SKLEARN_AVAILABLE:
        raise ImportError(
            "sklearn, xgboost, or lightgbm not available. Install with: pip install fplx[ml]"
        )

    self.model_type = model_type
    self.cv = RollingCV(initial_train_size, test_size, step)
    self.model = self._create_model(model_type, **model_kwargs)
    self.predictions = []
    self.true_values = []
    self.feature_importance = None
    self.feature_names_ = None
fit
fit(X, y=None)

Fit the model.

Source code in fplx/models/regression.py
def fit(self, X, y=None):
    """Fit the model."""
    self.feature_names_ = list(X.columns)
    self.model.fit(X, y)
    return self
predict
predict(X)

Generate predictions.

Source code in fplx/models/regression.py
def predict(self, X):
    """Generate predictions."""
    # Ensure the prediction data has the same columns as the training data
    if self.feature_names_:
        X_pred = X.reindex(columns=self.feature_names_, fill_value=0)
        return self.model.predict(X_pred)
    return self.model.predict(X)
fit_predict
fit_predict(
    y: Series, X: DataFrame, verbose: bool = False
) -> Series

Fit model and generate predictions using rolling CV.

PARAMETER DESCRIPTION
y

Target time series (points to predict)

TYPE: Series

X

Feature matrix

TYPE: DataFrame

verbose

Print progress

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
Series

Predictions aligned with test indices

Source code in fplx/models/regression.py
def fit_predict(self, y: pd.Series, X: pd.DataFrame, verbose: bool = False) -> pd.Series:
    """
    Fit model and generate predictions using rolling CV.

    Parameters
    ----------
    y : pd.Series
        Target time series (points to predict)
    X : pd.DataFrame
        Feature matrix
    verbose : bool
        Print progress

    Returns
    -------
    pd.Series
        Predictions aligned with test indices
    """
    X_vals = X.values
    y_vals = y.values

    self.predictions = []
    self.true_values = []
    pred_indices = []

    for fold, (train_idx, test_idx) in enumerate(self.cv.split(X_vals)):
        X_train, X_test = X_vals[train_idx], X_vals[test_idx]
        y_train, y_test = y_vals[train_idx], y_vals[test_idx]

        # Handle NaN values
        valid_train = ~np.isnan(X_train).any(axis=1) & ~np.isnan(y_train)
        if valid_train.sum() < 5:
            if verbose:
                logger.warning(f"Fold {fold}: insufficient valid training data")
            continue

        X_train_clean = X_train[valid_train]
        y_train_clean = y_train[valid_train]

        # Fit model
        self.model.fit(X_train_clean, y_train_clean)

        # Predict
        valid_test = ~np.isnan(X_test).any(axis=1)
        if not valid_test.any():
            continue

        X_test_clean = X_test[valid_test]
        y_pred = self.model.predict(X_test_clean)

        self.predictions.extend(y_pred)
        self.true_values.extend(y_test[valid_test])
        pred_indices.extend(test_idx[valid_test])

        if verbose:
            rmse = np.sqrt(mean_squared_error(y_test[valid_test], y_pred))
            logger.info(f"Fold {fold}: RMSE = {rmse:.3f}")

    return pd.Series(self.predictions, index=pred_indices, name="predicted_points")
predict_next
predict_next(X: DataFrame) -> float

Predict next value given features.

PARAMETER DESCRIPTION
X

Feature matrix (single row for next gameweek)

TYPE: DataFrame

RETURNS DESCRIPTION
float

Predicted points

Source code in fplx/models/regression.py
def predict_next(self, X: pd.DataFrame) -> float:
    """
    Predict next value given features.

    Parameters
    ----------
    X : pd.DataFrame
        Feature matrix (single row for next gameweek)

    Returns
    -------
    float
        Predicted points
    """
    if X.empty or self.model is None:
        return 0.0

    X_vals = X.values
    if np.isnan(X_vals).any():
        # Impute with mean
        X_vals = np.nan_to_num(X_vals, nan=0.0)

    pred = self.model.predict(X_vals)
    return max(0, pred[0])
get_feature_importance
get_feature_importance(
    feature_names: list[str],
) -> DataFrame

Get feature importance (for tree-based models).

PARAMETER DESCRIPTION
feature_names

Names of features

TYPE: list[str]

RETURNS DESCRIPTION
DataFrame

Feature importance scores

Source code in fplx/models/regression.py
def get_feature_importance(self, feature_names: list[str]) -> pd.DataFrame:
    """
    Get feature importance (for tree-based models).

    Parameters
    ----------
    feature_names : list[str]
        Names of features

    Returns
    -------
    pd.DataFrame
        Feature importance scores
    """
    if self.model_type in ["xgboost", "lightgbm"]:
        importance = self.model.feature_importances_
        return pd.DataFrame({
            "feature": feature_names,
            "importance": importance,
        }).sort_values("importance", ascending=False)
    logger.warning("Feature importance only available for tree-based models")
    return pd.DataFrame()
evaluate
evaluate() -> dict[str, float]

Evaluate model performance.

RETURNS DESCRIPTION
dict[str, float]

Dictionary of metrics

Source code in fplx/models/regression.py
def evaluate(self) -> dict[str, float]:
    """
    Evaluate model performance.

    Returns
    -------
    dict[str, float]
        Dictionary of metrics
    """
    if not self.predictions:
        return {}

    predictions = np.array(self.predictions)
    true_values = np.array(self.true_values)

    rmse = np.sqrt(mean_squared_error(true_values, predictions))
    mae = np.mean(np.abs(true_values - predictions))

    return {
        "rmse": rmse,
        "mae": mae,
        "n_predictions": len(predictions),
    }

rolling_cv

Rolling cross-validation for time-series models.

RollingCV
RollingCV(
    initial_train_size: int, test_size: int, step: int = 1
)

Generates indices for rolling cross-validation splits.

This is adapted from the MLSP project for time-series validation.

PARAMETER DESCRIPTION
initial_train_size

Size of the initial training set.

TYPE: int

test_size

Size of the test set (forecast horizon).

TYPE: int

step

Step size to move the training window forward.

TYPE: int DEFAULT: 1

Source code in fplx/models/rolling_cv.py
def __init__(self, initial_train_size: int, test_size: int, step: int = 1):
    if initial_train_size <= 0 or test_size <= 0 or step <= 0:
        raise ValueError(
            "initial_train_size, test_size, and step must be positive integers."
        )
    self.initial_train_size = initial_train_size
    self.test_size = test_size
    self.step = step
split
split(X) -> Generator[tuple[ndarray, ndarray], None, None]

Generate indices to split data into training and test sets.

PARAMETER DESCRIPTION
X

Time series data.

TYPE: array - like

YIELDS DESCRIPTION
train_indices

The training set indices for that split.

TYPE:: ndarray

test_indices

The testing set indices for that split.

TYPE:: ndarray

Source code in fplx/models/rolling_cv.py
def split(self, X) -> Generator[tuple[np.ndarray, np.ndarray], None, None]:
    """
    Generate indices to split data into training and test sets.

    Parameters
    ----------
    X : array-like
        Time series data.

    Yields
    ------
    train_indices : np.ndarray
        The training set indices for that split.
    test_indices : np.ndarray
        The testing set indices for that split.
    """
    n_samples = len(X)
    if self.initial_train_size + self.test_size > n_samples:
        raise ValueError(
            "initial_train_size + test_size is larger than the number of samples."
        )

    train_start = 0
    while train_start + self.initial_train_size + self.test_size <= n_samples:
        train_end = train_start + self.initial_train_size
        test_end = train_end + self.test_size

        train_indices = np.arange(train_start, train_end)
        test_indices = np.arange(train_end, test_end)

        yield train_indices, test_indices

        train_start += self.step

selection

Squad selection and optimization.

BudgetConstraint

BudgetConstraint(max_budget: float = 100.0)

Budget constraint for FPL squad (applied to 15-player squad).

Source code in fplx/selection/constraints.py
def __init__(self, max_budget: float = 100.0):
    self.max_budget = max_budget

FormationConstraints

Formation constraints for FPL squad.

Rules: - Exactly 11 players - 1 GK - 3-5 DEF - 2-5 MID - 1-3 FWD

validate classmethod
validate(players: list[Player]) -> bool

Check if squad satisfies formation constraints.

PARAMETER DESCRIPTION
players

List of players in squad

TYPE: list[Player]

RETURNS DESCRIPTION
bool

True if valid formation

Source code in fplx/selection/constraints.py
@classmethod
def validate(cls, players: list[Player]) -> bool:
    """
    Check if squad satisfies formation constraints.

    Parameters
    ----------
    players : list[Player]
        List of players in squad

    Returns
    -------
    bool
        True if valid formation
    """
    if len(players) != cls.TOTAL_PLAYERS:
        return False
    counts = {"GK": 0, "DEF": 0, "MID": 0, "FWD": 0}
    for p in players:
        counts[p.position] += 1
    return all(lo <= counts[pos] <= hi for pos, (lo, hi) in cls.POSITION_LIMITS.items())
get_valid_formations classmethod
get_valid_formations() -> list[str]

Get list of valid formation strings.

RETURNS DESCRIPTION
List[str]

Valid formations (e.g., "3-4-3", "4-3-3")

Source code in fplx/selection/constraints.py
@classmethod
def get_valid_formations(cls) -> list[str]:
    """
    Get list of valid formation strings.

    Returns
    -------
    List[str]
        Valid formations (e.g., "3-4-3", "4-3-3")
    """
    formations = []
    for d in range(3, 6):
        for m in range(2, 6):
            for f in range(1, 4):
                if d + m + f == 10:
                    formations.append(f"{d}-{m}-{f}")
    return formations

SquadQuotas

Position quotas for the 15-player FPL squad.

Rules: - 2 GK, 5 DEF, 5 MID, 3 FWD (exactly). - Total = 15 players.

TeamDiversityConstraint

TeamDiversityConstraint(max_from_team: int = 3)

Max players from same real-world team (default 3).

Source code in fplx/selection/constraints.py
def __init__(self, max_from_team: int = 3):
    self.max_from_team = max_from_team

LagrangianOptimizer

LagrangianOptimizer(
    budget: float = 100.0,
    max_from_team: int = 3,
    max_iter: int = 200,
    tol: float = 0.01,
    risk_aversion: float = 0.0,
)

Lagrangian relaxation for the FPL squad selection ILP.

Relaxes the budget constraint into the objective:

L(lambda) = max_{x in X} sum_i (mu_i - lambda * c_i) * x_i + lambda * B

where X encodes squad size, position quotas, and team caps. The inner maximization decomposes: for each position, select the top-k players by modified score (mu_i - lambda * c_i).

The dual problem min_{lambda >= 0} L(lambda) is solved via subgradient ascent.

PARAMETER DESCRIPTION
budget

Total budget (default 100.0).

TYPE: float DEFAULT: 100.0

max_from_team

Maximum players from same club.

TYPE: int DEFAULT: 3

max_iter

Maximum subgradient iterations.

TYPE: int DEFAULT: 200

tol

Convergence tolerance on duality gap.

TYPE: float DEFAULT: 0.01

risk_aversion

Mean-variance penalty (same as ILP).

TYPE: float DEFAULT: 0.0

Source code in fplx/selection/lagrangian.py
def __init__(
    self,
    budget: float = 100.0,
    max_from_team: int = 3,
    max_iter: int = 200,
    tol: float = 0.01,
    risk_aversion: float = 0.0,
):
    self.budget = budget
    self.max_from_team = max_from_team
    self.max_iter = max_iter
    self.tol = tol
    self.risk_aversion = risk_aversion
solve
solve(
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    best_known_primal: Optional[float] = None,
) -> LagrangianResult

Solve via Lagrangian relaxation with subgradient ascent.

PARAMETER DESCRIPTION
players

TYPE: list[Player]

expected_points

TYPE: dict[int, float]

expected_variance

TYPE: dict[int, float] DEFAULT: None

best_known_primal

Best known primal objective (e.g., from ILP). Used for better step size computation.

TYPE: float DEFAULT: None

RETURNS DESCRIPTION
LagrangianResult
Source code in fplx/selection/lagrangian.py
def solve(
    self,
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    best_known_primal: Optional[float] = None,
) -> LagrangianResult:
    """
    Solve via Lagrangian relaxation with subgradient ascent.

    Parameters
    ----------
    players : list[Player]
    expected_points : dict[int, float]
    expected_variance : dict[int, float], optional
    best_known_primal : float, optional
        Best known primal objective (e.g., from ILP).
        Used for better step size computation.

    Returns
    -------
    LagrangianResult
    """
    start_time = time.perf_counter()

    # Initialize lambda
    lam = 0.5  # initial budget multiplier
    best_dual = np.inf
    best_primal = -np.inf
    best_squad = None
    best_lineup = None

    # Step size parameters (Polyak-style)
    theta = 2.0
    theta_decay = 0.95
    no_improve_count = 0

    result = LagrangianResult()

    for k in range(self.max_iter):
        # Compute modified scores
        scores = self._compute_modified_scores(players, expected_points, expected_variance, lam)

        # Solve inner problem
        squad, lineup = self._solve_inner(players, scores)

        # Dual objective: L(lambda) = sum scores*x + lambda*B
        inner_value = sum(scores[p.id] for p in lineup)
        dual_obj = inner_value + lam * self.budget

        # Primal objective (original, without lambda penalty)
        primal_obj = sum(expected_points.get(p.id, 0.0) for p in lineup)
        if self.risk_aversion > 0 and expected_variance:
            for p in lineup:
                primal_obj -= self.risk_aversion * np.sqrt(max(expected_variance.get(p.id, 0.0), 0.0))

        # Budget slack (subgradient)
        squad_cost = sum(p.price for p in squad)
        budget_slack = squad_cost - self.budget  # positive = over budget

        # Track best
        if dual_obj < best_dual:
            best_dual = dual_obj
            no_improve_count = 0
        else:
            no_improve_count += 1

        # Only count as feasible primal if budget satisfied
        if squad_cost <= self.budget + 0.01 and primal_obj > best_primal:
            best_primal = primal_obj
            best_squad = squad
            best_lineup = lineup

        # Record history
        result.dual_history.append(float(dual_obj))
        result.primal_history.append(float(primal_obj))
        result.lambda_history.append(float(lam))
        result.budget_slack_history.append(float(budget_slack))

        # Convergence check
        gap = (best_dual - best_primal) / max(abs(best_dual), 1e-6)
        if gap < self.tol and best_primal > -np.inf:
            result.converged = True
            break

        # Step size (Polyak with target)
        target = best_known_primal if best_known_primal else best_primal
        step = 0.0 if abs(budget_slack) < 1e-08 else theta * (dual_obj - target) / budget_slack**2

        # Update lambda
        lam = max(0.0, lam + step * budget_slack)

        # Decay step size if no improvement
        if no_improve_count >= 5:
            theta *= theta_decay
            no_improve_count = 0

    elapsed = time.perf_counter() - start_time

    # Build FullSquad from best feasible solution
    if best_squad and best_lineup and len(best_squad) == 15 and len(best_lineup) == 11:
        pos_counts = {"DEF": 0, "MID": 0, "FWD": 0}
        for p in best_lineup:
            if p.position in pos_counts:
                pos_counts[p.position] += 1
        formation = f"{pos_counts['DEF']}-{pos_counts['MID']}-{pos_counts['FWD']}"

        ep_lineup = sum(expected_points.get(p.id, 0.0) for p in best_lineup)
        captain = max(best_lineup, key=lambda p: expected_points.get(p.id, 0.0))

        lineup_obj = Squad(
            players=best_lineup,
            formation=formation,
            total_cost=sum(p.price for p in best_lineup),
            expected_points=ep_lineup,
            captain=captain,
        )
        result.full_squad = FullSquad(squad_players=best_squad, lineup=lineup_obj)

    result.primal_objective = best_primal
    result.dual_bound = best_dual
    result.duality_gap = (best_dual - best_primal) / max(abs(best_dual), 1e-6)
    result.n_iterations = k + 1
    result.solve_time = elapsed

    logger.info(
        "Lagrangian: %d iters, primal=%.1f, dual=%.1f, gap=%.2f%%, time=%.3fs",
        result.n_iterations,
        best_primal,
        best_dual,
        result.duality_gap * 100,
        elapsed,
    )

    return result

LagrangianResult dataclass

LagrangianResult(
    full_squad: Optional[FullSquad] = None,
    primal_objective: float = 0.0,
    dual_bound: float = 0.0,
    duality_gap: float = 0.0,
    n_iterations: int = 0,
    converged: bool = False,
    solve_time: float = 0.0,
    dual_history: list[float] = list(),
    primal_history: list[float] = list(),
    lambda_history: list[float] = list(),
    budget_slack_history: list[float] = list(),
)

Convergence diagnostics for the Lagrangian solver.

GreedyOptimizer

GreedyOptimizer(
    budget: float = 100.0, max_from_team: int = 3
)

Bases: BaseOptimizer

Greedy baseline: select best-value players per position.

Fast heuristic for comparison. Selects 15-player squad, then picks best 11 as lineup.

Source code in fplx/selection/optimizer.py
def __init__(self, budget: float = 100.0, max_from_team: int = 3):
    self.budget = budget
    self.max_from_team = max_from_team
optimize
optimize(
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    formation: Optional[str] = None,
) -> FullSquad

Greedy squad + lineup selection.

Source code in fplx/selection/optimizer.py
def optimize(
    self,
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    formation: Optional[str] = None,
) -> FullSquad:
    """Greedy squad + lineup selection."""
    # Compute value = EP / price for each player
    for p in players:
        ep = expected_points.get(p.id, 0.0)
        p.expected_points = ep
        p._value = ep / max(p.price, 0.1)

    # Sort by value within each position
    by_pos: dict[str, list[Player]] = {"GK": [], "DEF": [], "MID": [], "FWD": []}
    for p in players:
        by_pos[p.position].append(p)
    for pos in by_pos:
        by_pos[pos].sort(key=lambda p: p._value, reverse=True)

    # Greedily fill squad (15 players)
    squad_quotas = {"GK": 2, "DEF": 5, "MID": 5, "FWD": 3}
    selected_squad: list[Player] = []
    team_counts: dict[str, int] = {}
    remaining = self.budget

    for pos in ["GK", "DEF", "MID", "FWD"]:
        count = 0
        for p in by_pos[pos]:
            if count >= squad_quotas[pos]:
                break
            if team_counts.get(p.team, 0) >= self.max_from_team:
                continue
            if p.price > remaining:
                continue
            selected_squad.append(p)
            team_counts[p.team] = team_counts.get(p.team, 0) + 1
            remaining -= p.price
            count += 1

    if len(selected_squad) != 15:
        logger.warning("Greedy only picked %d squad players.", len(selected_squad))
        # Pad if needed (shouldn't happen with 600+ players)
        return self._fallback(selected_squad, expected_points)

    # Select best 11 from the 15
    lineup = self._select_lineup(selected_squad, expected_points, formation)
    return FullSquad(squad_players=selected_squad, lineup=lineup)

OptimizationResult dataclass

OptimizationResult(
    full_squad: FullSquad,
    objective_value: float = 0.0,
    solve_time: float = 0.0,
    lp_objective: Optional[float] = None,
    integrality_gap: Optional[float] = None,
    shadow_prices: dict = dict(),
    binding_constraints: list = list(),
)

Container for optimization outputs including duality analysis.

TwoLevelILPOptimizer

TwoLevelILPOptimizer(
    budget: float = 100.0,
    max_from_team: int = 3,
    risk_aversion: float = 0.0,
)

Bases: BaseOptimizer

Two-level ILP: select 15-player squad then 11-player lineup jointly.

Supports risk-neutral and risk-averse (mean-variance) objectives. Also exposes LP relaxation for shadow price extraction.

PARAMETER DESCRIPTION
budget

Maximum total squad budget (applied to 15 players).

TYPE: float DEFAULT: 100.0

max_from_team

Maximum players from same club.

TYPE: int DEFAULT: 3

risk_aversion

Lambda for mean-variance penalty. 0 = risk-neutral.

TYPE: float DEFAULT: 0.0

Source code in fplx/selection/optimizer.py
def __init__(
    self,
    budget: float = 100.0,
    max_from_team: int = 3,
    risk_aversion: float = 0.0,
):
    self.budget = budget
    self.max_from_team = max_from_team
    self.risk_aversion = risk_aversion

    try:
        import pulp

        self.pulp = pulp
    except ImportError:
        raise ImportError("pulp required for ILP optimization. Install with: pip install pulp")
solve
solve(players, **kwargs)

Solve the optimization problem.

Source code in fplx/selection/optimizer.py
def solve(self, players, **kwargs):
    """Solve the optimization problem."""
    return self.optimize(players, **kwargs)
optimize
optimize(
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    downside_risk: Optional[dict[int, float]] = None,
    formation: Optional[str] = None,
) -> FullSquad

Solve the two-level ILP.

PARAMETER DESCRIPTION
players

Available player pool.

TYPE: list[Player]

expected_points

E[P_i] per player.

TYPE: dict[int, float]

expected_variance

Var[P_i] per player.

TYPE: dict[int, float] DEFAULT: None

downside_risk

Downside spread per player. If provided, risk penalty uses this directly (instead of sqrt(variance)).

TYPE: dict[int, float] DEFAULT: None

formation

Not used (formation is optimized automatically).

TYPE: Optional[str] DEFAULT: None

RETURNS DESCRIPTION
FullSquad
Source code in fplx/selection/optimizer.py
def optimize(
    self,
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    downside_risk: Optional[dict[int, float]] = None,
    formation: Optional[str] = None,
) -> FullSquad:
    """
    Solve the two-level ILP.

    Parameters
    ----------
    players : list[Player]
        Available player pool.
    expected_points : dict[int, float]
        E[P_i] per player.
    expected_variance : dict[int, float], optional
        Var[P_i] per player.
    downside_risk : dict[int, float], optional
        Downside spread per player. If provided, risk penalty uses this
        directly (instead of sqrt(variance)).
    formation : Optional[str]
        Not used (formation is optimized automatically).

    Returns
    -------
    FullSquad
    """
    import time

    start = time.perf_counter()
    prob, s_vars, x_vars = self._build_problem(
        players,
        expected_points,
        expected_variance,
        downside_risk,
        relax=False,
    )
    prob.solve(self.pulp.PULP_CBC_CMD(msg=0))
    elapsed = time.perf_counter() - start

    if prob.status != 1:
        logger.error("ILP solver did not find optimal solution (status=%d).", prob.status)

    # Extract solution
    squad_players = [p for p in players if s_vars[p.id].varValue and s_vars[p.id].varValue > 0.5]
    lineup_players = [p for p in players if x_vars[p.id].varValue and x_vars[p.id].varValue > 0.5]

    # Determine formation
    pos_counts = {"DEF": 0, "MID": 0, "FWD": 0}
    for p in lineup_players:
        if p.position in pos_counts:
            pos_counts[p.position] += 1
    formation_str = f"{pos_counts['DEF']}-{pos_counts['MID']}-{pos_counts['FWD']}"

    # Captain = highest expected points
    for p in lineup_players:
        p.expected_points = expected_points.get(p.id, 0.0)
    captain = (
        max(lineup_players, key=lambda p: expected_points.get(p.id, 0.0)) if lineup_players else None
    )

    total_ep = sum(expected_points.get(p.id, 0.0) for p in lineup_players)
    lineup_cost = sum(p.price for p in lineup_players)

    lineup = Squad(
        players=lineup_players,
        formation=formation_str,
        total_cost=lineup_cost,
        expected_points=total_ep,
        captain=captain,
    )
    full_squad = FullSquad(squad_players=squad_players, lineup=lineup)

    logger.info("ILP solved in %.3fs. Formation: %s. EP: %.2f", elapsed, formation_str, total_ep)
    return full_squad
solve_lp_relaxation
solve_lp_relaxation(
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    downside_risk: Optional[dict[int, float]] = None,
) -> OptimizationResult

Solve the LP relaxation and extract shadow prices.

RETURNS DESCRIPTION
OptimizationResult

Contains LP objective, shadow prices, binding constraints.

Source code in fplx/selection/optimizer.py
def solve_lp_relaxation(
    self,
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    downside_risk: Optional[dict[int, float]] = None,
) -> OptimizationResult:
    """
    Solve the LP relaxation and extract shadow prices.

    Returns
    -------
    OptimizationResult
        Contains LP objective, shadow prices, binding constraints.
    """
    import time

    start = time.perf_counter()
    prob, s_vars, x_vars = self._build_problem(
        players,
        expected_points,
        expected_variance,
        downside_risk,
        relax=True,
    )
    prob.solve(self.pulp.PULP_CBC_CMD(msg=0))
    elapsed = time.perf_counter() - start

    lp_obj = self.pulp.value(prob.objective)

    # Extract shadow prices from constraints
    shadow_prices = {}
    binding = []
    for name, constraint in prob.constraints.items():
        slack = constraint.slack
        # PuLP: pi attribute gives the dual value for LP
        dual = constraint.pi if constraint.pi is not None else 0.0
        shadow_prices[name] = {
            "dual_value": dual,
            "slack": slack,
            "binding": abs(slack) < 1e-6,
        }
        if abs(slack) < 1e-6:
            binding.append(name)

    # Also solve ILP to compute integrality gap
    full_squad = self.optimize(players, expected_points, expected_variance, downside_risk)
    ilp_obj = full_squad.lineup.expected_points
    gap = (lp_obj - ilp_obj) / lp_obj if lp_obj > 0 else 0.0

    return OptimizationResult(
        full_squad=full_squad,
        objective_value=ilp_obj,
        solve_time=elapsed,
        lp_objective=lp_obj,
        integrality_gap=gap,
        shadow_prices=shadow_prices,
        binding_constraints=binding,
    )

constraints

Constraints for squad selection.

SquadQuotas

Position quotas for the 15-player FPL squad.

Rules: - 2 GK, 5 DEF, 5 MID, 3 FWD (exactly). - Total = 15 players.

FormationConstraints

Formation constraints for FPL squad.

Rules: - Exactly 11 players - 1 GK - 3-5 DEF - 2-5 MID - 1-3 FWD

validate classmethod
validate(players: list[Player]) -> bool

Check if squad satisfies formation constraints.

PARAMETER DESCRIPTION
players

List of players in squad

TYPE: list[Player]

RETURNS DESCRIPTION
bool

True if valid formation

Source code in fplx/selection/constraints.py
@classmethod
def validate(cls, players: list[Player]) -> bool:
    """
    Check if squad satisfies formation constraints.

    Parameters
    ----------
    players : list[Player]
        List of players in squad

    Returns
    -------
    bool
        True if valid formation
    """
    if len(players) != cls.TOTAL_PLAYERS:
        return False
    counts = {"GK": 0, "DEF": 0, "MID": 0, "FWD": 0}
    for p in players:
        counts[p.position] += 1
    return all(lo <= counts[pos] <= hi for pos, (lo, hi) in cls.POSITION_LIMITS.items())
get_valid_formations classmethod
get_valid_formations() -> list[str]

Get list of valid formation strings.

RETURNS DESCRIPTION
List[str]

Valid formations (e.g., "3-4-3", "4-3-3")

Source code in fplx/selection/constraints.py
@classmethod
def get_valid_formations(cls) -> list[str]:
    """
    Get list of valid formation strings.

    Returns
    -------
    List[str]
        Valid formations (e.g., "3-4-3", "4-3-3")
    """
    formations = []
    for d in range(3, 6):
        for m in range(2, 6):
            for f in range(1, 4):
                if d + m + f == 10:
                    formations.append(f"{d}-{m}-{f}")
    return formations
BudgetConstraint
BudgetConstraint(max_budget: float = 100.0)

Budget constraint for FPL squad (applied to 15-player squad).

Source code in fplx/selection/constraints.py
def __init__(self, max_budget: float = 100.0):
    self.max_budget = max_budget
TeamDiversityConstraint
TeamDiversityConstraint(max_from_team: int = 3)

Max players from same real-world team (default 3).

Source code in fplx/selection/constraints.py
def __init__(self, max_from_team: int = 3):
    self.max_from_team = max_from_team

lagrangian

Lagrangian dual decomposition for FPL squad selection.

Relaxes the budget constraint into the objective and solves via subgradient ascent. The inner problem decomposes into per-position sorting problems, each solvable in O(n log n).

This provides: - A dual upper bound on the ILP optimum - A near-optimal primal solution via rounding - Convergence diagnostics for the 18-660 report

LagrangianResult dataclass
LagrangianResult(
    full_squad: Optional[FullSquad] = None,
    primal_objective: float = 0.0,
    dual_bound: float = 0.0,
    duality_gap: float = 0.0,
    n_iterations: int = 0,
    converged: bool = False,
    solve_time: float = 0.0,
    dual_history: list[float] = list(),
    primal_history: list[float] = list(),
    lambda_history: list[float] = list(),
    budget_slack_history: list[float] = list(),
)

Convergence diagnostics for the Lagrangian solver.

LagrangianOptimizer
LagrangianOptimizer(
    budget: float = 100.0,
    max_from_team: int = 3,
    max_iter: int = 200,
    tol: float = 0.01,
    risk_aversion: float = 0.0,
)

Lagrangian relaxation for the FPL squad selection ILP.

Relaxes the budget constraint into the objective:

L(lambda) = max_{x in X} sum_i (mu_i - lambda * c_i) * x_i + lambda * B

where X encodes squad size, position quotas, and team caps. The inner maximization decomposes: for each position, select the top-k players by modified score (mu_i - lambda * c_i).

The dual problem min_{lambda >= 0} L(lambda) is solved via subgradient ascent.

PARAMETER DESCRIPTION
budget

Total budget (default 100.0).

TYPE: float DEFAULT: 100.0

max_from_team

Maximum players from same club.

TYPE: int DEFAULT: 3

max_iter

Maximum subgradient iterations.

TYPE: int DEFAULT: 200

tol

Convergence tolerance on duality gap.

TYPE: float DEFAULT: 0.01

risk_aversion

Mean-variance penalty (same as ILP).

TYPE: float DEFAULT: 0.0

Source code in fplx/selection/lagrangian.py
def __init__(
    self,
    budget: float = 100.0,
    max_from_team: int = 3,
    max_iter: int = 200,
    tol: float = 0.01,
    risk_aversion: float = 0.0,
):
    self.budget = budget
    self.max_from_team = max_from_team
    self.max_iter = max_iter
    self.tol = tol
    self.risk_aversion = risk_aversion
solve
solve(
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    best_known_primal: Optional[float] = None,
) -> LagrangianResult

Solve via Lagrangian relaxation with subgradient ascent.

PARAMETER DESCRIPTION
players

TYPE: list[Player]

expected_points

TYPE: dict[int, float]

expected_variance

TYPE: dict[int, float] DEFAULT: None

best_known_primal

Best known primal objective (e.g., from ILP). Used for better step size computation.

TYPE: float DEFAULT: None

RETURNS DESCRIPTION
LagrangianResult
Source code in fplx/selection/lagrangian.py
def solve(
    self,
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    best_known_primal: Optional[float] = None,
) -> LagrangianResult:
    """
    Solve via Lagrangian relaxation with subgradient ascent.

    Parameters
    ----------
    players : list[Player]
    expected_points : dict[int, float]
    expected_variance : dict[int, float], optional
    best_known_primal : float, optional
        Best known primal objective (e.g., from ILP).
        Used for better step size computation.

    Returns
    -------
    LagrangianResult
    """
    start_time = time.perf_counter()

    # Initialize lambda
    lam = 0.5  # initial budget multiplier
    best_dual = np.inf
    best_primal = -np.inf
    best_squad = None
    best_lineup = None

    # Step size parameters (Polyak-style)
    theta = 2.0
    theta_decay = 0.95
    no_improve_count = 0

    result = LagrangianResult()

    for k in range(self.max_iter):
        # Compute modified scores
        scores = self._compute_modified_scores(players, expected_points, expected_variance, lam)

        # Solve inner problem
        squad, lineup = self._solve_inner(players, scores)

        # Dual objective: L(lambda) = sum scores*x + lambda*B
        inner_value = sum(scores[p.id] for p in lineup)
        dual_obj = inner_value + lam * self.budget

        # Primal objective (original, without lambda penalty)
        primal_obj = sum(expected_points.get(p.id, 0.0) for p in lineup)
        if self.risk_aversion > 0 and expected_variance:
            for p in lineup:
                primal_obj -= self.risk_aversion * np.sqrt(max(expected_variance.get(p.id, 0.0), 0.0))

        # Budget slack (subgradient)
        squad_cost = sum(p.price for p in squad)
        budget_slack = squad_cost - self.budget  # positive = over budget

        # Track best
        if dual_obj < best_dual:
            best_dual = dual_obj
            no_improve_count = 0
        else:
            no_improve_count += 1

        # Only count as feasible primal if budget satisfied
        if squad_cost <= self.budget + 0.01 and primal_obj > best_primal:
            best_primal = primal_obj
            best_squad = squad
            best_lineup = lineup

        # Record history
        result.dual_history.append(float(dual_obj))
        result.primal_history.append(float(primal_obj))
        result.lambda_history.append(float(lam))
        result.budget_slack_history.append(float(budget_slack))

        # Convergence check
        gap = (best_dual - best_primal) / max(abs(best_dual), 1e-6)
        if gap < self.tol and best_primal > -np.inf:
            result.converged = True
            break

        # Step size (Polyak with target)
        target = best_known_primal if best_known_primal else best_primal
        step = 0.0 if abs(budget_slack) < 1e-08 else theta * (dual_obj - target) / budget_slack**2

        # Update lambda
        lam = max(0.0, lam + step * budget_slack)

        # Decay step size if no improvement
        if no_improve_count >= 5:
            theta *= theta_decay
            no_improve_count = 0

    elapsed = time.perf_counter() - start_time

    # Build FullSquad from best feasible solution
    if best_squad and best_lineup and len(best_squad) == 15 and len(best_lineup) == 11:
        pos_counts = {"DEF": 0, "MID": 0, "FWD": 0}
        for p in best_lineup:
            if p.position in pos_counts:
                pos_counts[p.position] += 1
        formation = f"{pos_counts['DEF']}-{pos_counts['MID']}-{pos_counts['FWD']}"

        ep_lineup = sum(expected_points.get(p.id, 0.0) for p in best_lineup)
        captain = max(best_lineup, key=lambda p: expected_points.get(p.id, 0.0))

        lineup_obj = Squad(
            players=best_lineup,
            formation=formation,
            total_cost=sum(p.price for p in best_lineup),
            expected_points=ep_lineup,
            captain=captain,
        )
        result.full_squad = FullSquad(squad_players=best_squad, lineup=lineup_obj)

    result.primal_objective = best_primal
    result.dual_bound = best_dual
    result.duality_gap = (best_dual - best_primal) / max(abs(best_dual), 1e-6)
    result.n_iterations = k + 1
    result.solve_time = elapsed

    logger.info(
        "Lagrangian: %d iters, primal=%.1f, dual=%.1f, gap=%.2f%%, time=%.3fs",
        result.n_iterations,
        best_primal,
        best_dual,
        result.duality_gap * 100,
        elapsed,
    )

    return result

optimizer

Squad optimization: two-level ILP, mean-variance, LP relaxation.

OptimizationResult dataclass
OptimizationResult(
    full_squad: FullSquad,
    objective_value: float = 0.0,
    solve_time: float = 0.0,
    lp_objective: Optional[float] = None,
    integrality_gap: Optional[float] = None,
    shadow_prices: dict = dict(),
    binding_constraints: list = list(),
)

Container for optimization outputs including duality analysis.

TwoLevelILPOptimizer
TwoLevelILPOptimizer(
    budget: float = 100.0,
    max_from_team: int = 3,
    risk_aversion: float = 0.0,
)

Bases: BaseOptimizer

Two-level ILP: select 15-player squad then 11-player lineup jointly.

Supports risk-neutral and risk-averse (mean-variance) objectives. Also exposes LP relaxation for shadow price extraction.

PARAMETER DESCRIPTION
budget

Maximum total squad budget (applied to 15 players).

TYPE: float DEFAULT: 100.0

max_from_team

Maximum players from same club.

TYPE: int DEFAULT: 3

risk_aversion

Lambda for mean-variance penalty. 0 = risk-neutral.

TYPE: float DEFAULT: 0.0

Source code in fplx/selection/optimizer.py
def __init__(
    self,
    budget: float = 100.0,
    max_from_team: int = 3,
    risk_aversion: float = 0.0,
):
    self.budget = budget
    self.max_from_team = max_from_team
    self.risk_aversion = risk_aversion

    try:
        import pulp

        self.pulp = pulp
    except ImportError:
        raise ImportError("pulp required for ILP optimization. Install with: pip install pulp")
solve
solve(players, **kwargs)

Solve the optimization problem.

Source code in fplx/selection/optimizer.py
def solve(self, players, **kwargs):
    """Solve the optimization problem."""
    return self.optimize(players, **kwargs)
optimize
optimize(
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    downside_risk: Optional[dict[int, float]] = None,
    formation: Optional[str] = None,
) -> FullSquad

Solve the two-level ILP.

PARAMETER DESCRIPTION
players

Available player pool.

TYPE: list[Player]

expected_points

E[P_i] per player.

TYPE: dict[int, float]

expected_variance

Var[P_i] per player.

TYPE: dict[int, float] DEFAULT: None

downside_risk

Downside spread per player. If provided, risk penalty uses this directly (instead of sqrt(variance)).

TYPE: dict[int, float] DEFAULT: None

formation

Not used (formation is optimized automatically).

TYPE: Optional[str] DEFAULT: None

RETURNS DESCRIPTION
FullSquad
Source code in fplx/selection/optimizer.py
def optimize(
    self,
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    downside_risk: Optional[dict[int, float]] = None,
    formation: Optional[str] = None,
) -> FullSquad:
    """
    Solve the two-level ILP.

    Parameters
    ----------
    players : list[Player]
        Available player pool.
    expected_points : dict[int, float]
        E[P_i] per player.
    expected_variance : dict[int, float], optional
        Var[P_i] per player.
    downside_risk : dict[int, float], optional
        Downside spread per player. If provided, risk penalty uses this
        directly (instead of sqrt(variance)).
    formation : Optional[str]
        Not used (formation is optimized automatically).

    Returns
    -------
    FullSquad
    """
    import time

    start = time.perf_counter()
    prob, s_vars, x_vars = self._build_problem(
        players,
        expected_points,
        expected_variance,
        downside_risk,
        relax=False,
    )
    prob.solve(self.pulp.PULP_CBC_CMD(msg=0))
    elapsed = time.perf_counter() - start

    if prob.status != 1:
        logger.error("ILP solver did not find optimal solution (status=%d).", prob.status)

    # Extract solution
    squad_players = [p for p in players if s_vars[p.id].varValue and s_vars[p.id].varValue > 0.5]
    lineup_players = [p for p in players if x_vars[p.id].varValue and x_vars[p.id].varValue > 0.5]

    # Determine formation
    pos_counts = {"DEF": 0, "MID": 0, "FWD": 0}
    for p in lineup_players:
        if p.position in pos_counts:
            pos_counts[p.position] += 1
    formation_str = f"{pos_counts['DEF']}-{pos_counts['MID']}-{pos_counts['FWD']}"

    # Captain = highest expected points
    for p in lineup_players:
        p.expected_points = expected_points.get(p.id, 0.0)
    captain = (
        max(lineup_players, key=lambda p: expected_points.get(p.id, 0.0)) if lineup_players else None
    )

    total_ep = sum(expected_points.get(p.id, 0.0) for p in lineup_players)
    lineup_cost = sum(p.price for p in lineup_players)

    lineup = Squad(
        players=lineup_players,
        formation=formation_str,
        total_cost=lineup_cost,
        expected_points=total_ep,
        captain=captain,
    )
    full_squad = FullSquad(squad_players=squad_players, lineup=lineup)

    logger.info("ILP solved in %.3fs. Formation: %s. EP: %.2f", elapsed, formation_str, total_ep)
    return full_squad
solve_lp_relaxation
solve_lp_relaxation(
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    downside_risk: Optional[dict[int, float]] = None,
) -> OptimizationResult

Solve the LP relaxation and extract shadow prices.

RETURNS DESCRIPTION
OptimizationResult

Contains LP objective, shadow prices, binding constraints.

Source code in fplx/selection/optimizer.py
def solve_lp_relaxation(
    self,
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    downside_risk: Optional[dict[int, float]] = None,
) -> OptimizationResult:
    """
    Solve the LP relaxation and extract shadow prices.

    Returns
    -------
    OptimizationResult
        Contains LP objective, shadow prices, binding constraints.
    """
    import time

    start = time.perf_counter()
    prob, s_vars, x_vars = self._build_problem(
        players,
        expected_points,
        expected_variance,
        downside_risk,
        relax=True,
    )
    prob.solve(self.pulp.PULP_CBC_CMD(msg=0))
    elapsed = time.perf_counter() - start

    lp_obj = self.pulp.value(prob.objective)

    # Extract shadow prices from constraints
    shadow_prices = {}
    binding = []
    for name, constraint in prob.constraints.items():
        slack = constraint.slack
        # PuLP: pi attribute gives the dual value for LP
        dual = constraint.pi if constraint.pi is not None else 0.0
        shadow_prices[name] = {
            "dual_value": dual,
            "slack": slack,
            "binding": abs(slack) < 1e-6,
        }
        if abs(slack) < 1e-6:
            binding.append(name)

    # Also solve ILP to compute integrality gap
    full_squad = self.optimize(players, expected_points, expected_variance, downside_risk)
    ilp_obj = full_squad.lineup.expected_points
    gap = (lp_obj - ilp_obj) / lp_obj if lp_obj > 0 else 0.0

    return OptimizationResult(
        full_squad=full_squad,
        objective_value=ilp_obj,
        solve_time=elapsed,
        lp_objective=lp_obj,
        integrality_gap=gap,
        shadow_prices=shadow_prices,
        binding_constraints=binding,
    )
GreedyOptimizer
GreedyOptimizer(
    budget: float = 100.0, max_from_team: int = 3
)

Bases: BaseOptimizer

Greedy baseline: select best-value players per position.

Fast heuristic for comparison. Selects 15-player squad, then picks best 11 as lineup.

Source code in fplx/selection/optimizer.py
def __init__(self, budget: float = 100.0, max_from_team: int = 3):
    self.budget = budget
    self.max_from_team = max_from_team
optimize
optimize(
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    formation: Optional[str] = None,
) -> FullSquad

Greedy squad + lineup selection.

Source code in fplx/selection/optimizer.py
def optimize(
    self,
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    formation: Optional[str] = None,
) -> FullSquad:
    """Greedy squad + lineup selection."""
    # Compute value = EP / price for each player
    for p in players:
        ep = expected_points.get(p.id, 0.0)
        p.expected_points = ep
        p._value = ep / max(p.price, 0.1)

    # Sort by value within each position
    by_pos: dict[str, list[Player]] = {"GK": [], "DEF": [], "MID": [], "FWD": []}
    for p in players:
        by_pos[p.position].append(p)
    for pos in by_pos:
        by_pos[pos].sort(key=lambda p: p._value, reverse=True)

    # Greedily fill squad (15 players)
    squad_quotas = {"GK": 2, "DEF": 5, "MID": 5, "FWD": 3}
    selected_squad: list[Player] = []
    team_counts: dict[str, int] = {}
    remaining = self.budget

    for pos in ["GK", "DEF", "MID", "FWD"]:
        count = 0
        for p in by_pos[pos]:
            if count >= squad_quotas[pos]:
                break
            if team_counts.get(p.team, 0) >= self.max_from_team:
                continue
            if p.price > remaining:
                continue
            selected_squad.append(p)
            team_counts[p.team] = team_counts.get(p.team, 0) + 1
            remaining -= p.price
            count += 1

    if len(selected_squad) != 15:
        logger.warning("Greedy only picked %d squad players.", len(selected_squad))
        # Pad if needed (shouldn't happen with 600+ players)
        return self._fallback(selected_squad, expected_points)

    # Select best 11 from the 15
    lineup = self._select_lineup(selected_squad, expected_points, formation)
    return FullSquad(squad_players=selected_squad, lineup=lineup)

signals

Signal generation modules for player scoring.

FixtureSignal

FixtureSignal(
    difficulty_ratings: Optional[dict[str, int]] = None,
)

Bases: BaseSignal

Generate signals based on fixture difficulty and schedule.

Initialize with team difficulty ratings.

PARAMETER DESCRIPTION
difficulty_ratings

Team strength ratings (1-5, higher = harder opponent)

TYPE: Optional[dict[str, int]] DEFAULT: None

Source code in fplx/signals/fixtures.py
def __init__(self, difficulty_ratings: Optional[dict[str, int]] = None):
    """
    Initialize with team difficulty ratings.

    Parameters
    ----------
    difficulty_ratings : Optional[dict[str, int]]
        Team strength ratings (1-5, higher = harder opponent)
    """
    self.difficulty_ratings = difficulty_ratings or {}
generate_signal
generate_signal(data)

Generate fixture-based signal.

Source code in fplx/signals/fixtures.py
def generate_signal(self, data):
    """Generate fixture-based signal."""
    # This is a placeholder. The actual implementation would take
    # fixture data and compute a signal.
    return self.compute_fixture_advantage(
        data["team"], data["upcoming_opponents"], data["is_home"]
    )
set_difficulty_ratings
set_difficulty_ratings(ratings: dict[str, int])

Set or update difficulty ratings.

PARAMETER DESCRIPTION
ratings

Team strength ratings

TYPE: Dict[str, int]

Source code in fplx/signals/fixtures.py
def set_difficulty_ratings(self, ratings: dict[str, int]):
    """
    Set or update difficulty ratings.

    Parameters
    ----------
    ratings : Dict[str, int]
        Team strength ratings
    """
    self.difficulty_ratings = ratings
compute_fixture_difficulty
compute_fixture_difficulty(
    team: str,
    upcoming_opponents: list[str],
    is_home: list[bool],
) -> float

Compute fixture difficulty score for upcoming games.

PARAMETER DESCRIPTION
team

Player's team

TYPE: str

upcoming_opponents

List of upcoming opponent teams

TYPE: list[str]

is_home

Whether each fixture is home

TYPE: list[bool]

RETURNS DESCRIPTION
float

Difficulty score (lower = easier fixtures)

Source code in fplx/signals/fixtures.py
def compute_fixture_difficulty(
    self, team: str, upcoming_opponents: list[str], is_home: list[bool]
) -> float:
    """
    Compute fixture difficulty score for upcoming games.

    Parameters
    ----------
    team : str
        Player's team
    upcoming_opponents : list[str]
        List of upcoming opponent teams
    is_home : list[bool]
        Whether each fixture is home

    Returns
    -------
    float
        Difficulty score (lower = easier fixtures)
    """
    if not upcoming_opponents:
        return 3.0  # Neutral

    difficulties = []
    for opponent, home in zip(upcoming_opponents, is_home):
        # Get opponent difficulty
        diff = self.difficulty_ratings.get(opponent, 3)

        # Adjust for home advantage
        if home:
            diff = max(1, diff - 0.5)
        else:
            diff = min(5, diff + 0.5)

        difficulties.append(diff)

    # Average difficulty
    avg_difficulty = sum(difficulties) / len(difficulties)
    return avg_difficulty
compute_fixture_advantage
compute_fixture_advantage(
    team: str,
    upcoming_opponents: list[str],
    is_home: list[bool],
) -> float

Compute fixture advantage (inverse of difficulty).

Higher score = easier fixtures = better for player.

PARAMETER DESCRIPTION
team

Player's team

TYPE: str

upcoming_opponents

List of upcoming opponent teams

TYPE: list[str]

is_home

Whether each fixture is home

TYPE: list[bool]

RETURNS DESCRIPTION
float

Advantage score (0-1, higher = better fixtures)

Source code in fplx/signals/fixtures.py
def compute_fixture_advantage(
    self, team: str, upcoming_opponents: list[str], is_home: list[bool]
) -> float:
    """
    Compute fixture advantage (inverse of difficulty).

    Higher score = easier fixtures = better for player.

    Parameters
    ----------
    team : str
        Player's team
    upcoming_opponents : list[str]
        List of upcoming opponent teams
    is_home : list[bool]
        Whether each fixture is home

    Returns
    -------
    float
        Advantage score (0-1, higher = better fixtures)
    """
    difficulty = self.compute_fixture_difficulty(team, upcoming_opponents, is_home)

    # Convert to advantage (invert and normalize)
    # difficulty: 1 (easiest) to 5 (hardest)
    # advantage: 1 (best) to 0 (worst)
    advantage = (6 - difficulty) / 5
    return max(0, min(1, advantage))
compute_fixture_congestion
compute_fixture_congestion(
    fixtures: DataFrame, team: str, days_window: int = 14
) -> float

Compute fixture congestion (number of games in short period).

PARAMETER DESCRIPTION
fixtures

Fixtures dataframe

TYPE: DataFrame

team

Team name

TYPE: str

days_window

Days to look ahead

TYPE: int DEFAULT: 14

RETURNS DESCRIPTION
float

Congestion score (0-1, higher = more congested)

Source code in fplx/signals/fixtures.py
def compute_fixture_congestion(
    self, fixtures: pd.DataFrame, team: str, days_window: int = 14
) -> float:
    """
    Compute fixture congestion (number of games in short period).

    Parameters
    ----------
    fixtures : pd.DataFrame
        Fixtures dataframe
    team : str
        Team name
    days_window : int
        Days to look ahead

    Returns
    -------
    float
        Congestion score (0-1, higher = more congested)
    """
    # Filter fixtures for the team
    team_fixtures = fixtures[
        (fixtures["team_h"] == team) | (fixtures["team_a"] == team)
    ]

    if team_fixtures.empty:
        return 0.0

    # Count fixtures in window
    num_fixtures = len(team_fixtures)

    # Normalize: 1 game/week = 0, 3+ games/week = 1
    games_per_week = num_fixtures / (days_window / 7)
    congestion = min(1.0, (games_per_week - 1) / 2)

    return max(0, congestion)
batch_compute_advantages
batch_compute_advantages(
    players_teams: dict[str, str],
    fixtures_data: dict[str, tuple],
) -> dict[str, float]

Compute fixture advantages for multiple players.

PARAMETER DESCRIPTION
players_teams

Mapping of player ID to team

TYPE: dict[str, str]

fixtures_data

Mapping of team to (opponents, is_home) tuples

TYPE: dict[str, tuple]

RETURNS DESCRIPTION
dict[str, float]

Dictionary of player fixture advantage scores

Source code in fplx/signals/fixtures.py
def batch_compute_advantages(
    self, players_teams: dict[str, str], fixtures_data: dict[str, tuple]
) -> dict[str, float]:
    """
    Compute fixture advantages for multiple players.

    Parameters
    ----------
    players_teams : dict[str, str]
        Mapping of player ID to team
    fixtures_data : dict[str, tuple]
        Mapping of team to (opponents, is_home) tuples

    Returns
    -------
    dict[str, float]
        Dictionary of player fixture advantage scores
    """
    advantages = {}

    for player_id, team in players_teams.items():
        if team in fixtures_data:
            opponents, is_home = fixtures_data[team]
            advantage = self.compute_fixture_advantage(team, opponents, is_home)
            advantages[player_id] = advantage
        else:
            advantages[player_id] = 0.5  # Neutral

    return advantages

NewsParser

Parse and interpret FPL news text into structured signals.

parse_availability
parse_availability(news_text: str) -> float

Parse availability from news text.

PARAMETER DESCRIPTION
news_text

News text

TYPE: str

RETURNS DESCRIPTION
float

Availability score (0-1)

Source code in fplx/signals/news.py
def parse_availability(self, news_text: str) -> float:
    """
    Parse availability from news text.

    Parameters
    ----------
    news_text : str
        News text

    Returns
    -------
    float
        Availability score (0-1)
    """
    if not news_text or news_text.strip() == "":
        return 1.0

    text_lower = news_text.lower()

    # Check unavailable patterns
    for pattern in self.UNAVAILABLE_PATTERNS:
        if re.search(pattern, text_lower):
            return 0.0

    # Check doubtful patterns
    for pattern in self.DOUBTFUL_PATTERNS:
        if re.search(pattern, text_lower):
            return 0.5

    # Check positive patterns
    for pattern in self.POSITIVE_PATTERNS:
        if re.search(pattern, text_lower):
            return 0.9

    # Default: assume available if no negative signals
    return 1.0
parse_minutes_risk
parse_minutes_risk(news_text: str) -> float

Parse minutes risk from news text.

PARAMETER DESCRIPTION
news_text

News text

TYPE: str

RETURNS DESCRIPTION
float

Minutes risk score (0-1, higher = more risk)

Source code in fplx/signals/news.py
def parse_minutes_risk(self, news_text: str) -> float:
    """
    Parse minutes risk from news text.

    Parameters
    ----------
    news_text : str
        News text

    Returns
    -------
    float
        Minutes risk score (0-1, higher = more risk)
    """
    if not news_text or news_text.strip() == "":
        return 0.0

    text_lower = news_text.lower()

    # Check rotation patterns
    for pattern in self.ROTATION_PATTERNS:
        if re.search(pattern, text_lower):
            return 0.7

    # Check if doubtful (moderate risk)
    for pattern in self.DOUBTFUL_PATTERNS:
        if re.search(pattern, text_lower):
            return 0.3

    return 0.0
parse_confidence
parse_confidence(news_text: str) -> float

Estimate confidence in the parsed signal.

PARAMETER DESCRIPTION
news_text

News text

TYPE: str

RETURNS DESCRIPTION
float

Confidence score (0-1)

Source code in fplx/signals/news.py
def parse_confidence(self, news_text: str) -> float:
    """
    Estimate confidence in the parsed signal.

    Parameters
    ----------
    news_text : str
        News text

    Returns
    -------
    float
        Confidence score (0-1)
    """
    if not news_text or news_text.strip() == "":
        return 1.0  # High confidence when no news

    # Confidence based on clarity of news
    text_lower = news_text.lower()

    # High confidence patterns
    if any(
        re.search(p, text_lower) for p in ["ruled out", "confirmed", "definitely"]
    ):
        return 0.9

    # Medium confidence patterns
    if any(re.search(p, text_lower) for p in ["likely", "expected", "should"]):
        return 0.7

    # Low confidence patterns
    if any(re.search(p, text_lower) for p in ["maybe", "possible", "unclear"]):
        return 0.4

    return 0.6  # Default medium confidence

NewsSignal

NewsSignal()

Bases: BaseSignal

Generate structured news signals for players.

Source code in fplx/signals/news.py
def __init__(self):
    self.parser = NewsParser()
generate_signal
generate_signal(news_text: str) -> dict[str, float]

Generate signal from news text.

PARAMETER DESCRIPTION
news_text

News text

TYPE: str

RETURNS DESCRIPTION
dict[str, float]

Dictionary with availability, minutes_risk, confidence

Source code in fplx/signals/news.py
def generate_signal(self, news_text: str) -> dict[str, float]:
    """Generate signal from news text.

    Parameters
    ----------
    news_text : str
        News text

    Returns
    -------
    dict[str, float]
        Dictionary with availability, minutes_risk, confidence
    """
    availability = self.parser.parse_availability(news_text)
    minutes_risk = self.parser.parse_minutes_risk(news_text)
    confidence = self.parser.parse_confidence(news_text)

    return {
        "availability": availability,
        "minutes_risk": minutes_risk,
        "confidence": confidence,
        "adjustment_factor": availability * (1 - minutes_risk),
    }
batch_generate
batch_generate(
    news_dict: dict[str, str],
) -> dict[str, dict[str, float]]

Generate signals for multiple players.

PARAMETER DESCRIPTION
news_dict

Dictionary mapping player ID to news text

TYPE: dict[str, str]

RETURNS DESCRIPTION
dict[str, dict[str, float]]

Dictionary of player signals

Source code in fplx/signals/news.py
def batch_generate(self, news_dict: dict[str, str]) -> dict[str, dict[str, float]]:
    """
    Generate signals for multiple players.

    Parameters
    ----------
    news_dict : dict[str, str]
        Dictionary mapping player ID to news text

    Returns
    -------
    dict[str, dict[str, float]]
        Dictionary of player signals
    """
    signals = {}
    for player_id, news_text in news_dict.items():
        signals[player_id] = self.generate_signal(news_text)

    return signals

StatsSignal

StatsSignal(weights: Optional[dict[str, float]] = None)

Generate performance signals from statistical data.

Combines multiple statistical indicators into a unified score.

Initialize with custom weights for different stats.

PARAMETER DESCRIPTION
weights

Weights for different statistics

TYPE: Optional[dict[str, float]] DEFAULT: None

Source code in fplx/signals/stats.py
def __init__(self, weights: Optional[dict[str, float]] = None):
    """
    Initialize with custom weights for different stats.

    Parameters
    ----------
    weights : Optional[dict[str, float]]
        Weights for different statistics
    """
    self.weights = weights or {
        "points_mean": 0.3,
        "xG_mean": 0.15,
        "xA_mean": 0.15,
        "minutes_consistency": 0.2,
        "form_trend": 0.2,
    }
compute_signal
compute_signal(player_data: DataFrame) -> float

Compute aggregated signal score from player statistics.

PARAMETER DESCRIPTION
player_data

Player historical data with engineered features

TYPE: DataFrame

RETURNS DESCRIPTION
float

Aggregated signal score (0-100)

Source code in fplx/signals/stats.py
def compute_signal(self, player_data: pd.DataFrame) -> float:
    """
    Compute aggregated signal score from player statistics.

    Parameters
    ----------
    player_data : pd.DataFrame
        Player historical data with engineered features

    Returns
    -------
    float
        Aggregated signal score (0-100)
    """
    if player_data.empty:
        return 0.0

    # Get latest row (most recent data)
    latest = player_data.iloc[-1]

    score = 0.0

    # Points form (rolling mean)
    if "points_rolling_5_mean" in latest:
        points_component = (
            latest["points_rolling_5_mean"] * self.weights["points_mean"]
        )
        score += points_component

    # xG contribution
    if "xG_rolling_5_mean" in latest:
        xg_component = latest["xG_rolling_5_mean"] * 10 * self.weights["xG_mean"]
        score += xg_component

    # xA contribution
    if "xA_rolling_5_mean" in latest:
        xa_component = latest["xA_rolling_5_mean"] * 10 * self.weights["xA_mean"]
        score += xa_component

    # Minutes consistency (inverse of coefficient of variation)
    if "minutes_consistency_5" in latest:
        consistency = 1.0 / (1.0 + latest["minutes_consistency_5"])
        consistency_component = (
            consistency * 10 * self.weights["minutes_consistency"]
        )
        score += consistency_component

    # Form trend
    if "points_trend_5" in latest:
        trend = latest["points_trend_5"]
        # Normalize trend: positive trend is good
        trend_component = max(0, trend) * 5 * self.weights["form_trend"]
        score += trend_component

    return max(0, score)
batch_compute
batch_compute(
    players_data: dict[str, DataFrame],
) -> dict[str, float]

Compute signals for multiple players.

PARAMETER DESCRIPTION
players_data

Dictionary mapping player ID/name to their data

TYPE: dict[str, DataFrame]

RETURNS DESCRIPTION
dict[str, float]

Dictionary of player signals

Source code in fplx/signals/stats.py
def batch_compute(self, players_data: dict[str, pd.DataFrame]) -> dict[str, float]:
    """
    Compute signals for multiple players.

    Parameters
    ----------
    players_data : dict[str, pd.DataFrame]
        Dictionary mapping player ID/name to their data

    Returns
    -------
    dict[str, float]
        Dictionary of player signals
    """
    signals = {}
    for player_id, data in players_data.items():
        signals[player_id] = self.compute_signal(data)

    return signals

fixtures

Fixture difficulty signals.

FixtureSignal
FixtureSignal(
    difficulty_ratings: Optional[dict[str, int]] = None,
)

Bases: BaseSignal

Generate signals based on fixture difficulty and schedule.

Initialize with team difficulty ratings.

PARAMETER DESCRIPTION
difficulty_ratings

Team strength ratings (1-5, higher = harder opponent)

TYPE: Optional[dict[str, int]] DEFAULT: None

Source code in fplx/signals/fixtures.py
def __init__(self, difficulty_ratings: Optional[dict[str, int]] = None):
    """
    Initialize with team difficulty ratings.

    Parameters
    ----------
    difficulty_ratings : Optional[dict[str, int]]
        Team strength ratings (1-5, higher = harder opponent)
    """
    self.difficulty_ratings = difficulty_ratings or {}
generate_signal
generate_signal(data)

Generate fixture-based signal.

Source code in fplx/signals/fixtures.py
def generate_signal(self, data):
    """Generate fixture-based signal."""
    # This is a placeholder. The actual implementation would take
    # fixture data and compute a signal.
    return self.compute_fixture_advantage(
        data["team"], data["upcoming_opponents"], data["is_home"]
    )
set_difficulty_ratings
set_difficulty_ratings(ratings: dict[str, int])

Set or update difficulty ratings.

PARAMETER DESCRIPTION
ratings

Team strength ratings

TYPE: Dict[str, int]

Source code in fplx/signals/fixtures.py
def set_difficulty_ratings(self, ratings: dict[str, int]):
    """
    Set or update difficulty ratings.

    Parameters
    ----------
    ratings : Dict[str, int]
        Team strength ratings
    """
    self.difficulty_ratings = ratings
compute_fixture_difficulty
compute_fixture_difficulty(
    team: str,
    upcoming_opponents: list[str],
    is_home: list[bool],
) -> float

Compute fixture difficulty score for upcoming games.

PARAMETER DESCRIPTION
team

Player's team

TYPE: str

upcoming_opponents

List of upcoming opponent teams

TYPE: list[str]

is_home

Whether each fixture is home

TYPE: list[bool]

RETURNS DESCRIPTION
float

Difficulty score (lower = easier fixtures)

Source code in fplx/signals/fixtures.py
def compute_fixture_difficulty(
    self, team: str, upcoming_opponents: list[str], is_home: list[bool]
) -> float:
    """
    Compute fixture difficulty score for upcoming games.

    Parameters
    ----------
    team : str
        Player's team
    upcoming_opponents : list[str]
        List of upcoming opponent teams
    is_home : list[bool]
        Whether each fixture is home

    Returns
    -------
    float
        Difficulty score (lower = easier fixtures)
    """
    if not upcoming_opponents:
        return 3.0  # Neutral

    difficulties = []
    for opponent, home in zip(upcoming_opponents, is_home):
        # Get opponent difficulty
        diff = self.difficulty_ratings.get(opponent, 3)

        # Adjust for home advantage
        if home:
            diff = max(1, diff - 0.5)
        else:
            diff = min(5, diff + 0.5)

        difficulties.append(diff)

    # Average difficulty
    avg_difficulty = sum(difficulties) / len(difficulties)
    return avg_difficulty
compute_fixture_advantage
compute_fixture_advantage(
    team: str,
    upcoming_opponents: list[str],
    is_home: list[bool],
) -> float

Compute fixture advantage (inverse of difficulty).

Higher score = easier fixtures = better for player.

PARAMETER DESCRIPTION
team

Player's team

TYPE: str

upcoming_opponents

List of upcoming opponent teams

TYPE: list[str]

is_home

Whether each fixture is home

TYPE: list[bool]

RETURNS DESCRIPTION
float

Advantage score (0-1, higher = better fixtures)

Source code in fplx/signals/fixtures.py
def compute_fixture_advantage(
    self, team: str, upcoming_opponents: list[str], is_home: list[bool]
) -> float:
    """
    Compute fixture advantage (inverse of difficulty).

    Higher score = easier fixtures = better for player.

    Parameters
    ----------
    team : str
        Player's team
    upcoming_opponents : list[str]
        List of upcoming opponent teams
    is_home : list[bool]
        Whether each fixture is home

    Returns
    -------
    float
        Advantage score (0-1, higher = better fixtures)
    """
    difficulty = self.compute_fixture_difficulty(team, upcoming_opponents, is_home)

    # Convert to advantage (invert and normalize)
    # difficulty: 1 (easiest) to 5 (hardest)
    # advantage: 1 (best) to 0 (worst)
    advantage = (6 - difficulty) / 5
    return max(0, min(1, advantage))
compute_fixture_congestion
compute_fixture_congestion(
    fixtures: DataFrame, team: str, days_window: int = 14
) -> float

Compute fixture congestion (number of games in short period).

PARAMETER DESCRIPTION
fixtures

Fixtures dataframe

TYPE: DataFrame

team

Team name

TYPE: str

days_window

Days to look ahead

TYPE: int DEFAULT: 14

RETURNS DESCRIPTION
float

Congestion score (0-1, higher = more congested)

Source code in fplx/signals/fixtures.py
def compute_fixture_congestion(
    self, fixtures: pd.DataFrame, team: str, days_window: int = 14
) -> float:
    """
    Compute fixture congestion (number of games in short period).

    Parameters
    ----------
    fixtures : pd.DataFrame
        Fixtures dataframe
    team : str
        Team name
    days_window : int
        Days to look ahead

    Returns
    -------
    float
        Congestion score (0-1, higher = more congested)
    """
    # Filter fixtures for the team
    team_fixtures = fixtures[
        (fixtures["team_h"] == team) | (fixtures["team_a"] == team)
    ]

    if team_fixtures.empty:
        return 0.0

    # Count fixtures in window
    num_fixtures = len(team_fixtures)

    # Normalize: 1 game/week = 0, 3+ games/week = 1
    games_per_week = num_fixtures / (days_window / 7)
    congestion = min(1.0, (games_per_week - 1) / 2)

    return max(0, congestion)
batch_compute_advantages
batch_compute_advantages(
    players_teams: dict[str, str],
    fixtures_data: dict[str, tuple],
) -> dict[str, float]

Compute fixture advantages for multiple players.

PARAMETER DESCRIPTION
players_teams

Mapping of player ID to team

TYPE: dict[str, str]

fixtures_data

Mapping of team to (opponents, is_home) tuples

TYPE: dict[str, tuple]

RETURNS DESCRIPTION
dict[str, float]

Dictionary of player fixture advantage scores

Source code in fplx/signals/fixtures.py
def batch_compute_advantages(
    self, players_teams: dict[str, str], fixtures_data: dict[str, tuple]
) -> dict[str, float]:
    """
    Compute fixture advantages for multiple players.

    Parameters
    ----------
    players_teams : dict[str, str]
        Mapping of player ID to team
    fixtures_data : dict[str, tuple]
        Mapping of team to (opponents, is_home) tuples

    Returns
    -------
    dict[str, float]
        Dictionary of player fixture advantage scores
    """
    advantages = {}

    for player_id, team in players_teams.items():
        if team in fixtures_data:
            opponents, is_home = fixtures_data[team]
            advantage = self.compute_fixture_advantage(team, opponents, is_home)
            advantages[player_id] = advantage
        else:
            advantages[player_id] = 0.5  # Neutral

    return advantages

news

News and injury signal processing.

NewsParser

Parse and interpret FPL news text into structured signals.

parse_availability
parse_availability(news_text: str) -> float

Parse availability from news text.

PARAMETER DESCRIPTION
news_text

News text

TYPE: str

RETURNS DESCRIPTION
float

Availability score (0-1)

Source code in fplx/signals/news.py
def parse_availability(self, news_text: str) -> float:
    """
    Parse availability from news text.

    Parameters
    ----------
    news_text : str
        News text

    Returns
    -------
    float
        Availability score (0-1)
    """
    if not news_text or news_text.strip() == "":
        return 1.0

    text_lower = news_text.lower()

    # Check unavailable patterns
    for pattern in self.UNAVAILABLE_PATTERNS:
        if re.search(pattern, text_lower):
            return 0.0

    # Check doubtful patterns
    for pattern in self.DOUBTFUL_PATTERNS:
        if re.search(pattern, text_lower):
            return 0.5

    # Check positive patterns
    for pattern in self.POSITIVE_PATTERNS:
        if re.search(pattern, text_lower):
            return 0.9

    # Default: assume available if no negative signals
    return 1.0
parse_minutes_risk
parse_minutes_risk(news_text: str) -> float

Parse minutes risk from news text.

PARAMETER DESCRIPTION
news_text

News text

TYPE: str

RETURNS DESCRIPTION
float

Minutes risk score (0-1, higher = more risk)

Source code in fplx/signals/news.py
def parse_minutes_risk(self, news_text: str) -> float:
    """
    Parse minutes risk from news text.

    Parameters
    ----------
    news_text : str
        News text

    Returns
    -------
    float
        Minutes risk score (0-1, higher = more risk)
    """
    if not news_text or news_text.strip() == "":
        return 0.0

    text_lower = news_text.lower()

    # Check rotation patterns
    for pattern in self.ROTATION_PATTERNS:
        if re.search(pattern, text_lower):
            return 0.7

    # Check if doubtful (moderate risk)
    for pattern in self.DOUBTFUL_PATTERNS:
        if re.search(pattern, text_lower):
            return 0.3

    return 0.0
parse_confidence
parse_confidence(news_text: str) -> float

Estimate confidence in the parsed signal.

PARAMETER DESCRIPTION
news_text

News text

TYPE: str

RETURNS DESCRIPTION
float

Confidence score (0-1)

Source code in fplx/signals/news.py
def parse_confidence(self, news_text: str) -> float:
    """
    Estimate confidence in the parsed signal.

    Parameters
    ----------
    news_text : str
        News text

    Returns
    -------
    float
        Confidence score (0-1)
    """
    if not news_text or news_text.strip() == "":
        return 1.0  # High confidence when no news

    # Confidence based on clarity of news
    text_lower = news_text.lower()

    # High confidence patterns
    if any(
        re.search(p, text_lower) for p in ["ruled out", "confirmed", "definitely"]
    ):
        return 0.9

    # Medium confidence patterns
    if any(re.search(p, text_lower) for p in ["likely", "expected", "should"]):
        return 0.7

    # Low confidence patterns
    if any(re.search(p, text_lower) for p in ["maybe", "possible", "unclear"]):
        return 0.4

    return 0.6  # Default medium confidence
NewsSignal
NewsSignal()

Bases: BaseSignal

Generate structured news signals for players.

Source code in fplx/signals/news.py
def __init__(self):
    self.parser = NewsParser()
generate_signal
generate_signal(news_text: str) -> dict[str, float]

Generate signal from news text.

PARAMETER DESCRIPTION
news_text

News text

TYPE: str

RETURNS DESCRIPTION
dict[str, float]

Dictionary with availability, minutes_risk, confidence

Source code in fplx/signals/news.py
def generate_signal(self, news_text: str) -> dict[str, float]:
    """Generate signal from news text.

    Parameters
    ----------
    news_text : str
        News text

    Returns
    -------
    dict[str, float]
        Dictionary with availability, minutes_risk, confidence
    """
    availability = self.parser.parse_availability(news_text)
    minutes_risk = self.parser.parse_minutes_risk(news_text)
    confidence = self.parser.parse_confidence(news_text)

    return {
        "availability": availability,
        "minutes_risk": minutes_risk,
        "confidence": confidence,
        "adjustment_factor": availability * (1 - minutes_risk),
    }
batch_generate
batch_generate(
    news_dict: dict[str, str],
) -> dict[str, dict[str, float]]

Generate signals for multiple players.

PARAMETER DESCRIPTION
news_dict

Dictionary mapping player ID to news text

TYPE: dict[str, str]

RETURNS DESCRIPTION
dict[str, dict[str, float]]

Dictionary of player signals

Source code in fplx/signals/news.py
def batch_generate(self, news_dict: dict[str, str]) -> dict[str, dict[str, float]]:
    """
    Generate signals for multiple players.

    Parameters
    ----------
    news_dict : dict[str, str]
        Dictionary mapping player ID to news text

    Returns
    -------
    dict[str, dict[str, float]]
        Dictionary of player signals
    """
    signals = {}
    for player_id, news_text in news_dict.items():
        signals[player_id] = self.generate_signal(news_text)

    return signals

stats

Statistical performance signals.

StatsSignal
StatsSignal(weights: Optional[dict[str, float]] = None)

Generate performance signals from statistical data.

Combines multiple statistical indicators into a unified score.

Initialize with custom weights for different stats.

PARAMETER DESCRIPTION
weights

Weights for different statistics

TYPE: Optional[dict[str, float]] DEFAULT: None

Source code in fplx/signals/stats.py
def __init__(self, weights: Optional[dict[str, float]] = None):
    """
    Initialize with custom weights for different stats.

    Parameters
    ----------
    weights : Optional[dict[str, float]]
        Weights for different statistics
    """
    self.weights = weights or {
        "points_mean": 0.3,
        "xG_mean": 0.15,
        "xA_mean": 0.15,
        "minutes_consistency": 0.2,
        "form_trend": 0.2,
    }
compute_signal
compute_signal(player_data: DataFrame) -> float

Compute aggregated signal score from player statistics.

PARAMETER DESCRIPTION
player_data

Player historical data with engineered features

TYPE: DataFrame

RETURNS DESCRIPTION
float

Aggregated signal score (0-100)

Source code in fplx/signals/stats.py
def compute_signal(self, player_data: pd.DataFrame) -> float:
    """
    Compute aggregated signal score from player statistics.

    Parameters
    ----------
    player_data : pd.DataFrame
        Player historical data with engineered features

    Returns
    -------
    float
        Aggregated signal score (0-100)
    """
    if player_data.empty:
        return 0.0

    # Get latest row (most recent data)
    latest = player_data.iloc[-1]

    score = 0.0

    # Points form (rolling mean)
    if "points_rolling_5_mean" in latest:
        points_component = (
            latest["points_rolling_5_mean"] * self.weights["points_mean"]
        )
        score += points_component

    # xG contribution
    if "xG_rolling_5_mean" in latest:
        xg_component = latest["xG_rolling_5_mean"] * 10 * self.weights["xG_mean"]
        score += xg_component

    # xA contribution
    if "xA_rolling_5_mean" in latest:
        xa_component = latest["xA_rolling_5_mean"] * 10 * self.weights["xA_mean"]
        score += xa_component

    # Minutes consistency (inverse of coefficient of variation)
    if "minutes_consistency_5" in latest:
        consistency = 1.0 / (1.0 + latest["minutes_consistency_5"])
        consistency_component = (
            consistency * 10 * self.weights["minutes_consistency"]
        )
        score += consistency_component

    # Form trend
    if "points_trend_5" in latest:
        trend = latest["points_trend_5"]
        # Normalize trend: positive trend is good
        trend_component = max(0, trend) * 5 * self.weights["form_trend"]
        score += trend_component

    return max(0, score)
batch_compute
batch_compute(
    players_data: dict[str, DataFrame],
) -> dict[str, float]

Compute signals for multiple players.

PARAMETER DESCRIPTION
players_data

Dictionary mapping player ID/name to their data

TYPE: dict[str, DataFrame]

RETURNS DESCRIPTION
dict[str, float]

Dictionary of player signals

Source code in fplx/signals/stats.py
def batch_compute(self, players_data: dict[str, pd.DataFrame]) -> dict[str, float]:
    """
    Compute signals for multiple players.

    Parameters
    ----------
    players_data : dict[str, pd.DataFrame]
        Dictionary mapping player ID/name to their data

    Returns
    -------
    dict[str, float]
        Dictionary of player signals
    """
    signals = {}
    for player_id, data in players_data.items():
        signals[player_id] = self.compute_signal(data)

    return signals

timeseries

Time-series feature engineering and transformations.

FeatureEngineer

FeatureEngineer(config: Optional[dict] = None)

Feature engineering pipeline for player time-series data.

PARAMETER DESCRIPTION
config

Feature configuration dictionary

TYPE: Optional[Dict] DEFAULT: None

Source code in fplx/timeseries/features.py
def __init__(self, config: Optional[dict] = None):
    self.config = {**self.DEFAULT_CONFIG, **(config or {})}
fit_transform
fit_transform(df: DataFrame) -> DataFrame

Apply all feature engineering transformations.

PARAMETER DESCRIPTION
df

Input player timeseries data

TYPE: DataFrame

RETURNS DESCRIPTION
DataFrame

Transformed data with engineered features

Source code in fplx/timeseries/features.py
def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
    """
    Apply all feature engineering transformations.

    Parameters
    ----------
    df : pd.DataFrame
        Input player timeseries data

    Returns
    -------
    pd.DataFrame
        Transformed data with engineered features
    """
    df = df.copy()

    # Identify available columns and ensure they are numeric
    key_cols = [c for c in self.config["key_columns"] if c in df.columns]

    for col in key_cols:
        df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)

    if not key_cols:
        logger.warning("No key columns found for feature engineering")
        return df

    # Apply transformations
    logger.info("Adding rolling features...")
    df = add_rolling_features(
        df,
        columns=key_cols,
        windows=self.config["rolling_windows"],
        agg_funcs=["mean", "std"],
    )

    logger.info("Adding lag features...")
    df = add_lag_features(df, columns=key_cols, lags=self.config["lag_periods"])

    logger.info("Adding EWMA features...")
    df = add_ewma_features(df, columns=key_cols, alphas=self.config["ewma_alphas"])

    logger.info("Adding trend features...")
    df = add_trend_features(
        df, columns=key_cols, windows=self.config["trend_windows"]
    )

    logger.info("Adding difference features...")
    df = add_diff_features(df, columns=key_cols, periods=[1])

    logger.info("Adding consistency features...")
    df = add_consistency_features(df, columns=["minutes", "points"], window=5)

    return df
get_feature_names
get_feature_names(base_columns: list[str]) -> list[str]

Get list of all generated feature names.

PARAMETER DESCRIPTION
base_columns

Base column names

TYPE: list[str]

RETURNS DESCRIPTION
list[str]

Generated feature names

Source code in fplx/timeseries/features.py
def get_feature_names(self, base_columns: list[str]) -> list[str]:
    """
    Get list of all generated feature names.

    Parameters
    ----------
    base_columns : list[str]
        Base column names

    Returns
    -------
    list[str]
        Generated feature names
    """
    features = []

    for col in base_columns:
        # Rolling features
        for window in self.config["rolling_windows"]:
            features.extend([
                f"{col}_rolling_{window}_mean",
                f"{col}_rolling_{window}_std",
            ])

        # Lag features
        for lag in self.config["lag_periods"]:
            features.append(f"{col}_lag_{lag}")

        # EWMA features
        for alpha in self.config["ewma_alphas"]:
            features.append(f"{col}_ewma_{int(alpha * 100)}")

        # Trend features
        for window in self.config["trend_windows"]:
            features.append(f"{col}_trend_{window}")

        # Diff features
        features.append(f"{col}_diff_1")

    # Consistency features
    features.extend([
        "minutes_consistency_5",
        "points_consistency_5",
    ])

    return features
create_future_features
create_future_features(
    df: DataFrame, horizon: int
) -> DataFrame

Create features for future predictions.

This method extends the historical data by horizon periods, applies the full feature engineering pipeline, and returns the newly created future feature set.

PARAMETER DESCRIPTION
df

Historical data

TYPE: DataFrame

horizon

Number of future gameweeks to predict

TYPE: int

RETURNS DESCRIPTION
DataFrame

DataFrame with features for future gameweeks

Source code in fplx/timeseries/features.py
def create_future_features(self, df: pd.DataFrame, horizon: int) -> pd.DataFrame:
    """
    Create features for future predictions.

    This method extends the historical data by `horizon` periods,
    applies the full feature engineering pipeline, and returns
    the newly created future feature set.

    Parameters
    ----------
    df : pd.DataFrame
        Historical data
    horizon : int
        Number of future gameweeks to predict

    Returns
    -------
    pd.DataFrame
        DataFrame with features for future gameweeks
    """
    if df.empty:
        return pd.DataFrame()

    # Create future placeholders by repeating the last known data point
    last_row = df.iloc[-1:].copy()

    # Avoid duplicating index if it's a timestamp or gameweek
    is_numeric_index = pd.api.types.is_numeric_dtype(df.index)
    if isinstance(df.index, pd.DatetimeIndex) or is_numeric_index:
        last_index = df.index[-1]
        future_index = pd.RangeIndex(
            start=last_index + 1, stop=last_index + 1 + horizon
        )
        last_row.index = [future_index[0]]  # Temporarily align for concat
    else:
        future_index = pd.RangeIndex(start=len(df), stop=len(df) + horizon)

    future_rows = pd.concat([last_row] * horizon, ignore_index=True)
    if isinstance(df.index, pd.DatetimeIndex) or is_numeric_index:
        future_rows.index = future_index

    # Combine historical and future data
    combined_df = pd.concat([df, future_rows])

    # Run the full feature engineering pipeline on the combined data
    # This ensures that rolling/lag features are calculated correctly
    # based on the historical context.
    engineered_df = self.fit_transform(combined_df)

    # Return only the future part
    return engineered_df.tail(horizon)

add_ewma_features

add_ewma_features(
    df: DataFrame,
    columns: list[str],
    alphas: list[float] = [0.3, 0.5, 0.7],
) -> DataFrame

Add exponentially weighted moving average features.

PARAMETER DESCRIPTION
df

Input dataframe

TYPE: DataFrame

columns

Columns to compute EWMA for

TYPE: list[str]

alphas

Smoothing factors (0 < alpha < 1)

TYPE: list[float] DEFAULT: [0.3, 0.5, 0.7]

RETURNS DESCRIPTION
DataFrame

DataFrame with EWMA features

Source code in fplx/timeseries/transforms.py
def add_ewma_features(
    df: pd.DataFrame, columns: list[str], alphas: list[float] = [0.3, 0.5, 0.7]
) -> pd.DataFrame:
    """
    Add exponentially weighted moving average features.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe
    columns : list[str]
        Columns to compute EWMA for
    alphas : list[float]
        Smoothing factors (0 < alpha < 1)

    Returns
    -------
    pd.DataFrame
        DataFrame with EWMA features
    """
    df = df.copy()

    for col in columns:
        if col not in df.columns:
            continue

        for alpha in alphas:
            feature_name = f"{col}_ewma_{int(alpha * 100)}"
            df[feature_name] = df[col].ewm(alpha=alpha, adjust=False).mean()

    return df

add_lag_features

add_lag_features(
    df: DataFrame,
    columns: list[str],
    lags: list[int] = [1, 2, 3, 7],
) -> DataFrame

Add lagged features to dataframe.

PARAMETER DESCRIPTION
df

Input dataframe

TYPE: DataFrame

columns

Columns to create lags for

TYPE: list[str]

lags

Lag periods

TYPE: list[int] DEFAULT: [1, 2, 3, 7]

RETURNS DESCRIPTION
DataFrame

DataFrame with lagged features

Source code in fplx/timeseries/transforms.py
def add_lag_features(
    df: pd.DataFrame, columns: list[str], lags: list[int] = [1, 2, 3, 7]
) -> pd.DataFrame:
    """
    Add lagged features to dataframe.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe
    columns : list[str]
        Columns to create lags for
    lags : list[int]
        Lag periods

    Returns
    -------
    pd.DataFrame
        DataFrame with lagged features
    """
    df = df.copy()

    for col in columns:
        if col not in df.columns:
            continue

        for lag in lags:
            feature_name = f"{col}_lag_{lag}"
            df[feature_name] = df[col].shift(lag)

    return df

add_rolling_features

add_rolling_features(
    df: DataFrame,
    columns: list[str],
    windows: list[int] = [3, 5, 10],
    agg_funcs: list[str] = ["mean", "std"],
    min_periods: int = 1,
) -> DataFrame

Add rolling window features to dataframe.

PARAMETER DESCRIPTION
df

Input dataframe with time-series data

TYPE: DataFrame

columns

Columns to compute rolling features for

TYPE: list[str]

windows

Window sizes for rolling computation

TYPE: list[int] DEFAULT: [3, 5, 10]

agg_funcs

Aggregation functions ('mean', 'std', 'min', 'max', 'sum')

TYPE: list[str] DEFAULT: ['mean', 'std']

min_periods

Minimum observations in window

TYPE: int DEFAULT: 1

RETURNS DESCRIPTION
DataFrame

DataFrame with added rolling features

Source code in fplx/timeseries/transforms.py
def add_rolling_features(
    df: pd.DataFrame,
    columns: list[str],
    windows: list[int] = [3, 5, 10],
    agg_funcs: list[str] = ["mean", "std"],
    min_periods: int = 1,
) -> pd.DataFrame:
    """
    Add rolling window features to dataframe.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe with time-series data
    columns : list[str]
        Columns to compute rolling features for
    windows : list[int]
        Window sizes for rolling computation
    agg_funcs : list[str]
        Aggregation functions ('mean', 'std', 'min', 'max', 'sum')
    min_periods : int
        Minimum observations in window

    Returns
    -------
    pd.DataFrame
        DataFrame with added rolling features
    """
    df = df.copy()

    for col in columns:
        if col not in df.columns:
            continue

        for window in windows:
            for func in agg_funcs:
                feature_name = f"{col}_rolling_{window}_{func}"
                df[feature_name] = (
                    df[col].rolling(window=window, min_periods=min_periods).agg(func)
                )

    return df

add_trend_features

add_trend_features(
    df: DataFrame,
    columns: list[str],
    windows: list[int] = [5, 10],
) -> DataFrame

Add trend features (slope) using linear regression.

PARAMETER DESCRIPTION
df

Input dataframe

TYPE: DataFrame

columns

Columns to compute trends for

TYPE: list[str]

windows

Window sizes for trend calculation

TYPE: list[int] DEFAULT: [5, 10]

RETURNS DESCRIPTION
DataFrame

DataFrame with trend features

Source code in fplx/timeseries/transforms.py
def add_trend_features(
    df: pd.DataFrame, columns: list[str], windows: list[int] = [5, 10]
) -> pd.DataFrame:
    """
    Add trend features (slope) using linear regression.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe
    columns : list[str]
        Columns to compute trends for
    windows : list[int]
        Window sizes for trend calculation

    Returns
    -------
    pd.DataFrame
        DataFrame with trend features
    """
    df = df.copy()

    def calculate_slope(series):
        """Calculate slope of linear fit."""
        if len(series) < 2 or series.isna().all():
            return np.nan
        x = np.arange(len(series))
        y = series.values
        mask = ~np.isnan(y)
        if mask.sum() < 2:
            return np.nan
        slope = np.polyfit(x[mask], y[mask], 1)[0]
        return slope

    for col in columns:
        if col not in df.columns:
            continue

        for window in windows:
            feature_name = f"{col}_trend_{window}"
            df[feature_name] = (
                df[col]
                .rolling(window=window, min_periods=2)
                .apply(calculate_slope, raw=False)
            )

    return df

features

Feature engineering pipeline for FPL time-series data.

FeatureEngineer
FeatureEngineer(config: Optional[dict] = None)

Feature engineering pipeline for player time-series data.

PARAMETER DESCRIPTION
config

Feature configuration dictionary

TYPE: Optional[Dict] DEFAULT: None

Source code in fplx/timeseries/features.py
def __init__(self, config: Optional[dict] = None):
    self.config = {**self.DEFAULT_CONFIG, **(config or {})}
fit_transform
fit_transform(df: DataFrame) -> DataFrame

Apply all feature engineering transformations.

PARAMETER DESCRIPTION
df

Input player timeseries data

TYPE: DataFrame

RETURNS DESCRIPTION
DataFrame

Transformed data with engineered features

Source code in fplx/timeseries/features.py
def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
    """
    Apply all feature engineering transformations.

    Parameters
    ----------
    df : pd.DataFrame
        Input player timeseries data

    Returns
    -------
    pd.DataFrame
        Transformed data with engineered features
    """
    df = df.copy()

    # Identify available columns and ensure they are numeric
    key_cols = [c for c in self.config["key_columns"] if c in df.columns]

    for col in key_cols:
        df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)

    if not key_cols:
        logger.warning("No key columns found for feature engineering")
        return df

    # Apply transformations
    logger.info("Adding rolling features...")
    df = add_rolling_features(
        df,
        columns=key_cols,
        windows=self.config["rolling_windows"],
        agg_funcs=["mean", "std"],
    )

    logger.info("Adding lag features...")
    df = add_lag_features(df, columns=key_cols, lags=self.config["lag_periods"])

    logger.info("Adding EWMA features...")
    df = add_ewma_features(df, columns=key_cols, alphas=self.config["ewma_alphas"])

    logger.info("Adding trend features...")
    df = add_trend_features(
        df, columns=key_cols, windows=self.config["trend_windows"]
    )

    logger.info("Adding difference features...")
    df = add_diff_features(df, columns=key_cols, periods=[1])

    logger.info("Adding consistency features...")
    df = add_consistency_features(df, columns=["minutes", "points"], window=5)

    return df
get_feature_names
get_feature_names(base_columns: list[str]) -> list[str]

Get list of all generated feature names.

PARAMETER DESCRIPTION
base_columns

Base column names

TYPE: list[str]

RETURNS DESCRIPTION
list[str]

Generated feature names

Source code in fplx/timeseries/features.py
def get_feature_names(self, base_columns: list[str]) -> list[str]:
    """
    Get list of all generated feature names.

    Parameters
    ----------
    base_columns : list[str]
        Base column names

    Returns
    -------
    list[str]
        Generated feature names
    """
    features = []

    for col in base_columns:
        # Rolling features
        for window in self.config["rolling_windows"]:
            features.extend([
                f"{col}_rolling_{window}_mean",
                f"{col}_rolling_{window}_std",
            ])

        # Lag features
        for lag in self.config["lag_periods"]:
            features.append(f"{col}_lag_{lag}")

        # EWMA features
        for alpha in self.config["ewma_alphas"]:
            features.append(f"{col}_ewma_{int(alpha * 100)}")

        # Trend features
        for window in self.config["trend_windows"]:
            features.append(f"{col}_trend_{window}")

        # Diff features
        features.append(f"{col}_diff_1")

    # Consistency features
    features.extend([
        "minutes_consistency_5",
        "points_consistency_5",
    ])

    return features
create_future_features
create_future_features(
    df: DataFrame, horizon: int
) -> DataFrame

Create features for future predictions.

This method extends the historical data by horizon periods, applies the full feature engineering pipeline, and returns the newly created future feature set.

PARAMETER DESCRIPTION
df

Historical data

TYPE: DataFrame

horizon

Number of future gameweeks to predict

TYPE: int

RETURNS DESCRIPTION
DataFrame

DataFrame with features for future gameweeks

Source code in fplx/timeseries/features.py
def create_future_features(self, df: pd.DataFrame, horizon: int) -> pd.DataFrame:
    """
    Create features for future predictions.

    This method extends the historical data by `horizon` periods,
    applies the full feature engineering pipeline, and returns
    the newly created future feature set.

    Parameters
    ----------
    df : pd.DataFrame
        Historical data
    horizon : int
        Number of future gameweeks to predict

    Returns
    -------
    pd.DataFrame
        DataFrame with features for future gameweeks
    """
    if df.empty:
        return pd.DataFrame()

    # Create future placeholders by repeating the last known data point
    last_row = df.iloc[-1:].copy()

    # Avoid duplicating index if it's a timestamp or gameweek
    is_numeric_index = pd.api.types.is_numeric_dtype(df.index)
    if isinstance(df.index, pd.DatetimeIndex) or is_numeric_index:
        last_index = df.index[-1]
        future_index = pd.RangeIndex(
            start=last_index + 1, stop=last_index + 1 + horizon
        )
        last_row.index = [future_index[0]]  # Temporarily align for concat
    else:
        future_index = pd.RangeIndex(start=len(df), stop=len(df) + horizon)

    future_rows = pd.concat([last_row] * horizon, ignore_index=True)
    if isinstance(df.index, pd.DatetimeIndex) or is_numeric_index:
        future_rows.index = future_index

    # Combine historical and future data
    combined_df = pd.concat([df, future_rows])

    # Run the full feature engineering pipeline on the combined data
    # This ensures that rolling/lag features are calculated correctly
    # based on the historical context.
    engineered_df = self.fit_transform(combined_df)

    # Return only the future part
    return engineered_df.tail(horizon)

transforms

Time-series transformations for FPL data.

add_rolling_features
add_rolling_features(
    df: DataFrame,
    columns: list[str],
    windows: list[int] = [3, 5, 10],
    agg_funcs: list[str] = ["mean", "std"],
    min_periods: int = 1,
) -> DataFrame

Add rolling window features to dataframe.

PARAMETER DESCRIPTION
df

Input dataframe with time-series data

TYPE: DataFrame

columns

Columns to compute rolling features for

TYPE: list[str]

windows

Window sizes for rolling computation

TYPE: list[int] DEFAULT: [3, 5, 10]

agg_funcs

Aggregation functions ('mean', 'std', 'min', 'max', 'sum')

TYPE: list[str] DEFAULT: ['mean', 'std']

min_periods

Minimum observations in window

TYPE: int DEFAULT: 1

RETURNS DESCRIPTION
DataFrame

DataFrame with added rolling features

Source code in fplx/timeseries/transforms.py
def add_rolling_features(
    df: pd.DataFrame,
    columns: list[str],
    windows: list[int] = [3, 5, 10],
    agg_funcs: list[str] = ["mean", "std"],
    min_periods: int = 1,
) -> pd.DataFrame:
    """
    Add rolling window features to dataframe.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe with time-series data
    columns : list[str]
        Columns to compute rolling features for
    windows : list[int]
        Window sizes for rolling computation
    agg_funcs : list[str]
        Aggregation functions ('mean', 'std', 'min', 'max', 'sum')
    min_periods : int
        Minimum observations in window

    Returns
    -------
    pd.DataFrame
        DataFrame with added rolling features
    """
    df = df.copy()

    for col in columns:
        if col not in df.columns:
            continue

        for window in windows:
            for func in agg_funcs:
                feature_name = f"{col}_rolling_{window}_{func}"
                df[feature_name] = (
                    df[col].rolling(window=window, min_periods=min_periods).agg(func)
                )

    return df
add_lag_features
add_lag_features(
    df: DataFrame,
    columns: list[str],
    lags: list[int] = [1, 2, 3, 7],
) -> DataFrame

Add lagged features to dataframe.

PARAMETER DESCRIPTION
df

Input dataframe

TYPE: DataFrame

columns

Columns to create lags for

TYPE: list[str]

lags

Lag periods

TYPE: list[int] DEFAULT: [1, 2, 3, 7]

RETURNS DESCRIPTION
DataFrame

DataFrame with lagged features

Source code in fplx/timeseries/transforms.py
def add_lag_features(
    df: pd.DataFrame, columns: list[str], lags: list[int] = [1, 2, 3, 7]
) -> pd.DataFrame:
    """
    Add lagged features to dataframe.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe
    columns : list[str]
        Columns to create lags for
    lags : list[int]
        Lag periods

    Returns
    -------
    pd.DataFrame
        DataFrame with lagged features
    """
    df = df.copy()

    for col in columns:
        if col not in df.columns:
            continue

        for lag in lags:
            feature_name = f"{col}_lag_{lag}"
            df[feature_name] = df[col].shift(lag)

    return df
add_ewma_features
add_ewma_features(
    df: DataFrame,
    columns: list[str],
    alphas: list[float] = [0.3, 0.5, 0.7],
) -> DataFrame

Add exponentially weighted moving average features.

PARAMETER DESCRIPTION
df

Input dataframe

TYPE: DataFrame

columns

Columns to compute EWMA for

TYPE: list[str]

alphas

Smoothing factors (0 < alpha < 1)

TYPE: list[float] DEFAULT: [0.3, 0.5, 0.7]

RETURNS DESCRIPTION
DataFrame

DataFrame with EWMA features

Source code in fplx/timeseries/transforms.py
def add_ewma_features(
    df: pd.DataFrame, columns: list[str], alphas: list[float] = [0.3, 0.5, 0.7]
) -> pd.DataFrame:
    """
    Add exponentially weighted moving average features.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe
    columns : list[str]
        Columns to compute EWMA for
    alphas : list[float]
        Smoothing factors (0 < alpha < 1)

    Returns
    -------
    pd.DataFrame
        DataFrame with EWMA features
    """
    df = df.copy()

    for col in columns:
        if col not in df.columns:
            continue

        for alpha in alphas:
            feature_name = f"{col}_ewma_{int(alpha * 100)}"
            df[feature_name] = df[col].ewm(alpha=alpha, adjust=False).mean()

    return df
add_trend_features
add_trend_features(
    df: DataFrame,
    columns: list[str],
    windows: list[int] = [5, 10],
) -> DataFrame

Add trend features (slope) using linear regression.

PARAMETER DESCRIPTION
df

Input dataframe

TYPE: DataFrame

columns

Columns to compute trends for

TYPE: list[str]

windows

Window sizes for trend calculation

TYPE: list[int] DEFAULT: [5, 10]

RETURNS DESCRIPTION
DataFrame

DataFrame with trend features

Source code in fplx/timeseries/transforms.py
def add_trend_features(
    df: pd.DataFrame, columns: list[str], windows: list[int] = [5, 10]
) -> pd.DataFrame:
    """
    Add trend features (slope) using linear regression.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe
    columns : list[str]
        Columns to compute trends for
    windows : list[int]
        Window sizes for trend calculation

    Returns
    -------
    pd.DataFrame
        DataFrame with trend features
    """
    df = df.copy()

    def calculate_slope(series):
        """Calculate slope of linear fit."""
        if len(series) < 2 or series.isna().all():
            return np.nan
        x = np.arange(len(series))
        y = series.values
        mask = ~np.isnan(y)
        if mask.sum() < 2:
            return np.nan
        slope = np.polyfit(x[mask], y[mask], 1)[0]
        return slope

    for col in columns:
        if col not in df.columns:
            continue

        for window in windows:
            feature_name = f"{col}_trend_{window}"
            df[feature_name] = (
                df[col]
                .rolling(window=window, min_periods=2)
                .apply(calculate_slope, raw=False)
            )

    return df
add_diff_features
add_diff_features(
    df: DataFrame,
    columns: list[str],
    periods: list[int] = [1, 2],
) -> DataFrame

Add difference features (current - previous).

PARAMETER DESCRIPTION
df

Input dataframe

TYPE: DataFrame

columns

Columns to compute differences for

TYPE: list[str]

periods

Difference periods

TYPE: list[int] DEFAULT: [1, 2]

RETURNS DESCRIPTION
DataFrame

DataFrame with difference features

Source code in fplx/timeseries/transforms.py
def add_diff_features(
    df: pd.DataFrame, columns: list[str], periods: list[int] = [1, 2]
) -> pd.DataFrame:
    """
    Add difference features (current - previous).

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe
    columns : list[str]
        Columns to compute differences for
    periods : list[int]
        Difference periods

    Returns
    -------
    pd.DataFrame
        DataFrame with difference features
    """
    df = df.copy()

    for col in columns:
        if col not in df.columns:
            continue

        for period in periods:
            feature_name = f"{col}_diff_{period}"
            df[feature_name] = df[col].diff(periods=period)

    return df
add_consistency_features
add_consistency_features(
    df: DataFrame, columns: list[str], window: int = 5
) -> DataFrame

Add consistency measures (coefficient of variation).

PARAMETER DESCRIPTION
df

Input dataframe

TYPE: DataFrame

columns

Columns to measure consistency for

TYPE: list[str]

window

Window size

TYPE: int DEFAULT: 5

RETURNS DESCRIPTION
DataFrame

DataFrame with consistency features

Source code in fplx/timeseries/transforms.py
def add_consistency_features(
    df: pd.DataFrame, columns: list[str], window: int = 5
) -> pd.DataFrame:
    """
    Add consistency measures (coefficient of variation).

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe
    columns : list[str]
        Columns to measure consistency for
    window : int
        Window size

    Returns
    -------
    pd.DataFrame
        DataFrame with consistency features
    """
    df = df.copy()

    for col in columns:
        if col not in df.columns:
            continue

        feature_name = f"{col}_consistency_{window}"
        rolling_mean = df[col].rolling(window=window, min_periods=1).mean()
        rolling_std = df[col].rolling(window=window, min_periods=1).std()

        # Coefficient of variation (lower = more consistent)
        df[feature_name] = rolling_std / (rolling_mean + 1e-6)

    return df

utils

Utility modules.

Config

Config(config: Optional[dict] = None)

Configuration manager for FPLX.

PARAMETER DESCRIPTION
config

Configuration dictionary

TYPE: Optional[Dict] DEFAULT: None

Source code in fplx/utils/config.py
def __init__(self, config: Optional[dict] = None):
    self.config = {**self.DEFAULT_CONFIG}
    if config:
        self._update_nested(self.config, config)
get
get(key: str, default: Any = None) -> Any

Get configuration value.

PARAMETER DESCRIPTION
key

Configuration key (supports nested keys with '.')

TYPE: str

default

Default value if key not found

TYPE: Any DEFAULT: None

RETURNS DESCRIPTION
Any

Configuration value

Source code in fplx/utils/config.py
def get(self, key: str, default: Any = None) -> Any:
    """
    Get configuration value.

    Parameters
    ----------
    key : str
        Configuration key (supports nested keys with '.')
    default : Any
        Default value if key not found

    Returns
    -------
    Any
        Configuration value
    """
    keys = key.split(".")
    value = self.config

    for k in keys:
        if isinstance(value, dict) and k in value:
            value = value[k]
        else:
            return default

    return value
set
set(key: str, value: Any)

Set configuration value.

PARAMETER DESCRIPTION
key

Configuration key (supports nested keys with '.')

TYPE: str

value

Value to set

TYPE: Any

Source code in fplx/utils/config.py
def set(self, key: str, value: Any):
    """
    Set configuration value.

    Parameters
    ----------
    key : str
        Configuration key (supports nested keys with '.')
    value : Any
        Value to set
    """
    keys = key.split(".")
    config = self.config

    for k in keys[:-1]:
        if k not in config:
            config[k] = {}
        config = config[k]

    config[keys[-1]] = value
load_from_file
load_from_file(filepath: Path)

Load configuration from JSON file.

PARAMETER DESCRIPTION
filepath

Path to configuration file

TYPE: Path

Source code in fplx/utils/config.py
def load_from_file(self, filepath: Path):
    """
    Load configuration from JSON file.

    Parameters
    ----------
    filepath : Path
        Path to configuration file
    """
    with open(filepath) as f:
        file_config = json.load(f)

    self._update_nested(self.config, file_config)
save_to_file
save_to_file(filepath: Path)

Save configuration to JSON file.

PARAMETER DESCRIPTION
filepath

Path to save configuration

TYPE: Path

Source code in fplx/utils/config.py
def save_to_file(self, filepath: Path):
    """
    Save configuration to JSON file.

    Parameters
    ----------
    filepath : Path
        Path to save configuration
    """
    with open(filepath, "w") as f:
        json.dump(self.config, f, indent=2)
to_dict
to_dict() -> dict

Get configuration as dictionary.

RETURNS DESCRIPTION
Dict

Configuration dictionary

Source code in fplx/utils/config.py
def to_dict(self) -> dict:
    """
    Get configuration as dictionary.

    Returns
    -------
    Dict
        Configuration dictionary
    """
    return self.config.copy()

validate_data

validate_data(
    df: DataFrame, required_columns: list[str]
) -> bool

Validate that dataframe has required columns.

PARAMETER DESCRIPTION
df

Dataframe to validate

TYPE: DataFrame

required_columns

Required column names

TYPE: list[str]

RETURNS DESCRIPTION
bool

True if valid

Source code in fplx/utils/validation.py
def validate_data(df: pd.DataFrame, required_columns: list[str]) -> bool:
    """
    Validate that dataframe has required columns.

    Parameters
    ----------
    df : pd.DataFrame
        Dataframe to validate
    required_columns : list[str]
        Required column names

    Returns
    -------
    bool
        True if valid
    """
    missing = set(required_columns) - set(df.columns)

    if missing:
        logger.error(f"Missing required columns: {missing}")
        return False

    return True

config

Configuration management.

Config
Config(config: Optional[dict] = None)

Configuration manager for FPLX.

PARAMETER DESCRIPTION
config

Configuration dictionary

TYPE: Optional[Dict] DEFAULT: None

Source code in fplx/utils/config.py
def __init__(self, config: Optional[dict] = None):
    self.config = {**self.DEFAULT_CONFIG}
    if config:
        self._update_nested(self.config, config)
get
get(key: str, default: Any = None) -> Any

Get configuration value.

PARAMETER DESCRIPTION
key

Configuration key (supports nested keys with '.')

TYPE: str

default

Default value if key not found

TYPE: Any DEFAULT: None

RETURNS DESCRIPTION
Any

Configuration value

Source code in fplx/utils/config.py
def get(self, key: str, default: Any = None) -> Any:
    """
    Get configuration value.

    Parameters
    ----------
    key : str
        Configuration key (supports nested keys with '.')
    default : Any
        Default value if key not found

    Returns
    -------
    Any
        Configuration value
    """
    keys = key.split(".")
    value = self.config

    for k in keys:
        if isinstance(value, dict) and k in value:
            value = value[k]
        else:
            return default

    return value
set
set(key: str, value: Any)

Set configuration value.

PARAMETER DESCRIPTION
key

Configuration key (supports nested keys with '.')

TYPE: str

value

Value to set

TYPE: Any

Source code in fplx/utils/config.py
def set(self, key: str, value: Any):
    """
    Set configuration value.

    Parameters
    ----------
    key : str
        Configuration key (supports nested keys with '.')
    value : Any
        Value to set
    """
    keys = key.split(".")
    config = self.config

    for k in keys[:-1]:
        if k not in config:
            config[k] = {}
        config = config[k]

    config[keys[-1]] = value
load_from_file
load_from_file(filepath: Path)

Load configuration from JSON file.

PARAMETER DESCRIPTION
filepath

Path to configuration file

TYPE: Path

Source code in fplx/utils/config.py
def load_from_file(self, filepath: Path):
    """
    Load configuration from JSON file.

    Parameters
    ----------
    filepath : Path
        Path to configuration file
    """
    with open(filepath) as f:
        file_config = json.load(f)

    self._update_nested(self.config, file_config)
save_to_file
save_to_file(filepath: Path)

Save configuration to JSON file.

PARAMETER DESCRIPTION
filepath

Path to save configuration

TYPE: Path

Source code in fplx/utils/config.py
def save_to_file(self, filepath: Path):
    """
    Save configuration to JSON file.

    Parameters
    ----------
    filepath : Path
        Path to save configuration
    """
    with open(filepath, "w") as f:
        json.dump(self.config, f, indent=2)
to_dict
to_dict() -> dict

Get configuration as dictionary.

RETURNS DESCRIPTION
Dict

Configuration dictionary

Source code in fplx/utils/config.py
def to_dict(self) -> dict:
    """
    Get configuration as dictionary.

    Returns
    -------
    Dict
        Configuration dictionary
    """
    return self.config.copy()

validation

Data validation utilities.

validate_data
validate_data(
    df: DataFrame, required_columns: list[str]
) -> bool

Validate that dataframe has required columns.

PARAMETER DESCRIPTION
df

Dataframe to validate

TYPE: DataFrame

required_columns

Required column names

TYPE: list[str]

RETURNS DESCRIPTION
bool

True if valid

Source code in fplx/utils/validation.py
def validate_data(df: pd.DataFrame, required_columns: list[str]) -> bool:
    """
    Validate that dataframe has required columns.

    Parameters
    ----------
    df : pd.DataFrame
        Dataframe to validate
    required_columns : list[str]
        Required column names

    Returns
    -------
    bool
        True if valid
    """
    missing = set(required_columns) - set(df.columns)

    if missing:
        logger.error(f"Missing required columns: {missing}")
        return False

    return True
check_data_quality
check_data_quality(
    df: DataFrame, max_missing_pct: float = 0.3
) -> dict[str, float]

Check data quality and report issues.

PARAMETER DESCRIPTION
df

Data to check

TYPE: DataFrame

max_missing_pct

Maximum acceptable percentage of missing values

TYPE: float DEFAULT: 0.3

RETURNS DESCRIPTION
Dict[str, float]

Quality metrics

Source code in fplx/utils/validation.py
def check_data_quality(
    df: pd.DataFrame, max_missing_pct: float = 0.3
) -> dict[str, float]:
    """
    Check data quality and report issues.

    Parameters
    ----------
    df : pd.DataFrame
        Data to check
    max_missing_pct : float
        Maximum acceptable percentage of missing values

    Returns
    -------
    Dict[str, float]
        Quality metrics
    """
    total_cells = df.shape[0] * df.shape[1]
    missing_cells = df.isna().sum().sum()
    missing_pct = missing_cells / total_cells if total_cells > 0 else 0

    # Per-column missing
    col_missing = df.isna().mean()
    problematic_cols = col_missing[col_missing > max_missing_pct].index.tolist()

    metrics = {
        "total_rows": df.shape[0],
        "total_columns": df.shape[1],
        "missing_percentage": missing_pct * 100,
        "problematic_columns": problematic_cols,
    }

    if missing_pct > max_missing_pct:
        logger.warning(f"High missing data: {missing_pct * 100:.2f}%")

    if problematic_cols:
        logger.warning(f"Columns with high missing data: {problematic_cols}")

    return metrics
impute_missing
impute_missing(
    df: DataFrame, strategy: str = "mean"
) -> DataFrame

Impute missing values.

PARAMETER DESCRIPTION
df

Data with missing values

TYPE: DataFrame

strategy

Imputation strategy: 'mean', 'median', 'forward_fill', 'zero'

TYPE: str DEFAULT: 'mean'

RETURNS DESCRIPTION
DataFrame

Data with imputed values

Source code in fplx/utils/validation.py
def impute_missing(df: pd.DataFrame, strategy: str = "mean") -> pd.DataFrame:
    """
    Impute missing values.

    Parameters
    ----------
    df : pd.DataFrame
        Data with missing values
    strategy : str
        Imputation strategy: 'mean', 'median', 'forward_fill', 'zero'

    Returns
    -------
    pd.DataFrame
        Data with imputed values
    """
    df = df.copy()

    if strategy == "mean":
        df = df.fillna(df.mean())
    elif strategy == "median":
        df = df.fillna(df.median())
    elif strategy == "forward_fill":
        df = df.fillna(method="ffill")
    elif strategy == "zero":
        df = df.fillna(0)
    else:
        logger.warning(f"Unknown strategy {strategy}, using mean")
        df = df.fillna(df.mean())

    return df

Subpackages