fplx¶

fplx ¶

FPLX - Fantasy Premier League Time-Series Analysis & Squad Optimization

A production-ready Python library for: - FPL player time-series data analysis - News & injury signal integration - Expected performance scoring - Optimal 15-player squad and 11-player lineup selection

FPLModel ¶

FPLModel(
    budget: float = 100.0,
    horizon: int = 1,
    formation: str = "auto",
    config: Optional[dict] = None,
)

High-level interface for FPL analysis and squad optimization.

This is the main user-facing API. It orchestrates data loading, feature engineering, model fitting, and squad optimization.

PARAMETER	DESCRIPTION
`budget`	Maximum squad budget (default 100.0) TYPE: `float` DEFAULT: `100.0`
`horizon`	Prediction horizon in gameweeks (default 1) TYPE: `int` DEFAULT: `1`
`formation`	Desired formation, or "auto" for optimization TYPE: `str` DEFAULT: `'auto'`
`config`	Custom configuration TYPE: `Optional[Dict]` DEFAULT: `None`

Examples:

>>> from fplx import FPLModel
>>> model = FPLModel(budget=100, horizon=1)
>>> model.load_data()
>>> model.fit()
>>> squad = model.select_best_11()
>>> squad.summary()

Source code in fplx/api/interface.py

def __init__(
    self,
    budget: float = 100.0,
    horizon: int = 1,
    formation: str = "auto",
    config: Optional[dict] = None,
):
    self.budget = budget
    self.horizon = horizon
    self.formation = formation
    self.config = Config(config)

    # core components are instantiated on-demand
    self._data_loader = None
    self._feature_engineer = None
    self._news_collector = None
    self._stats_signal = None
    self._news_signal = None
    self._fixture_signal = None
    self._model = None
    self._optimizer = None

    # Data containers
    self.players: list[Player] = []
    self.players_data: dict[int, pd.DataFrame] = {}
    self.expected_points: dict[int, float] = {}
    self.expected_variance: dict[int, float] = {}
    self.current_gameweek: int = 1

load_data ¶

load_data(
    source: str = "api",
    path: Optional[Union[str, Path]] = None,
) -> None

Load player and fixture data.

PARAMETER	DESCRIPTION
`source`	Data source: 'api' or 'local' TYPE: `str` DEFAULT: `'api'`
`path`	Path to local data (if source is 'local') TYPE: `Optional[Union[str, Path]]` DEFAULT: `None`

Source code in fplx/api/interface.py

def load_data(self, source: str = "api", path: Optional[Union[str, Path]] = None) -> None:
    """
    Load player and fixture data.

    Parameters
    ----------
    source : str
        Data source: 'api' or 'local'
    path : Optional[Union[str, Path]]
        Path to local data (if source is 'local')
    """
    logger.info(f"Loading data from {source}...")
    if source == "api":
        bootstrap_data = self.data_loader.fetch_bootstrap_data()
        self.players = self.data_loader.load_players(bootstrap_data)

        # Determine current gameweek from bootstrap events
        for event in bootstrap_data.get("events", []):
            if event.get("is_current"):
                self.current_gameweek = event["id"]
                break

        # Collect per-gameweek news snapshots for inference
        self.news_collector.collect_from_bootstrap(bootstrap_data, self.current_gameweek)

    elif source == "local":
        if path is None:
            raise ValueError("Path must be provided for local data source.")
        self.players = self.data_loader.load_from_csv(path)

    logger.info(f"Loaded {len(self.players)} players.")

    # load detailed time-series for each player
    for player in self.players:
        # This is a simplification; in reality, you'd fetch this
        # or have it in your local data.
        self.players_data[player.id] = player.timeseries

fit ¶

fit() -> None

Fit the prediction model.

Uses the probabilistic inference pipeline (HMM + Kalman + Fusion) when model_type is 'inference'. Falls back to the original feature engineering pipeline for baseline/ML models.

Source code in fplx/api/interface.py

def fit(self) -> None:
    """
    Fit the prediction model.

    Uses the probabilistic inference pipeline (HMM + Kalman + Fusion)
    when model_type is 'inference'. Falls back to the original
    feature engineering pipeline for baseline/ML models.
    """
    if not self.players:
        raise RuntimeError("Data not loaded. Call load_data() first.")

    model_type = self.config.get("model_type", "baseline")
    logger.info(f"Fitting model '{model_type}'...")

    if model_type == "inference":
        self._fit_inference()
    else:
        self._fit_legacy(model_type)

    logger.info("Model fitting complete.")

select_best_11 ¶

select_best_11() -> FullSquad

Select the optimal 15-player squad and 11-player starting lineup.

RETURNS	DESCRIPTION
`FullSquad`	The optimized squad with lineup.

Source code in fplx/api/interface.py

def select_best_11(self) -> FullSquad:
    """
    Select the optimal 15-player squad and 11-player starting lineup.

    Returns
    -------
    FullSquad
        The optimized squad with lineup.
    """
    if not self.expected_points:
        raise RuntimeError("Model not fitted. Call fit() first.")

    logger.info("Optimizing squad with %s optimizer...", self.config.get("optimizer", "greedy"))

    squad = self.optimizer.solve(
        players=self.players,
        expected_points=self.expected_points,
        expected_variance=self.expected_variance or None,
    )

    logger.info("Squad optimization complete.")
    return squad

Matchweek `dataclass` ¶

Matchweek(
    gameweek: int,
    date: datetime,
    fixtures: list[dict],
    team_difficulty: dict[str, float],
)

Represents a matchweek with global context.

ATTRIBUTE	DESCRIPTION
`gameweek`	Gameweek number TYPE: `int`
`date`	Date of the gameweek TYPE: `datetime`
`fixtures`	List of fixtures TYPE: `list[dict]`
`team_difficulty`	Team-level difficulty ratings TYPE: `dict[str, float]`

Player `dataclass` ¶

Player(
    id: int,
    name: str,
    team: str,
    position: str,
    price: float,
    timeseries: DataFrame,
    news: Optional[dict] = None,
)

Represents a Fantasy Premier League player.

ATTRIBUTE	DESCRIPTION
`id`	Unique player identifier TYPE: `int`
`name`	Player full name TYPE: `str`
`team`	Current team TYPE: `str`
`position`	Position (GK, DEF, MID, FWD) TYPE: `str`
`price`	Current price in FPL TYPE: `float`
`timeseries`	Historical stats (points, xG, minutes, etc.) TYPE: `DataFrame`
`news`	Latest news/injury information TYPE: `Optional[dict]`

last_5_points `property` ¶

last_5_points: float

Average points over last 5 gameweeks.

availability `property` ¶

availability: float

Availability score (0-1) based on news.

FullSquad `dataclass` ¶

FullSquad(
    squad_players: list[Player],
    lineup: Squad,
    bench: list[Player] = list(),
    squad_cost: float = 0.0,
    expected_points: float = 0.0,
)

Represents a 15-player FPL squad with a selected 11-player lineup.

The two-level FPL structure: Level 1: 15-player squad (2 GK, 5 DEF, 5 MID, 3 FWD) under budget. Level 2: 11-player starting lineup chosen from the squad each gameweek.

ATTRIBUTE	DESCRIPTION
`squad_players`	All 15 squad members. TYPE: `list[Player]`
`lineup`	The 11-player starting lineup (subset of squad_players). TYPE: `Squad`
`bench`	The 4 bench players. TYPE: `list[Player]`
`squad_cost`	Total cost of all 15 players. TYPE: `float`
`expected_points`	Expected points for the starting 11. TYPE: `float`

summary ¶

summary() -> str

Returns a formatted string summary of the full squad.

Source code in fplx/core/squad.py

def summary(self) -> str:
    """Returns a formatted string summary of the full squad."""
    lines = [
        f"Squad Cost: £{self.squad_cost:.1f}m / £100.0m",
        f"Remaining Budget: £{100.0 - self.squad_cost:.1f}m",
        "",
        self.lineup.summary(),
        "",
        "--- Bench ---",
    ]
    for p in self.bench:
        lines.append(f"  {p.name} ({p.position}, {p.team}, £{p.price}m)")
    return "\n".join(lines)

Squad `dataclass` ¶

Squad(
    players: list[Player],
    formation: str,
    total_cost: float,
    expected_points: float,
    captain: Optional[Player] = None,
)

Represents an 11-player starting lineup.

ATTRIBUTE	DESCRIPTION
`players`	Selected starters (exactly 11). TYPE: `list[Player]`
`formation`	Formation string (e.g., "3-4-3"). TYPE: `str`
`total_cost`	Total cost of the starting 11. TYPE: `float`
`expected_points`	Expected total points for the starting 11. TYPE: `float`
`captain`	Captain selection (earns double points). TYPE: `Optional[Player]`

summary ¶

summary() -> str

Returns a formatted string summary of the lineup.

Source code in fplx/core/squad.py

def summary(self) -> str:
    """Returns a formatted string summary of the lineup."""
    pos_order = {"GK": 0, "DEF": 1, "MID": 2, "FWD": 3}
    lines = [
        f"Formation: {self.formation}",
        f"Total Cost: £{self.total_cost:.1f}m",
        f"Expected Points: {self.expected_points:.2f}",
        f"Captain: {self.captain.name if self.captain else 'None'}",
        "",
        "--- Starting XI ---",
    ]
    for p in sorted(self.players, key=lambda x: pos_order.get(x.position, 9)):
        lines.append(f"  {p.name} ({p.position}, {p.team}, £{p.price}m)")
    return "\n".join(lines)

FPLDataLoader ¶

FPLDataLoader(cache_dir: Optional[Path] = None)

Load and manage FPL data from various sources (API, CSV, cache).

PARAMETER	DESCRIPTION
`cache_dir`	Directory to cache downloaded data TYPE: `Optional[Path]` DEFAULT: `None`

Source code in fplx/data/loaders.py

def __init__(self, cache_dir: Optional[Path] = None):
    self.cache_dir = cache_dir or Path.home() / ".fplx" / "cache"
    self.cache_dir.mkdir(parents=True, exist_ok=True)
    self._bootstrap_data = None

fetch_bootstrap_data ¶

fetch_bootstrap_data(force_refresh: bool = False) -> dict

Fetch main FPL data (players, teams, gameweeks).

PARAMETER	DESCRIPTION
`force_refresh`	Force refresh even if cached TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
`Dict`	Bootstrap data containing players, teams, events

Source code in fplx/data/loaders.py

def fetch_bootstrap_data(self, force_refresh: bool = False) -> dict:
    """
    Fetch main FPL data (players, teams, gameweeks).

    Parameters
    ----------
    force_refresh : bool
        Force refresh even if cached

    Returns
    -------
    Dict
        Bootstrap data containing players, teams, events
    """
    cache_file = self.cache_dir / "bootstrap.json"

    if not force_refresh and cache_file.exists():
        import json

        with open(cache_file) as f:
            logger.info("Loading bootstrap data from cache")
            return json.load(f)

    logger.info("Fetching bootstrap data from FPL API")
    response = requests.get(self.BOOTSTRAP_URL)
    response.raise_for_status()

    data = response.json()

    # Cache the data
    import json

    with open(cache_file, "w") as f:
        json.dump(data, f)

    self._bootstrap_data = data
    return data

load_players ¶

load_players(force_refresh: bool = False) -> list[Player]

Load all players with basic info.

PARAMETER	DESCRIPTION
`force_refresh`	Force refresh from API TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
`list[Player]`	List of Player objects

Source code in fplx/data/loaders.py

def load_players(self, force_refresh: bool = False) -> list[Player]:
    """
    Load all players with basic info.

    Parameters
    ----------
    force_refresh : bool
        Force refresh from API

    Returns
    -------
    list[Player]
        List of Player objects
    """
    data = self.fetch_bootstrap_data(force_refresh)

    # Build team mapping
    teams = {t["id"]: t["name"] for t in data["teams"]}
    positions = {1: "GK", 2: "DEF", 3: "MID", 4: "FWD"}

    players = []
    for element in data["elements"]:
        # Create minimal timeseries (can be enriched later)
        ts_data = {
            "gameweek": [0],
            "points": [element.get("total_points", 0)],
            "minutes": [element.get("minutes", 0)],
            "form": [float(element.get("form", 0))],
        }

        player = Player(
            id=element["id"],
            name=element["web_name"],
            team=teams[element["team"]],
            position=positions[element["element_type"]],
            price=element["now_cost"] / 10.0,  # Convert to £m
            timeseries=pd.DataFrame(ts_data),
            news={
                "text": element.get("news", ""),
                "availability": 1.0
                if element.get("chance_of_playing_next_round") is None
                else element.get("chance_of_playing_next_round") / 100.0,
            },
        )
        players.append(player)

    logger.info(f"Loaded {len(players)} players")
    return players

load_player_history ¶

load_player_history(player_id: int) -> DataFrame

Load detailed historical data for a specific player.

PARAMETER	DESCRIPTION
`player_id`	Player ID TYPE: `int`

RETURNS	DESCRIPTION
`DataFrame`	Historical gameweek stats

Source code in fplx/data/loaders.py

def load_player_history(self, player_id: int) -> pd.DataFrame:
    """
    Load detailed historical data for a specific player.

    Parameters
    ----------
    player_id : int
        Player ID

    Returns
    -------
    pd.DataFrame
        Historical gameweek stats
    """
    url = self.PLAYER_DETAIL_URL.format(player_id=player_id)
    response = requests.get(url)
    response.raise_for_status()

    data = response.json()
    history = pd.DataFrame(data["history"])

    # Rename columns for consistency
    if not history.empty:
        history = history.rename(
            columns={
                "round": "gameweek",
                "total_points": "points",
                "minutes": "minutes",
                "goals_scored": "goals",
                "assists": "assists",
                "expected_goals": "xG",
                "expected_assists": "xA",
            }
        )

    return history

load_fixtures ¶

load_fixtures() -> DataFrame

Load all fixtures.

RETURNS	DESCRIPTION
`DataFrame`	Fixtures data

Source code in fplx/data/loaders.py

def load_fixtures(self) -> pd.DataFrame:
    """
    Load all fixtures.

    Returns
    -------
    pd.DataFrame
        Fixtures data
    """
    response = requests.get(self.FIXTURES_URL)
    response.raise_for_status()

    fixtures = pd.DataFrame(response.json())
    return fixtures

load_from_csv ¶

load_from_csv(filepath: Path) -> DataFrame

Load data from CSV file.

PARAMETER	DESCRIPTION
`filepath`	Path to CSV file TYPE: `Path`

RETURNS	DESCRIPTION
`DataFrame`	Loaded data

Source code in fplx/data/loaders.py

def load_from_csv(self, filepath: Path) -> pd.DataFrame:
    """
    Load data from CSV file.

    Parameters
    ----------
    filepath : Path
        Path to CSV file

    Returns
    -------
    pd.DataFrame
        Loaded data
    """
    logger.info("Loading data from %s", filepath)
    df = pd.read_csv(filepath)
    return df

enrich_player_history ¶

enrich_player_history(
    players: list[Player],
) -> list[Player]

Enrich players with full historical data.

PARAMETER	DESCRIPTION
`players`	List of players to enrich TYPE: `list[Player]`

RETURNS	DESCRIPTION
`list[Player]`	Players with enriched timeseries

Source code in fplx/data/loaders.py

def enrich_player_history(self, players: list[Player]) -> list[Player]:
    """
    Enrich players with full historical data.

    Parameters
    ----------
    players : list[Player]
        List of players to enrich

    Returns
    -------
    list[Player]
        Players with enriched timeseries
    """
    enriched = []
    for player in players:
        try:
            history = self.load_player_history(player.id)
            if not history.empty:
                player.timeseries = history
            enriched.append(player)
        except Exception as e:
            logger.warning(f"Could not load history for %s : %s", player.name, e)
            enriched.append(player)

    return enriched

api ¶

API module.

FPLModel ¶

FPLModel(
    budget: float = 100.0,
    horizon: int = 1,
    formation: str = "auto",
    config: Optional[dict] = None,
)

High-level interface for FPL analysis and squad optimization.

This is the main user-facing API. It orchestrates data loading, feature engineering, model fitting, and squad optimization.

PARAMETER	DESCRIPTION
`budget`	Maximum squad budget (default 100.0) TYPE: `float` DEFAULT: `100.0`
`horizon`	Prediction horizon in gameweeks (default 1) TYPE: `int` DEFAULT: `1`
`formation`	Desired formation, or "auto" for optimization TYPE: `str` DEFAULT: `'auto'`
`config`	Custom configuration TYPE: `Optional[Dict]` DEFAULT: `None`

Examples:

>>> from fplx import FPLModel
>>> model = FPLModel(budget=100, horizon=1)
>>> model.load_data()
>>> model.fit()
>>> squad = model.select_best_11()
>>> squad.summary()

Source code in fplx/api/interface.py

def __init__(
    self,
    budget: float = 100.0,
    horizon: int = 1,
    formation: str = "auto",
    config: Optional[dict] = None,
):
    self.budget = budget
    self.horizon = horizon
    self.formation = formation
    self.config = Config(config)

    # core components are instantiated on-demand
    self._data_loader = None
    self._feature_engineer = None
    self._news_collector = None
    self._stats_signal = None
    self._news_signal = None
    self._fixture_signal = None
    self._model = None
    self._optimizer = None

    # Data containers
    self.players: list[Player] = []
    self.players_data: dict[int, pd.DataFrame] = {}
    self.expected_points: dict[int, float] = {}
    self.expected_variance: dict[int, float] = {}
    self.current_gameweek: int = 1

load_data ¶

load_data(
    source: str = "api",
    path: Optional[Union[str, Path]] = None,
) -> None

Load player and fixture data.

PARAMETER	DESCRIPTION
`source`	Data source: 'api' or 'local' TYPE: `str` DEFAULT: `'api'`
`path`	Path to local data (if source is 'local') TYPE: `Optional[Union[str, Path]]` DEFAULT: `None`

Source code in fplx/api/interface.py

def load_data(self, source: str = "api", path: Optional[Union[str, Path]] = None) -> None:
    """
    Load player and fixture data.

    Parameters
    ----------
    source : str
        Data source: 'api' or 'local'
    path : Optional[Union[str, Path]]
        Path to local data (if source is 'local')
    """
    logger.info(f"Loading data from {source}...")
    if source == "api":
        bootstrap_data = self.data_loader.fetch_bootstrap_data()
        self.players = self.data_loader.load_players(bootstrap_data)

        # Determine current gameweek from bootstrap events
        for event in bootstrap_data.get("events", []):
            if event.get("is_current"):
                self.current_gameweek = event["id"]
                break

        # Collect per-gameweek news snapshots for inference
        self.news_collector.collect_from_bootstrap(bootstrap_data, self.current_gameweek)

    elif source == "local":
        if path is None:
            raise ValueError("Path must be provided for local data source.")
        self.players = self.data_loader.load_from_csv(path)

    logger.info(f"Loaded {len(self.players)} players.")

    # load detailed time-series for each player
    for player in self.players:
        # This is a simplification; in reality, you'd fetch this
        # or have it in your local data.
        self.players_data[player.id] = player.timeseries

fit ¶

fit() -> None

Fit the prediction model.

Uses the probabilistic inference pipeline (HMM + Kalman + Fusion) when model_type is 'inference'. Falls back to the original feature engineering pipeline for baseline/ML models.

Source code in fplx/api/interface.py

def fit(self) -> None:
    """
    Fit the prediction model.

    Uses the probabilistic inference pipeline (HMM + Kalman + Fusion)
    when model_type is 'inference'. Falls back to the original
    feature engineering pipeline for baseline/ML models.
    """
    if not self.players:
        raise RuntimeError("Data not loaded. Call load_data() first.")

    model_type = self.config.get("model_type", "baseline")
    logger.info(f"Fitting model '{model_type}'...")

    if model_type == "inference":
        self._fit_inference()
    else:
        self._fit_legacy(model_type)

    logger.info("Model fitting complete.")

select_best_11 ¶

select_best_11() -> FullSquad

Select the optimal 15-player squad and 11-player starting lineup.

RETURNS	DESCRIPTION
`FullSquad`	The optimized squad with lineup.

Source code in fplx/api/interface.py

def select_best_11(self) -> FullSquad:
    """
    Select the optimal 15-player squad and 11-player starting lineup.

    Returns
    -------
    FullSquad
        The optimized squad with lineup.
    """
    if not self.expected_points:
        raise RuntimeError("Model not fitted. Call fit() first.")

    logger.info("Optimizing squad with %s optimizer...", self.config.get("optimizer", "greedy"))

    squad = self.optimizer.solve(
        players=self.players,
        expected_points=self.expected_points,
        expected_variance=self.expected_variance or None,
    )

    logger.info("Squad optimization complete.")
    return squad

interface ¶

High-level API interface for FPLX.

FPLModel ¶

FPLModel(
    budget: float = 100.0,
    horizon: int = 1,
    formation: str = "auto",
    config: Optional[dict] = None,
)

High-level interface for FPL analysis and squad optimization.

This is the main user-facing API. It orchestrates data loading, feature engineering, model fitting, and squad optimization.

PARAMETER	DESCRIPTION
`budget`	Maximum squad budget (default 100.0) TYPE: `float` DEFAULT: `100.0`
`horizon`	Prediction horizon in gameweeks (default 1) TYPE: `int` DEFAULT: `1`
`formation`	Desired formation, or "auto" for optimization TYPE: `str` DEFAULT: `'auto'`
`config`	Custom configuration TYPE: `Optional[Dict]` DEFAULT: `None`

Examples:

>>> from fplx import FPLModel
>>> model = FPLModel(budget=100, horizon=1)
>>> model.load_data()
>>> model.fit()
>>> squad = model.select_best_11()
>>> squad.summary()

Source code in fplx/api/interface.py

def __init__(
    self,
    budget: float = 100.0,
    horizon: int = 1,
    formation: str = "auto",
    config: Optional[dict] = None,
):
    self.budget = budget
    self.horizon = horizon
    self.formation = formation
    self.config = Config(config)

    # core components are instantiated on-demand
    self._data_loader = None
    self._feature_engineer = None
    self._news_collector = None
    self._stats_signal = None
    self._news_signal = None
    self._fixture_signal = None
    self._model = None
    self._optimizer = None

    # Data containers
    self.players: list[Player] = []
    self.players_data: dict[int, pd.DataFrame] = {}
    self.expected_points: dict[int, float] = {}
    self.expected_variance: dict[int, float] = {}
    self.current_gameweek: int = 1

load_data ¶

load_data(
    source: str = "api",
    path: Optional[Union[str, Path]] = None,
) -> None

Load player and fixture data.

PARAMETER	DESCRIPTION
`source`	Data source: 'api' or 'local' TYPE: `str` DEFAULT: `'api'`
`path`	Path to local data (if source is 'local') TYPE: `Optional[Union[str, Path]]` DEFAULT: `None`

Source code in fplx/api/interface.py

def load_data(self, source: str = "api", path: Optional[Union[str, Path]] = None) -> None:
    """
    Load player and fixture data.

    Parameters
    ----------
    source : str
        Data source: 'api' or 'local'
    path : Optional[Union[str, Path]]
        Path to local data (if source is 'local')
    """
    logger.info(f"Loading data from {source}...")
    if source == "api":
        bootstrap_data = self.data_loader.fetch_bootstrap_data()
        self.players = self.data_loader.load_players(bootstrap_data)

        # Determine current gameweek from bootstrap events
        for event in bootstrap_data.get("events", []):
            if event.get("is_current"):
                self.current_gameweek = event["id"]
                break

        # Collect per-gameweek news snapshots for inference
        self.news_collector.collect_from_bootstrap(bootstrap_data, self.current_gameweek)

    elif source == "local":
        if path is None:
            raise ValueError("Path must be provided for local data source.")
        self.players = self.data_loader.load_from_csv(path)

    logger.info(f"Loaded {len(self.players)} players.")

    # load detailed time-series for each player
    for player in self.players:
        # This is a simplification; in reality, you'd fetch this
        # or have it in your local data.
        self.players_data[player.id] = player.timeseries

fit ¶

fit() -> None

Fit the prediction model.

Uses the probabilistic inference pipeline (HMM + Kalman + Fusion) when model_type is 'inference'. Falls back to the original feature engineering pipeline for baseline/ML models.

Source code in fplx/api/interface.py

def fit(self) -> None:
    """
    Fit the prediction model.

    Uses the probabilistic inference pipeline (HMM + Kalman + Fusion)
    when model_type is 'inference'. Falls back to the original
    feature engineering pipeline for baseline/ML models.
    """
    if not self.players:
        raise RuntimeError("Data not loaded. Call load_data() first.")

    model_type = self.config.get("model_type", "baseline")
    logger.info(f"Fitting model '{model_type}'...")

    if model_type == "inference":
        self._fit_inference()
    else:
        self._fit_legacy(model_type)

    logger.info("Model fitting complete.")

select_best_11 ¶

select_best_11() -> FullSquad

Select the optimal 15-player squad and 11-player starting lineup.

RETURNS	DESCRIPTION
`FullSquad`	The optimized squad with lineup.

Source code in fplx/api/interface.py

def select_best_11(self) -> FullSquad:
    """
    Select the optimal 15-player squad and 11-player starting lineup.

    Returns
    -------
    FullSquad
        The optimized squad with lineup.
    """
    if not self.expected_points:
        raise RuntimeError("Model not fitted. Call fit() first.")

    logger.info("Optimizing squad with %s optimizer...", self.config.get("optimizer", "greedy"))

    squad = self.optimizer.solve(
        players=self.players,
        expected_points=self.expected_points,
        expected_variance=self.expected_variance or None,
    )

    logger.info("Squad optimization complete.")
    return squad

core ¶

Matchweek `dataclass` ¶

Matchweek(
    gameweek: int,
    date: datetime,
    fixtures: list[dict],
    team_difficulty: dict[str, float],
)

Represents a matchweek with global context.

ATTRIBUTE	DESCRIPTION
`gameweek`	Gameweek number TYPE: `int`
`date`	Date of the gameweek TYPE: `datetime`
`fixtures`	List of fixtures TYPE: `list[dict]`
`team_difficulty`	Team-level difficulty ratings TYPE: `dict[str, float]`

Player `dataclass` ¶

Player(
    id: int,
    name: str,
    team: str,
    position: str,
    price: float,
    timeseries: DataFrame,
    news: Optional[dict] = None,
)

Represents a Fantasy Premier League player.

ATTRIBUTE	DESCRIPTION
`id`	Unique player identifier TYPE: `int`
`name`	Player full name TYPE: `str`
`team`	Current team TYPE: `str`
`position`	Position (GK, DEF, MID, FWD) TYPE: `str`
`price`	Current price in FPL TYPE: `float`
`timeseries`	Historical stats (points, xG, minutes, etc.) TYPE: `DataFrame`
`news`	Latest news/injury information TYPE: `Optional[dict]`

last_5_points `property` ¶

last_5_points: float

Average points over last 5 gameweeks.

availability `property` ¶

availability: float

Availability score (0-1) based on news.

FullSquad `dataclass` ¶

FullSquad(
    squad_players: list[Player],
    lineup: Squad,
    bench: list[Player] = list(),
    squad_cost: float = 0.0,
    expected_points: float = 0.0,
)

Represents a 15-player FPL squad with a selected 11-player lineup.

The two-level FPL structure: Level 1: 15-player squad (2 GK, 5 DEF, 5 MID, 3 FWD) under budget. Level 2: 11-player starting lineup chosen from the squad each gameweek.

ATTRIBUTE	DESCRIPTION
`squad_players`	All 15 squad members. TYPE: `list[Player]`
`lineup`	The 11-player starting lineup (subset of squad_players). TYPE: `Squad`
`bench`	The 4 bench players. TYPE: `list[Player]`
`squad_cost`	Total cost of all 15 players. TYPE: `float`
`expected_points`	Expected points for the starting 11. TYPE: `float`

summary ¶

summary() -> str

Returns a formatted string summary of the full squad.

Source code in fplx/core/squad.py

def summary(self) -> str:
    """Returns a formatted string summary of the full squad."""
    lines = [
        f"Squad Cost: £{self.squad_cost:.1f}m / £100.0m",
        f"Remaining Budget: £{100.0 - self.squad_cost:.1f}m",
        "",
        self.lineup.summary(),
        "",
        "--- Bench ---",
    ]
    for p in self.bench:
        lines.append(f"  {p.name} ({p.position}, {p.team}, £{p.price}m)")
    return "\n".join(lines)

Squad `dataclass` ¶

Squad(
    players: list[Player],
    formation: str,
    total_cost: float,
    expected_points: float,
    captain: Optional[Player] = None,
)

Represents an 11-player starting lineup.

ATTRIBUTE	DESCRIPTION
`players`	Selected starters (exactly 11). TYPE: `list[Player]`
`formation`	Formation string (e.g., "3-4-3"). TYPE: `str`
`total_cost`	Total cost of the starting 11. TYPE: `float`
`expected_points`	Expected total points for the starting 11. TYPE: `float`
`captain`	Captain selection (earns double points). TYPE: `Optional[Player]`

summary ¶

summary() -> str

Returns a formatted string summary of the lineup.

Source code in fplx/core/squad.py

def summary(self) -> str:
    """Returns a formatted string summary of the lineup."""
    pos_order = {"GK": 0, "DEF": 1, "MID": 2, "FWD": 3}
    lines = [
        f"Formation: {self.formation}",
        f"Total Cost: £{self.total_cost:.1f}m",
        f"Expected Points: {self.expected_points:.2f}",
        f"Captain: {self.captain.name if self.captain else 'None'}",
        "",
        "--- Starting XI ---",
    ]
    for p in sorted(self.players, key=lambda x: pos_order.get(x.position, 9)):
        lines.append(f"  {p.name} ({p.position}, {p.team}, £{p.price}m)")
    return "\n".join(lines)

matchweek ¶

Matchweek domain object.

Matchweek `dataclass` ¶

Matchweek(
    gameweek: int,
    date: datetime,
    fixtures: list[dict],
    team_difficulty: dict[str, float],
)

Represents a matchweek with global context.

ATTRIBUTE	DESCRIPTION
`gameweek`	Gameweek number TYPE: `int`
`date`	Date of the gameweek TYPE: `datetime`
`fixtures`	List of fixtures TYPE: `list[dict]`
`team_difficulty`	Team-level difficulty ratings TYPE: `dict[str, float]`

player ¶

Player domain object.

Player `dataclass` ¶

Player(
    id: int,
    name: str,
    team: str,
    position: str,
    price: float,
    timeseries: DataFrame,
    news: Optional[dict] = None,
)

Represents a Fantasy Premier League player.

ATTRIBUTE	DESCRIPTION
`id`	Unique player identifier TYPE: `int`
`name`	Player full name TYPE: `str`
`team`	Current team TYPE: `str`
`position`	Position (GK, DEF, MID, FWD) TYPE: `str`
`price`	Current price in FPL TYPE: `float`
`timeseries`	Historical stats (points, xG, minutes, etc.) TYPE: `DataFrame`
`news`	Latest news/injury information TYPE: `Optional[dict]`

last_5_points `property` ¶

last_5_points: float

Average points over last 5 gameweeks.

availability `property` ¶

availability: float

Availability score (0-1) based on news.

squad ¶

Squad and FullSquad domain objects.

Squad `dataclass` ¶

Squad(
    players: list[Player],
    formation: str,
    total_cost: float,
    expected_points: float,
    captain: Optional[Player] = None,
)

Represents an 11-player starting lineup.

ATTRIBUTE	DESCRIPTION
`players`	Selected starters (exactly 11). TYPE: `list[Player]`
`formation`	Formation string (e.g., "3-4-3"). TYPE: `str`
`total_cost`	Total cost of the starting 11. TYPE: `float`
`expected_points`	Expected total points for the starting 11. TYPE: `float`
`captain`	Captain selection (earns double points). TYPE: `Optional[Player]`

summary ¶

summary() -> str

Returns a formatted string summary of the lineup.

Source code in fplx/core/squad.py

def summary(self) -> str:
    """Returns a formatted string summary of the lineup."""
    pos_order = {"GK": 0, "DEF": 1, "MID": 2, "FWD": 3}
    lines = [
        f"Formation: {self.formation}",
        f"Total Cost: £{self.total_cost:.1f}m",
        f"Expected Points: {self.expected_points:.2f}",
        f"Captain: {self.captain.name if self.captain else 'None'}",
        "",
        "--- Starting XI ---",
    ]
    for p in sorted(self.players, key=lambda x: pos_order.get(x.position, 9)):
        lines.append(f"  {p.name} ({p.position}, {p.team}, £{p.price}m)")
    return "\n".join(lines)

FullSquad `dataclass` ¶

FullSquad(
    squad_players: list[Player],
    lineup: Squad,
    bench: list[Player] = list(),
    squad_cost: float = 0.0,
    expected_points: float = 0.0,
)

Represents a 15-player FPL squad with a selected 11-player lineup.

The two-level FPL structure: Level 1: 15-player squad (2 GK, 5 DEF, 5 MID, 3 FWD) under budget. Level 2: 11-player starting lineup chosen from the squad each gameweek.

ATTRIBUTE	DESCRIPTION
`squad_players`	All 15 squad members. TYPE: `list[Player]`
`lineup`	The 11-player starting lineup (subset of squad_players). TYPE: `Squad`
`bench`	The 4 bench players. TYPE: `list[Player]`
`squad_cost`	Total cost of all 15 players. TYPE: `float`
`expected_points`	Expected points for the starting 11. TYPE: `float`

summary ¶

summary() -> str

Returns a formatted string summary of the full squad.

Source code in fplx/core/squad.py

def summary(self) -> str:
    """Returns a formatted string summary of the full squad."""
    lines = [
        f"Squad Cost: £{self.squad_cost:.1f}m / £100.0m",
        f"Remaining Budget: £{100.0 - self.squad_cost:.1f}m",
        "",
        self.lineup.summary(),
        "",
        "--- Bench ---",
    ]
    for p in self.bench:
        lines.append(f"  {p.name} ({p.position}, {p.team}, £{p.price}m)")
    return "\n".join(lines)

data ¶

Data loading and schema definitions.

FPLDataLoader ¶

FPLDataLoader(cache_dir: Optional[Path] = None)

Load and manage FPL data from various sources (API, CSV, cache).

PARAMETER	DESCRIPTION
`cache_dir`	Directory to cache downloaded data TYPE: `Optional[Path]` DEFAULT: `None`

Source code in fplx/data/loaders.py

def __init__(self, cache_dir: Optional[Path] = None):
    self.cache_dir = cache_dir or Path.home() / ".fplx" / "cache"
    self.cache_dir.mkdir(parents=True, exist_ok=True)
    self._bootstrap_data = None

fetch_bootstrap_data ¶

fetch_bootstrap_data(force_refresh: bool = False) -> dict

Fetch main FPL data (players, teams, gameweeks).

PARAMETER	DESCRIPTION
`force_refresh`	Force refresh even if cached TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
`Dict`	Bootstrap data containing players, teams, events

Source code in fplx/data/loaders.py

def fetch_bootstrap_data(self, force_refresh: bool = False) -> dict:
    """
    Fetch main FPL data (players, teams, gameweeks).

    Parameters
    ----------
    force_refresh : bool
        Force refresh even if cached

    Returns
    -------
    Dict
        Bootstrap data containing players, teams, events
    """
    cache_file = self.cache_dir / "bootstrap.json"

    if not force_refresh and cache_file.exists():
        import json

        with open(cache_file) as f:
            logger.info("Loading bootstrap data from cache")
            return json.load(f)

    logger.info("Fetching bootstrap data from FPL API")
    response = requests.get(self.BOOTSTRAP_URL)
    response.raise_for_status()

    data = response.json()

    # Cache the data
    import json

    with open(cache_file, "w") as f:
        json.dump(data, f)

    self._bootstrap_data = data
    return data

load_players ¶

load_players(force_refresh: bool = False) -> list[Player]

Load all players with basic info.

PARAMETER	DESCRIPTION
`force_refresh`	Force refresh from API TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
`list[Player]`	List of Player objects

Source code in fplx/data/loaders.py

def load_players(self, force_refresh: bool = False) -> list[Player]:
    """
    Load all players with basic info.

    Parameters
    ----------
    force_refresh : bool
        Force refresh from API

    Returns
    -------
    list[Player]
        List of Player objects
    """
    data = self.fetch_bootstrap_data(force_refresh)

    # Build team mapping
    teams = {t["id"]: t["name"] for t in data["teams"]}
    positions = {1: "GK", 2: "DEF", 3: "MID", 4: "FWD"}

    players = []
    for element in data["elements"]:
        # Create minimal timeseries (can be enriched later)
        ts_data = {
            "gameweek": [0],
            "points": [element.get("total_points", 0)],
            "minutes": [element.get("minutes", 0)],
            "form": [float(element.get("form", 0))],
        }

        player = Player(
            id=element["id"],
            name=element["web_name"],
            team=teams[element["team"]],
            position=positions[element["element_type"]],
            price=element["now_cost"] / 10.0,  # Convert to £m
            timeseries=pd.DataFrame(ts_data),
            news={
                "text": element.get("news", ""),
                "availability": 1.0
                if element.get("chance_of_playing_next_round") is None
                else element.get("chance_of_playing_next_round") / 100.0,
            },
        )
        players.append(player)

    logger.info(f"Loaded {len(players)} players")
    return players

load_player_history ¶

load_player_history(player_id: int) -> DataFrame

Load detailed historical data for a specific player.

PARAMETER	DESCRIPTION
`player_id`	Player ID TYPE: `int`

RETURNS	DESCRIPTION
`DataFrame`	Historical gameweek stats

Source code in fplx/data/loaders.py

def load_player_history(self, player_id: int) -> pd.DataFrame:
    """
    Load detailed historical data for a specific player.

    Parameters
    ----------
    player_id : int
        Player ID

    Returns
    -------
    pd.DataFrame
        Historical gameweek stats
    """
    url = self.PLAYER_DETAIL_URL.format(player_id=player_id)
    response = requests.get(url)
    response.raise_for_status()

    data = response.json()
    history = pd.DataFrame(data["history"])

    # Rename columns for consistency
    if not history.empty:
        history = history.rename(
            columns={
                "round": "gameweek",
                "total_points": "points",
                "minutes": "minutes",
                "goals_scored": "goals",
                "assists": "assists",
                "expected_goals": "xG",
                "expected_assists": "xA",
            }
        )

    return history

load_fixtures ¶

load_fixtures() -> DataFrame

Load all fixtures.

RETURNS	DESCRIPTION
`DataFrame`	Fixtures data

Source code in fplx/data/loaders.py

def load_fixtures(self) -> pd.DataFrame:
    """
    Load all fixtures.

    Returns
    -------
    pd.DataFrame
        Fixtures data
    """
    response = requests.get(self.FIXTURES_URL)
    response.raise_for_status()

    fixtures = pd.DataFrame(response.json())
    return fixtures

load_from_csv ¶

load_from_csv(filepath: Path) -> DataFrame

Load data from CSV file.

PARAMETER	DESCRIPTION
`filepath`	Path to CSV file TYPE: `Path`

RETURNS	DESCRIPTION
`DataFrame`	Loaded data

Source code in fplx/data/loaders.py

def load_from_csv(self, filepath: Path) -> pd.DataFrame:
    """
    Load data from CSV file.

    Parameters
    ----------
    filepath : Path
        Path to CSV file

    Returns
    -------
    pd.DataFrame
        Loaded data
    """
    logger.info("Loading data from %s", filepath)
    df = pd.read_csv(filepath)
    return df

enrich_player_history ¶

enrich_player_history(
    players: list[Player],
) -> list[Player]

Enrich players with full historical data.

PARAMETER	DESCRIPTION
`players`	List of players to enrich TYPE: `list[Player]`

RETURNS	DESCRIPTION
`list[Player]`	Players with enriched timeseries

Source code in fplx/data/loaders.py

def enrich_player_history(self, players: list[Player]) -> list[Player]:
    """
    Enrich players with full historical data.

    Parameters
    ----------
    players : list[Player]
        List of players to enrich

    Returns
    -------
    list[Player]
        Players with enriched timeseries
    """
    enriched = []
    for player in players:
        try:
            history = self.load_player_history(player.id)
            if not history.empty:
                player.timeseries = history
            enriched.append(player)
        except Exception as e:
            logger.warning(f"Could not load history for %s : %s", player.name, e)
            enriched.append(player)

    return enriched

VaastavLoader ¶

VaastavLoader(
    season: str = "2023-24",
    data_dir: Optional[str | Path] = None,
    cache_dir: Optional[str | Path] = None,
)

Load historical FPL data from the vaastav dataset.

PARAMETER	DESCRIPTION
`season`	Season string, e.g. "2023-24". TYPE: `str` DEFAULT: `'2023-24'`
`data_dir`	Path to a local clone. If None, fetches from GitHub. TYPE: `str or Path` DEFAULT: `None`
`cache_dir`	Where to cache downloaded CSVs. Defaults to ~/.fplx/vaastav/. TYPE: `str or Path` DEFAULT: `None`

Source code in fplx/data/vaastav_loader.py

def __init__(
    self,
    season: str = "2023-24",
    data_dir: Optional[str | Path] = None,
    cache_dir: Optional[str | Path] = None,
):
    self.season = self._validate_season(season)
    self.data_dir = Path(data_dir) if data_dir else None
    # Default cache is project-local to keep artifacts within the workspace.
    project_root = Path(__file__).resolve().parents[2]
    self.cache_dir = Path(cache_dir) if cache_dir else project_root / ".fplx" / "vaastav"
    self.cache_dir.mkdir(parents=True, exist_ok=True)

    self._merged_gw: Optional[pd.DataFrame] = None
    self._player_raw: Optional[pd.DataFrame] = None

load_merged_gw ¶

load_merged_gw() -> DataFrame

Load the merged gameweek file (all GWs, all players, one CSV).

RETURNS	DESCRIPTION
`DataFrame`	One row per player-gameweek appearance.

Source code in fplx/data/vaastav_loader.py

def load_merged_gw(self) -> pd.DataFrame:
    """
    Load the merged gameweek file (all GWs, all players, one CSV).

    Returns
    -------
    pd.DataFrame
        One row per player-gameweek appearance.
    """
    if self._merged_gw is not None:
        return self._merged_gw

    df = self._read_csv("gws/merged_gw.csv")
    df = df.rename(columns={c: COLUMN_MAP.get(c, c) for c in df.columns})
    df = self._coalesce_duplicate_columns(df)

    if "gameweek" in df.columns:
        df["gameweek"] = pd.to_numeric(df["gameweek"], errors="coerce")

    self._merged_gw = df
    logger.info(
        "Loaded merged_gw: %d rows, %d players, GW %d-%d",
        len(df),
        df["element"].nunique(),
        df["gameweek"].min(),
        df["gameweek"].max(),
    )
    return df

load_player_raw ¶

load_player_raw() -> DataFrame

Load season-level player metadata.

Source code in fplx/data/vaastav_loader.py

def load_player_raw(self) -> pd.DataFrame:
    """Load season-level player metadata."""
    if self._player_raw is not None:
        return self._player_raw
    self._player_raw = self._read_csv("players_raw.csv")
    return self._player_raw

load_gameweek ¶

load_gameweek(gw: int) -> DataFrame

Load a single gameweek from merged data.

Source code in fplx/data/vaastav_loader.py

def load_gameweek(self, gw: int) -> pd.DataFrame:
    """Load a single gameweek from merged data."""
    df = self.load_merged_gw()
    return df[df["gameweek"] == gw].copy()

build_player_objects ¶

build_player_objects(
    up_to_gw: Optional[int] = None,
) -> list[Player]

Build Player objects with timeseries up to a given gameweek.

PARAMETER	DESCRIPTION
`up_to_gw`	Only include gameweeks 1..up_to_gw. If None, include all. TYPE: `int` DEFAULT: `None`

RETURNS	DESCRIPTION
`list[Player]`

Source code in fplx/data/vaastav_loader.py

def build_player_objects(
    self,
    up_to_gw: Optional[int] = None,
) -> list[Player]:
    """
    Build Player objects with timeseries up to a given gameweek.

    Parameters
    ----------
    up_to_gw : int, optional
        Only include gameweeks 1..up_to_gw. If None, include all.

    Returns
    -------
    list[Player]
    """
    all_gw = self.load_merged_gw()

    if up_to_gw is not None:
        all_gw = all_gw[all_gw["gameweek"] <= up_to_gw]

    if all_gw.empty:
        return []

    players = []
    grouped = all_gw.groupby("element")

    for pid, grp in grouped:
        pid = int(pid)
        grp = grp.sort_values("gameweek").reset_index(drop=True)

        # Player metadata from the row itself
        name = str(grp["name"].iloc[0]) if "name" in grp.columns else f"Player_{pid}"
        team = str(grp["team"].iloc[0]) if "team" in grp.columns else "Unknown"
        pos_raw = grp["position"].iloc[0] if "position" in grp.columns else "MID"
        price = grp["value"].iloc[-1] / 10.0 if "value" in grp.columns else 5.0

        position = POSITION_MAP.get(pos_raw, POSITION_MAP.get(str(pos_raw), "MID"))

        # Build timeseries with available columns
        keep = [
            c
            for c in [
                "gameweek",
                "points",
                "minutes",
                "starts",
                "goals",
                "assists",
                "xG",
                "xA",
                "bonus",
                "bps",
                "clean_sheets",
                "goals_conceded",
                "saves",
                "yellow_cards",
                "red_cards",
                "own_goals",
                "penalties_missed",
                "penalties_saved",
                "influence",
                "creativity",
                "threat",
                "ict_index",
                "was_home",
                "opponent_team",
                "expected_goals_conceded",
                "xP",
                "value",
                "selected",
                "transfers_in",
                "transfers_out",
            ]
            if c in grp.columns
        ]
        timeseries = grp[keep].copy()
        for col in timeseries.columns:
            timeseries[col] = pd.to_numeric(timeseries[col], errors="coerce")

        # ── DGW aggregation ───────────────────────────────────────────
        # Always collapse to one row per GW decision period.
        # DGW gameweeks receive per-fixture normalised scores so that the
        # inference pipeline (HMM, enriched, KF) operates on single-game-
        # equivalent observations. See double_gameweek.py for details.
        timeseries = aggregate_dgw_timeseries(timeseries)

        player = Player(
            id=pid,
            name=name,
            team=team,
            position=position,
            price=float(price),
            timeseries=timeseries,
        )
        players.append(player)

    logger.info("Built %d Player objects (up_to_gw=%s).", len(players), up_to_gw)
    return players

get_actual_points ¶

get_actual_points(gw: int) -> dict[int, float]

Get actual points scored by each player in a specific gameweek.

For Double Gameweek players (two fixtures in the same round) the points from both fixtures are summed, which is the correct FPL score for that gameweek. The previous implementation used dict(zip(…)) which silently discarded the first fixture row when a player appeared twice, underreporting DGW scores.

RETURNS	DESCRIPTION
`dict[int, float]`	{player_id: actual_points} (summed across fixtures for DGW players)

Source code in fplx/data/vaastav_loader.py

def get_actual_points(self, gw: int) -> dict[int, float]:
    """
    Get actual points scored by each player in a specific gameweek.

    For Double Gameweek players (two fixtures in the same round) the
    points from both fixtures are **summed**, which is the correct FPL
    score for that gameweek. The previous implementation used ``dict(zip(…))``
    which silently discarded the first fixture row when a player appeared
    twice, underreporting DGW scores.

    Returns
    -------
    dict[int, float]
        {player_id: actual_points}  (summed across fixtures for DGW players)
    """
    df = self.load_gameweek(gw)
    pts_col = "points" if "points" in df.columns else "total_points"
    # groupby + sum handles both SGW (one row) and DGW (two rows) correctly
    summed = df.groupby("element")[pts_col].sum().reset_index()
    return dict(zip(summed["element"].astype(int), summed[pts_col].astype(float)))

get_fixture_info ¶

get_fixture_info(gw: int) -> dict[int, dict]

Get fixture context (opponent, home/away, xP) per player for a GW.

Source code in fplx/data/vaastav_loader.py

def get_fixture_info(self, gw: int) -> dict[int, dict]:
    """Get fixture context (opponent, home/away, xP) per player for a GW."""
    df = self.load_gameweek(gw)
    info = {}
    for _, row in df.iterrows():
        pid = int(row.get("element", 0))
        info[pid] = {
            "was_home": bool(row.get("was_home", False)),
            "opponent_team": int(row.get("opponent_team", 0)) if "opponent_team" in df.columns else 0,
            "xP": float(row.get("xP", 0.0)) if "xP" in df.columns else 0.0,
        }
    return info

double_gameweek ¶

Double Gameweek (DGW) detection, timeseries aggregation, and prediction scaling.

A Double Gameweek occurs when a team plays two Premier League fixtures in the same FPL gameweek. From the perspective of the inference pipeline and optimizer, this has two distinct effects:

Historical timeseries (training/inference input) The vaastav dataset stores each fixture as a separate row. A DGW player therefore has two rows sharing the same gameweek value. If not aggregated, the HMM will see them as two sequential timesteps with single-game-calibrated emissions, causing the model to misinterpret a large total (e.g. 14 pts from two good games) as a single "Star" observation when it is actually two "Good" observations.
Forward prediction (next-GW forecast for ILP) When the upcoming gameweek is a DGW, a player plays twice. Their expected FPL points should be approximately 2× the single-game prediction (under independence), and their variance should also scale accordingly.

Usage

from fplx.data.double_gameweek import ( ... detect_dgw_gameweeks, ... aggregate_dgw_timeseries, ... scale_predictions_for_dgw, ... get_fixture_counts_from_bootstrap, ... )

detect_dgw_gameweeks ¶

detect_dgw_gameweeks(
    timeseries: DataFrame,
) -> dict[int, int]

Return a mapping of {gameweek: n_fixtures} for a single player's timeseries.

A gameweek with n_fixtures > 1 is a Double (or Triple) Gameweek.

PARAMETER	DESCRIPTION
`timeseries`	Per-fixture timeseries as returned by `VaastavLoader.build_player_objects`. Must contain a `gameweek` column. TYPE: `DataFrame`

RETURNS	DESCRIPTION
`dict[int, int]`	`{gameweek_number: fixture_count}` for all gameweeks in the data. Gameweeks with a single fixture have value 1.

Examples:

>>> counts = detect_dgw_gameweeks(player.timeseries)
>>> dgw_gws = [gw for gw, n in counts.items() if n > 1]

Source code in fplx/data/double_gameweek.py

def detect_dgw_gameweeks(timeseries: pd.DataFrame) -> dict[int, int]:
    """Return a mapping of {gameweek: n_fixtures} for a single player's timeseries.

    A gameweek with n_fixtures > 1 is a Double (or Triple) Gameweek.

    Parameters
    ----------
    timeseries : pd.DataFrame
        Per-fixture timeseries as returned by ``VaastavLoader.build_player_objects``.
        Must contain a ``gameweek`` column.

    Returns
    -------
    dict[int, int]
        ``{gameweek_number: fixture_count}`` for all gameweeks in the data.
        Gameweeks with a single fixture have value 1.

    Examples
    --------
    >>> counts = detect_dgw_gameweeks(player.timeseries)
    >>> dgw_gws = [gw for gw, n in counts.items() if n > 1]
    """
    if timeseries.empty or "gameweek" not in timeseries.columns:
        return {}
    return timeseries.groupby("gameweek").size().to_dict()

aggregate_dgw_timeseries ¶

aggregate_dgw_timeseries(
    timeseries: DataFrame,
) -> DataFrame

Collapse per-fixture rows into one normalised row per gameweek.

This is the single place where Double Gameweek handling lives. All downstream consumers (inference pipeline, enriched predictor, MV-HMM, Kalman Filter) always receive exactly one row per FPL decision period and never need to be aware of DGWs.

For a DGW gameweek (n_fixtures == 2):

Additive stats (goals, minutes, bonus, …) are summed to reflect the total accumulated across both matches.
Per-fixture normalisation is applied to points and to every additive stat that forms an inference feature. The normalised column is stored alongside the raw total:

.. code-block:: text

  points         # raw total (used for scoring / oracle)
  points_norm    # per-fixture average (used by inference / HMM)

The HMM emission distributions are calibrated on points_norm, so a DGW observation of 10 total points (points_norm = 5) is correctly interpreted as an "Average" game rather than misidentified as a "Star" event (8.5 pts single-game emission mean).

Rate / expected stats (xG, xA, …) are averaged — they already represent per-match rates.
Context columns (price, opponent) take the last-fixture value.

For a single-fixture gameweek (n_fixtures == 1) the row is returned unchanged and points_norm == points.

PARAMETER	DESCRIPTION
`timeseries`	Raw per-fixture timeseries (may contain duplicate `gameweek` values for DGW players). TYPE: `DataFrame`

RETURNS	DESCRIPTION
`DataFrame`	One row per gameweek, sorted ascending by `gameweek`. New columns added: - `n_fixtures` : int, number of fixtures played that round - `points_norm` : float, per-fixture normalised points

Source code in fplx/data/double_gameweek.py

def aggregate_dgw_timeseries(timeseries: pd.DataFrame) -> pd.DataFrame:
    """Collapse per-fixture rows into one normalised row per gameweek.

    This is the **single place** where Double Gameweek handling lives.  All
    downstream consumers (inference pipeline, enriched predictor, MV-HMM,
    Kalman Filter) always receive exactly one row per FPL decision period and
    never need to be aware of DGWs.

    For a DGW gameweek (``n_fixtures == 2``):

    * **Additive stats** (goals, minutes, bonus, …) are **summed** to reflect
      the total accumulated across both matches.
    * **Per-fixture normalisation** is applied to ``points`` and to every
      additive stat that forms an inference feature.  The normalised column is
      stored alongside the raw total:

      .. code-block:: text

          points         # raw total (used for scoring / oracle)
          points_norm    # per-fixture average (used by inference / HMM)

      The HMM emission distributions are calibrated on ``points_norm``, so a
      DGW observation of 10 total points (``points_norm = 5``) is correctly
      interpreted as an "Average" game rather than misidentified as a "Star"
      event (8.5 pts single-game emission mean).

    * **Rate / expected stats** (xG, xA, …) are averaged — they already
      represent per-match rates.

    * **Context columns** (price, opponent) take the last-fixture value.

    For a single-fixture gameweek (``n_fixtures == 1``) the row is returned
    unchanged and ``points_norm == points``.

    Parameters
    ----------
    timeseries : pd.DataFrame
        Raw per-fixture timeseries (may contain duplicate ``gameweek`` values
        for DGW players).

    Returns
    -------
    pd.DataFrame
        One row per gameweek, sorted ascending by ``gameweek``.
        New columns added:
        - ``n_fixtures``  : int, number of fixtures played that round
        - ``points_norm`` : float, per-fixture normalised points
    """
    if timeseries.empty or "gameweek" not in timeseries.columns:
        return timeseries.copy()

    gw_counts = timeseries.groupby("gameweek").size()
    has_multi = (gw_counts > 1).any()

    if not has_multi:
        ts = timeseries.copy()
        ts["n_fixtures"] = 1
        ts["points_norm"] = ts["points"] if "points" in ts.columns else 0.0
        return ts.sort_values("gameweek").reset_index(drop=True)

    agg_rows = []
    for gw, grp in timeseries.groupby("gameweek"):
        n = len(grp)
        row: dict = {"gameweek": gw, "n_fixtures": n}

        # ── Additive stats: sum across fixtures ───────────────────────────
        for col in _ADDITIVE_COLS:
            if col in grp.columns:
                row[col] = pd.to_numeric(grp[col], errors="coerce").fillna(0.0).sum()

        # ── Per-fixture normalisation of inference-facing columns ─────────
        # points_norm is what the HMM / enriched predictor trains on.
        # All other additive stat norms follow the same pattern.
        pts_total = row.get("points", 0.0)
        row["points_norm"] = pts_total / n if n > 0 else 0.0

        for col in _ADDITIVE_COLS:
            if col in row and col != "points":
                row[f"{col}_norm"] = row[col] / n if n > 0 else 0.0

        # ── Rate / expected stats: average across fixtures ────────────────
        for col in _RATE_COLS:
            if col in grp.columns:
                row[col] = pd.to_numeric(grp[col], errors="coerce").mean()

        # ── Context: last fixture value ───────────────────────────────────
        for col in _LAST_COLS:
            if col in grp.columns:
                row[col] = grp[col].iloc[-1]

        # Remaining columns: last value
        handled = set(_ADDITIVE_COLS + _RATE_COLS + _LAST_COLS + ["gameweek"])
        for col in grp.columns:
            if col not in handled:
                with contextlib.suppress(Exception):
                    row[col] = grp[col].iloc[-1]
        agg_rows.append(row)

    result = pd.DataFrame(agg_rows).sort_values("gameweek").reset_index(drop=True)

    for col in result.columns:
        if col != "gameweek":
            with contextlib.suppress(Exception):
                result[col] = pd.to_numeric(result[col], errors="coerce")

    return result

scale_predictions_for_dgw ¶

scale_predictions_for_dgw(
    expected_points: dict[int, float],
    variances: dict[int, float],
    downside_risks: dict[int, float],
    fixture_counts: dict[int, int],
    variance_mode: str = "additive",
) -> tuple[
    dict[int, float], dict[int, float], dict[int, float]
]

Scale single-game predictions to account for a Double Gameweek.

For a player with n fixtures in the upcoming gameweek:

Expected points: E[P_total] = n * E[P_single]
Variance (additive, under independence): Var[P_total] = n * Var[P_single]
Downside risk: DR_total = sqrt(n) * DR_single

This is exact under independence of the two match performances. The independence assumption is acceptable because FPL points in different matches are only weakly correlated (shared clean sheet probability for the same game counts for both defenders, but that is captured in the single-game variance estimate).

PARAMETER	DESCRIPTION
`expected_points`	Single-game expected points per player id. TYPE: `dict[int, float]`
`variances`	Single-game predictive variance per player id. TYPE: `dict[int, float]`
`downside_risks`	Single-game semi-deviation per player id. TYPE: `dict[int, float]`
`fixture_counts`	Number of upcoming fixtures per player id (1 for SGW, 2 for DGW). Players absent from this dict are assumed to have 1 fixture. TYPE: `dict[int, int]`
`variance_mode`	`"additive"` (default): `Var[P_total] = n * Var[P_single]` — correct under independence. `"conservative"`: multiply variance by `n^2` to account for possible correlation (e.g. both games against the same strong opponent). TYPE: `str` DEFAULT: `'additive'`

RETURNS	DESCRIPTION
`ep_scaled, var_scaled, dr_scaled : tuple of dicts`	Scaled prediction dicts with the same keys as the inputs.

Notes

Blank gameweek (BGW) players (n = 0) receive E[P] = 0, Var[P] = 0.1, DR = 0. The optimizer will naturally exclude them since their expected points are zero.

Examples:

>>> ep_scaled, var_scaled, dr_scaled = scale_predictions_for_dgw(
...     expected_points, variances, downside_risks, fixture_counts
... )

Source code in fplx/data/double_gameweek.py

def scale_predictions_for_dgw(
    expected_points: dict[int, float],
    variances: dict[int, float],
    downside_risks: dict[int, float],
    fixture_counts: dict[int, int],
    variance_mode: str = "additive",
) -> tuple[dict[int, float], dict[int, float], dict[int, float]]:
    """Scale single-game predictions to account for a Double Gameweek.

    For a player with ``n`` fixtures in the upcoming gameweek:

    - Expected points: ``E[P_total] = n * E[P_single]``
    - Variance (additive, under independence): ``Var[P_total] = n * Var[P_single]``
    - Downside risk: ``DR_total = sqrt(n) * DR_single``

    This is exact under independence of the two match performances. The
    independence assumption is acceptable because FPL points in different
    matches are only weakly correlated (shared clean sheet probability for the
    same game counts for both defenders, but that is captured in the single-game
    variance estimate).

    Parameters
    ----------
    expected_points : dict[int, float]
        Single-game expected points per player id.
    variances : dict[int, float]
        Single-game predictive variance per player id.
    downside_risks : dict[int, float]
        Single-game semi-deviation per player id.
    fixture_counts : dict[int, int]
        Number of upcoming fixtures per player id (1 for SGW, 2 for DGW).
        Players absent from this dict are assumed to have 1 fixture.
    variance_mode : str
        ``"additive"`` (default): ``Var[P_total] = n * Var[P_single]`` — correct
        under independence.
        ``"conservative"``: multiply variance by ``n^2`` to account for possible
        correlation (e.g. both games against the same strong opponent).

    Returns
    -------
    ep_scaled, var_scaled, dr_scaled : tuple of dicts
        Scaled prediction dicts with the same keys as the inputs.

    Notes
    -----
    Blank gameweek (BGW) players (``n = 0``) receive ``E[P] = 0``,
    ``Var[P] = 0.1``, ``DR = 0``. The optimizer will naturally exclude them
    since their expected points are zero.

    Examples
    --------
    >>> ep_scaled, var_scaled, dr_scaled = scale_predictions_for_dgw(
    ...     expected_points, variances, downside_risks, fixture_counts
    ... )
    """
    ep_out: dict[int, float] = {}
    var_out: dict[int, float] = {}
    dr_out: dict[int, float] = {}

    for pid, ep in expected_points.items():
        n = fixture_counts.get(pid, 1)

        if n == 0:
            # Blank gameweek — player has no fixture
            ep_out[pid] = 0.0
            var_out[pid] = 0.1
            dr_out[pid] = 0.0
            continue

        var = variances.get(pid, 4.0)
        dr = downside_risks.get(pid, var**0.5 / 2**0.5)

        ep_out[pid] = ep * n

        if variance_mode == "additive":
            var_out[pid] = var * n
        else:  # conservative
            var_out[pid] = var * n * n

        # Semi-deviation scales as sqrt(n) under independence
        dr_out[pid] = dr * (n**0.5)

    return ep_out, var_out, dr_out

get_fixture_counts_from_bootstrap ¶

get_fixture_counts_from_bootstrap(
    bootstrap: dict, target_gw: int
) -> dict[int, int]

Derive per-player fixture counts for a gameweek from FPL bootstrap data.

Parses the fixtures list in the bootstrap-static response to count how many fixtures each team plays in target_gw. Returns a player-level mapping derived from each player's team id.

PARAMETER	DESCRIPTION
`bootstrap`	Full bootstrap-static API response containing `"fixtures"` and `"elements"` lists. TYPE: `dict`
`target_gw`	The gameweek to inspect. TYPE: `int`

RETURNS	DESCRIPTION
`dict[int, int]`	`{player_id: n_fixtures}` for all players. Players whose team has no fixture in `target_gw` (BGW) receive 0.

Source code in fplx/data/double_gameweek.py

def get_fixture_counts_from_bootstrap(
    bootstrap: dict,
    target_gw: int,
) -> dict[int, int]:
    """Derive per-player fixture counts for a gameweek from FPL bootstrap data.

    Parses the ``fixtures`` list in the bootstrap-static response to count how
    many fixtures each team plays in ``target_gw``. Returns a player-level
    mapping derived from each player's ``team`` id.

    Parameters
    ----------
    bootstrap : dict
        Full bootstrap-static API response containing ``"fixtures"`` and
        ``"elements"`` lists.
    target_gw : int
        The gameweek to inspect.

    Returns
    -------
    dict[int, int]
        ``{player_id: n_fixtures}`` for all players. Players whose team has no
        fixture in ``target_gw`` (BGW) receive 0.
    """
    fixtures = bootstrap.get("fixtures", [])
    elements = bootstrap.get("elements", [])

    # Count fixtures per team in target_gw
    team_fixture_counts: dict[int, int] = {}
    for fix in fixtures:
        if fix.get("event") != target_gw:
            continue
        h = fix.get("team_h")
        a = fix.get("team_a")
        if h is not None:
            team_fixture_counts[h] = team_fixture_counts.get(h, 0) + 1
        if a is not None:
            team_fixture_counts[a] = team_fixture_counts.get(a, 0) + 1

    # Map player → team → fixture count
    player_counts: dict[int, int] = {}
    for elem in elements:
        pid = elem["id"]
        team = elem.get("team")
        player_counts[pid] = team_fixture_counts.get(team, 1)

    n_dgw = sum(1 for t, n in team_fixture_counts.items() if n > 1)
    n_bgw = sum(1 for t, n in team_fixture_counts.items() if n == 0)
    if n_dgw:
        logger.info("GW%d: %d teams with DGW, %d teams with BGW.", target_gw, n_dgw, n_bgw)

    return player_counts

get_fixture_counts_from_vaastav ¶

get_fixture_counts_from_vaastav(
    loader, target_gw: int
) -> dict[int, int]

Derive per-player fixture counts for a historical gameweek from vaastav data.

Uses the merged_gw CSV to count how many rows each player has for target_gw. This is the ground-truth fixture count for backtesting.

PARAMETER	DESCRIPTION
`loader`	An initialised loader instance. TYPE: `VaastavLoader`
`target_gw`	Gameweek to inspect. TYPE: `int`

RETURNS	DESCRIPTION
`dict[int, int]`	`{player_id: n_fixtures}` — 1 for SGW, 2 for DGW, 0 if no fixture.

Source code in fplx/data/double_gameweek.py

def get_fixture_counts_from_vaastav(
    loader,
    target_gw: int,
) -> dict[int, int]:
    """Derive per-player fixture counts for a historical gameweek from vaastav data.

    Uses the merged_gw CSV to count how many rows each player has for
    ``target_gw``. This is the ground-truth fixture count for backtesting.

    Parameters
    ----------
    loader : VaastavLoader
        An initialised loader instance.
    target_gw : int
        Gameweek to inspect.

    Returns
    -------
    dict[int, int]
        ``{player_id: n_fixtures}`` — 1 for SGW, 2 for DGW, 0 if no fixture.
    """
    df = loader.load_gameweek(target_gw)
    if df.empty:
        return {}
    counts = df.groupby("element").size()
    return counts.to_dict()

loaders ¶

Data loaders for FPL data sources.

FPLDataLoader ¶

FPLDataLoader(cache_dir: Optional[Path] = None)

Load and manage FPL data from various sources (API, CSV, cache).

PARAMETER	DESCRIPTION
`cache_dir`	Directory to cache downloaded data TYPE: `Optional[Path]` DEFAULT: `None`

Source code in fplx/data/loaders.py

def __init__(self, cache_dir: Optional[Path] = None):
    self.cache_dir = cache_dir or Path.home() / ".fplx" / "cache"
    self.cache_dir.mkdir(parents=True, exist_ok=True)
    self._bootstrap_data = None

fetch_bootstrap_data ¶

fetch_bootstrap_data(force_refresh: bool = False) -> dict

Fetch main FPL data (players, teams, gameweeks).

PARAMETER	DESCRIPTION
`force_refresh`	Force refresh even if cached TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
`Dict`	Bootstrap data containing players, teams, events

Source code in fplx/data/loaders.py

def fetch_bootstrap_data(self, force_refresh: bool = False) -> dict:
    """
    Fetch main FPL data (players, teams, gameweeks).

    Parameters
    ----------
    force_refresh : bool
        Force refresh even if cached

    Returns
    -------
    Dict
        Bootstrap data containing players, teams, events
    """
    cache_file = self.cache_dir / "bootstrap.json"

    if not force_refresh and cache_file.exists():
        import json

        with open(cache_file) as f:
            logger.info("Loading bootstrap data from cache")
            return json.load(f)

    logger.info("Fetching bootstrap data from FPL API")
    response = requests.get(self.BOOTSTRAP_URL)
    response.raise_for_status()

    data = response.json()

    # Cache the data
    import json

    with open(cache_file, "w") as f:
        json.dump(data, f)

    self._bootstrap_data = data
    return data

load_players ¶

load_players(force_refresh: bool = False) -> list[Player]

Load all players with basic info.

PARAMETER	DESCRIPTION
`force_refresh`	Force refresh from API TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
`list[Player]`	List of Player objects

Source code in fplx/data/loaders.py

def load_players(self, force_refresh: bool = False) -> list[Player]:
    """
    Load all players with basic info.

    Parameters
    ----------
    force_refresh : bool
        Force refresh from API

    Returns
    -------
    list[Player]
        List of Player objects
    """
    data = self.fetch_bootstrap_data(force_refresh)

    # Build team mapping
    teams = {t["id"]: t["name"] for t in data["teams"]}
    positions = {1: "GK", 2: "DEF", 3: "MID", 4: "FWD"}

    players = []
    for element in data["elements"]:
        # Create minimal timeseries (can be enriched later)
        ts_data = {
            "gameweek": [0],
            "points": [element.get("total_points", 0)],
            "minutes": [element.get("minutes", 0)],
            "form": [float(element.get("form", 0))],
        }

        player = Player(
            id=element["id"],
            name=element["web_name"],
            team=teams[element["team"]],
            position=positions[element["element_type"]],
            price=element["now_cost"] / 10.0,  # Convert to £m
            timeseries=pd.DataFrame(ts_data),
            news={
                "text": element.get("news", ""),
                "availability": 1.0
                if element.get("chance_of_playing_next_round") is None
                else element.get("chance_of_playing_next_round") / 100.0,
            },
        )
        players.append(player)

    logger.info(f"Loaded {len(players)} players")
    return players

load_player_history ¶

load_player_history(player_id: int) -> DataFrame

Load detailed historical data for a specific player.

PARAMETER	DESCRIPTION
`player_id`	Player ID TYPE: `int`

RETURNS	DESCRIPTION
`DataFrame`	Historical gameweek stats

Source code in fplx/data/loaders.py

def load_player_history(self, player_id: int) -> pd.DataFrame:
    """
    Load detailed historical data for a specific player.

    Parameters
    ----------
    player_id : int
        Player ID

    Returns
    -------
    pd.DataFrame
        Historical gameweek stats
    """
    url = self.PLAYER_DETAIL_URL.format(player_id=player_id)
    response = requests.get(url)
    response.raise_for_status()

    data = response.json()
    history = pd.DataFrame(data["history"])

    # Rename columns for consistency
    if not history.empty:
        history = history.rename(
            columns={
                "round": "gameweek",
                "total_points": "points",
                "minutes": "minutes",
                "goals_scored": "goals",
                "assists": "assists",
                "expected_goals": "xG",
                "expected_assists": "xA",
            }
        )

    return history

load_fixtures ¶

load_fixtures() -> DataFrame

Load all fixtures.

RETURNS	DESCRIPTION
`DataFrame`	Fixtures data

Source code in fplx/data/loaders.py

def load_fixtures(self) -> pd.DataFrame:
    """
    Load all fixtures.

    Returns
    -------
    pd.DataFrame
        Fixtures data
    """
    response = requests.get(self.FIXTURES_URL)
    response.raise_for_status()

    fixtures = pd.DataFrame(response.json())
    return fixtures

load_from_csv ¶

load_from_csv(filepath: Path) -> DataFrame

Load data from CSV file.

PARAMETER	DESCRIPTION
`filepath`	Path to CSV file TYPE: `Path`

RETURNS	DESCRIPTION
`DataFrame`	Loaded data

Source code in fplx/data/loaders.py

def load_from_csv(self, filepath: Path) -> pd.DataFrame:
    """
    Load data from CSV file.

    Parameters
    ----------
    filepath : Path
        Path to CSV file

    Returns
    -------
    pd.DataFrame
        Loaded data
    """
    logger.info("Loading data from %s", filepath)
    df = pd.read_csv(filepath)
    return df

enrich_player_history ¶

enrich_player_history(
    players: list[Player],
) -> list[Player]

Enrich players with full historical data.

PARAMETER	DESCRIPTION
`players`	List of players to enrich TYPE: `list[Player]`

RETURNS	DESCRIPTION
`list[Player]`	Players with enriched timeseries

Source code in fplx/data/loaders.py

def enrich_player_history(self, players: list[Player]) -> list[Player]:
    """
    Enrich players with full historical data.

    Parameters
    ----------
    players : list[Player]
        List of players to enrich

    Returns
    -------
    list[Player]
        Players with enriched timeseries
    """
    enriched = []
    for player in players:
        try:
            history = self.load_player_history(player.id)
            if not history.empty:
                player.timeseries = history
            enriched.append(player)
        except Exception as e:
            logger.warning(f"Could not load history for %s : %s", player.name, e)
            enriched.append(player)

    return enriched

news_collector ¶

News collection and per-gameweek persistence.

NewsSnapshot ¶

NewsSnapshot(
    player_id: int,
    gameweek: int,
    news_text: str = "",
    status: str = "a",
    chance_this_round: Optional[float] = None,
    chance_next_round: Optional[float] = None,
    timestamp: str = "",
)

A single player's news state at a specific gameweek.

ATTRIBUTE	DESCRIPTION
`player_id`	TYPE: `int`
`gameweek`	TYPE: `int`
`news_text`	Raw news string from FPL API. TYPE: `str`
`status`	FPL status code: "a", "d", "i", "s", "u", "n". TYPE: `str`
`chance_this_round`	Probability of playing this round (0-100 scale from API, stored as 0-1). TYPE: `float or None`
`chance_next_round`	Probability of playing next round (0-1). TYPE: `float or None`
`timestamp`	When the news was added (ISO format from API). TYPE: `str`

Source code in fplx/data/news_collector.py

def __init__(
    self,
    player_id: int,
    gameweek: int,
    news_text: str = "",
    status: str = "a",
    chance_this_round: Optional[float] = None,
    chance_next_round: Optional[float] = None,
    timestamp: str = "",
):
    self.player_id = player_id
    self.gameweek = gameweek
    self.news_text = news_text
    self.status = status
    self.chance_this_round = chance_this_round
    self.chance_next_round = chance_next_round
    self.timestamp = timestamp

to_news_signal_input ¶

to_news_signal_input() -> str

Convert to the text format that NewsSignal.generate_signal() expects.

Combines the raw news text with status information to give the existing NewsParser richer input.

Source code in fplx/data/news_collector.py

def to_news_signal_input(self) -> str:
    """
    Convert to the text format that NewsSignal.generate_signal() expects.

    Combines the raw news text with status information to give the
    existing NewsParser richer input.
    """
    parts = []

    if self.news_text and self.news_text.strip():
        parts.append(self.news_text.strip())

    # Augment with status if not already implied by text
    status_text = {
        "i": "injured",
        "s": "suspended",
        "u": "unavailable",
        "d": "doubtful",
        "n": "not in squad",
    }
    if self.status in status_text and status_text[self.status] not in " ".join(parts).lower():
        parts.append(f"Status: {status_text[self.status]}")

    # Augment with chance percentage
    if self.chance_next_round is not None and self.chance_next_round < 1.0:
        pct = int(self.chance_next_round * 100)
        parts.append(f"{pct}% chance of playing")

    return ". ".join(parts) if parts else ""

NewsCollector ¶

NewsCollector(cache_dir: Optional[Path] = None)

Collects and persists player news snapshots per gameweek.

Usage (live): collector = NewsCollector(cache_dir="~/.fplx/news") collector.collect_from_bootstrap(bootstrap_data, gameweek=25) # Later, feed into inference: snapshots = collector.get_player_history(player_id=123)

Usage (backtest): collector = NewsCollector(cache_dir="~/.fplx/news") # Load all pre-collected snapshots for gw in range(1, 39): snapshots = collector.get_gameweek(gw) # inject into pipeline per player

PARAMETER	DESCRIPTION
`cache_dir`	Directory to persist snapshots as JSON. TYPE: `Path or str` DEFAULT: `None`

Source code in fplx/data/news_collector.py

def __init__(self, cache_dir: Optional[Path] = None):
    self.cache_dir = Path(cache_dir) if cache_dir else Path.home() / ".fplx" / "news"
    self.cache_dir.mkdir(parents=True, exist_ok=True)

    # In-memory store: {gameweek: {player_id: NewsSnapshot}}
    self._store: dict[int, dict[int, NewsSnapshot]] = {}

collect_from_bootstrap ¶

collect_from_bootstrap(
    bootstrap_data: dict, gameweek: int
) -> int

Extract news from a bootstrap-static API response.

This is the key method. Call it each gameweek with fresh API data.

PARAMETER	DESCRIPTION
`bootstrap_data`	Response from https://fantasy.premierleague.com/api/bootstrap-static/ TYPE: `dict`
`gameweek`	Current gameweek number. TYPE: `int`

RETURNS	DESCRIPTION
`int`	Number of players with active news.

Source code in fplx/data/news_collector.py

def collect_from_bootstrap(self, bootstrap_data: dict, gameweek: int) -> int:
    """
    Extract news from a bootstrap-static API response.

    This is the key method. Call it each gameweek with fresh API data.

    Parameters
    ----------
    bootstrap_data : dict
        Response from https://fantasy.premierleague.com/api/bootstrap-static/
    gameweek : int
        Current gameweek number.

    Returns
    -------
    int
        Number of players with active news.
    """
    elements = bootstrap_data.get("elements", [])
    gw_snapshots = {}
    news_count = 0

    for el in elements:
        player_id = el["id"]

        news_text = el.get("news", "") or ""
        status = el.get("status", "a") or "a"

        # Convert API percentages (0-100 or None) to 0-1
        chance_this = el.get("chance_of_playing_this_round")
        chance_next = el.get("chance_of_playing_next_round")

        if chance_this is not None:
            chance_this = chance_this / 100.0
        if chance_next is not None:
            chance_next = chance_next / 100.0

        snapshot = NewsSnapshot(
            player_id=player_id,
            gameweek=gameweek,
            news_text=news_text,
            status=status,
            chance_this_round=chance_this,
            chance_next_round=chance_next,
            timestamp=el.get("news_added", ""),
        )

        gw_snapshots[player_id] = snapshot

        if news_text.strip() or status not in ("a",):
            news_count += 1

    self._store[gameweek] = gw_snapshots
    self._persist_gameweek(gameweek)

    logger.info(
        "GW %s: collected news for %d players (%d with active news)",
        gameweek,
        len(gw_snapshots),
        news_count,
    )
    return news_count

get_player_news ¶

get_player_news(
    player_id: int, gameweek: int
) -> Optional[NewsSnapshot]

Get a specific player's news at a specific gameweek.

Source code in fplx/data/news_collector.py

def get_player_news(self, player_id: int, gameweek: int) -> Optional[NewsSnapshot]:
    """Get a specific player's news at a specific gameweek."""
    self._ensure_loaded(gameweek)
    gw_data = self._store.get(gameweek, {})
    return gw_data.get(player_id)

get_player_history ¶

get_player_history(player_id: int) -> list[NewsSnapshot]

Get all news snapshots for a player across all collected gameweeks.

Returns list sorted by gameweek.

Source code in fplx/data/news_collector.py

def get_player_history(self, player_id: int) -> list[NewsSnapshot]:
    """
    Get all news snapshots for a player across all collected gameweeks.

    Returns list sorted by gameweek.
    """
    self._load_all()
    history = []
    for gw in sorted(self._store.keys()):
        snapshot = self._store[gw].get(player_id)
        if snapshot is not None:
            history.append(snapshot)
    return history

get_gameweek ¶

get_gameweek(gameweek: int) -> dict[int, NewsSnapshot]

Get all player news for a specific gameweek.

Source code in fplx/data/news_collector.py

def get_gameweek(self, gameweek: int) -> dict[int, NewsSnapshot]:
    """Get all player news for a specific gameweek."""
    self._ensure_loaded(gameweek)
    return self._store.get(gameweek, {})

get_players_with_news ¶

get_players_with_news(gameweek: int) -> list[NewsSnapshot]

Get only players with non-trivial news at a gameweek.

Source code in fplx/data/news_collector.py

def get_players_with_news(self, gameweek: int) -> list[NewsSnapshot]:
    """Get only players with non-trivial news at a gameweek."""
    gw_data = self.get_gameweek(gameweek)
    return [snap for snap in gw_data.values() if snap.news_text.strip() or snap.status not in ("a",)]

collect_season_from_api ¶

collect_season_from_api(data_loader) -> int

Collect news for all gameweeks in a season.

Requires calling the FPL API once per gameweek (the bootstrap-static endpoint only gives current-week news). For backtesting, you'd need to have cached the bootstrap data weekly during the season.

For a single-shot collection (current state only), just call collect_from_bootstrap() once with the current bootstrap data and the current gameweek number.

PARAMETER	DESCRIPTION
`data_loader`	Your existing data loader. TYPE: `FPLDataLoader`

RETURNS	DESCRIPTION
`int`	Number of gameweeks collected.

Source code in fplx/data/news_collector.py

def collect_season_from_api(self, data_loader) -> int:
    """
    Collect news for all gameweeks in a season.

    Requires calling the FPL API once per gameweek (the bootstrap-static
    endpoint only gives current-week news). For backtesting, you'd need
    to have cached the bootstrap data weekly during the season.

    For a single-shot collection (current state only), just call
    collect_from_bootstrap() once with the current bootstrap data and
    the current gameweek number.

    Parameters
    ----------
    data_loader : FPLDataLoader
        Your existing data loader.

    Returns
    -------
    int
        Number of gameweeks collected.
    """
    bootstrap = data_loader.fetch_bootstrap_data(force_refresh=True)

    # Determine current gameweek
    events = bootstrap.get("events", [])
    current_gw = 1
    for event in events:
        if event.get("is_current"):
            current_gw = event["id"]
            break

    self.collect_from_bootstrap(bootstrap, current_gw)
    return 1  # Only current GW available from a single API call

schemas ¶

Data validation schemas for FPL data sources.

BootstrapStatic ¶

Bases: BaseModel

Schema for the main FPL bootstrap-static endpoint.

Fixture ¶

Bases: BaseModel

Schema for a single fixture.

PlayerHistory ¶

Bases: BaseModel

Schema for a player's historical performance data.

PlayerSummary ¶

Bases: BaseModel

Schema for a player's summary data.

tft_dataset ¶

Dataset utilities for Temporal Fusion Transformer (TFT).

This module converts vaastav merged gameweek data into a global panel format compatible with pytorch_forecasting.TimeSeriesDataSet.

build_tft_panel ¶

build_tft_panel(merged_gw: DataFrame) -> DataFrame

Build TFT panel dataframe from merged gameweek data.

Output schema includes: - group_id: player identifier - time_idx: gameweek index - static categoricals: position, team - known covariates: fixture_difficulty, is_home - unknown covariates: xPts, mins_frac, news_sentiment, actual_points

Source code in fplx/data/tft_dataset.py

def build_tft_panel(merged_gw: pd.DataFrame) -> pd.DataFrame:
    """Build TFT panel dataframe from merged gameweek data.

    Output schema includes:
    - group_id: player identifier
    - time_idx: gameweek index
    - static categoricals: position, team
    - known covariates: fixture_difficulty, is_home
    - unknown covariates: xPts, mins_frac, news_sentiment, actual_points
    """
    df = merged_gw.copy()

    rename_map = {
        "element": "group_id",
        "gameweek": "time_idx",
        "points": "actual_points",
    }
    df = df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns})

    if "group_id" not in df.columns and "element" in merged_gw.columns:
        df["group_id"] = merged_gw["element"]
    if "time_idx" not in df.columns and "gameweek" in merged_gw.columns:
        df["time_idx"] = merged_gw["gameweek"]
    if "actual_points" not in df.columns and "points" in merged_gw.columns:
        df["actual_points"] = merged_gw["points"]

    df["group_id"] = pd.to_numeric(df["group_id"], errors="coerce").astype("Int64")
    df["time_idx"] = pd.to_numeric(df["time_idx"], errors="coerce")
    df["actual_points"] = pd.to_numeric(df["actual_points"], errors="coerce").fillna(0.0)

    if "position" not in df.columns:
        df["position"] = "MID"
    if "team" not in df.columns:
        df["team"] = "Unknown"
    df["position"] = df["position"].astype(str)
    df["team"] = df["team"].astype(str)

    # Known future covariates.
    if "was_home" in df.columns:
        df["is_home"] = pd.to_numeric(df["was_home"], errors="coerce").fillna(0.0)
    else:
        df["is_home"] = 0.0

    if "fixture_difficulty" in df.columns:
        df["fixture_difficulty"] = pd.to_numeric(df["fixture_difficulty"], errors="coerce").fillna(3.0)
    else:
        df["fixture_difficulty"] = 3.0

    if "minutes" in df.columns:
        mins = pd.to_numeric(df["minutes"], errors="coerce").fillna(0.0)
        df["mins_frac"] = np.clip(mins / 90.0, 0.0, 1.0)
    else:
        df["mins_frac"] = 0.0

    # Placeholder until historical NLP news pipeline is fully integrated.
    df["news_sentiment"] = 0.0

    # Structural xPts projection per player-position trajectory.
    xpts = np.zeros(len(df), dtype=float)
    for _, grp in df.groupby("group_id", dropna=True):
        grp_sorted = grp.sort_values("time_idx")
        pos = str(grp_sorted["position"].iloc[0])
        x_vals = compute_xpoints(grp_sorted, pos)
        xpts[grp_sorted.index.to_numpy()] = x_vals
    df["xPts"] = xpts

    keep_cols = [
        "group_id",
        "time_idx",
        "position",
        "team",
        "fixture_difficulty",
        "is_home",
        "xPts",
        "mins_frac",
        "news_sentiment",
        "actual_points",
    ]
    df = df[keep_cols].dropna(subset=["group_id", "time_idx"]).copy()
    df["group_id"] = df["group_id"].astype(int)
    df["time_idx"] = df["time_idx"].astype(int)
    return df.sort_values(["group_id", "time_idx"]).reset_index(drop=True)

make_tft_datasets ¶

make_tft_datasets(
    panel_df: DataFrame,
    training_cutoff: int,
    encoder_length: int = 15,
    prediction_length: int = 1,
)

Create TFT training and prediction datasets.

Requires optional dependency pytorch-forecasting.

Source code in fplx/data/tft_dataset.py

def make_tft_datasets(
    panel_df: pd.DataFrame,
    training_cutoff: int,
    encoder_length: int = 15,
    prediction_length: int = 1,
):
    """Create TFT training and prediction datasets.

    Requires optional dependency `pytorch-forecasting`.
    """
    try:
        from pytorch_forecasting import TimeSeriesDataSet
        from pytorch_forecasting.data.encoders import NaNLabelEncoder
    except ImportError as e:
        raise ImportError(
            "TFT dataset creation requires pytorch-forecasting. "
            "Install with: pip install pytorch-forecasting lightning"
        ) from e

    train_df = panel_df[panel_df["time_idx"] <= training_cutoff].copy()
    train_df["actual_points"] = (
        pd.to_numeric(train_df["actual_points"], errors="coerce").fillna(0.0).astype(float)
    )

    # Ensure encoder length is feasible for available history.
    hist_len = train_df.groupby("group_id")["time_idx"].nunique()
    if hist_len.empty:
        raise ValueError("No training data available for TFT dataset creation.")

    max_possible_encoder = int(hist_len.max() - prediction_length)
    if max_possible_encoder < 1:
        raise ValueError(
            "Insufficient history to build TFT windows. Increase training cutoff or reduce prediction length."
        )

    eff_encoder_length = min(int(encoder_length), max_possible_encoder)
    min_required_len = eff_encoder_length + prediction_length

    valid_ids = hist_len[hist_len >= min_required_len].index
    train_df = train_df[train_df["group_id"].isin(valid_ids)].copy()
    if train_df.empty:
        raise ValueError(
            "No groups have enough history after encoder-length adjustment. "
            f"Required per-group length: {min_required_len}."
        )

    training = TimeSeriesDataSet(
        train_df,
        time_idx="time_idx",
        target="actual_points",
        group_ids=["group_id"],
        min_encoder_length=eff_encoder_length,
        max_encoder_length=eff_encoder_length,
        min_prediction_length=prediction_length,
        max_prediction_length=prediction_length,
        static_categoricals=["position", "team"],
        time_varying_known_reals=["time_idx", "fixture_difficulty", "is_home"],
        time_varying_unknown_reals=["xPts", "mins_frac", "news_sentiment", "actual_points"],
        categorical_encoders={
            "position": NaNLabelEncoder(add_nan=True),
            "team": NaNLabelEncoder(add_nan=True),
        },
        allow_missing_timesteps=True,
        add_relative_time_idx=True,
        add_target_scales=True,
        add_encoder_length=True,
    )

    pred_df = panel_df[panel_df["group_id"].isin(valid_ids)].copy()
    pred_df["actual_points"] = (
        pd.to_numeric(pred_df["actual_points"], errors="coerce").fillna(0.0).astype(float)
    )

    prediction = TimeSeriesDataSet.from_dataset(
        training,
        pred_df,
        predict=True,
        stop_randomization=True,
    )

    return training, prediction

vaastav_loader ¶

Loader for the vaastav/Fantasy-Premier-League dataset.

Supports two modes: 1. Remote: fetch CSVs directly from GitHub (no clone needed). 2. Local: read from a cloned repo directory.

Usage (remote): loader = VaastavLoader(season="2023-24") players = loader.build_player_objects(up_to_gw=20)

Usage (local): loader = VaastavLoader(season="2023-24", data_dir="./Fantasy-Premier-League") players = loader.build_player_objects(up_to_gw=20)

Dataset: https://github.com/vaastav/Fantasy-Premier-League

Double Gameweek handling

build_player_objects automatically calls aggregate_dgw_timeseries on every player's raw timeseries before constructing the Player object. This means all downstream consumers (inference pipeline, MV-HMM, enriched predictor, Kalman Filter) always receive exactly one row per FPL decision period.

For DGW gameweeks, the resulting row contains: points – raw total (both fixtures summed, used for scoring / oracle) points_norm – per-fixture average (used by inference components) n_fixtures – number of fixtures played (1 for SGW, 2 for DGW)

The inference pipeline uses points_norm so that HMM emission distributions remain calibrated on single-game-equivalent observations. The ILP objective then scales back via scale_predictions_for_dgw to reflect the full DGW opportunity.

VaastavLoader ¶

VaastavLoader(
    season: str = "2023-24",
    data_dir: Optional[str | Path] = None,
    cache_dir: Optional[str | Path] = None,
)

Load historical FPL data from the vaastav dataset.

PARAMETER	DESCRIPTION
`season`	Season string, e.g. "2023-24". TYPE: `str` DEFAULT: `'2023-24'`
`data_dir`	Path to a local clone. If None, fetches from GitHub. TYPE: `str or Path` DEFAULT: `None`
`cache_dir`	Where to cache downloaded CSVs. Defaults to ~/.fplx/vaastav/. TYPE: `str or Path` DEFAULT: `None`

Source code in fplx/data/vaastav_loader.py

def __init__(
    self,
    season: str = "2023-24",
    data_dir: Optional[str | Path] = None,
    cache_dir: Optional[str | Path] = None,
):
    self.season = self._validate_season(season)
    self.data_dir = Path(data_dir) if data_dir else None
    # Default cache is project-local to keep artifacts within the workspace.
    project_root = Path(__file__).resolve().parents[2]
    self.cache_dir = Path(cache_dir) if cache_dir else project_root / ".fplx" / "vaastav"
    self.cache_dir.mkdir(parents=True, exist_ok=True)

    self._merged_gw: Optional[pd.DataFrame] = None
    self._player_raw: Optional[pd.DataFrame] = None

load_merged_gw ¶

load_merged_gw() -> DataFrame

Load the merged gameweek file (all GWs, all players, one CSV).

RETURNS	DESCRIPTION
`DataFrame`	One row per player-gameweek appearance.

Source code in fplx/data/vaastav_loader.py

def load_merged_gw(self) -> pd.DataFrame:
    """
    Load the merged gameweek file (all GWs, all players, one CSV).

    Returns
    -------
    pd.DataFrame
        One row per player-gameweek appearance.
    """
    if self._merged_gw is not None:
        return self._merged_gw

    df = self._read_csv("gws/merged_gw.csv")
    df = df.rename(columns={c: COLUMN_MAP.get(c, c) for c in df.columns})
    df = self._coalesce_duplicate_columns(df)

    if "gameweek" in df.columns:
        df["gameweek"] = pd.to_numeric(df["gameweek"], errors="coerce")

    self._merged_gw = df
    logger.info(
        "Loaded merged_gw: %d rows, %d players, GW %d-%d",
        len(df),
        df["element"].nunique(),
        df["gameweek"].min(),
        df["gameweek"].max(),
    )
    return df

load_player_raw ¶

load_player_raw() -> DataFrame

Load season-level player metadata.

Source code in fplx/data/vaastav_loader.py

def load_player_raw(self) -> pd.DataFrame:
    """Load season-level player metadata."""
    if self._player_raw is not None:
        return self._player_raw
    self._player_raw = self._read_csv("players_raw.csv")
    return self._player_raw

load_gameweek ¶

load_gameweek(gw: int) -> DataFrame

Load a single gameweek from merged data.

Source code in fplx/data/vaastav_loader.py

def load_gameweek(self, gw: int) -> pd.DataFrame:
    """Load a single gameweek from merged data."""
    df = self.load_merged_gw()
    return df[df["gameweek"] == gw].copy()

build_player_objects ¶

build_player_objects(
    up_to_gw: Optional[int] = None,
) -> list[Player]

Build Player objects with timeseries up to a given gameweek.

PARAMETER	DESCRIPTION
`up_to_gw`	Only include gameweeks 1..up_to_gw. If None, include all. TYPE: `int` DEFAULT: `None`

RETURNS	DESCRIPTION
`list[Player]`

Source code in fplx/data/vaastav_loader.py

def build_player_objects(
    self,
    up_to_gw: Optional[int] = None,
) -> list[Player]:
    """
    Build Player objects with timeseries up to a given gameweek.

    Parameters
    ----------
    up_to_gw : int, optional
        Only include gameweeks 1..up_to_gw. If None, include all.

    Returns
    -------
    list[Player]
    """
    all_gw = self.load_merged_gw()

    if up_to_gw is not None:
        all_gw = all_gw[all_gw["gameweek"] <= up_to_gw]

    if all_gw.empty:
        return []

    players = []
    grouped = all_gw.groupby("element")

    for pid, grp in grouped:
        pid = int(pid)
        grp = grp.sort_values("gameweek").reset_index(drop=True)

        # Player metadata from the row itself
        name = str(grp["name"].iloc[0]) if "name" in grp.columns else f"Player_{pid}"
        team = str(grp["team"].iloc[0]) if "team" in grp.columns else "Unknown"
        pos_raw = grp["position"].iloc[0] if "position" in grp.columns else "MID"
        price = grp["value"].iloc[-1] / 10.0 if "value" in grp.columns else 5.0

        position = POSITION_MAP.get(pos_raw, POSITION_MAP.get(str(pos_raw), "MID"))

        # Build timeseries with available columns
        keep = [
            c
            for c in [
                "gameweek",
                "points",
                "minutes",
                "starts",
                "goals",
                "assists",
                "xG",
                "xA",
                "bonus",
                "bps",
                "clean_sheets",
                "goals_conceded",
                "saves",
                "yellow_cards",
                "red_cards",
                "own_goals",
                "penalties_missed",
                "penalties_saved",
                "influence",
                "creativity",
                "threat",
                "ict_index",
                "was_home",
                "opponent_team",
                "expected_goals_conceded",
                "xP",
                "value",
                "selected",
                "transfers_in",
                "transfers_out",
            ]
            if c in grp.columns
        ]
        timeseries = grp[keep].copy()
        for col in timeseries.columns:
            timeseries[col] = pd.to_numeric(timeseries[col], errors="coerce")

        # ── DGW aggregation ───────────────────────────────────────────
        # Always collapse to one row per GW decision period.
        # DGW gameweeks receive per-fixture normalised scores so that the
        # inference pipeline (HMM, enriched, KF) operates on single-game-
        # equivalent observations. See double_gameweek.py for details.
        timeseries = aggregate_dgw_timeseries(timeseries)

        player = Player(
            id=pid,
            name=name,
            team=team,
            position=position,
            price=float(price),
            timeseries=timeseries,
        )
        players.append(player)

    logger.info("Built %d Player objects (up_to_gw=%s).", len(players), up_to_gw)
    return players

get_actual_points ¶

get_actual_points(gw: int) -> dict[int, float]

Get actual points scored by each player in a specific gameweek.

For Double Gameweek players (two fixtures in the same round) the points from both fixtures are summed, which is the correct FPL score for that gameweek. The previous implementation used dict(zip(…)) which silently discarded the first fixture row when a player appeared twice, underreporting DGW scores.

RETURNS	DESCRIPTION
`dict[int, float]`	{player_id: actual_points} (summed across fixtures for DGW players)

Source code in fplx/data/vaastav_loader.py

def get_actual_points(self, gw: int) -> dict[int, float]:
    """
    Get actual points scored by each player in a specific gameweek.

    For Double Gameweek players (two fixtures in the same round) the
    points from both fixtures are **summed**, which is the correct FPL
    score for that gameweek. The previous implementation used ``dict(zip(…))``
    which silently discarded the first fixture row when a player appeared
    twice, underreporting DGW scores.

    Returns
    -------
    dict[int, float]
        {player_id: actual_points}  (summed across fixtures for DGW players)
    """
    df = self.load_gameweek(gw)
    pts_col = "points" if "points" in df.columns else "total_points"
    # groupby + sum handles both SGW (one row) and DGW (two rows) correctly
    summed = df.groupby("element")[pts_col].sum().reset_index()
    return dict(zip(summed["element"].astype(int), summed[pts_col].astype(float)))

get_fixture_info ¶

get_fixture_info(gw: int) -> dict[int, dict]

Get fixture context (opponent, home/away, xP) per player for a GW.

Source code in fplx/data/vaastav_loader.py

def get_fixture_info(self, gw: int) -> dict[int, dict]:
    """Get fixture context (opponent, home/away, xP) per player for a GW."""
    df = self.load_gameweek(gw)
    info = {}
    for _, row in df.iterrows():
        pid = int(row.get("element", 0))
        info[pid] = {
            "was_home": bool(row.get("was_home", False)),
            "opponent_team": int(row.get("opponent_team", 0)) if "opponent_team" in df.columns else 0,
            "xP": float(row.get("xP", 0.0)) if "xP" in df.columns else 0.0,
        }
    return info

evaluation ¶

Evaluation metrics for inference and optimization.

InferenceMetrics `dataclass` ¶

InferenceMetrics(
    predicted_means: list[float] = list(),
    predicted_vars: list[float] = list(),
    actuals: list[float] = list(),
    model_predictions: dict[str, list[float]] = dict(),
)

Collects and computes inference evaluation metrics.

Usage: metrics = InferenceMetrics() for each player-gameweek: metrics.add(predicted_mean, predicted_var, actual_points) report = metrics.compute()

add ¶

add(
    predicted_mean: float,
    predicted_var: float,
    actual: float,
    model_preds: dict[str, float] | None = None,
)

Record a single prediction-actual pair.

Source code in fplx/evaluation/metrics.py

def add(
    self,
    predicted_mean: float,
    predicted_var: float,
    actual: float,
    model_preds: dict[str, float] | None = None,
):
    """Record a single prediction-actual pair."""
    self.predicted_means.append(predicted_mean)
    self.predicted_vars.append(predicted_var)
    self.actuals.append(actual)

    if model_preds:
        for name, pred in model_preds.items():
            if name not in self.model_predictions:
                self.model_predictions[name] = []
            self.model_predictions[name].append(pred)

compute ¶

compute() -> dict

Compute all inference metrics.

Source code in fplx/evaluation/metrics.py

def compute(self) -> dict:
    """Compute all inference metrics."""
    preds = np.array(self.predicted_means)
    varis = np.array(self.predicted_vars)
    acts = np.array(self.actuals)

    if len(preds) == 0:
        return {}

    errors = preds - acts

    report = {
        "n_predictions": len(preds),
        "mse": float(np.mean(errors**2)),
        "rmse": float(np.sqrt(np.mean(errors**2))),
        "mae": float(np.mean(np.abs(errors))),
        "mean_bias": float(np.mean(errors)),
    }

    # Calibration: what fraction of actuals fall within 95% CI?
    stds = np.sqrt(np.maximum(varis, 1e-8))
    lower_95 = preds - 1.96 * stds
    upper_95 = preds + 1.96 * stds
    in_ci = (acts >= lower_95) & (acts <= upper_95)
    report["calibration_95"] = float(np.mean(in_ci))

    # Also check 50% CI
    lower_50 = preds - 0.674 * stds
    upper_50 = preds + 0.674 * stds
    in_ci_50 = (acts >= lower_50) & (acts <= upper_50)
    report["calibration_50"] = float(np.mean(in_ci_50))

    # Mean predicted std (average uncertainty)
    report["mean_predicted_std"] = float(np.mean(stds))

    # Log-likelihood under Gaussian predictive distribution
    # log p(y | mu, sigma^2) = -0.5 * (log(2*pi*sigma^2) + (y-mu)^2/sigma^2)
    ll = -0.5 * (np.log(2 * np.pi * np.maximum(varis, 1e-8)) + errors**2 / np.maximum(varis, 1e-8))
    report["mean_log_likelihood"] = float(np.mean(ll))

    # Per-model ablation MSE
    ablation = {}
    for name, model_preds in self.model_predictions.items():
        mp = np.array(model_preds)
        if len(mp) == len(acts):
            ablation[name] = {
                "mse": float(np.mean((mp - acts) ** 2)),
                "mae": float(np.mean(np.abs(mp - acts))),
            }
    if ablation:
        report["ablation"] = ablation

    return report

OptimizationMetrics `dataclass` ¶

OptimizationMetrics(
    strategy_points: dict[str, list[float]] = dict(),
    oracle_points: list[float] = list(),
    gameweeks: list[int] = list(),
)

Collects and computes optimization evaluation metrics.

Tracks actual points earned per gameweek under different strategies, and compares against oracle (hindsight-optimal).

Usage: metrics = OptimizationMetrics() for each gameweek: metrics.add_gameweek(gw, actual_points, oracle_points) report = metrics.compute()

add_gameweek ¶

add_gameweek(
    gw: int,
    strategy_results: dict[str, float],
    oracle: float,
)

Record actual points for one gameweek across strategies.

PARAMETER	DESCRIPTION
`gw`	Gameweek number. TYPE: `int`
`strategy_results`	{strategy_name: actual_points_earned} TYPE: `dict[str, float]`
`oracle`	Best possible points with hindsight. TYPE: `float`

Source code in fplx/evaluation/metrics.py

def add_gameweek(
    self,
    gw: int,
    strategy_results: dict[str, float],
    oracle: float,
):
    """
    Record actual points for one gameweek across strategies.

    Parameters
    ----------
    gw : int
        Gameweek number.
    strategy_results : dict[str, float]
        {strategy_name: actual_points_earned}
    oracle : float
        Best possible points with hindsight.
    """
    self.gameweeks.append(gw)
    self.oracle_points.append(oracle)

    for name, pts in strategy_results.items():
        if name not in self.strategy_points:
            self.strategy_points[name] = []
        self.strategy_points[name].append(pts)

compute ¶

compute() -> dict

Compute optimization metrics for all strategies.

Source code in fplx/evaluation/metrics.py

def compute(self) -> dict:
    """Compute optimization metrics for all strategies."""
    oracle = np.array(self.oracle_points)
    report = {
        "n_gameweeks": len(self.gameweeks),
        "oracle_total": float(np.sum(oracle)),
        "oracle_mean_per_gw": float(np.mean(oracle)) if len(oracle) > 0 else 0.0,
        "strategies": {},
    }

    for name, pts_list in self.strategy_points.items():
        pts = np.array(pts_list)
        total = float(np.sum(pts))
        mean_gw = float(np.mean(pts)) if len(pts) > 0 else 0.0
        std_gw = float(np.std(pts)) if len(pts) > 0 else 0.0

        # Optimality gap: (oracle - strategy) / oracle
        gaps = (oracle[: len(pts)] - pts) / np.maximum(oracle[: len(pts)], 1e-6)
        mean_gap = float(np.mean(gaps)) if len(gaps) > 0 else 0.0

        # Worst-case: minimum points in any single gameweek
        worst_gw = float(np.min(pts)) if len(pts) > 0 else 0.0

        # Consistency: coefficient of variation
        cv = std_gw / mean_gw if mean_gw > 0 else 0.0

        report["strategies"][name] = {
            "total_points": total,
            "mean_per_gw": mean_gw,
            "std_per_gw": std_gw,
            "cv": cv,
            "worst_gw_points": worst_gw,
            "mean_optimality_gap": mean_gap,
            "pct_of_oracle": total / float(np.sum(oracle)) * 100 if np.sum(oracle) > 0 else 0,
        }

    return report

metrics ¶

Metrics for evaluating inference accuracy and optimization quality.

Part I (18-662) metrics: prediction accuracy, calibration, ablation. Part II (18-660) metrics: actual points, optimality gap, consistency.

InferenceMetrics `dataclass` ¶

InferenceMetrics(
    predicted_means: list[float] = list(),
    predicted_vars: list[float] = list(),
    actuals: list[float] = list(),
    model_predictions: dict[str, list[float]] = dict(),
)

Collects and computes inference evaluation metrics.

Usage: metrics = InferenceMetrics() for each player-gameweek: metrics.add(predicted_mean, predicted_var, actual_points) report = metrics.compute()

add ¶

add(
    predicted_mean: float,
    predicted_var: float,
    actual: float,
    model_preds: dict[str, float] | None = None,
)

Record a single prediction-actual pair.

Source code in fplx/evaluation/metrics.py

def add(
    self,
    predicted_mean: float,
    predicted_var: float,
    actual: float,
    model_preds: dict[str, float] | None = None,
):
    """Record a single prediction-actual pair."""
    self.predicted_means.append(predicted_mean)
    self.predicted_vars.append(predicted_var)
    self.actuals.append(actual)

    if model_preds:
        for name, pred in model_preds.items():
            if name not in self.model_predictions:
                self.model_predictions[name] = []
            self.model_predictions[name].append(pred)

compute ¶

compute() -> dict

Compute all inference metrics.

Source code in fplx/evaluation/metrics.py

def compute(self) -> dict:
    """Compute all inference metrics."""
    preds = np.array(self.predicted_means)
    varis = np.array(self.predicted_vars)
    acts = np.array(self.actuals)

    if len(preds) == 0:
        return {}

    errors = preds - acts

    report = {
        "n_predictions": len(preds),
        "mse": float(np.mean(errors**2)),
        "rmse": float(np.sqrt(np.mean(errors**2))),
        "mae": float(np.mean(np.abs(errors))),
        "mean_bias": float(np.mean(errors)),
    }

    # Calibration: what fraction of actuals fall within 95% CI?
    stds = np.sqrt(np.maximum(varis, 1e-8))
    lower_95 = preds - 1.96 * stds
    upper_95 = preds + 1.96 * stds
    in_ci = (acts >= lower_95) & (acts <= upper_95)
    report["calibration_95"] = float(np.mean(in_ci))

    # Also check 50% CI
    lower_50 = preds - 0.674 * stds
    upper_50 = preds + 0.674 * stds
    in_ci_50 = (acts >= lower_50) & (acts <= upper_50)
    report["calibration_50"] = float(np.mean(in_ci_50))

    # Mean predicted std (average uncertainty)
    report["mean_predicted_std"] = float(np.mean(stds))

    # Log-likelihood under Gaussian predictive distribution
    # log p(y | mu, sigma^2) = -0.5 * (log(2*pi*sigma^2) + (y-mu)^2/sigma^2)
    ll = -0.5 * (np.log(2 * np.pi * np.maximum(varis, 1e-8)) + errors**2 / np.maximum(varis, 1e-8))
    report["mean_log_likelihood"] = float(np.mean(ll))

    # Per-model ablation MSE
    ablation = {}
    for name, model_preds in self.model_predictions.items():
        mp = np.array(model_preds)
        if len(mp) == len(acts):
            ablation[name] = {
                "mse": float(np.mean((mp - acts) ** 2)),
                "mae": float(np.mean(np.abs(mp - acts))),
            }
    if ablation:
        report["ablation"] = ablation

    return report

OptimizationMetrics `dataclass` ¶

OptimizationMetrics(
    strategy_points: dict[str, list[float]] = dict(),
    oracle_points: list[float] = list(),
    gameweeks: list[int] = list(),
)

Collects and computes optimization evaluation metrics.

Tracks actual points earned per gameweek under different strategies, and compares against oracle (hindsight-optimal).

Usage: metrics = OptimizationMetrics() for each gameweek: metrics.add_gameweek(gw, actual_points, oracle_points) report = metrics.compute()

add_gameweek ¶

add_gameweek(
    gw: int,
    strategy_results: dict[str, float],
    oracle: float,
)

Record actual points for one gameweek across strategies.

PARAMETER	DESCRIPTION
`gw`	Gameweek number. TYPE: `int`
`strategy_results`	{strategy_name: actual_points_earned} TYPE: `dict[str, float]`
`oracle`	Best possible points with hindsight. TYPE: `float`

Source code in fplx/evaluation/metrics.py

def add_gameweek(
    self,
    gw: int,
    strategy_results: dict[str, float],
    oracle: float,
):
    """
    Record actual points for one gameweek across strategies.

    Parameters
    ----------
    gw : int
        Gameweek number.
    strategy_results : dict[str, float]
        {strategy_name: actual_points_earned}
    oracle : float
        Best possible points with hindsight.
    """
    self.gameweeks.append(gw)
    self.oracle_points.append(oracle)

    for name, pts in strategy_results.items():
        if name not in self.strategy_points:
            self.strategy_points[name] = []
        self.strategy_points[name].append(pts)

compute ¶

compute() -> dict

Compute optimization metrics for all strategies.

Source code in fplx/evaluation/metrics.py

def compute(self) -> dict:
    """Compute optimization metrics for all strategies."""
    oracle = np.array(self.oracle_points)
    report = {
        "n_gameweeks": len(self.gameweeks),
        "oracle_total": float(np.sum(oracle)),
        "oracle_mean_per_gw": float(np.mean(oracle)) if len(oracle) > 0 else 0.0,
        "strategies": {},
    }

    for name, pts_list in self.strategy_points.items():
        pts = np.array(pts_list)
        total = float(np.sum(pts))
        mean_gw = float(np.mean(pts)) if len(pts) > 0 else 0.0
        std_gw = float(np.std(pts)) if len(pts) > 0 else 0.0

        # Optimality gap: (oracle - strategy) / oracle
        gaps = (oracle[: len(pts)] - pts) / np.maximum(oracle[: len(pts)], 1e-6)
        mean_gap = float(np.mean(gaps)) if len(gaps) > 0 else 0.0

        # Worst-case: minimum points in any single gameweek
        worst_gw = float(np.min(pts)) if len(pts) > 0 else 0.0

        # Consistency: coefficient of variation
        cv = std_gw / mean_gw if mean_gw > 0 else 0.0

        report["strategies"][name] = {
            "total_points": total,
            "mean_per_gw": mean_gw,
            "std_per_gw": std_gw,
            "cv": cv,
            "worst_gw_points": worst_gw,
            "mean_optimality_gap": mean_gap,
            "pct_of_oracle": total / float(np.sum(oracle)) * 100 if np.sum(oracle) > 0 else 0,
        }

    return report

inference ¶

Probabilistic inference modules for FPLX.

HMMInference ¶

HMMInference(
    transition_matrix: Optional[ndarray] = None,
    emission_params: Optional[dict] = None,
    initial_dist: Optional[ndarray] = None,
)

Hidden Markov Model for discrete player form states.

Supports dynamic transition matrix perturbation so that external signals (news, injuries) can shift state probabilities mid-sequence.

PARAMETER	DESCRIPTION
`transition_matrix`	transition_matrix[i,j] = P(S_{t+1}=j \| S_t=i). Rows must sum to 1. TYPE: `(ndarray, shape(N, N))` DEFAULT: `None`
`emission_params`	{state_index: (mean, std)} for Gaussian emissions. TYPE: `dict` DEFAULT: `None`
`initial_dist`	Prior over initial state. TYPE: `(ndarray, shape(N))` DEFAULT: `None`

Source code in fplx/inference/hmm.py

def __init__(
    self,
    transition_matrix: Optional[np.ndarray] = None,
    emission_params: Optional[dict] = None,
    initial_dist: Optional[np.ndarray] = None,
):
    self.transition_matrix = (
        transition_matrix.copy() if transition_matrix is not None else DEFAULT_TRANSITION_MATRIX.copy()
    )
    self.emission_params = emission_params or dict(DEFAULT_EMISSION_PARAMS)
    self.pi = initial_dist.copy() if initial_dist is not None else DEFAULT_INITIAL_DIST.copy()
    self.n_states = len(self.pi)

    # per-timestep transition overrides (for news injection)
    # key: timestep t, Value: modified transition_matrix matrix for that step
    self._transition_overrides: dict[int, np.ndarray] = {}

inject_news_perturbation ¶

inject_news_perturbation(
    timestep: int,
    state_boost: dict[int, float],
    confidence: float = 1.0,
)

Perturb transition matrix at a specific timestep based on news.

For each source state, the transition probability toward boosted target states is multiplied by the boost factor (scaled by confidence), then the row is renormalized.

PARAMETER	DESCRIPTION
`timestep`	The gameweek at which the perturbation applies. TYPE: `int`
`state_boost`	{target_state: multiplicative_boost}. E.g., {0: 10.0} means "10x more likely to transition to Injured." TYPE: `dict[int, float]`
`confidence`	Scales the perturbation. 0 = no effect, 1 = full effect. TYPE: `float` DEFAULT: `1.0`

Source code in fplx/inference/hmm.py

def inject_news_perturbation(
    self,
    timestep: int,
    state_boost: dict[int, float],
    confidence: float = 1.0,
):
    """
    Perturb transition matrix at a specific timestep based on news.

    For each source state, the transition probability toward boosted
    target states is multiplied by the boost factor (scaled by confidence),
    then the row is renormalized.

    Parameters
    ----------
    timestep : int
        The gameweek at which the perturbation applies.
    state_boost : dict[int, float]
        {target_state: multiplicative_boost}. E.g., {0: 10.0} means
        "10x more likely to transition to Injured."
    confidence : float
        Scales the perturbation. 0 = no effect, 1 = full effect.
    """
    perturbed_transition_matrix = self.transition_matrix.copy()

    for source_state in range(self.n_states):
        for target_state, boost in state_boost.items():
            # scale boost by confidence: effective_boost = 1 + confidence*(boost-1)
            effective_boost = 1.0 + confidence * (boost - 1.0)
            perturbed_transition_matrix[source_state, target_state] *= effective_boost

        # renormalize row
        row_sum = perturbed_transition_matrix[source_state].sum()
        if row_sum > 0:
            perturbed_transition_matrix[source_state] /= row_sum

    self._transition_overrides[timestep] = perturbed_transition_matrix

clear_perturbations ¶

clear_perturbations()

Remove all per-timestep transition overrides.

Source code in fplx/inference/hmm.py

def clear_perturbations(self):
    """Remove all per-timestep transition overrides."""
    self._transition_overrides.clear()

forward ¶

forward(observations: ndarray)

Forward algorithm with dynamic transition matrices.

PARAMETER	DESCRIPTION
`observations`	TYPE: `(ndarray, shape(num_timesteps))`

RETURNS	DESCRIPTION
`forward_messages`	Normalized forward messages. forward_messages[t] = P(S_t \| y_1:t) TYPE: `(ndarray, shape(num_timesteps, N))`
`scale`	Per-timestep normalization constants. TYPE: `(ndarray, shape(num_timesteps))`

Source code in fplx/inference/hmm.py

def forward(self, observations: np.ndarray):
    """
    Forward algorithm with dynamic transition matrices.

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    forward_messages : np.ndarray, shape (num_timesteps, N)
        Normalized forward messages. forward_messages[t] = P(S_t | y_1:t)
    scale : np.ndarray, shape (num_timesteps,)
        Per-timestep normalization constants.
    """
    num_timesteps = len(observations)
    forward_messages = np.zeros((num_timesteps, self.n_states))
    scale = np.zeros(num_timesteps)

    # t = 0
    b = self._emission_vector(observations[0])
    forward_messages[0] = self.pi * b
    scale[0] = forward_messages[0].sum()
    if scale[0] > 0:
        forward_messages[0] /= scale[0]

    # t = 1..num_timesteps-1
    for t in range(1, num_timesteps):
        transition_matrix_t = self._get_transition_matrix(t)
        b = self._emission_vector(observations[t])
        forward_messages[t] = (forward_messages[t - 1] @ transition_matrix_t) * b
        scale[t] = forward_messages[t].sum()
        if scale[t] > 0:
            forward_messages[t] /= scale[t]

    return forward_messages, scale

forward_backward ¶

forward_backward(observations: ndarray) -> ndarray

Compute smoothed posteriors P(S_t | y_1:num_timesteps).

PARAMETER	DESCRIPTION
`observations`	TYPE: `(ndarray, shape(num_timesteps))`

RETURNS	DESCRIPTION
`smoothed_posteriors`	smoothed_posteriors[t, s] = P(S_t=s \| y_1:num_timesteps) TYPE: `(ndarray, shape(num_timesteps, N))`

Source code in fplx/inference/hmm.py

def forward_backward(self, observations: np.ndarray) -> np.ndarray:
    """
    Compute smoothed posteriors P(S_t | y_1:num_timesteps).

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    smoothed_posteriors : np.ndarray, shape (num_timesteps, N)
        smoothed_posteriors[t, s] = P(S_t=s | y_1:num_timesteps)
    """
    num_timesteps = len(observations)
    forward_messages, scale = self.forward(observations)

    backward_messages = np.zeros((num_timesteps, self.n_states))
    backward_messages[num_timesteps - 1] = 1.0

    for t in range(num_timesteps - 2, -1, -1):
        transition_matrix_t_plus_1 = self._get_transition_matrix(t + 1)
        b_next = self._emission_vector(observations[t + 1])
        backward_messages[t] = transition_matrix_t_plus_1 @ (b_next * backward_messages[t + 1])
        if scale[t + 1] > 0:
            backward_messages[t] /= scale[t + 1]

    smoothed_posteriors = forward_messages * backward_messages
    row_sums = smoothed_posteriors.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1.0
    smoothed_posteriors /= row_sums

    return smoothed_posteriors

viterbi ¶

viterbi(observations: ndarray) -> ndarray

Most likely state sequence via Viterbi decoding.

PARAMETER	DESCRIPTION
`observations`	TYPE: `(ndarray, shape(num_timesteps))`

RETURNS	DESCRIPTION
`best_path`	TYPE: `np.ndarray of int, shape (num_timesteps,)`

Source code in fplx/inference/hmm.py

def viterbi(self, observations: np.ndarray) -> np.ndarray:
    """
    Most likely state sequence via Viterbi decoding.

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    best_path : np.ndarray of int, shape (num_timesteps,)
    """
    num_timesteps = len(observations)
    log_pi = np.log(self.pi + 1e-300)

    log_probabilities = np.zeros((num_timesteps, self.n_states))
    backpointers = np.zeros((num_timesteps, self.n_states), dtype=int)

    b0 = self._emission_vector(observations[0])
    log_probabilities[0] = log_pi + np.log(b0 + 1e-300)

    for t in range(1, num_timesteps):
        transition_matrix_t = self._get_transition_matrix(t)
        log_transition_matrix_t = np.log(transition_matrix_t + 1e-300)
        b = self._emission_vector(observations[t])
        for s in range(self.n_states):
            candidates = log_probabilities[t - 1] + log_transition_matrix_t[:, s]
            backpointers[t, s] = np.argmax(candidates)
            log_probabilities[t, s] = candidates[backpointers[t, s]] + np.log(b[s] + 1e-300)

    best_path = np.zeros(num_timesteps, dtype=int)
    best_path[num_timesteps - 1] = np.argmax(log_probabilities[num_timesteps - 1])
    for t in range(num_timesteps - 2, -1, -1):
        best_path[t] = backpointers[t + 1, best_path[t + 1]]

    return best_path

predict_next ¶

predict_next(
    observations: ndarray,
) -> tuple[float, float, ndarray]

Predict next timestep's points distribution.

Runs forward algorithm, then propagates one step ahead via the transition matrix.

PARAMETER	DESCRIPTION
`observations`	TYPE: `(ndarray, shape(num_timesteps))`

RETURNS	DESCRIPTION
`expected_points`	E[Y_{num_timesteps+1} \| y_1:num_timesteps] TYPE: `float`
`variance`	Var[Y_{num_timesteps+1} \| y_1:num_timesteps] (from law of total variance) TYPE: `float`
`next_state_dist`	P(S_{num_timesteps+1} \| y_1:num_timesteps) TYPE: `(ndarray, shape(N))`

Source code in fplx/inference/hmm.py

def predict_next(self, observations: np.ndarray) -> tuple[float, float, np.ndarray]:
    """
    Predict next timestep's points distribution.

    Runs forward algorithm, then propagates one step ahead via
    the transition matrix.

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    expected_points : float
        E[Y_{num_timesteps+1} | y_1:num_timesteps]
    variance : float
        Var[Y_{num_timesteps+1} | y_1:num_timesteps] (from law of total variance)
    next_state_dist : np.ndarray, shape (N,)
        P(S_{num_timesteps+1} | y_1:num_timesteps)
    """
    forward_messages, _ = self.forward(observations)
    current_belief = forward_messages[-1]  # P(S_num_timesteps | y_1:num_timesteps)

    num_timesteps = len(observations)
    next_transition_matrix = self._get_transition_matrix(num_timesteps)  # transition for next step
    next_state_dist = (
        current_belief @ next_transition_matrix
    )  # P(S_{num_timesteps+1} | y_1:num_timesteps)

    state_means = np.array([self.emission_params[s][0] for s in range(self.n_states)])
    state_vars = np.array([self.emission_params[s][1] ** 2 for s in range(self.n_states)])

    expected_points = next_state_dist @ state_means

    # law of total variance: Var = E[Var|S] + Var[E|S]
    variance = next_state_dist @ state_vars + next_state_dist @ (state_means**2) - expected_points**2

    return expected_points, max(0.0, variance), next_state_dist

fit ¶

fit(
    observations: ndarray,
    n_iter: int = 20,
    tol: float = 0.0001,
    verbose: bool = False,
)

Learn transition matrix and emission parameters via Baum-Welch EM.

PARAMETER	DESCRIPTION
`observations`	Training sequence. TYPE: `(ndarray, shape(num_timesteps))`
`n_iter`	Maximum EM iterations. TYPE: `int` DEFAULT: `20`
`tol`	Convergence tolerance on log-likelihood. TYPE: `float` DEFAULT: `0.0001`
`verbose`	Print progress. TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
`self`

Source code in fplx/inference/hmm.py

def fit(
    self,
    observations: np.ndarray,
    n_iter: int = 20,
    tol: float = 1e-4,
    verbose: bool = False,
):
    """
    Learn transition matrix and emission parameters via Baum-Welch EM.

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)
        Training sequence.
    n_iter : int
        Maximum EM iterations.
    tol : float
        Convergence tolerance on log-likelihood.
    verbose : bool
        Print progress.

    Returns
    -------
    self
    """
    num_timesteps = len(observations)
    prev_log_likelihood = -np.inf

    for iteration in range(n_iter):
        # E-step
        forward_messages, scale = self.forward(observations)

        # Backward pass using the same scaling factors as forward()
        backward_messages = np.zeros((num_timesteps, self.n_states))
        backward_messages[num_timesteps - 1] = 1.0
        for t in range(num_timesteps - 2, -1, -1):
            transition_matrix_t_plus_1 = self._get_transition_matrix(t + 1)
            b_next = self._emission_vector(observations[t + 1])
            backward_messages[t] = transition_matrix_t_plus_1 @ (b_next * backward_messages[t + 1])
            if scale[t + 1] > 0:
                backward_messages[t] /= scale[t + 1]

        # gamma_t(i) = P(S_t=i | y_1:T)
        smoothed_posteriors = forward_messages * backward_messages
        row_sums = smoothed_posteriors.sum(axis=1, keepdims=True)
        row_sums[row_sums == 0] = 1.0
        smoothed_posteriors /= row_sums

        # transition_posteriors: P(S_t=i, S_{t+1}=j | y_1:num_timesteps) for transition re-estimation
        transition_posteriors = np.zeros((num_timesteps - 1, self.n_states, self.n_states))
        for t in range(num_timesteps - 1):
            transition_matrix_t_plus_1 = self._get_transition_matrix(t + 1)
            b_next = self._emission_vector(observations[t + 1])

            # xi_t(i,j) = P(S_t=i, S_{t+1}=j | y_1:T)
            for i in range(self.n_states):
                for j in range(self.n_states):
                    transition_posteriors[t, i, j] = (
                        forward_messages[t, i]
                        * transition_matrix_t_plus_1[i, j]
                        * b_next[j]
                        * backward_messages[t + 1, j]
                    )

            xi_sum = transition_posteriors[t].sum()
            if xi_sum > 0:
                transition_posteriors[t] /= xi_sum

        # M-step
        # Re-estimate initial distribution
        self.pi = smoothed_posteriors[0]

        # Re-estimate transition matrix
        for i in range(self.n_states):
            denom = smoothed_posteriors[:-1, i].sum()
            if denom > 0:
                for j in range(self.n_states):
                    self.transition_matrix[i, j] = transition_posteriors[:, i, j].sum() / denom
            # Renormalize
            row_sum = self.transition_matrix[i].sum()
            if row_sum > 0:
                self.transition_matrix[i] /= row_sum

        # re-estimate emission parameters
        for s in range(self.n_states):
            weights = smoothed_posteriors[:, s]
            w_sum = weights.sum()
            if w_sum > 1e-10:
                mu = np.average(observations, weights=weights)
                var = np.average((observations - mu) ** 2, weights=weights)
                sigma = max(np.sqrt(var), 0.1)  # floor to prevent collapse
                self.emission_params[s] = (mu, sigma)

        # log-likelihood
        log_likelihood = np.sum(np.log(scale + 1e-300))
        if verbose:
            logger.info("EM iteration %d: LL = %.4f", iteration, log_likelihood)

        if abs(log_likelihood - prev_log_likelihood) < tol:
            if verbose:
                logger.info("Converged at iteration %d", iteration)
            break
        prev_log_likelihood = log_likelihood

    return self

KalmanFilter ¶

KalmanFilter(
    process_noise: float = 1.0,
    observation_noise: float = 4.0,
    initial_state_mean: float = 4.0,
    initial_state_covariance: float = 2.0,
)

1D Kalman Filter for tracking latent point potential.

PARAMETER	DESCRIPTION
`process_noise`	Default process noise variance (form drift rate). TYPE: `float` DEFAULT: `1.0`
`observation_noise`	Default observation noise variance (weekly point noise). TYPE: `float` DEFAULT: `4.0`
`initial_state_mean`	Initial state estimate. TYPE: `float` DEFAULT: `4.0`
`initial_state_covariance`	Initial state uncertainty (variance). TYPE: `float` DEFAULT: `2.0`

Source code in fplx/inference/kalman.py

def __init__(
    self,
    process_noise: float = 1.0,
    observation_noise: float = 4.0,
    initial_state_mean: float = 4.0,
    initial_state_covariance: float = 2.0,
):
    self.default_process_noise = process_noise
    self.default_observation_noise = observation_noise
    self.initial_state_mean = initial_state_mean
    self.initial_state_covariance = initial_state_covariance

    # Per-timestep noise overrides
    self._process_noise_overrides: dict[int, float] = {}
    self._observation_noise_overrides: dict[int, float] = {}

    # Stored results after filtering
    self.filtered_state_means: Optional[np.ndarray] = None
    self.filtered_state_covariances: Optional[np.ndarray] = None
    self.kalman_gains: Optional[np.ndarray] = None  # Kalman gains

inject_process_shock ¶

inject_process_shock(timestep: int, multiplier: float)

Inflate process noise at a specific timestep.

Use when news indicates a sudden form change (injury, transfer). process_noise_t = default_process_noise * multiplier.

PARAMETER	DESCRIPTION
`timestep`	Gameweek index. TYPE: `int`
`multiplier`	Process noise multiplier (>1 = more uncertainty about form drift). TYPE: `float`

Source code in fplx/inference/kalman.py

def inject_process_shock(self, timestep: int, multiplier: float):
    """
    Inflate process noise at a specific timestep.

    Use when news indicates a sudden form change (injury, transfer).
    process_noise_t = default_process_noise * multiplier.

    Parameters
    ----------
    timestep : int
        Gameweek index.
    multiplier : float
        Process noise multiplier (>1 = more uncertainty about form drift).
    """
    self._process_noise_overrides[timestep] = self.default_process_noise * multiplier

inject_observation_noise ¶

inject_observation_noise(timestep: int, factor: float)

Adjust observation noise at a specific timestep.

Use for fixture difficulty: harder opponents → less predictable points. observation_noise_t = default_observation_noise * factor.

PARAMETER	DESCRIPTION
`timestep`	Gameweek index. TYPE: `int`
`factor`	Observation noise factor (>1 = harder fixture, noisier observation). TYPE: `float`

Source code in fplx/inference/kalman.py

def inject_observation_noise(self, timestep: int, factor: float):
    """
    Adjust observation noise at a specific timestep.

    Use for fixture difficulty: harder opponents → less predictable points.
    observation_noise_t = default_observation_noise * factor.

    Parameters
    ----------
    timestep : int
        Gameweek index.
    factor : float
        Observation noise factor (>1 = harder fixture, noisier observation).
    """
    self._observation_noise_overrides[timestep] = self.default_observation_noise * factor

clear_overrides ¶

clear_overrides()

Remove all per-timestep noise overrides.

Source code in fplx/inference/kalman.py

def clear_overrides(self):
    """Remove all per-timestep noise overrides."""
    self._process_noise_overrides.clear()
    self._observation_noise_overrides.clear()

get_process_noise_override ¶

get_process_noise_override(
    timestep: int,
) -> Optional[float]

Return explicit process noise override at timestep, if any.

Source code in fplx/inference/kalman.py

def get_process_noise_override(self, timestep: int) -> Optional[float]:
    """Return explicit process noise override at timestep, if any."""
    return self._process_noise_overrides.get(timestep)

set_noise_overrides ¶

set_noise_overrides(
    process_noise_overrides: dict[int, float],
    observation_noise_overrides: dict[int, float],
)

Replace per-timestep noise overrides.

Source code in fplx/inference/kalman.py

def set_noise_overrides(
    self,
    process_noise_overrides: dict[int, float],
    observation_noise_overrides: dict[int, float],
):
    """Replace per-timestep noise overrides."""
    self._process_noise_overrides = dict(process_noise_overrides)
    self._observation_noise_overrides = dict(observation_noise_overrides)

copy_with_overrides ¶

copy_with_overrides(
    max_timestep: Optional[int] = None,
) -> KalmanFilter

Create a parameter-identical filter with copied noise overrides.

PARAMETER	DESCRIPTION
`max_timestep`	If provided, only overrides for timesteps <= max_timestep are copied. TYPE: `int` DEFAULT: `None`

Source code in fplx/inference/kalman.py

def copy_with_overrides(self, max_timestep: Optional[int] = None) -> "KalmanFilter":
    """Create a parameter-identical filter with copied noise overrides.

    Parameters
    ----------
    max_timestep : int, optional
        If provided, only overrides for timesteps <= max_timestep are copied.
    """
    copied = KalmanFilter(
        process_noise=self.default_process_noise,
        observation_noise=self.default_observation_noise,
        initial_state_mean=self.initial_state_mean,
        initial_state_covariance=self.initial_state_covariance,
    )

    if max_timestep is None:
        proc = dict(self._process_noise_overrides)
        obs = dict(self._observation_noise_overrides)
    else:
        proc = {k: v for k, v in self._process_noise_overrides.items() if k <= max_timestep}
        obs = {k: v for k, v in self._observation_noise_overrides.items() if k <= max_timestep}

    copied.set_noise_overrides(proc, obs)

    return copied

filter ¶

filter(observations: ndarray)

Run Kalman filter on observations with per-timestep noise.

PARAMETER	DESCRIPTION
`observations`	TYPE: `(ndarray, shape(num_timesteps))`

RETURNS	DESCRIPTION
`filtered_state_means`	Filtered state estimates (posterior mean). TYPE: `(ndarray, shape(num_timesteps))`
`filtered_state_covariances`	Filtered state uncertainties (posterior variance). TYPE: `(ndarray, shape(num_timesteps))`

Source code in fplx/inference/kalman.py

def filter(self, observations: np.ndarray):
    """
    Run Kalman filter on observations with per-timestep noise.

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    filtered_state_means : np.ndarray, shape (num_timesteps,)
        Filtered state estimates (posterior mean).
    filtered_state_covariances : np.ndarray, shape (num_timesteps,)
        Filtered state uncertainties (posterior variance).
    """
    num_timesteps = len(observations)
    filtered_state_means = np.zeros(num_timesteps)
    filtered_state_covariances = np.zeros(num_timesteps)
    kalman_gains = np.zeros(num_timesteps)

    predicted_state_mean = self.initial_state_mean
    predicted_state_covariance = self.initial_state_covariance

    for t in range(num_timesteps):
        process_noise_t = self._get_process_noise(t)
        observation_noise_t = self._get_observation_noise(t)

        # Predict
        if t > 0:
            predicted_state_mean = filtered_state_means[t - 1]
            predicted_state_covariance = filtered_state_covariances[t - 1] + process_noise_t

        # Update
        y = observations[t]
        innovation = y - predicted_state_mean
        innovation_covariance = predicted_state_covariance + observation_noise_t  # Innovation covariance
        kalman_gain = predicted_state_covariance / innovation_covariance  # Kalman gain

        filtered_state_means[t] = predicted_state_mean + kalman_gain * innovation
        filtered_state_covariances[t] = (1 - kalman_gain) * predicted_state_covariance
        kalman_gains[t] = kalman_gain

    self.filtered_state_means = filtered_state_means
    self.filtered_state_covariances = filtered_state_covariances
    self.kalman_gains = kalman_gains

    return filtered_state_means, filtered_state_covariances

predict_next ¶

predict_next() -> tuple[float, float]

Predict next observation with uncertainty.

Returns the predictive distribution for Y_{t+1} (the observation), not X_{t+1} (the latent state). This ensures consistency with the HMM predict_next which also returns observation-level variance.

Var[Y_{t+1}] = Var[X_{t+1}|y_{1:t}] + R = (P_t + Q) + R

Must call filter() first.

RETURNS	DESCRIPTION
`predicted_mean`	E[Y_{t+1} \| y_{1:t}]. TYPE: `float`
`predicted_var`	Var[Y_{t+1} \| y_{1:t}] (observation-level, includes R). TYPE: `float`

Source code in fplx/inference/kalman.py

def predict_next(self) -> tuple[float, float]:
    """
    Predict next observation with uncertainty.

    Returns the predictive distribution for Y_{t+1} (the observation),
    not X_{t+1} (the latent state). This ensures consistency with the
    HMM predict_next which also returns observation-level variance.

    Var[Y_{t+1}] = Var[X_{t+1}|y_{1:t}] + R
                 = (P_t + Q) + R

    Must call filter() first.

    Returns
    -------
    predicted_mean : float
        E[Y_{t+1} | y_{1:t}].
    predicted_var : float
        Var[Y_{t+1} | y_{1:t}] (observation-level, includes R).
    """
    if self.filtered_state_means is None or self.filtered_state_covariances is None:
        raise RuntimeError("Must call filter() before predict_next().")

    num_timesteps = len(self.filtered_state_means)
    next_process_noise = self._get_process_noise(num_timesteps)
    next_observation_noise = self._get_observation_noise(num_timesteps)

    predicted_mean = self.filtered_state_means[-1]
    # State-level: P_{t+1|t} = P_{t|t} + Q
    state_var = self.filtered_state_covariances[-1] + next_process_noise
    # Observation-level: Var[Y] = P_{t+1|t} + R
    predicted_var = state_var + next_observation_noise

    return predicted_mean, predicted_var

smooth ¶

smooth(observations: ndarray)

Run RTS smoother (backward pass after forward Kalman filter).

PARAMETER	DESCRIPTION
`observations`	TYPE: `(ndarray, shape(num_timesteps))`

RETURNS	DESCRIPTION
`smoothed_state_means`	Smoothed state estimates. TYPE: `(ndarray, shape(num_timesteps))`
`smoothed_state_covariances`	Smoothed state uncertainties. TYPE: `(ndarray, shape(num_timesteps))`

Source code in fplx/inference/kalman.py

def smooth(self, observations: np.ndarray):
    """
    Run RTS smoother (backward pass after forward Kalman filter).

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    smoothed_state_means : np.ndarray, shape (num_timesteps,)
        Smoothed state estimates.
    smoothed_state_covariances : np.ndarray, shape (num_timesteps,)
        Smoothed state uncertainties.
    """
    filtered_state_means, filtered_state_covariances = self.filter(observations)
    num_timesteps = len(observations)

    smoothed_state_means = np.zeros(num_timesteps)
    smoothed_state_covariances = np.zeros(num_timesteps)

    smoothed_state_means[-1] = filtered_state_means[-1]
    smoothed_state_covariances[-1] = filtered_state_covariances[-1]

    for t in range(num_timesteps - 2, -1, -1):
        next_process_noise = self._get_process_noise(t + 1)
        predicted_state_covariance = filtered_state_covariances[t] + next_process_noise

        # Smoother gain
        if predicted_state_covariance > 0:
            smoother_gain = filtered_state_covariances[t] / predicted_state_covariance
        else:
            smoother_gain = 0.0

        smoothed_state_means[t] = filtered_state_means[t] + smoother_gain * (
            smoothed_state_means[t + 1] - filtered_state_means[t]
        )
        smoothed_state_covariances[t] = filtered_state_covariances[t] + smoother_gain**2 * (
            smoothed_state_covariances[t + 1] - predicted_state_covariance
        )

    return smoothed_state_means, smoothed_state_covariances

MultivariateHMM ¶

MultivariateHMM(
    position: str = "MID",
    transition_matrix: Optional[ndarray] = None,
    initial_dist: Optional[ndarray] = None,
)

Position-aware HMM with multivariate diagonal Gaussian emissions.

PARAMETER	DESCRIPTION
`position`	GK, DEF, MID, FWD. Determines feature set and default emissions. TYPE: `str` DEFAULT: `'MID'`

Source code in fplx/inference/multivariate_hmm.py

def __init__(
    self,
    position: str = "MID",
    transition_matrix: Optional[np.ndarray] = None,
    initial_dist: Optional[np.ndarray] = None,
):
    self.position = position
    self.means, self.vars = _default_emissions(position)

    # Priors for MAP-style regularization in Baum-Welch.
    self.prior_means = self.means.copy()
    self.prior_vars = self.vars.copy()
    self.prior_A = (
        transition_matrix.copy() if transition_matrix is not None else DEFAULT_TRANSITION.copy()
    )

    self.A = self.prior_A.copy()
    self.pi = initial_dist.copy() if initial_dist is not None else DEFAULT_INITIAL.copy()
    self.n_states = N_STATES
    self.n_features = self.means.shape[1]
    self._transition_overrides: dict[int, np.ndarray] = {}

inject_news_perturbation ¶

inject_news_perturbation(
    timestep: int,
    state_boost: dict,
    confidence: float = 1.0,
)

Perturb transition matrix at timestep (same API as scalar HMM).

Source code in fplx/inference/multivariate_hmm.py

def inject_news_perturbation(self, timestep: int, state_boost: dict, confidence: float = 1.0):
    """Perturb transition matrix at timestep (same API as scalar HMM)."""
    A_p = self.A.copy()
    for src in range(self.n_states):
        for tgt, boost in state_boost.items():
            A_p[src, tgt] *= 1.0 + confidence * (boost - 1.0)
        s = A_p[src].sum()
        if s > 0:
            A_p[src] /= s
    self._transition_overrides[timestep] = A_p

forward ¶

forward(observations: ndarray)

Forward algorithm. observations: (T, D).

Source code in fplx/inference/multivariate_hmm.py

def forward(self, observations: np.ndarray):
    """Forward algorithm. observations: (T, D)."""
    T = len(observations)
    alpha = np.zeros((T, self.n_states))
    scale = np.zeros(T)
    b = self._emission_prob_vector(observations[0])
    alpha[0] = self.pi * b
    scale[0] = alpha[0].sum()
    if scale[0] > 0:
        alpha[0] /= scale[0]
    for t in range(1, T):
        b = self._emission_prob_vector(observations[t])
        alpha[t] = (alpha[t - 1] @ self._get_A(t)) * b
        scale[t] = alpha[t].sum()
        if scale[t] > 0:
            alpha[t] /= scale[t]
    return alpha, scale

forward_backward ¶

forward_backward(observations: ndarray) -> ndarray

Smoothed posteriors P(S_t | y_{1:T}).

Source code in fplx/inference/multivariate_hmm.py

def forward_backward(self, observations: np.ndarray) -> np.ndarray:
    """Smoothed posteriors P(S_t | y_{1:T})."""
    T = len(observations)
    alpha, scale = self.forward(observations)
    beta = np.zeros((T, self.n_states))
    beta[T - 1] = 1.0
    for t in range(T - 2, -1, -1):
        b_next = self._emission_prob_vector(observations[t + 1])
        beta[t] = self._get_A(t + 1) @ (b_next * beta[t + 1])
        if scale[t + 1] > 0:
            beta[t] /= scale[t + 1]
    gamma = alpha * beta
    rs = gamma.sum(axis=1, keepdims=True)
    rs[rs == 0] = 1.0
    return gamma / rs

viterbi ¶

viterbi(observations: ndarray) -> ndarray

Most likely state sequence.

Source code in fplx/inference/multivariate_hmm.py

def viterbi(self, observations: np.ndarray) -> np.ndarray:
    """Most likely state sequence."""
    T = len(observations)
    log_d = np.zeros((T, self.n_states))
    psi = np.zeros((T, self.n_states), dtype=int)
    log_d[0] = np.log(self.pi + 1e-300) + np.array([
        self._emission_log_prob(observations[0], s) for s in range(self.n_states)
    ])
    for t in range(1, T):
        log_A = np.log(self._get_A(t) + 1e-300)
        log_b = np.array([self._emission_log_prob(observations[t], s) for s in range(self.n_states)])
        for s in range(self.n_states):
            c = log_d[t - 1] + log_A[:, s]
            psi[t, s] = np.argmax(c)
            log_d[t, s] = c[psi[t, s]] + log_b[s]
    path = np.zeros(T, dtype=int)
    path[T - 1] = np.argmax(log_d[T - 1])
    for t in range(T - 2, -1, -1):
        path[t] = psi[t + 1, path[t + 1]]
    return path

predict_next_features ¶

predict_next_features(observations: ndarray)

Predict next gameweek's feature vector.

Returns mean, var (per feature), and state distribution.

Source code in fplx/inference/multivariate_hmm.py

def predict_next_features(self, observations: np.ndarray):
    """
    Predict next gameweek's feature vector.

    Returns mean, var (per feature), and state distribution.
    """
    alpha, _ = self.forward(observations)
    next_dist = alpha[-1] @ self._get_A(len(observations))
    mean = next_dist @ self.means
    var = next_dist @ self.vars + next_dist @ (self.means**2) - mean**2
    return mean, np.maximum(var, 1e-8), next_dist

one_step_point_predictions ¶

one_step_point_predictions(
    observations: ndarray,
) -> ndarray

One-step-ahead point predictions for each historical timestep.

Returns array preds where preds[t] predicts points at timestep t, using information up to t-1 (preds[0] is NaN).

Source code in fplx/inference/multivariate_hmm.py

def one_step_point_predictions(self, observations: np.ndarray) -> np.ndarray:
    """One-step-ahead point predictions for each historical timestep.

    Returns array preds where preds[t] predicts points at timestep t,
    using information up to t-1 (preds[0] is NaN).
    """
    T = len(observations)
    preds = np.full(T, np.nan)
    if T < 2:
        return preds

    alpha, _ = self.forward(observations)
    for t in range(1, T):
        pred_dist = alpha[t - 1] @ self._get_A(t)
        preds[t] = self._expected_points_from_state_dist(pred_dist)
    return preds

predict_next_points ¶

predict_next_points(
    observations: ndarray,
) -> tuple[float, float]

Convert predicted features → expected FPL points.

Uses FPL scoring rules applied to predicted feature rates.

Source code in fplx/inference/multivariate_hmm.py

def predict_next_points(self, observations: np.ndarray) -> tuple[float, float]:
    """
    Convert predicted features → expected FPL points.

    Uses FPL scoring rules applied to predicted feature rates.
    """
    feat_mean, feat_var, _ = self.predict_next_features(observations)
    feat_names = POSITION_FEATURES[self.position]
    xpts_idx = feat_names.index("xPts")

    ep = max(0.0, float(feat_mean[xpts_idx]))
    var_pts = float(max(feat_var[xpts_idx], 1e-6) + 1.0)  # residual floor
    return ep, var_pts

fit ¶

fit(
    observations: ndarray,
    n_iter: int = 20,
    tol: float = 0.0001,
    prior_weight: float = 0.85,
)

Baum-Welch EM with MAP-style prior interpolation.

PARAMETER	DESCRIPTION
`observations`	Feature matrix with shape (T, D). TYPE: `ndarray`
`n_iter`	Maximum EM iterations. TYPE: `int` DEFAULT: `20`
`tol`	Convergence tolerance on log-likelihood. TYPE: `float` DEFAULT: `0.0001`
`prior_weight`	Weight on prior parameters in [0, 1]. Higher values increase regularization toward position-level default emissions/transitions. TYPE: `float` DEFAULT: `0.85`

Source code in fplx/inference/multivariate_hmm.py

def fit(
    self,
    observations: np.ndarray,
    n_iter: int = 20,
    tol: float = 1e-4,
    prior_weight: float = 0.85,
):
    """Baum-Welch EM with MAP-style prior interpolation.

    Parameters
    ----------
    observations : np.ndarray
        Feature matrix with shape (T, D).
    n_iter : int
        Maximum EM iterations.
    tol : float
        Convergence tolerance on log-likelihood.
    prior_weight : float
        Weight on prior parameters in [0, 1]. Higher values increase
        regularization toward position-level default emissions/transitions.
    """
    T = observations.shape[0]
    prev_ll = -np.inf
    prior_weight = float(np.clip(prior_weight, 0.0, 1.0))

    for _ in range(n_iter):
        alpha, scale = self.forward(observations)

        # Backward pass with scaling aligned to forward()
        beta = np.zeros((T, self.n_states))
        beta[T - 1] = 1.0
        for t in range(T - 2, -1, -1):
            b_next = self._emission_prob_vector(observations[t + 1])
            beta[t] = self._get_A(t + 1) @ (b_next * beta[t + 1])
            if scale[t + 1] > 0:
                beta[t] /= scale[t + 1]

        gamma = alpha * beta
        rs = gamma.sum(axis=1, keepdims=True)
        rs[rs == 0] = 1.0
        gamma /= rs

        # M-step: initial
        self.pi = np.maximum(gamma[0], 1e-10)
        self.pi /= self.pi.sum()

        # M-step: transitions
        xi = np.zeros((T - 1, self.n_states, self.n_states))
        for t in range(T - 1):
            b_next = self._emission_prob_vector(observations[t + 1])
            for i in range(self.n_states):
                for j in range(self.n_states):
                    xi[t, i, j] = alpha[t, i] * self._get_A(t + 1)[i, j] * b_next[j] * beta[t + 1, j]
            xs = xi[t].sum()
            if xs > 0:
                xi[t] /= xs
        for i in range(self.n_states):
            d = gamma[:-1, i].sum()
            if d > 1e-10:
                mle_A = xi[:, i, :].sum(axis=0) / d
                self.A[i] = prior_weight * self.prior_A[i] + (1.0 - prior_weight) * mle_A
            rs = self.A[i].sum()
            if rs > 0:
                self.A[i] /= rs

        # M-step: emissions
        for s in range(self.n_states):
            w = gamma[:, s]
            ws = w.sum()
            if ws > 1e-10:
                mle_mu = np.average(observations, axis=0, weights=w)
                diff = observations - mle_mu
                mle_var = np.average(diff**2, axis=0, weights=w)
                self.means[s] = prior_weight * self.prior_means[s] + (1.0 - prior_weight) * mle_mu
                self.vars[s] = np.maximum(
                    prior_weight * self.prior_vars[s] + (1.0 - prior_weight) * mle_var,
                    1e-4,
                )

        ll = np.sum(np.log(scale + 1e-300))
        if abs(ll - prev_ll) < tol:
            break
        prev_ll = ll
    return self

InferenceResult `dataclass` ¶

InferenceResult(
    filtered_beliefs: ndarray,
    smoothed_beliefs: ndarray,
    viterbi_path: ndarray,
    hmm_predicted_mean: float = 0.0,
    hmm_predicted_var: float = 0.0,
    kalman_filtered: ndarray = (lambda: array([]))(),
    kalman_uncertainty: ndarray = (lambda: array([]))(),
    kf_predicted_mean: float = 0.0,
    kf_predicted_var: float = 0.0,
    fused_mean: ndarray = (lambda: array([]))(),
    fused_var: ndarray = (lambda: array([]))(),
    fusion_alpha: Optional[float] = None,
    predicted_mean: float = 0.0,
    predicted_var: float = 0.0,
)

Container for inference pipeline outputs.

PlayerInferencePipeline ¶

PlayerInferencePipeline(
    hmm_params: Optional[dict] = None,
    kf_params: Optional[dict] = None,
    hmm_variance_floor: float = 1.0,
    news_params: Optional[dict] = None,
    fusion_mode: str = "precision",
    fusion_params: Optional[dict] = None,
)

Orchestrates HMM + Kalman inference for a single player.

PARAMETER	DESCRIPTION
`hmm_params`	Override HMM parameters: transition_matrix, emission_params, initial_dist. TYPE: `dict` DEFAULT: `None`
`kf_params`	Override Kalman parameters: Q, R, x0, P0. TYPE: `dict` DEFAULT: `None`

Source code in fplx/inference/pipeline.py

def __init__(
    self,
    hmm_params: Optional[dict] = None,
    kf_params: Optional[dict] = None,
    hmm_variance_floor: float = 1.0,
    news_params: Optional[dict] = None,
    fusion_mode: str = "precision",
    fusion_params: Optional[dict] = None,
):
    hmm_params = hmm_params or {}
    kf_params = kf_params or {}

    self.hmm = HMMInference(
        transition_matrix=hmm_params.get("transition_matrix"),
        emission_params=hmm_params.get("emission_params"),
        initial_dist=hmm_params.get("initial_dist"),
    )
    self.kf = KalmanFilter(
        process_noise=kf_params.get("process_noise", 1.0),
        observation_noise=kf_params.get("observation_noise", 4.0),
        initial_state_mean=kf_params.get("initial_state_mean", 4.0),
        initial_state_covariance=kf_params.get("initial_state_covariance", 2.0),
    )
    self.hmm_variance_floor = max(float(hmm_variance_floor), 1e-6)
    self.news_params = _merge_nested_dicts(DEFAULT_NEWS_PARAMS, news_params or {})
    self.fusion_mode = fusion_mode
    self.fusion_params = _merge_nested_dicts(DEFAULT_FUSION_PARAMS, fusion_params or {})
    if self.fusion_mode not in {"precision", "calibrated_alpha"}:
        raise ValueError(
            f"Unknown fusion_mode '{self.fusion_mode}'. Expected one of: 'precision', 'calibrated_alpha'."
        )

    self.observations: Optional[np.ndarray] = None
    self._result: Optional[InferenceResult] = None

ingest_observations ¶

ingest_observations(points: ndarray)

Set the player's historical points sequence.

PARAMETER	DESCRIPTION
`points`	Weekly points history. TYPE: `(ndarray, shape(T))`

Source code in fplx/inference/pipeline.py

def ingest_observations(self, points: np.ndarray):
    """
    Set the player's historical points sequence.

    Parameters
    ----------
    points : np.ndarray, shape (T,)
        Weekly points history.
    """
    self.observations = np.asarray(points, dtype=float)
    self._result = None  # invalidate cached result

inject_news ¶

inject_news(news_signal: dict, timestep: int)

Inject a news signal into the inference at a specific gameweek.

Bridges from existing NewsSignal.generate_signal() output format.

PARAMETER	DESCRIPTION
`news_signal`	Output from NewsSignal.generate_signal(). Must contain: 'availability', 'minutes_risk', 'confidence'. TYPE: `dict`
`timestep`	The gameweek index to apply the perturbation. TYPE: `int`

Source code in fplx/inference/pipeline.py

def inject_news(
    self,
    news_signal: dict,
    timestep: int,
):
    """
    Inject a news signal into the inference at a specific gameweek.

    Bridges from existing NewsSignal.generate_signal() output format.

    Parameters
    ----------
    news_signal : dict
        Output from NewsSignal.generate_signal(). Must contain:
        'availability', 'minutes_risk', 'confidence'.
    timestep : int
        The gameweek index to apply the perturbation.
    """
    category = _classify_news(
        news_signal.get("availability", 1.0),
        news_signal.get("minutes_risk", 0.0),
        self.news_params.get("classification_thresholds"),
    )
    confidence = news_signal.get(
        "confidence",
        float(self.news_params.get("default_confidence", 0.6)),
    )

    perturbation_map = self.news_params.get("perturbation_map", DEFAULT_NEWS_PERTURBATION_MAP)
    perturbation = perturbation_map.get(
        category,
        perturbation_map.get("neutral", {"state_boost": {}, "kalman_shock": 1.0}),
    )

    # Inject into HMM
    state_boost = perturbation.get("state_boost", {})
    if state_boost:
        self.hmm.inject_news_perturbation(
            timestep=timestep,
            state_boost=state_boost,
            confidence=confidence,
        )

    # Inject into Kalman
    kalman_shock = float(perturbation.get("kalman_shock", 1.0))
    if kalman_shock != 1.0:
        self.kf.inject_process_shock(
            timestep=timestep,
            multiplier=kalman_shock,
        )

inject_fixture_difficulty ¶

inject_fixture_difficulty(difficulty: float, timestep: int)

Inject fixture difficulty into Kalman observation noise.

PARAMETER	DESCRIPTION
`difficulty`	Fixture difficulty score (1-5, from FixtureSignal). TYPE: `float`
`timestep`	The gameweek index. TYPE: `int`

Source code in fplx/inference/pipeline.py

def inject_fixture_difficulty(self, difficulty: float, timestep: int):
    """
    Inject fixture difficulty into Kalman observation noise.

    Parameters
    ----------
    difficulty : float
        Fixture difficulty score (1-5, from FixtureSignal).
    timestep : int
        The gameweek index.
    """
    noise_factor = _difficulty_to_noise_factor(difficulty)
    self.kf.inject_observation_noise(timestep=timestep, factor=noise_factor)

run ¶

run() -> InferenceResult

Run full inference pipeline: HMM + Kalman + Fusion.

RETURNS	DESCRIPTION
`InferenceResult`	All inference outputs.

Source code in fplx/inference/pipeline.py

def run(self) -> InferenceResult:
    """
    Run full inference pipeline: HMM + Kalman + Fusion.

    Returns
    -------
    InferenceResult
        All inference outputs.
    """
    if self.observations is None or len(self.observations) == 0:
        raise RuntimeError("No observations ingested. Call ingest_observations().")

    obs = self.observations

    # HMM
    alpha, _ = self.hmm.forward(obs)
    gamma = self.hmm.forward_backward(obs)
    viterbi_path = self.hmm.viterbi(obs)
    hmm_pred_mean, hmm_pred_var, _ = self.hmm.predict_next(obs)

    # Kalman
    kf_x, kf_P = self.kf.filter(obs)
    kf_pred_mean, kf_pred_var = self.kf.predict_next()

    fusion_alpha = None
    if self.fusion_mode == "calibrated_alpha":
        fusion_alpha = self._estimate_fusion_alpha(obs)
        hmm_seq_mean, hmm_seq_var = self._hmm_sequence_moments(gamma)

        fused_mean = fusion_alpha * kf_x + (1.0 - fusion_alpha) * hmm_seq_mean
        fused_var = fusion_alpha**2 * np.maximum(kf_P, 1e-6) + (1.0 - fusion_alpha) ** 2 * np.maximum(
            hmm_seq_var, self.hmm_variance_floor
        )

        pred_mean = fusion_alpha * kf_pred_mean + (1.0 - fusion_alpha) * hmm_pred_mean
        pred_var = fusion_alpha**2 * max(kf_pred_var, 1e-6) + (1.0 - fusion_alpha) ** 2 * max(
            hmm_pred_var, self.hmm_variance_floor
        )
    else:
        # Fusion (full sequence, smoothed)
        # Apply an HMM variance floor so HMM does not become unrealistically
        # overconfident and dominate precision-weighted fusion.
        emission_params_for_fusion = {
            s: (mu, max(std, np.sqrt(self.hmm_variance_floor)))
            for s, (mu, std) in self.hmm.emission_params.items()
        }
        fused_mean, fused_var = fuse_sequences(gamma, kf_x, kf_P, emission_params_for_fusion)

        # Fused one-step-ahead prediction
        pred_mean, pred_var = fuse_estimates(
            hmm_pred_mean,
            max(hmm_pred_var, self.hmm_variance_floor),
            kf_pred_mean,
            kf_pred_var,
        )

    self._result = InferenceResult(
        filtered_beliefs=alpha,
        smoothed_beliefs=gamma,
        viterbi_path=viterbi_path,
        hmm_predicted_mean=hmm_pred_mean,
        hmm_predicted_var=hmm_pred_var,
        kalman_filtered=kf_x,
        kalman_uncertainty=kf_P,
        kf_predicted_mean=kf_pred_mean,
        kf_predicted_var=kf_pred_var,
        fused_mean=fused_mean,
        fused_var=fused_var,
        fusion_alpha=fusion_alpha,
        predicted_mean=pred_mean,
        predicted_var=pred_var,
    )

    return self._result

predict_next ¶

predict_next() -> tuple[float, float]

Get the fused one-step-ahead forecast.

RETURNS	DESCRIPTION
`expected_points`	TYPE: `float`
`variance`	TYPE: `float`

Source code in fplx/inference/pipeline.py

def predict_next(self) -> tuple[float, float]:
    """
    Get the fused one-step-ahead forecast.

    Returns
    -------
    expected_points : float
    variance : float
    """
    if self._result is None:
        self.run()
    return self._result.predicted_mean, self._result.predicted_var

learn_parameters ¶

learn_parameters(n_iter: int = 20)

Run Baum-Welch to learn HMM parameters from current observations.

Call this before run() if you want data-driven parameters.

Source code in fplx/inference/pipeline.py

def learn_parameters(self, n_iter: int = 20):
    """
    Run Baum-Welch to learn HMM parameters from current observations.

    Call this before run() if you want data-driven parameters.
    """
    if self.observations is None:
        raise RuntimeError("No observations. Call ingest_observations() first.")
    self.hmm.fit(self.observations, n_iter=n_iter)

batch_enriched_predict ¶

batch_enriched_predict(
    players, alpha=0.3, fixture_info=None
)

Run enriched prediction for all players. Returns ep, var, downside_risk dicts.

Source code in fplx/inference/enriched.py

def batch_enriched_predict(players, alpha=0.3, fixture_info=None):
    """Run enriched prediction for all players. Returns ep, var, downside_risk dicts."""
    ep, ev, dr = {}, {}, {}
    for p in players:
        fix = fixture_info.get(p.id) if fixture_info else None
        mu, var, dsr = enriched_predict(p.timeseries, p.position, alpha=alpha, upcoming_fixture=fix)
        ep[p.id] = mu
        ev[p.id] = var
        dr[p.id] = dsr
    return ep, ev, dr

compute_xpoints ¶

compute_xpoints(timeseries, position)

Compute per-GW expected points from ALL underlying components.

Source code in fplx/inference/enriched.py

def compute_xpoints(timeseries, position):
    """Compute per-GW expected points from ALL underlying components."""
    n = len(timeseries)
    if n == 0:
        return np.array([])

    mins = _safe_col(timeseries, "minutes")
    played = mins > 0
    appearance = np.where(mins >= 60, 2.0, np.where(played, 1.0, 0.0))

    xg = _safe_col(timeseries, "xG")
    if np.all(xg == 0):
        xg = _safe_col(timeseries, "goals").astype(float)
    xa = _safe_col(timeseries, "xA")
    if np.all(xa == 0):
        xa = _safe_col(timeseries, "assists").astype(float)

    goal_c = xg * GOAL_PTS.get(position, 4)
    assist_c = xa * ASSIST_PTS
    cs_c = _safe_col(timeseries, "clean_sheets") * CS_PTS.get(position, 0)
    gc_c = np.floor(_safe_col(timeseries, "goals_conceded") / 2.0) * GC_PTS.get(position, 0)
    bonus_c = _safe_col(timeseries, "bonus")

    saves_c = np.zeros(n)
    if position == "GK":
        saves_c = np.floor(_safe_col(timeseries, "saves") / 3.0)

    yc = _safe_col(timeseries, "yellow_cards") * (-1)
    rc = _safe_col(timeseries, "red_cards") * (-3)
    og = _safe_col(timeseries, "own_goals") * (-2)
    pm = _safe_col(timeseries, "penalties_missed") * (-2)
    ps = np.zeros(n)
    if position == "GK":
        ps = _safe_col(timeseries, "penalties_saved") * 5

    return (
        appearance + goal_c + assist_c + cs_c + gc_c + bonus_c + saves_c + yc + rc + og + pm + ps
    ) * played

enriched_predict ¶

enriched_predict(
    timeseries,
    position,
    alpha=0.3,
    lookback=15,
    upcoming_fixture=None,
)

Predict expected points with fixture awareness and semi-variance.

PARAMETER	DESCRIPTION
`timeseries`	TYPE: `DataFrame`
`position`	TYPE: `str`
`alpha`	EWMA decay. TYPE: `float` DEFAULT: `0.3`
`lookback`	Max recent GWs (increased from 10 to 15 for more data). TYPE: `int` DEFAULT: `15`
`upcoming_fixture`	{"was_home": bool, "opponent_team": int, "xP": float} TYPE: `dict` DEFAULT: `None`

RETURNS	DESCRIPTION
`expected_points`	TYPE: `float`
`variance`	TYPE: `float`
`downside_risk`	TYPE: `float (semi-deviation below E[P])`

Source code in fplx/inference/enriched.py

def enriched_predict(timeseries, position, alpha=0.3, lookback=15, upcoming_fixture=None):
    """
    Predict expected points with fixture awareness and semi-variance.

    Parameters
    ----------
    timeseries : pd.DataFrame
    position : str
    alpha : float
        EWMA decay.
    lookback : int
        Max recent GWs (increased from 10 to 15 for more data).
    upcoming_fixture : dict, optional
        {"was_home": bool, "opponent_team": int, "xP": float}

    Returns
    -------
    expected_points : float
    variance : float
    downside_risk : float  (semi-deviation below E[P])
    """
    if timeseries.empty or "minutes" not in timeseries.columns:
        return 0.0, 4.0, 0.0

    ts = timeseries.tail(lookback).copy()
    mins = _safe_col(ts, "minutes")
    played_mask = mins > 0
    n_played = int(played_mask.sum())

    if n_played < 2:
        return 0.0, 4.0, 0.0

    avail = float(played_mask[-min(3, len(played_mask)) :].mean())
    if avail < 0.1:
        return 0.0, 1.0, 0.0

    # xPoints from all components
    xpts = compute_xpoints(ts, position)
    played_xpts = xpts[played_mask]

    # EWMA on played xPoints
    conditional_ep = max(0.0, _ewma(played_xpts, alpha))

    # Fixture adjustments
    fixture_mult = 1.0
    if upcoming_fixture:
        hf, af = _home_away_factor(timeseries)
        fixture_mult = hf if upcoming_fixture.get("was_home", False) else af
        opp_id = upcoming_fixture.get("opponent_team", 0)
        if opp_id > 0:
            fixture_mult *= _opponent_mult(timeseries, opp_id)
    conditional_ep *= fixture_mult

    # Ensemble with xP
    if upcoming_fixture and upcoming_fixture.get("xP", 0) > 0:
        conditional_ep = 0.7 * conditional_ep + 0.3 * upcoming_fixture["xP"]

    # Variance and semi-variance from residuals
    downside_risk = 0.0
    if "points" in ts.columns:
        pts = _safe_col(ts, "points")
        played_pts = pts[played_mask]
        residuals = played_pts - played_xpts
        var_estimate = float(np.var(residuals)) + 1.0

        # Semi-variance: only negative residuals (actual < expected)
        neg_residuals = residuals[residuals < 0]
        if len(neg_residuals) >= 2:
            downside_risk = float(np.sqrt(np.mean(neg_residuals**2)))
        else:
            downside_risk = float(np.sqrt(var_estimate)) * 0.5
    else:
        var_estimate = 4.0
        downside_risk = 1.0

    ep = conditional_ep * avail
    var_out = avail * var_estimate + avail * (1 - avail) * conditional_ep**2
    dr_out = downside_risk * avail

    return ep, var_out, dr_out

fuse_estimates ¶

fuse_estimates(
    hmm_mean: float,
    hmm_var: float,
    kf_mean: float,
    kf_var: float,
) -> tuple[float, float]

Fuse a single HMM estimate with a single Kalman estimate.

Uses inverse-variance weighting: fused_mean = (hmm_mean/hmm_var + kf_mean/kf_var) / (1/hmm_var + 1/kf_var) fused_var = 1 / (1/hmm_var + 1/kf_var)

PARAMETER	DESCRIPTION
`hmm_mean`	HMM expected points (from state posterior weighted emission means). TYPE: `float`
`hmm_var`	HMM variance (law of total variance over state posterior). TYPE: `float`
`kf_mean`	Kalman filtered point estimate. TYPE: `float`
`kf_var`	Kalman filtered uncertainty (posterior variance). TYPE: `float`

RETURNS	DESCRIPTION
`fused_mean`	TYPE: `float`
`fused_var`	TYPE: `float`

Source code in fplx/inference/fusion.py

def fuse_estimates(
    hmm_mean: float,
    hmm_var: float,
    kf_mean: float,
    kf_var: float,
) -> tuple[float, float]:
    """
    Fuse a single HMM estimate with a single Kalman estimate.

    Uses inverse-variance weighting:
        fused_mean = (hmm_mean/hmm_var + kf_mean/kf_var) / (1/hmm_var + 1/kf_var)
        fused_var  = 1 / (1/hmm_var + 1/kf_var)

    Parameters
    ----------
    hmm_mean : float
        HMM expected points (from state posterior weighted emission means).
    hmm_var : float
        HMM variance (law of total variance over state posterior).
    kf_mean : float
        Kalman filtered point estimate.
    kf_var : float
        Kalman filtered uncertainty (posterior variance).

    Returns
    -------
    fused_mean : float
    fused_var : float
    """
    hmm_var = max(hmm_var, 1e-6)
    kf_var = max(kf_var, 1e-6)

    precision_hmm = 1.0 / hmm_var
    precision_kf = 1.0 / kf_var
    total_precision = precision_hmm + precision_kf

    fused_mean = (precision_hmm * hmm_mean + precision_kf * kf_mean) / total_precision
    fused_var = 1.0 / total_precision

    return fused_mean, fused_var

fuse_sequences ¶

fuse_sequences(
    hmm_gamma: ndarray,
    kalman_x: ndarray,
    kalman_P: ndarray,
    emission_params: dict,
) -> tuple[ndarray, ndarray]

Fuse full sequences of HMM posteriors and Kalman estimates.

PARAMETER	DESCRIPTION
`hmm_gamma`	Smoothed state posteriors from HMM. TYPE: `(ndarray, shape(T, N))`
`kalman_x`	Kalman filtered estimates. TYPE: `(ndarray, shape(T))`
`kalman_P`	Kalman filtered uncertainties. TYPE: `(ndarray, shape(T))`
`emission_params`	{state_index: (mean, std)} from HMM. TYPE: `dict`

RETURNS	DESCRIPTION
`fused_mean`	TYPE: `(ndarray, shape(T))`
`fused_var`	TYPE: `(ndarray, shape(T))`

Source code in fplx/inference/fusion.py

def fuse_sequences(
    hmm_gamma: np.ndarray,
    kalman_x: np.ndarray,
    kalman_P: np.ndarray,
    emission_params: dict,
) -> tuple[np.ndarray, np.ndarray]:
    """
    Fuse full sequences of HMM posteriors and Kalman estimates.

    Parameters
    ----------
    hmm_gamma : np.ndarray, shape (T, N)
        Smoothed state posteriors from HMM.
    kalman_x : np.ndarray, shape (T,)
        Kalman filtered estimates.
    kalman_P : np.ndarray, shape (T,)
        Kalman filtered uncertainties.
    emission_params : dict
        {state_index: (mean, std)} from HMM.

    Returns
    -------
    fused_mean : np.ndarray, shape (T,)
    fused_var : np.ndarray, shape (T,)
    """
    T = len(kalman_x)
    n_states = hmm_gamma.shape[1]

    state_means = np.array([emission_params[s][0] for s in range(n_states)])
    state_vars = np.array([emission_params[s][1] ** 2 for s in range(n_states)])

    # HMM expected value and variance per timestep
    hmm_mean = hmm_gamma @ state_means
    hmm_var = (
        hmm_gamma @ state_vars
        + hmm_gamma @ (state_means ** 2)
        - hmm_mean ** 2
    )
    hmm_var = np.maximum(hmm_var, 1e-6)
    kalman_P_safe = np.maximum(kalman_P, 1e-6)

    # Inverse-variance weighting
    precision_hmm = 1.0 / hmm_var
    precision_kf = 1.0 / kalman_P_safe
    total_precision = precision_hmm + precision_kf

    fused_mean = (precision_hmm * hmm_mean + precision_kf * kalman_x) / total_precision
    fused_var = 1.0 / total_precision

    return fused_mean, fused_var

build_feature_matrix ¶

build_feature_matrix(
    timeseries: DataFrame, position: str
) -> ndarray

Extract position-specific feature matrix from player timeseries.

PARAMETER	DESCRIPTION
`timeseries`	Player gameweek history from vaastav dataset. TYPE: `DataFrame`
`position`	GK, DEF, MID, or FWD. TYPE: `str`

RETURNS	DESCRIPTION
`np.ndarray, shape (T, D) where D depends on position.`

Source code in fplx/inference/multivariate_hmm.py

def build_feature_matrix(timeseries: pd.DataFrame, position: str) -> np.ndarray:
    """
    Extract position-specific feature matrix from player timeseries.

    Parameters
    ----------
    timeseries : pd.DataFrame
        Player gameweek history from vaastav dataset.
    position : str
        GK, DEF, MID, or FWD.

    Returns
    -------
    np.ndarray, shape (T, D) where D depends on position.
    """
    n = len(timeseries)
    features = np.zeros((n, 2))

    mins = _safe_col(timeseries, "minutes")
    features[:, 1] = np.clip(mins / 90.0, 0.0, 1.0)  # mins_frac

    # Domain-specific projection from rich event space to structural xPts.
    features[:, 0] = compute_xpoints(timeseries, position)
    return features

enriched ¶

Fixture-aware enriched prediction with semi-variance for downside risk.

Improvements over base enriched: - Cards, own goals, penalties (negative pts previously unmodeled) - Home/away adjustment from player history - Opponent strength adjustment from player history - Ensemble with FPL's xP when available - Semi-variance: only penalize downside deviation below E[P] - Longer lookback with exponential decay (more data, recency bias)

compute_xpoints ¶

compute_xpoints(timeseries, position)

Compute per-GW expected points from ALL underlying components.

Source code in fplx/inference/enriched.py

def compute_xpoints(timeseries, position):
    """Compute per-GW expected points from ALL underlying components."""
    n = len(timeseries)
    if n == 0:
        return np.array([])

    mins = _safe_col(timeseries, "minutes")
    played = mins > 0
    appearance = np.where(mins >= 60, 2.0, np.where(played, 1.0, 0.0))

    xg = _safe_col(timeseries, "xG")
    if np.all(xg == 0):
        xg = _safe_col(timeseries, "goals").astype(float)
    xa = _safe_col(timeseries, "xA")
    if np.all(xa == 0):
        xa = _safe_col(timeseries, "assists").astype(float)

    goal_c = xg * GOAL_PTS.get(position, 4)
    assist_c = xa * ASSIST_PTS
    cs_c = _safe_col(timeseries, "clean_sheets") * CS_PTS.get(position, 0)
    gc_c = np.floor(_safe_col(timeseries, "goals_conceded") / 2.0) * GC_PTS.get(position, 0)
    bonus_c = _safe_col(timeseries, "bonus")

    saves_c = np.zeros(n)
    if position == "GK":
        saves_c = np.floor(_safe_col(timeseries, "saves") / 3.0)

    yc = _safe_col(timeseries, "yellow_cards") * (-1)
    rc = _safe_col(timeseries, "red_cards") * (-3)
    og = _safe_col(timeseries, "own_goals") * (-2)
    pm = _safe_col(timeseries, "penalties_missed") * (-2)
    ps = np.zeros(n)
    if position == "GK":
        ps = _safe_col(timeseries, "penalties_saved") * 5

    return (
        appearance + goal_c + assist_c + cs_c + gc_c + bonus_c + saves_c + yc + rc + og + pm + ps
    ) * played

enriched_predict ¶

enriched_predict(
    timeseries,
    position,
    alpha=0.3,
    lookback=15,
    upcoming_fixture=None,
)

Predict expected points with fixture awareness and semi-variance.

PARAMETER	DESCRIPTION
`timeseries`	TYPE: `DataFrame`
`position`	TYPE: `str`
`alpha`	EWMA decay. TYPE: `float` DEFAULT: `0.3`
`lookback`	Max recent GWs (increased from 10 to 15 for more data). TYPE: `int` DEFAULT: `15`
`upcoming_fixture`	{"was_home": bool, "opponent_team": int, "xP": float} TYPE: `dict` DEFAULT: `None`

RETURNS	DESCRIPTION
`expected_points`	TYPE: `float`
`variance`	TYPE: `float`
`downside_risk`	TYPE: `float (semi-deviation below E[P])`

Source code in fplx/inference/enriched.py

def enriched_predict(timeseries, position, alpha=0.3, lookback=15, upcoming_fixture=None):
    """
    Predict expected points with fixture awareness and semi-variance.

    Parameters
    ----------
    timeseries : pd.DataFrame
    position : str
    alpha : float
        EWMA decay.
    lookback : int
        Max recent GWs (increased from 10 to 15 for more data).
    upcoming_fixture : dict, optional
        {"was_home": bool, "opponent_team": int, "xP": float}

    Returns
    -------
    expected_points : float
    variance : float
    downside_risk : float  (semi-deviation below E[P])
    """
    if timeseries.empty or "minutes" not in timeseries.columns:
        return 0.0, 4.0, 0.0

    ts = timeseries.tail(lookback).copy()
    mins = _safe_col(ts, "minutes")
    played_mask = mins > 0
    n_played = int(played_mask.sum())

    if n_played < 2:
        return 0.0, 4.0, 0.0

    avail = float(played_mask[-min(3, len(played_mask)) :].mean())
    if avail < 0.1:
        return 0.0, 1.0, 0.0

    # xPoints from all components
    xpts = compute_xpoints(ts, position)
    played_xpts = xpts[played_mask]

    # EWMA on played xPoints
    conditional_ep = max(0.0, _ewma(played_xpts, alpha))

    # Fixture adjustments
    fixture_mult = 1.0
    if upcoming_fixture:
        hf, af = _home_away_factor(timeseries)
        fixture_mult = hf if upcoming_fixture.get("was_home", False) else af
        opp_id = upcoming_fixture.get("opponent_team", 0)
        if opp_id > 0:
            fixture_mult *= _opponent_mult(timeseries, opp_id)
    conditional_ep *= fixture_mult

    # Ensemble with xP
    if upcoming_fixture and upcoming_fixture.get("xP", 0) > 0:
        conditional_ep = 0.7 * conditional_ep + 0.3 * upcoming_fixture["xP"]

    # Variance and semi-variance from residuals
    downside_risk = 0.0
    if "points" in ts.columns:
        pts = _safe_col(ts, "points")
        played_pts = pts[played_mask]
        residuals = played_pts - played_xpts
        var_estimate = float(np.var(residuals)) + 1.0

        # Semi-variance: only negative residuals (actual < expected)
        neg_residuals = residuals[residuals < 0]
        if len(neg_residuals) >= 2:
            downside_risk = float(np.sqrt(np.mean(neg_residuals**2)))
        else:
            downside_risk = float(np.sqrt(var_estimate)) * 0.5
    else:
        var_estimate = 4.0
        downside_risk = 1.0

    ep = conditional_ep * avail
    var_out = avail * var_estimate + avail * (1 - avail) * conditional_ep**2
    dr_out = downside_risk * avail

    return ep, var_out, dr_out

batch_enriched_predict ¶

batch_enriched_predict(
    players, alpha=0.3, fixture_info=None
)

Run enriched prediction for all players. Returns ep, var, downside_risk dicts.

Source code in fplx/inference/enriched.py

def batch_enriched_predict(players, alpha=0.3, fixture_info=None):
    """Run enriched prediction for all players. Returns ep, var, downside_risk dicts."""
    ep, ev, dr = {}, {}, {}
    for p in players:
        fix = fixture_info.get(p.id) if fixture_info else None
        mu, var, dsr = enriched_predict(p.timeseries, p.position, alpha=alpha, upcoming_fixture=fix)
        ep[p.id] = mu
        ev[p.id] = var
        dr[p.id] = dsr
    return ep, ev, dr

fusion ¶

Fusion of HMM and Kalman Filter outputs.

Combines discrete state posteriors (HMM) with continuous estimates (Kalman) using inverse-variance weighting — optimal under Gaussian independence.

fuse_estimates ¶

fuse_estimates(
    hmm_mean: float,
    hmm_var: float,
    kf_mean: float,
    kf_var: float,
) -> tuple[float, float]

Fuse a single HMM estimate with a single Kalman estimate.

Uses inverse-variance weighting: fused_mean = (hmm_mean/hmm_var + kf_mean/kf_var) / (1/hmm_var + 1/kf_var) fused_var = 1 / (1/hmm_var + 1/kf_var)

PARAMETER	DESCRIPTION
`hmm_mean`	HMM expected points (from state posterior weighted emission means). TYPE: `float`
`hmm_var`	HMM variance (law of total variance over state posterior). TYPE: `float`
`kf_mean`	Kalman filtered point estimate. TYPE: `float`
`kf_var`	Kalman filtered uncertainty (posterior variance). TYPE: `float`

RETURNS	DESCRIPTION
`fused_mean`	TYPE: `float`
`fused_var`	TYPE: `float`

Source code in fplx/inference/fusion.py

def fuse_estimates(
    hmm_mean: float,
    hmm_var: float,
    kf_mean: float,
    kf_var: float,
) -> tuple[float, float]:
    """
    Fuse a single HMM estimate with a single Kalman estimate.

    Uses inverse-variance weighting:
        fused_mean = (hmm_mean/hmm_var + kf_mean/kf_var) / (1/hmm_var + 1/kf_var)
        fused_var  = 1 / (1/hmm_var + 1/kf_var)

    Parameters
    ----------
    hmm_mean : float
        HMM expected points (from state posterior weighted emission means).
    hmm_var : float
        HMM variance (law of total variance over state posterior).
    kf_mean : float
        Kalman filtered point estimate.
    kf_var : float
        Kalman filtered uncertainty (posterior variance).

    Returns
    -------
    fused_mean : float
    fused_var : float
    """
    hmm_var = max(hmm_var, 1e-6)
    kf_var = max(kf_var, 1e-6)

    precision_hmm = 1.0 / hmm_var
    precision_kf = 1.0 / kf_var
    total_precision = precision_hmm + precision_kf

    fused_mean = (precision_hmm * hmm_mean + precision_kf * kf_mean) / total_precision
    fused_var = 1.0 / total_precision

    return fused_mean, fused_var

fuse_sequences ¶

fuse_sequences(
    hmm_gamma: ndarray,
    kalman_x: ndarray,
    kalman_P: ndarray,
    emission_params: dict,
) -> tuple[ndarray, ndarray]

Fuse full sequences of HMM posteriors and Kalman estimates.

PARAMETER	DESCRIPTION
`hmm_gamma`	Smoothed state posteriors from HMM. TYPE: `(ndarray, shape(T, N))`
`kalman_x`	Kalman filtered estimates. TYPE: `(ndarray, shape(T))`
`kalman_P`	Kalman filtered uncertainties. TYPE: `(ndarray, shape(T))`
`emission_params`	{state_index: (mean, std)} from HMM. TYPE: `dict`

RETURNS	DESCRIPTION
`fused_mean`	TYPE: `(ndarray, shape(T))`
`fused_var`	TYPE: `(ndarray, shape(T))`

Source code in fplx/inference/fusion.py

def fuse_sequences(
    hmm_gamma: np.ndarray,
    kalman_x: np.ndarray,
    kalman_P: np.ndarray,
    emission_params: dict,
) -> tuple[np.ndarray, np.ndarray]:
    """
    Fuse full sequences of HMM posteriors and Kalman estimates.

    Parameters
    ----------
    hmm_gamma : np.ndarray, shape (T, N)
        Smoothed state posteriors from HMM.
    kalman_x : np.ndarray, shape (T,)
        Kalman filtered estimates.
    kalman_P : np.ndarray, shape (T,)
        Kalman filtered uncertainties.
    emission_params : dict
        {state_index: (mean, std)} from HMM.

    Returns
    -------
    fused_mean : np.ndarray, shape (T,)
    fused_var : np.ndarray, shape (T,)
    """
    T = len(kalman_x)
    n_states = hmm_gamma.shape[1]

    state_means = np.array([emission_params[s][0] for s in range(n_states)])
    state_vars = np.array([emission_params[s][1] ** 2 for s in range(n_states)])

    # HMM expected value and variance per timestep
    hmm_mean = hmm_gamma @ state_means
    hmm_var = (
        hmm_gamma @ state_vars
        + hmm_gamma @ (state_means ** 2)
        - hmm_mean ** 2
    )
    hmm_var = np.maximum(hmm_var, 1e-6)
    kalman_P_safe = np.maximum(kalman_P, 1e-6)

    # Inverse-variance weighting
    precision_hmm = 1.0 / hmm_var
    precision_kf = 1.0 / kalman_P_safe
    total_precision = precision_hmm + precision_kf

    fused_mean = (precision_hmm * hmm_mean + precision_kf * kalman_x) / total_precision
    fused_var = 1.0 / total_precision

    return fused_mean, fused_var

hmm ¶

Hidden Markov Model for player form state inference.

Implements: - Forward algorithm (online filtering) - Forward-Backward (offline smoothing) - Viterbi decoding (most likely state sequence) - Dynamic transition matrix perturbation (news signal injection) - Baum-Welch parameter learning (EM) - One-step-ahead prediction with uncertainty

HMMInference ¶

HMMInference(
    transition_matrix: Optional[ndarray] = None,
    emission_params: Optional[dict] = None,
    initial_dist: Optional[ndarray] = None,
)

Hidden Markov Model for discrete player form states.

Supports dynamic transition matrix perturbation so that external signals (news, injuries) can shift state probabilities mid-sequence.

PARAMETER	DESCRIPTION
`transition_matrix`	transition_matrix[i,j] = P(S_{t+1}=j \| S_t=i). Rows must sum to 1. TYPE: `(ndarray, shape(N, N))` DEFAULT: `None`
`emission_params`	{state_index: (mean, std)} for Gaussian emissions. TYPE: `dict` DEFAULT: `None`
`initial_dist`	Prior over initial state. TYPE: `(ndarray, shape(N))` DEFAULT: `None`

Source code in fplx/inference/hmm.py

def __init__(
    self,
    transition_matrix: Optional[np.ndarray] = None,
    emission_params: Optional[dict] = None,
    initial_dist: Optional[np.ndarray] = None,
):
    self.transition_matrix = (
        transition_matrix.copy() if transition_matrix is not None else DEFAULT_TRANSITION_MATRIX.copy()
    )
    self.emission_params = emission_params or dict(DEFAULT_EMISSION_PARAMS)
    self.pi = initial_dist.copy() if initial_dist is not None else DEFAULT_INITIAL_DIST.copy()
    self.n_states = len(self.pi)

    # per-timestep transition overrides (for news injection)
    # key: timestep t, Value: modified transition_matrix matrix for that step
    self._transition_overrides: dict[int, np.ndarray] = {}

inject_news_perturbation ¶

inject_news_perturbation(
    timestep: int,
    state_boost: dict[int, float],
    confidence: float = 1.0,
)

Perturb transition matrix at a specific timestep based on news.

For each source state, the transition probability toward boosted target states is multiplied by the boost factor (scaled by confidence), then the row is renormalized.

PARAMETER	DESCRIPTION
`timestep`	The gameweek at which the perturbation applies. TYPE: `int`
`state_boost`	{target_state: multiplicative_boost}. E.g., {0: 10.0} means "10x more likely to transition to Injured." TYPE: `dict[int, float]`
`confidence`	Scales the perturbation. 0 = no effect, 1 = full effect. TYPE: `float` DEFAULT: `1.0`

Source code in fplx/inference/hmm.py

def inject_news_perturbation(
    self,
    timestep: int,
    state_boost: dict[int, float],
    confidence: float = 1.0,
):
    """
    Perturb transition matrix at a specific timestep based on news.

    For each source state, the transition probability toward boosted
    target states is multiplied by the boost factor (scaled by confidence),
    then the row is renormalized.

    Parameters
    ----------
    timestep : int
        The gameweek at which the perturbation applies.
    state_boost : dict[int, float]
        {target_state: multiplicative_boost}. E.g., {0: 10.0} means
        "10x more likely to transition to Injured."
    confidence : float
        Scales the perturbation. 0 = no effect, 1 = full effect.
    """
    perturbed_transition_matrix = self.transition_matrix.copy()

    for source_state in range(self.n_states):
        for target_state, boost in state_boost.items():
            # scale boost by confidence: effective_boost = 1 + confidence*(boost-1)
            effective_boost = 1.0 + confidence * (boost - 1.0)
            perturbed_transition_matrix[source_state, target_state] *= effective_boost

        # renormalize row
        row_sum = perturbed_transition_matrix[source_state].sum()
        if row_sum > 0:
            perturbed_transition_matrix[source_state] /= row_sum

    self._transition_overrides[timestep] = perturbed_transition_matrix

clear_perturbations ¶

clear_perturbations()

Remove all per-timestep transition overrides.

Source code in fplx/inference/hmm.py

def clear_perturbations(self):
    """Remove all per-timestep transition overrides."""
    self._transition_overrides.clear()

forward ¶

forward(observations: ndarray)

Forward algorithm with dynamic transition matrices.

PARAMETER	DESCRIPTION
`observations`	TYPE: `(ndarray, shape(num_timesteps))`

RETURNS	DESCRIPTION
`forward_messages`	Normalized forward messages. forward_messages[t] = P(S_t \| y_1:t) TYPE: `(ndarray, shape(num_timesteps, N))`
`scale`	Per-timestep normalization constants. TYPE: `(ndarray, shape(num_timesteps))`

Source code in fplx/inference/hmm.py

def forward(self, observations: np.ndarray):
    """
    Forward algorithm with dynamic transition matrices.

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    forward_messages : np.ndarray, shape (num_timesteps, N)
        Normalized forward messages. forward_messages[t] = P(S_t | y_1:t)
    scale : np.ndarray, shape (num_timesteps,)
        Per-timestep normalization constants.
    """
    num_timesteps = len(observations)
    forward_messages = np.zeros((num_timesteps, self.n_states))
    scale = np.zeros(num_timesteps)

    # t = 0
    b = self._emission_vector(observations[0])
    forward_messages[0] = self.pi * b
    scale[0] = forward_messages[0].sum()
    if scale[0] > 0:
        forward_messages[0] /= scale[0]

    # t = 1..num_timesteps-1
    for t in range(1, num_timesteps):
        transition_matrix_t = self._get_transition_matrix(t)
        b = self._emission_vector(observations[t])
        forward_messages[t] = (forward_messages[t - 1] @ transition_matrix_t) * b
        scale[t] = forward_messages[t].sum()
        if scale[t] > 0:
            forward_messages[t] /= scale[t]

    return forward_messages, scale

forward_backward ¶

forward_backward(observations: ndarray) -> ndarray

Compute smoothed posteriors P(S_t | y_1:num_timesteps).

PARAMETER	DESCRIPTION
`observations`	TYPE: `(ndarray, shape(num_timesteps))`

RETURNS	DESCRIPTION
`smoothed_posteriors`	smoothed_posteriors[t, s] = P(S_t=s \| y_1:num_timesteps) TYPE: `(ndarray, shape(num_timesteps, N))`

Source code in fplx/inference/hmm.py

def forward_backward(self, observations: np.ndarray) -> np.ndarray:
    """
    Compute smoothed posteriors P(S_t | y_1:num_timesteps).

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    smoothed_posteriors : np.ndarray, shape (num_timesteps, N)
        smoothed_posteriors[t, s] = P(S_t=s | y_1:num_timesteps)
    """
    num_timesteps = len(observations)
    forward_messages, scale = self.forward(observations)

    backward_messages = np.zeros((num_timesteps, self.n_states))
    backward_messages[num_timesteps - 1] = 1.0

    for t in range(num_timesteps - 2, -1, -1):
        transition_matrix_t_plus_1 = self._get_transition_matrix(t + 1)
        b_next = self._emission_vector(observations[t + 1])
        backward_messages[t] = transition_matrix_t_plus_1 @ (b_next * backward_messages[t + 1])
        if scale[t + 1] > 0:
            backward_messages[t] /= scale[t + 1]

    smoothed_posteriors = forward_messages * backward_messages
    row_sums = smoothed_posteriors.sum(axis=1, keepdims=True)
    row_sums[row_sums == 0] = 1.0
    smoothed_posteriors /= row_sums

    return smoothed_posteriors

viterbi ¶

viterbi(observations: ndarray) -> ndarray

Most likely state sequence via Viterbi decoding.

PARAMETER	DESCRIPTION
`observations`	TYPE: `(ndarray, shape(num_timesteps))`

RETURNS	DESCRIPTION
`best_path`	TYPE: `np.ndarray of int, shape (num_timesteps,)`

Source code in fplx/inference/hmm.py

def viterbi(self, observations: np.ndarray) -> np.ndarray:
    """
    Most likely state sequence via Viterbi decoding.

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    best_path : np.ndarray of int, shape (num_timesteps,)
    """
    num_timesteps = len(observations)
    log_pi = np.log(self.pi + 1e-300)

    log_probabilities = np.zeros((num_timesteps, self.n_states))
    backpointers = np.zeros((num_timesteps, self.n_states), dtype=int)

    b0 = self._emission_vector(observations[0])
    log_probabilities[0] = log_pi + np.log(b0 + 1e-300)

    for t in range(1, num_timesteps):
        transition_matrix_t = self._get_transition_matrix(t)
        log_transition_matrix_t = np.log(transition_matrix_t + 1e-300)
        b = self._emission_vector(observations[t])
        for s in range(self.n_states):
            candidates = log_probabilities[t - 1] + log_transition_matrix_t[:, s]
            backpointers[t, s] = np.argmax(candidates)
            log_probabilities[t, s] = candidates[backpointers[t, s]] + np.log(b[s] + 1e-300)

    best_path = np.zeros(num_timesteps, dtype=int)
    best_path[num_timesteps - 1] = np.argmax(log_probabilities[num_timesteps - 1])
    for t in range(num_timesteps - 2, -1, -1):
        best_path[t] = backpointers[t + 1, best_path[t + 1]]

    return best_path

predict_next ¶

predict_next(
    observations: ndarray,
) -> tuple[float, float, ndarray]

Predict next timestep's points distribution.

Runs forward algorithm, then propagates one step ahead via the transition matrix.

PARAMETER	DESCRIPTION
`observations`	TYPE: `(ndarray, shape(num_timesteps))`

RETURNS	DESCRIPTION
`expected_points`	E[Y_{num_timesteps+1} \| y_1:num_timesteps] TYPE: `float`
`variance`	Var[Y_{num_timesteps+1} \| y_1:num_timesteps] (from law of total variance) TYPE: `float`
`next_state_dist`	P(S_{num_timesteps+1} \| y_1:num_timesteps) TYPE: `(ndarray, shape(N))`

Source code in fplx/inference/hmm.py

def predict_next(self, observations: np.ndarray) -> tuple[float, float, np.ndarray]:
    """
    Predict next timestep's points distribution.

    Runs forward algorithm, then propagates one step ahead via
    the transition matrix.

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    expected_points : float
        E[Y_{num_timesteps+1} | y_1:num_timesteps]
    variance : float
        Var[Y_{num_timesteps+1} | y_1:num_timesteps] (from law of total variance)
    next_state_dist : np.ndarray, shape (N,)
        P(S_{num_timesteps+1} | y_1:num_timesteps)
    """
    forward_messages, _ = self.forward(observations)
    current_belief = forward_messages[-1]  # P(S_num_timesteps | y_1:num_timesteps)

    num_timesteps = len(observations)
    next_transition_matrix = self._get_transition_matrix(num_timesteps)  # transition for next step
    next_state_dist = (
        current_belief @ next_transition_matrix
    )  # P(S_{num_timesteps+1} | y_1:num_timesteps)

    state_means = np.array([self.emission_params[s][0] for s in range(self.n_states)])
    state_vars = np.array([self.emission_params[s][1] ** 2 for s in range(self.n_states)])

    expected_points = next_state_dist @ state_means

    # law of total variance: Var = E[Var|S] + Var[E|S]
    variance = next_state_dist @ state_vars + next_state_dist @ (state_means**2) - expected_points**2

    return expected_points, max(0.0, variance), next_state_dist

fit ¶

fit(
    observations: ndarray,
    n_iter: int = 20,
    tol: float = 0.0001,
    verbose: bool = False,
)

Learn transition matrix and emission parameters via Baum-Welch EM.

PARAMETER	DESCRIPTION
`observations`	Training sequence. TYPE: `(ndarray, shape(num_timesteps))`
`n_iter`	Maximum EM iterations. TYPE: `int` DEFAULT: `20`
`tol`	Convergence tolerance on log-likelihood. TYPE: `float` DEFAULT: `0.0001`
`verbose`	Print progress. TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
`self`

Source code in fplx/inference/hmm.py

def fit(
    self,
    observations: np.ndarray,
    n_iter: int = 20,
    tol: float = 1e-4,
    verbose: bool = False,
):
    """
    Learn transition matrix and emission parameters via Baum-Welch EM.

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)
        Training sequence.
    n_iter : int
        Maximum EM iterations.
    tol : float
        Convergence tolerance on log-likelihood.
    verbose : bool
        Print progress.

    Returns
    -------
    self
    """
    num_timesteps = len(observations)
    prev_log_likelihood = -np.inf

    for iteration in range(n_iter):
        # E-step
        forward_messages, scale = self.forward(observations)

        # Backward pass using the same scaling factors as forward()
        backward_messages = np.zeros((num_timesteps, self.n_states))
        backward_messages[num_timesteps - 1] = 1.0
        for t in range(num_timesteps - 2, -1, -1):
            transition_matrix_t_plus_1 = self._get_transition_matrix(t + 1)
            b_next = self._emission_vector(observations[t + 1])
            backward_messages[t] = transition_matrix_t_plus_1 @ (b_next * backward_messages[t + 1])
            if scale[t + 1] > 0:
                backward_messages[t] /= scale[t + 1]

        # gamma_t(i) = P(S_t=i | y_1:T)
        smoothed_posteriors = forward_messages * backward_messages
        row_sums = smoothed_posteriors.sum(axis=1, keepdims=True)
        row_sums[row_sums == 0] = 1.0
        smoothed_posteriors /= row_sums

        # transition_posteriors: P(S_t=i, S_{t+1}=j | y_1:num_timesteps) for transition re-estimation
        transition_posteriors = np.zeros((num_timesteps - 1, self.n_states, self.n_states))
        for t in range(num_timesteps - 1):
            transition_matrix_t_plus_1 = self._get_transition_matrix(t + 1)
            b_next = self._emission_vector(observations[t + 1])

            # xi_t(i,j) = P(S_t=i, S_{t+1}=j | y_1:T)
            for i in range(self.n_states):
                for j in range(self.n_states):
                    transition_posteriors[t, i, j] = (
                        forward_messages[t, i]
                        * transition_matrix_t_plus_1[i, j]
                        * b_next[j]
                        * backward_messages[t + 1, j]
                    )

            xi_sum = transition_posteriors[t].sum()
            if xi_sum > 0:
                transition_posteriors[t] /= xi_sum

        # M-step
        # Re-estimate initial distribution
        self.pi = smoothed_posteriors[0]

        # Re-estimate transition matrix
        for i in range(self.n_states):
            denom = smoothed_posteriors[:-1, i].sum()
            if denom > 0:
                for j in range(self.n_states):
                    self.transition_matrix[i, j] = transition_posteriors[:, i, j].sum() / denom
            # Renormalize
            row_sum = self.transition_matrix[i].sum()
            if row_sum > 0:
                self.transition_matrix[i] /= row_sum

        # re-estimate emission parameters
        for s in range(self.n_states):
            weights = smoothed_posteriors[:, s]
            w_sum = weights.sum()
            if w_sum > 1e-10:
                mu = np.average(observations, weights=weights)
                var = np.average((observations - mu) ** 2, weights=weights)
                sigma = max(np.sqrt(var), 0.1)  # floor to prevent collapse
                self.emission_params[s] = (mu, sigma)

        # log-likelihood
        log_likelihood = np.sum(np.log(scale + 1e-300))
        if verbose:
            logger.info("EM iteration %d: LL = %.4f", iteration, log_likelihood)

        if abs(log_likelihood - prev_log_likelihood) < tol:
            if verbose:
                logger.info("Converged at iteration %d", iteration)
            break
        prev_log_likelihood = log_likelihood

    return self

kalman ¶

Kalman Filter for continuous player point potential tracking.

State model: x_{t+1} = x_t + w_t, w_t ~ N(0, Q_t) Observation: y_t = x_t + v_t, v_t ~ N(0, R_t)

Supports per-timestep noise overrides so that: - News shocks (injury) → inflate Q_t (true form can jump suddenly) - Fixture difficulty → inflate R_t (harder opponents → noisier observations)

KalmanFilter ¶

KalmanFilter(
    process_noise: float = 1.0,
    observation_noise: float = 4.0,
    initial_state_mean: float = 4.0,
    initial_state_covariance: float = 2.0,
)

1D Kalman Filter for tracking latent point potential.

PARAMETER	DESCRIPTION
`process_noise`	Default process noise variance (form drift rate). TYPE: `float` DEFAULT: `1.0`
`observation_noise`	Default observation noise variance (weekly point noise). TYPE: `float` DEFAULT: `4.0`
`initial_state_mean`	Initial state estimate. TYPE: `float` DEFAULT: `4.0`
`initial_state_covariance`	Initial state uncertainty (variance). TYPE: `float` DEFAULT: `2.0`

Source code in fplx/inference/kalman.py

def __init__(
    self,
    process_noise: float = 1.0,
    observation_noise: float = 4.0,
    initial_state_mean: float = 4.0,
    initial_state_covariance: float = 2.0,
):
    self.default_process_noise = process_noise
    self.default_observation_noise = observation_noise
    self.initial_state_mean = initial_state_mean
    self.initial_state_covariance = initial_state_covariance

    # Per-timestep noise overrides
    self._process_noise_overrides: dict[int, float] = {}
    self._observation_noise_overrides: dict[int, float] = {}

    # Stored results after filtering
    self.filtered_state_means: Optional[np.ndarray] = None
    self.filtered_state_covariances: Optional[np.ndarray] = None
    self.kalman_gains: Optional[np.ndarray] = None  # Kalman gains

inject_process_shock ¶

inject_process_shock(timestep: int, multiplier: float)

Inflate process noise at a specific timestep.

Use when news indicates a sudden form change (injury, transfer). process_noise_t = default_process_noise * multiplier.

PARAMETER	DESCRIPTION
`timestep`	Gameweek index. TYPE: `int`
`multiplier`	Process noise multiplier (>1 = more uncertainty about form drift). TYPE: `float`

Source code in fplx/inference/kalman.py

def inject_process_shock(self, timestep: int, multiplier: float):
    """
    Inflate process noise at a specific timestep.

    Use when news indicates a sudden form change (injury, transfer).
    process_noise_t = default_process_noise * multiplier.

    Parameters
    ----------
    timestep : int
        Gameweek index.
    multiplier : float
        Process noise multiplier (>1 = more uncertainty about form drift).
    """
    self._process_noise_overrides[timestep] = self.default_process_noise * multiplier

inject_observation_noise ¶

inject_observation_noise(timestep: int, factor: float)

Adjust observation noise at a specific timestep.

Use for fixture difficulty: harder opponents → less predictable points. observation_noise_t = default_observation_noise * factor.

PARAMETER	DESCRIPTION
`timestep`	Gameweek index. TYPE: `int`
`factor`	Observation noise factor (>1 = harder fixture, noisier observation). TYPE: `float`

Source code in fplx/inference/kalman.py

def inject_observation_noise(self, timestep: int, factor: float):
    """
    Adjust observation noise at a specific timestep.

    Use for fixture difficulty: harder opponents → less predictable points.
    observation_noise_t = default_observation_noise * factor.

    Parameters
    ----------
    timestep : int
        Gameweek index.
    factor : float
        Observation noise factor (>1 = harder fixture, noisier observation).
    """
    self._observation_noise_overrides[timestep] = self.default_observation_noise * factor

clear_overrides ¶

clear_overrides()

Remove all per-timestep noise overrides.

Source code in fplx/inference/kalman.py

def clear_overrides(self):
    """Remove all per-timestep noise overrides."""
    self._process_noise_overrides.clear()
    self._observation_noise_overrides.clear()

get_process_noise_override ¶

get_process_noise_override(
    timestep: int,
) -> Optional[float]

Return explicit process noise override at timestep, if any.

Source code in fplx/inference/kalman.py

def get_process_noise_override(self, timestep: int) -> Optional[float]:
    """Return explicit process noise override at timestep, if any."""
    return self._process_noise_overrides.get(timestep)

set_noise_overrides ¶

set_noise_overrides(
    process_noise_overrides: dict[int, float],
    observation_noise_overrides: dict[int, float],
)

Replace per-timestep noise overrides.

Source code in fplx/inference/kalman.py

def set_noise_overrides(
    self,
    process_noise_overrides: dict[int, float],
    observation_noise_overrides: dict[int, float],
):
    """Replace per-timestep noise overrides."""
    self._process_noise_overrides = dict(process_noise_overrides)
    self._observation_noise_overrides = dict(observation_noise_overrides)

copy_with_overrides ¶

copy_with_overrides(
    max_timestep: Optional[int] = None,
) -> KalmanFilter

Create a parameter-identical filter with copied noise overrides.

PARAMETER	DESCRIPTION
`max_timestep`	If provided, only overrides for timesteps <= max_timestep are copied. TYPE: `int` DEFAULT: `None`

Source code in fplx/inference/kalman.py

def copy_with_overrides(self, max_timestep: Optional[int] = None) -> "KalmanFilter":
    """Create a parameter-identical filter with copied noise overrides.

    Parameters
    ----------
    max_timestep : int, optional
        If provided, only overrides for timesteps <= max_timestep are copied.
    """
    copied = KalmanFilter(
        process_noise=self.default_process_noise,
        observation_noise=self.default_observation_noise,
        initial_state_mean=self.initial_state_mean,
        initial_state_covariance=self.initial_state_covariance,
    )

    if max_timestep is None:
        proc = dict(self._process_noise_overrides)
        obs = dict(self._observation_noise_overrides)
    else:
        proc = {k: v for k, v in self._process_noise_overrides.items() if k <= max_timestep}
        obs = {k: v for k, v in self._observation_noise_overrides.items() if k <= max_timestep}

    copied.set_noise_overrides(proc, obs)

    return copied

filter ¶

filter(observations: ndarray)

Run Kalman filter on observations with per-timestep noise.

PARAMETER	DESCRIPTION
`observations`	TYPE: `(ndarray, shape(num_timesteps))`

RETURNS	DESCRIPTION
`filtered_state_means`	Filtered state estimates (posterior mean). TYPE: `(ndarray, shape(num_timesteps))`
`filtered_state_covariances`	Filtered state uncertainties (posterior variance). TYPE: `(ndarray, shape(num_timesteps))`

Source code in fplx/inference/kalman.py

def filter(self, observations: np.ndarray):
    """
    Run Kalman filter on observations with per-timestep noise.

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    filtered_state_means : np.ndarray, shape (num_timesteps,)
        Filtered state estimates (posterior mean).
    filtered_state_covariances : np.ndarray, shape (num_timesteps,)
        Filtered state uncertainties (posterior variance).
    """
    num_timesteps = len(observations)
    filtered_state_means = np.zeros(num_timesteps)
    filtered_state_covariances = np.zeros(num_timesteps)
    kalman_gains = np.zeros(num_timesteps)

    predicted_state_mean = self.initial_state_mean
    predicted_state_covariance = self.initial_state_covariance

    for t in range(num_timesteps):
        process_noise_t = self._get_process_noise(t)
        observation_noise_t = self._get_observation_noise(t)

        # Predict
        if t > 0:
            predicted_state_mean = filtered_state_means[t - 1]
            predicted_state_covariance = filtered_state_covariances[t - 1] + process_noise_t

        # Update
        y = observations[t]
        innovation = y - predicted_state_mean
        innovation_covariance = predicted_state_covariance + observation_noise_t  # Innovation covariance
        kalman_gain = predicted_state_covariance / innovation_covariance  # Kalman gain

        filtered_state_means[t] = predicted_state_mean + kalman_gain * innovation
        filtered_state_covariances[t] = (1 - kalman_gain) * predicted_state_covariance
        kalman_gains[t] = kalman_gain

    self.filtered_state_means = filtered_state_means
    self.filtered_state_covariances = filtered_state_covariances
    self.kalman_gains = kalman_gains

    return filtered_state_means, filtered_state_covariances

predict_next ¶

predict_next() -> tuple[float, float]

Predict next observation with uncertainty.

Returns the predictive distribution for Y_{t+1} (the observation), not X_{t+1} (the latent state). This ensures consistency with the HMM predict_next which also returns observation-level variance.

Var[Y_{t+1}] = Var[X_{t+1}|y_{1:t}] + R = (P_t + Q) + R

Must call filter() first.

RETURNS	DESCRIPTION
`predicted_mean`	E[Y_{t+1} \| y_{1:t}]. TYPE: `float`
`predicted_var`	Var[Y_{t+1} \| y_{1:t}] (observation-level, includes R). TYPE: `float`

Source code in fplx/inference/kalman.py

def predict_next(self) -> tuple[float, float]:
    """
    Predict next observation with uncertainty.

    Returns the predictive distribution for Y_{t+1} (the observation),
    not X_{t+1} (the latent state). This ensures consistency with the
    HMM predict_next which also returns observation-level variance.

    Var[Y_{t+1}] = Var[X_{t+1}|y_{1:t}] + R
                 = (P_t + Q) + R

    Must call filter() first.

    Returns
    -------
    predicted_mean : float
        E[Y_{t+1} | y_{1:t}].
    predicted_var : float
        Var[Y_{t+1} | y_{1:t}] (observation-level, includes R).
    """
    if self.filtered_state_means is None or self.filtered_state_covariances is None:
        raise RuntimeError("Must call filter() before predict_next().")

    num_timesteps = len(self.filtered_state_means)
    next_process_noise = self._get_process_noise(num_timesteps)
    next_observation_noise = self._get_observation_noise(num_timesteps)

    predicted_mean = self.filtered_state_means[-1]
    # State-level: P_{t+1|t} = P_{t|t} + Q
    state_var = self.filtered_state_covariances[-1] + next_process_noise
    # Observation-level: Var[Y] = P_{t+1|t} + R
    predicted_var = state_var + next_observation_noise

    return predicted_mean, predicted_var

smooth ¶

smooth(observations: ndarray)

Run RTS smoother (backward pass after forward Kalman filter).

PARAMETER	DESCRIPTION
`observations`	TYPE: `(ndarray, shape(num_timesteps))`

RETURNS	DESCRIPTION
`smoothed_state_means`	Smoothed state estimates. TYPE: `(ndarray, shape(num_timesteps))`
`smoothed_state_covariances`	Smoothed state uncertainties. TYPE: `(ndarray, shape(num_timesteps))`

Source code in fplx/inference/kalman.py

def smooth(self, observations: np.ndarray):
    """
    Run RTS smoother (backward pass after forward Kalman filter).

    Parameters
    ----------
    observations : np.ndarray, shape (num_timesteps,)

    Returns
    -------
    smoothed_state_means : np.ndarray, shape (num_timesteps,)
        Smoothed state estimates.
    smoothed_state_covariances : np.ndarray, shape (num_timesteps,)
        Smoothed state uncertainties.
    """
    filtered_state_means, filtered_state_covariances = self.filter(observations)
    num_timesteps = len(observations)

    smoothed_state_means = np.zeros(num_timesteps)
    smoothed_state_covariances = np.zeros(num_timesteps)

    smoothed_state_means[-1] = filtered_state_means[-1]
    smoothed_state_covariances[-1] = filtered_state_covariances[-1]

    for t in range(num_timesteps - 2, -1, -1):
        next_process_noise = self._get_process_noise(t + 1)
        predicted_state_covariance = filtered_state_covariances[t] + next_process_noise

        # Smoother gain
        if predicted_state_covariance > 0:
            smoother_gain = filtered_state_covariances[t] / predicted_state_covariance
        else:
            smoother_gain = 0.0

        smoothed_state_means[t] = filtered_state_means[t] + smoother_gain * (
            smoothed_state_means[t + 1] - filtered_state_means[t]
        )
        smoothed_state_covariances[t] = filtered_state_covariances[t] + smoother_gain**2 * (
            smoothed_state_covariances[t + 1] - predicted_state_covariance
        )

    return smoothed_state_means, smoothed_state_covariances

multivariate_hmm ¶

Position-aware multivariate-emission HMM for player form inference.

Uses position-specific feature vectors extracted from the full vaastav dataset:

GK: [saves/90, xGC/90, clean_sheet, bonus, mins_frac] DEF: [xG, xA, xGC/90, clean_sheet, influence/100, bonus, mins_frac] MID: [xG, xA, creativity/100, threat/100, bonus, mins_frac] FWD: [xG, xA, threat/100, bonus, mins_frac]

Each state emits a multivariate Gaussian with diagonal covariance. Baum-Welch learns per-player emission parameters from their history.

The minutes_fraction feature (0 or ~1) lets the HMM identify the Injured state from the feature vector alone, without NLP news signals.

MultivariateHMM ¶

MultivariateHMM(
    position: str = "MID",
    transition_matrix: Optional[ndarray] = None,
    initial_dist: Optional[ndarray] = None,
)

Position-aware HMM with multivariate diagonal Gaussian emissions.

PARAMETER	DESCRIPTION
`position`	GK, DEF, MID, FWD. Determines feature set and default emissions. TYPE: `str` DEFAULT: `'MID'`

Source code in fplx/inference/multivariate_hmm.py

def __init__(
    self,
    position: str = "MID",
    transition_matrix: Optional[np.ndarray] = None,
    initial_dist: Optional[np.ndarray] = None,
):
    self.position = position
    self.means, self.vars = _default_emissions(position)

    # Priors for MAP-style regularization in Baum-Welch.
    self.prior_means = self.means.copy()
    self.prior_vars = self.vars.copy()
    self.prior_A = (
        transition_matrix.copy() if transition_matrix is not None else DEFAULT_TRANSITION.copy()
    )

    self.A = self.prior_A.copy()
    self.pi = initial_dist.copy() if initial_dist is not None else DEFAULT_INITIAL.copy()
    self.n_states = N_STATES
    self.n_features = self.means.shape[1]
    self._transition_overrides: dict[int, np.ndarray] = {}

inject_news_perturbation ¶

inject_news_perturbation(
    timestep: int,
    state_boost: dict,
    confidence: float = 1.0,
)

Perturb transition matrix at timestep (same API as scalar HMM).

Source code in fplx/inference/multivariate_hmm.py

def inject_news_perturbation(self, timestep: int, state_boost: dict, confidence: float = 1.0):
    """Perturb transition matrix at timestep (same API as scalar HMM)."""
    A_p = self.A.copy()
    for src in range(self.n_states):
        for tgt, boost in state_boost.items():
            A_p[src, tgt] *= 1.0 + confidence * (boost - 1.0)
        s = A_p[src].sum()
        if s > 0:
            A_p[src] /= s
    self._transition_overrides[timestep] = A_p

forward ¶

forward(observations: ndarray)

Forward algorithm. observations: (T, D).

Source code in fplx/inference/multivariate_hmm.py

def forward(self, observations: np.ndarray):
    """Forward algorithm. observations: (T, D)."""
    T = len(observations)
    alpha = np.zeros((T, self.n_states))
    scale = np.zeros(T)
    b = self._emission_prob_vector(observations[0])
    alpha[0] = self.pi * b
    scale[0] = alpha[0].sum()
    if scale[0] > 0:
        alpha[0] /= scale[0]
    for t in range(1, T):
        b = self._emission_prob_vector(observations[t])
        alpha[t] = (alpha[t - 1] @ self._get_A(t)) * b
        scale[t] = alpha[t].sum()
        if scale[t] > 0:
            alpha[t] /= scale[t]
    return alpha, scale

forward_backward ¶

forward_backward(observations: ndarray) -> ndarray

Smoothed posteriors P(S_t | y_{1:T}).

Source code in fplx/inference/multivariate_hmm.py

def forward_backward(self, observations: np.ndarray) -> np.ndarray:
    """Smoothed posteriors P(S_t | y_{1:T})."""
    T = len(observations)
    alpha, scale = self.forward(observations)
    beta = np.zeros((T, self.n_states))
    beta[T - 1] = 1.0
    for t in range(T - 2, -1, -1):
        b_next = self._emission_prob_vector(observations[t + 1])
        beta[t] = self._get_A(t + 1) @ (b_next * beta[t + 1])
        if scale[t + 1] > 0:
            beta[t] /= scale[t + 1]
    gamma = alpha * beta
    rs = gamma.sum(axis=1, keepdims=True)
    rs[rs == 0] = 1.0
    return gamma / rs

viterbi ¶

viterbi(observations: ndarray) -> ndarray

Most likely state sequence.

Source code in fplx/inference/multivariate_hmm.py

def viterbi(self, observations: np.ndarray) -> np.ndarray:
    """Most likely state sequence."""
    T = len(observations)
    log_d = np.zeros((T, self.n_states))
    psi = np.zeros((T, self.n_states), dtype=int)
    log_d[0] = np.log(self.pi + 1e-300) + np.array([
        self._emission_log_prob(observations[0], s) for s in range(self.n_states)
    ])
    for t in range(1, T):
        log_A = np.log(self._get_A(t) + 1e-300)
        log_b = np.array([self._emission_log_prob(observations[t], s) for s in range(self.n_states)])
        for s in range(self.n_states):
            c = log_d[t - 1] + log_A[:, s]
            psi[t, s] = np.argmax(c)
            log_d[t, s] = c[psi[t, s]] + log_b[s]
    path = np.zeros(T, dtype=int)
    path[T - 1] = np.argmax(log_d[T - 1])
    for t in range(T - 2, -1, -1):
        path[t] = psi[t + 1, path[t + 1]]
    return path

predict_next_features ¶

predict_next_features(observations: ndarray)

Predict next gameweek's feature vector.

Returns mean, var (per feature), and state distribution.

Source code in fplx/inference/multivariate_hmm.py

def predict_next_features(self, observations: np.ndarray):
    """
    Predict next gameweek's feature vector.

    Returns mean, var (per feature), and state distribution.
    """
    alpha, _ = self.forward(observations)
    next_dist = alpha[-1] @ self._get_A(len(observations))
    mean = next_dist @ self.means
    var = next_dist @ self.vars + next_dist @ (self.means**2) - mean**2
    return mean, np.maximum(var, 1e-8), next_dist

one_step_point_predictions ¶

one_step_point_predictions(
    observations: ndarray,
) -> ndarray

One-step-ahead point predictions for each historical timestep.

Returns array preds where preds[t] predicts points at timestep t, using information up to t-1 (preds[0] is NaN).

Source code in fplx/inference/multivariate_hmm.py

def one_step_point_predictions(self, observations: np.ndarray) -> np.ndarray:
    """One-step-ahead point predictions for each historical timestep.

    Returns array preds where preds[t] predicts points at timestep t,
    using information up to t-1 (preds[0] is NaN).
    """
    T = len(observations)
    preds = np.full(T, np.nan)
    if T < 2:
        return preds

    alpha, _ = self.forward(observations)
    for t in range(1, T):
        pred_dist = alpha[t - 1] @ self._get_A(t)
        preds[t] = self._expected_points_from_state_dist(pred_dist)
    return preds

predict_next_points ¶

predict_next_points(
    observations: ndarray,
) -> tuple[float, float]

Convert predicted features → expected FPL points.

Uses FPL scoring rules applied to predicted feature rates.

Source code in fplx/inference/multivariate_hmm.py

def predict_next_points(self, observations: np.ndarray) -> tuple[float, float]:
    """
    Convert predicted features → expected FPL points.

    Uses FPL scoring rules applied to predicted feature rates.
    """
    feat_mean, feat_var, _ = self.predict_next_features(observations)
    feat_names = POSITION_FEATURES[self.position]
    xpts_idx = feat_names.index("xPts")

    ep = max(0.0, float(feat_mean[xpts_idx]))
    var_pts = float(max(feat_var[xpts_idx], 1e-6) + 1.0)  # residual floor
    return ep, var_pts

fit ¶

fit(
    observations: ndarray,
    n_iter: int = 20,
    tol: float = 0.0001,
    prior_weight: float = 0.85,
)

Baum-Welch EM with MAP-style prior interpolation.

PARAMETER	DESCRIPTION
`observations`	Feature matrix with shape (T, D). TYPE: `ndarray`
`n_iter`	Maximum EM iterations. TYPE: `int` DEFAULT: `20`
`tol`	Convergence tolerance on log-likelihood. TYPE: `float` DEFAULT: `0.0001`
`prior_weight`	Weight on prior parameters in [0, 1]. Higher values increase regularization toward position-level default emissions/transitions. TYPE: `float` DEFAULT: `0.85`

Source code in fplx/inference/multivariate_hmm.py

def fit(
    self,
    observations: np.ndarray,
    n_iter: int = 20,
    tol: float = 1e-4,
    prior_weight: float = 0.85,
):
    """Baum-Welch EM with MAP-style prior interpolation.

    Parameters
    ----------
    observations : np.ndarray
        Feature matrix with shape (T, D).
    n_iter : int
        Maximum EM iterations.
    tol : float
        Convergence tolerance on log-likelihood.
    prior_weight : float
        Weight on prior parameters in [0, 1]. Higher values increase
        regularization toward position-level default emissions/transitions.
    """
    T = observations.shape[0]
    prev_ll = -np.inf
    prior_weight = float(np.clip(prior_weight, 0.0, 1.0))

    for _ in range(n_iter):
        alpha, scale = self.forward(observations)

        # Backward pass with scaling aligned to forward()
        beta = np.zeros((T, self.n_states))
        beta[T - 1] = 1.0
        for t in range(T - 2, -1, -1):
            b_next = self._emission_prob_vector(observations[t + 1])
            beta[t] = self._get_A(t + 1) @ (b_next * beta[t + 1])
            if scale[t + 1] > 0:
                beta[t] /= scale[t + 1]

        gamma = alpha * beta
        rs = gamma.sum(axis=1, keepdims=True)
        rs[rs == 0] = 1.0
        gamma /= rs

        # M-step: initial
        self.pi = np.maximum(gamma[0], 1e-10)
        self.pi /= self.pi.sum()

        # M-step: transitions
        xi = np.zeros((T - 1, self.n_states, self.n_states))
        for t in range(T - 1):
            b_next = self._emission_prob_vector(observations[t + 1])
            for i in range(self.n_states):
                for j in range(self.n_states):
                    xi[t, i, j] = alpha[t, i] * self._get_A(t + 1)[i, j] * b_next[j] * beta[t + 1, j]
            xs = xi[t].sum()
            if xs > 0:
                xi[t] /= xs
        for i in range(self.n_states):
            d = gamma[:-1, i].sum()
            if d > 1e-10:
                mle_A = xi[:, i, :].sum(axis=0) / d
                self.A[i] = prior_weight * self.prior_A[i] + (1.0 - prior_weight) * mle_A
            rs = self.A[i].sum()
            if rs > 0:
                self.A[i] /= rs

        # M-step: emissions
        for s in range(self.n_states):
            w = gamma[:, s]
            ws = w.sum()
            if ws > 1e-10:
                mle_mu = np.average(observations, axis=0, weights=w)
                diff = observations - mle_mu
                mle_var = np.average(diff**2, axis=0, weights=w)
                self.means[s] = prior_weight * self.prior_means[s] + (1.0 - prior_weight) * mle_mu
                self.vars[s] = np.maximum(
                    prior_weight * self.prior_vars[s] + (1.0 - prior_weight) * mle_var,
                    1e-4,
                )

        ll = np.sum(np.log(scale + 1e-300))
        if abs(ll - prev_ll) < tol:
            break
        prev_ll = ll
    return self

build_feature_matrix ¶

build_feature_matrix(
    timeseries: DataFrame, position: str
) -> ndarray

Extract position-specific feature matrix from player timeseries.

PARAMETER	DESCRIPTION
`timeseries`	Player gameweek history from vaastav dataset. TYPE: `DataFrame`
`position`	GK, DEF, MID, or FWD. TYPE: `str`

RETURNS	DESCRIPTION
`np.ndarray, shape (T, D) where D depends on position.`

Source code in fplx/inference/multivariate_hmm.py

def build_feature_matrix(timeseries: pd.DataFrame, position: str) -> np.ndarray:
    """
    Extract position-specific feature matrix from player timeseries.

    Parameters
    ----------
    timeseries : pd.DataFrame
        Player gameweek history from vaastav dataset.
    position : str
        GK, DEF, MID, or FWD.

    Returns
    -------
    np.ndarray, shape (T, D) where D depends on position.
    """
    n = len(timeseries)
    features = np.zeros((n, 2))

    mins = _safe_col(timeseries, "minutes")
    features[:, 1] = np.clip(mins / 90.0, 0.0, 1.0)  # mins_frac

    # Domain-specific projection from rich event space to structural xPts.
    features[:, 0] = compute_xpoints(timeseries, position)
    return features

pipeline ¶

Per-player inference pipeline orchestrator.

This is the single entry point that FPLModel.fit() calls for each player. It coordinates HMM, Kalman Filter, signal injection, and fusion.

Usage: pipeline = PlayerInferencePipeline() pipeline.ingest_observations(points_array) pipeline.inject_news("Player ruled out for 3 weeks", timestep=20) pipeline.inject_fixture_difficulty(difficulty=4.5, timestep=21) results = pipeline.run() ep_mean, ep_var = pipeline.predict_next()

InferenceResult `dataclass` ¶

InferenceResult(
    filtered_beliefs: ndarray,
    smoothed_beliefs: ndarray,
    viterbi_path: ndarray,
    hmm_predicted_mean: float = 0.0,
    hmm_predicted_var: float = 0.0,
    kalman_filtered: ndarray = (lambda: array([]))(),
    kalman_uncertainty: ndarray = (lambda: array([]))(),
    kf_predicted_mean: float = 0.0,
    kf_predicted_var: float = 0.0,
    fused_mean: ndarray = (lambda: array([]))(),
    fused_var: ndarray = (lambda: array([]))(),
    fusion_alpha: Optional[float] = None,
    predicted_mean: float = 0.0,
    predicted_var: float = 0.0,
)

Container for inference pipeline outputs.

PlayerInferencePipeline ¶

PlayerInferencePipeline(
    hmm_params: Optional[dict] = None,
    kf_params: Optional[dict] = None,
    hmm_variance_floor: float = 1.0,
    news_params: Optional[dict] = None,
    fusion_mode: str = "precision",
    fusion_params: Optional[dict] = None,
)

Orchestrates HMM + Kalman inference for a single player.

PARAMETER	DESCRIPTION
`hmm_params`	Override HMM parameters: transition_matrix, emission_params, initial_dist. TYPE: `dict` DEFAULT: `None`
`kf_params`	Override Kalman parameters: Q, R, x0, P0. TYPE: `dict` DEFAULT: `None`

Source code in fplx/inference/pipeline.py

def __init__(
    self,
    hmm_params: Optional[dict] = None,
    kf_params: Optional[dict] = None,
    hmm_variance_floor: float = 1.0,
    news_params: Optional[dict] = None,
    fusion_mode: str = "precision",
    fusion_params: Optional[dict] = None,
):
    hmm_params = hmm_params or {}
    kf_params = kf_params or {}

    self.hmm = HMMInference(
        transition_matrix=hmm_params.get("transition_matrix"),
        emission_params=hmm_params.get("emission_params"),
        initial_dist=hmm_params.get("initial_dist"),
    )
    self.kf = KalmanFilter(
        process_noise=kf_params.get("process_noise", 1.0),
        observation_noise=kf_params.get("observation_noise", 4.0),
        initial_state_mean=kf_params.get("initial_state_mean", 4.0),
        initial_state_covariance=kf_params.get("initial_state_covariance", 2.0),
    )
    self.hmm_variance_floor = max(float(hmm_variance_floor), 1e-6)
    self.news_params = _merge_nested_dicts(DEFAULT_NEWS_PARAMS, news_params or {})
    self.fusion_mode = fusion_mode
    self.fusion_params = _merge_nested_dicts(DEFAULT_FUSION_PARAMS, fusion_params or {})
    if self.fusion_mode not in {"precision", "calibrated_alpha"}:
        raise ValueError(
            f"Unknown fusion_mode '{self.fusion_mode}'. Expected one of: 'precision', 'calibrated_alpha'."
        )

    self.observations: Optional[np.ndarray] = None
    self._result: Optional[InferenceResult] = None

ingest_observations ¶

ingest_observations(points: ndarray)

Set the player's historical points sequence.

PARAMETER	DESCRIPTION
`points`	Weekly points history. TYPE: `(ndarray, shape(T))`

Source code in fplx/inference/pipeline.py

def ingest_observations(self, points: np.ndarray):
    """
    Set the player's historical points sequence.

    Parameters
    ----------
    points : np.ndarray, shape (T,)
        Weekly points history.
    """
    self.observations = np.asarray(points, dtype=float)
    self._result = None  # invalidate cached result

inject_news ¶

inject_news(news_signal: dict, timestep: int)

Inject a news signal into the inference at a specific gameweek.

Bridges from existing NewsSignal.generate_signal() output format.

PARAMETER	DESCRIPTION
`news_signal`	Output from NewsSignal.generate_signal(). Must contain: 'availability', 'minutes_risk', 'confidence'. TYPE: `dict`
`timestep`	The gameweek index to apply the perturbation. TYPE: `int`

Source code in fplx/inference/pipeline.py

def inject_news(
    self,
    news_signal: dict,
    timestep: int,
):
    """
    Inject a news signal into the inference at a specific gameweek.

    Bridges from existing NewsSignal.generate_signal() output format.

    Parameters
    ----------
    news_signal : dict
        Output from NewsSignal.generate_signal(). Must contain:
        'availability', 'minutes_risk', 'confidence'.
    timestep : int
        The gameweek index to apply the perturbation.
    """
    category = _classify_news(
        news_signal.get("availability", 1.0),
        news_signal.get("minutes_risk", 0.0),
        self.news_params.get("classification_thresholds"),
    )
    confidence = news_signal.get(
        "confidence",
        float(self.news_params.get("default_confidence", 0.6)),
    )

    perturbation_map = self.news_params.get("perturbation_map", DEFAULT_NEWS_PERTURBATION_MAP)
    perturbation = perturbation_map.get(
        category,
        perturbation_map.get("neutral", {"state_boost": {}, "kalman_shock": 1.0}),
    )

    # Inject into HMM
    state_boost = perturbation.get("state_boost", {})
    if state_boost:
        self.hmm.inject_news_perturbation(
            timestep=timestep,
            state_boost=state_boost,
            confidence=confidence,
        )

    # Inject into Kalman
    kalman_shock = float(perturbation.get("kalman_shock", 1.0))
    if kalman_shock != 1.0:
        self.kf.inject_process_shock(
            timestep=timestep,
            multiplier=kalman_shock,
        )

inject_fixture_difficulty ¶

inject_fixture_difficulty(difficulty: float, timestep: int)

Inject fixture difficulty into Kalman observation noise.

PARAMETER	DESCRIPTION
`difficulty`	Fixture difficulty score (1-5, from FixtureSignal). TYPE: `float`
`timestep`	The gameweek index. TYPE: `int`

Source code in fplx/inference/pipeline.py

def inject_fixture_difficulty(self, difficulty: float, timestep: int):
    """
    Inject fixture difficulty into Kalman observation noise.

    Parameters
    ----------
    difficulty : float
        Fixture difficulty score (1-5, from FixtureSignal).
    timestep : int
        The gameweek index.
    """
    noise_factor = _difficulty_to_noise_factor(difficulty)
    self.kf.inject_observation_noise(timestep=timestep, factor=noise_factor)

run ¶

run() -> InferenceResult

Run full inference pipeline: HMM + Kalman + Fusion.

RETURNS	DESCRIPTION
`InferenceResult`	All inference outputs.

Source code in fplx/inference/pipeline.py

def run(self) -> InferenceResult:
    """
    Run full inference pipeline: HMM + Kalman + Fusion.

    Returns
    -------
    InferenceResult
        All inference outputs.
    """
    if self.observations is None or len(self.observations) == 0:
        raise RuntimeError("No observations ingested. Call ingest_observations().")

    obs = self.observations

    # HMM
    alpha, _ = self.hmm.forward(obs)
    gamma = self.hmm.forward_backward(obs)
    viterbi_path = self.hmm.viterbi(obs)
    hmm_pred_mean, hmm_pred_var, _ = self.hmm.predict_next(obs)

    # Kalman
    kf_x, kf_P = self.kf.filter(obs)
    kf_pred_mean, kf_pred_var = self.kf.predict_next()

    fusion_alpha = None
    if self.fusion_mode == "calibrated_alpha":
        fusion_alpha = self._estimate_fusion_alpha(obs)
        hmm_seq_mean, hmm_seq_var = self._hmm_sequence_moments(gamma)

        fused_mean = fusion_alpha * kf_x + (1.0 - fusion_alpha) * hmm_seq_mean
        fused_var = fusion_alpha**2 * np.maximum(kf_P, 1e-6) + (1.0 - fusion_alpha) ** 2 * np.maximum(
            hmm_seq_var, self.hmm_variance_floor
        )

        pred_mean = fusion_alpha * kf_pred_mean + (1.0 - fusion_alpha) * hmm_pred_mean
        pred_var = fusion_alpha**2 * max(kf_pred_var, 1e-6) + (1.0 - fusion_alpha) ** 2 * max(
            hmm_pred_var, self.hmm_variance_floor
        )
    else:
        # Fusion (full sequence, smoothed)
        # Apply an HMM variance floor so HMM does not become unrealistically
        # overconfident and dominate precision-weighted fusion.
        emission_params_for_fusion = {
            s: (mu, max(std, np.sqrt(self.hmm_variance_floor)))
            for s, (mu, std) in self.hmm.emission_params.items()
        }
        fused_mean, fused_var = fuse_sequences(gamma, kf_x, kf_P, emission_params_for_fusion)

        # Fused one-step-ahead prediction
        pred_mean, pred_var = fuse_estimates(
            hmm_pred_mean,
            max(hmm_pred_var, self.hmm_variance_floor),
            kf_pred_mean,
            kf_pred_var,
        )

    self._result = InferenceResult(
        filtered_beliefs=alpha,
        smoothed_beliefs=gamma,
        viterbi_path=viterbi_path,
        hmm_predicted_mean=hmm_pred_mean,
        hmm_predicted_var=hmm_pred_var,
        kalman_filtered=kf_x,
        kalman_uncertainty=kf_P,
        kf_predicted_mean=kf_pred_mean,
        kf_predicted_var=kf_pred_var,
        fused_mean=fused_mean,
        fused_var=fused_var,
        fusion_alpha=fusion_alpha,
        predicted_mean=pred_mean,
        predicted_var=pred_var,
    )

    return self._result

predict_next ¶

predict_next() -> tuple[float, float]

Get the fused one-step-ahead forecast.

RETURNS	DESCRIPTION
`expected_points`	TYPE: `float`
`variance`	TYPE: `float`

Source code in fplx/inference/pipeline.py

def predict_next(self) -> tuple[float, float]:
    """
    Get the fused one-step-ahead forecast.

    Returns
    -------
    expected_points : float
    variance : float
    """
    if self._result is None:
        self.run()
    return self._result.predicted_mean, self._result.predicted_var

learn_parameters ¶

learn_parameters(n_iter: int = 20)

Run Baum-Welch to learn HMM parameters from current observations.

Call this before run() if you want data-driven parameters.

Source code in fplx/inference/pipeline.py

def learn_parameters(self, n_iter: int = 20):
    """
    Run Baum-Welch to learn HMM parameters from current observations.

    Call this before run() if you want data-driven parameters.
    """
    if self.observations is None:
        raise RuntimeError("No observations. Call ingest_observations() first.")
    self.hmm.fit(self.observations, n_iter=n_iter)

tft ¶

Temporal Fusion Transformer (TFT) inference adapter.

This module provides optional deep-learning inference for FPLX using pytorch-forecasting.

TFTQuantilePredictions `dataclass` ¶

TFTQuantilePredictions(
    p10: dict[int, float],
    p50: dict[int, float],
    p90: dict[int, float],
)

Container for TFT quantile outputs for a single gameweek.

to_optimizer_inputs ¶

to_optimizer_inputs() -> (
    tuple[dict[int, float], dict[int, float]]
)

Map quantiles to objective mean and downside risk.

RETURNS	DESCRIPTION
`expected_points`	Uses q50 as robust expected value proxy. TYPE: `dict[int, float]`
`downside_risk`	Uses q50 - q10 as downside spread. TYPE: `dict[int, float]`

Source code in fplx/inference/tft.py

def to_optimizer_inputs(self) -> tuple[dict[int, float], dict[int, float]]:
    """Map quantiles to objective mean and downside risk.

    Returns
    -------
    expected_points : dict[int, float]
        Uses q50 as robust expected value proxy.
    downside_risk : dict[int, float]
        Uses q50 - q10 as downside spread.
    """
    expected_points = {pid: float(v) for pid, v in self.p50.items()}
    downside_risk = {
        pid: max(0.0, float(self.p50.get(pid, 0.0) - self.p10.get(pid, 0.0))) for pid in expected_points
    }
    return expected_points, downside_risk

TFTForecaster ¶

TFTForecaster(
    quantiles: tuple[float, float, float] = (0.1, 0.5, 0.9),
    encoder_length: int = 15,
    prediction_length: int = 1,
)

Wrapper around PyTorch Forecasting's TemporalFusionTransformer.

Source code in fplx/inference/tft.py

def __init__(
    self,
    quantiles: tuple[float, float, float] = (0.1, 0.5, 0.9),
    encoder_length: int = 15,
    prediction_length: int = 1,
):
    self.quantiles = quantiles
    self.encoder_length = encoder_length
    self.prediction_length = prediction_length
    self.model = None
    self._trainer = None

fit ¶

fit(
    panel_df: DataFrame,
    training_cutoff: int,
    max_epochs: int = 20,
    batch_size: int = 256,
    learning_rate: float = 0.001,
    hidden_size: int = 32,
    attention_head_size: int = 4,
    dropout: float = 0.1,
)

Train TFT on panel data.

Source code in fplx/inference/tft.py

def fit(
    self,
    panel_df: pd.DataFrame,
    training_cutoff: int,
    max_epochs: int = 20,
    batch_size: int = 256,
    learning_rate: float = 1e-3,
    hidden_size: int = 32,
    attention_head_size: int = 4,
    dropout: float = 0.1,
):
    """Train TFT on panel data."""
    pl, TemporalFusionTransformer, QuantileLoss = self._imports()

    training, validation = make_tft_datasets(
        panel_df,
        training_cutoff=training_cutoff,
        encoder_length=self.encoder_length,
        prediction_length=self.prediction_length,
    )

    train_loader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
    val_loader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

    self.model = TemporalFusionTransformer.from_dataset(
        training,
        learning_rate=learning_rate,
        hidden_size=hidden_size,
        attention_head_size=attention_head_size,
        dropout=dropout,
        loss=QuantileLoss(self.quantiles),
        output_size=len(self.quantiles),
        reduce_on_plateau_patience=4,
    )

    trainer = pl.Trainer(
        max_epochs=max_epochs,
        accelerator="auto",
        devices=1,
        logger=False,
        enable_checkpointing=False,
        enable_model_summary=False,
    )
    trainer.fit(self.model, train_loader, val_loader)
    self._trainer = trainer
    return self

load ¶

load(checkpoint_path: str | Path)

Load a trained TFT checkpoint.

Source code in fplx/inference/tft.py

def load(self, checkpoint_path: str | Path):
    """Load a trained TFT checkpoint."""
    _, TemporalFusionTransformer, _ = self._imports()
    self.model = TemporalFusionTransformer.load_from_checkpoint(str(checkpoint_path))
    return self

predict_gameweek ¶

predict_gameweek(
    panel_df: DataFrame,
    target_gw: int,
    batch_size: int = 256,
) -> TFTQuantilePredictions

Predict quantiles for one target gameweek across all players.

Source code in fplx/inference/tft.py

def predict_gameweek(
    self,
    panel_df: pd.DataFrame,
    target_gw: int,
    batch_size: int = 256,
) -> TFTQuantilePredictions:
    """Predict quantiles for one target gameweek across all players."""
    if self.model is None:
        raise RuntimeError("Model is not trained/loaded.")

    training, prediction = make_tft_datasets(
        panel_df[panel_df["time_idx"] <= target_gw].copy(),
        training_cutoff=target_gw - 1,
        encoder_length=self.encoder_length,
        prediction_length=self.prediction_length,
    )

    _ = training  # required for consistent schema creation in from_dataset
    pred_loader = prediction.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

    # Quantile output shape: [n_samples, prediction_length, n_quantiles]
    pred_out = self.model.predict(
        pred_loader,
        mode="quantiles",
        return_x=True,
        return_index=True,
    )

    preds = None
    x = None
    index_df = None

    if hasattr(pred_out, "output"):
        preds = pred_out.output
        x = getattr(pred_out, "x", None)
        index_df = getattr(pred_out, "index", None)
    elif isinstance(pred_out, tuple):
        if len(pred_out) >= 1:
            preds = pred_out[0]
        if len(pred_out) >= 2:
            x = pred_out[1]
        if len(pred_out) >= 3:
            index_df = pred_out[2]
    else:
        preds = pred_out

    if preds is None:
        raise RuntimeError("TFT prediction output is empty.")

    q = preds.detach().cpu().numpy()
    q = q[:, 0, :]  # one-step forecast

    # Recover sample player ids from prediction index when available.
    if index_df is not None and "group_id" in index_df.columns:
        player_ids = index_df["group_id"].astype(int).to_numpy()
    elif x is not None and "groups" in x:
        groups = x["groups"].detach().cpu().numpy()
        player_ids = groups[:, 0].astype(int)
    else:
        raise RuntimeError("Unable to recover TFT sample player IDs from prediction output.")

    # Deduplicate by keeping last sample for each player in case of overlap.
    p10, p50, p90 = {}, {}, {}
    for pid, row in zip(player_ids, q, strict=False):
        p10[pid] = float(row[0])
        p50[pid] = float(row[1])
        p90[pid] = float(row[2])

    return TFTQuantilePredictions(p10=p10, p50=p50, p90=p90)

models ¶

Machine learning models for FPL prediction.

BaselineModel ¶

BaselineModel(
    method: str = "rolling_mean", window: int = 5
)

Bases: BaseModel

Baseline model using simple heuristics.

Methods: - Rolling average of points - Weighted recent form - Form-based prediction

Initialize baseline model.

PARAMETER	DESCRIPTION
`method`	Prediction method: 'rolling_mean', 'ewma', 'last_value' TYPE: `str` DEFAULT: `'rolling_mean'`
`window`	Window size for rolling calculations TYPE: `int` DEFAULT: `5`

Source code in fplx/models/baseline.py

def __init__(self, method: str = "rolling_mean", window: int = 5):
    """
    Initialize baseline model.

    Parameters
    ----------
    method : str
        Prediction method: 'rolling_mean', 'ewma', 'last_value'
    window : int
        Window size for rolling calculations
    """
    self.method = method
    self.window = window
    self.predictions = {}

fit ¶

fit(X, y=None)

Fit the model (no-op for baseline).

Source code in fplx/models/baseline.py

def fit(self, X, y=None):
    """Fit the model (no-op for baseline)."""
    return self

predict ¶

predict(X: DataFrame) -> float

Predict next gameweek points for a player.

PARAMETER	DESCRIPTION
`X`	Player historical data TYPE: `DataFrame`

RETURNS	DESCRIPTION
`float`	Predicted points

Source code in fplx/models/baseline.py

def predict(self, X: pd.DataFrame) -> float:
    """
    Predict next gameweek points for a player.

    Parameters
    ----------
    X : pd.DataFrame
        Player historical data

    Returns
    -------
    float
        Predicted points
    """
    if X.empty or "points" not in X.columns:
        return 0.0

    points = X["points"]

    if self.method == "rolling_mean":
        return self._rolling_mean(points)
    if self.method == "ewma":
        return self._ewma(points)
    if self.method == "last_value":
        return points.iloc[-1]
    logger.warning(f"Unknown method {self.method}, using rolling_mean")
    return self._rolling_mean(points)

batch_predict ¶

batch_predict(
    players_data: dict[str, DataFrame],
) -> dict[str, float]

Predict for multiple players.

PARAMETER	DESCRIPTION
`players_data`	Dictionary mapping player ID to their data TYPE: `dict[str, DataFrame]`

RETURNS	DESCRIPTION
`dict[str, float]`	Dictionary of predictions

Source code in fplx/models/baseline.py

def batch_predict(self, players_data: dict[str, pd.DataFrame]) -> dict[str, float]:
    """
    Predict for multiple players.

    Parameters
    ----------
    players_data : dict[str, pd.DataFrame]
        Dictionary mapping player ID to their data

    Returns
    -------
    dict[str, float]
        Dictionary of predictions
    """
    predictions = {}
    for player_id, data in players_data.items():
        predictions[player_id] = self.predict(data)

    self.predictions = predictions
    return predictions

EnsembleModel ¶

EnsembleModel(
    models: list, weights: Optional[list[float]] = None
)

Ensemble combining multiple models with weighted averaging.

PARAMETER	DESCRIPTION
`models`	List of model instances TYPE: `list`
`weights`	Weights for each model (must sum to 1) TYPE: `Optional[list[float]]` DEFAULT: `None`

Source code in fplx/models/ensemble.py

def __init__(self, models: list, weights: Optional[list[float]] = None):
    self.models = models

    if weights is None:
        # Equal weights
        self.weights = [1.0 / len(models)] * len(models)
    else:
        if len(weights) != len(models):
            raise ValueError("Number of weights must match number of models")
        if not np.isclose(sum(weights), 1.0):
            raise ValueError("Weights must sum to 1")
        self.weights = weights

predict ¶

predict(player_data: DataFrame) -> float

Ensemble prediction for a single player.

PARAMETER	DESCRIPTION
`player_data`	Player historical data TYPE: `DataFrame`

RETURNS	DESCRIPTION
`float`	Ensemble prediction

Source code in fplx/models/ensemble.py

def predict(self, player_data: pd.DataFrame) -> float:
    """
    Ensemble prediction for a single player.

    Parameters
    ----------
    player_data : pd.DataFrame
        Player historical data

    Returns
    -------
    float
        Ensemble prediction
    """
    predictions = []

    for model in self.models:
        try:
            pred = model.predict(player_data)
            predictions.append(pred)
        except Exception as e:
            logger.warning(f"Model {type(model).__name__} failed: {e}")
            predictions.append(0.0)

    # Weighted average
    ensemble_pred = sum(p * w for p, w in zip(predictions, self.weights))
    return max(0, ensemble_pred)

batch_predict ¶

batch_predict(
    players_data: dict[str, DataFrame],
) -> dict[str, float]

Ensemble predictions for multiple players.

PARAMETER	DESCRIPTION
`players_data`	Dictionary mapping player ID to their data TYPE: `Dict[str, DataFrame]`

RETURNS	DESCRIPTION
`Dict[str, float]`	Dictionary of ensemble predictions

Source code in fplx/models/ensemble.py

def batch_predict(self, players_data: dict[str, pd.DataFrame]) -> dict[str, float]:
    """
    Ensemble predictions for multiple players.

    Parameters
    ----------
    players_data : Dict[str, pd.DataFrame]
        Dictionary mapping player ID to their data

    Returns
    -------
    Dict[str, float]
        Dictionary of ensemble predictions
    """
    predictions = {}

    for player_id, data in players_data.items():
        predictions[player_id] = self.predict(data)

    return predictions

RegressionModel ¶

RegressionModel(
    model_type: str = "ridge",
    initial_train_size: int = 10,
    test_size: int = 1,
    step: int = 1,
    **model_kwargs
)

Bases: BaseModel

Machine learning regression model for FPL predictions.

Adapted from the MLSP project's regressor patterns.

PARAMETER	DESCRIPTION
`model_type`	Type of model: 'ridge', 'xgboost', 'lightgbm' TYPE: `str` DEFAULT: `'ridge'`
`initial_train_size`	Size of initial training window TYPE: `int` DEFAULT: `10`
`test_size`	Forecast horizon TYPE: `int` DEFAULT: `1`
`step`	Rolling window step size TYPE: `int` DEFAULT: `1`

Source code in fplx/models/regression.py

def __init__(
    self,
    model_type: str = "ridge",
    initial_train_size: int = 10,
    test_size: int = 1,
    step: int = 1,
    **model_kwargs,
):
    if not SKLEARN_AVAILABLE:
        raise ImportError(
            "sklearn, xgboost, or lightgbm not available. Install with: pip install fplx[ml]"
        )

    self.model_type = model_type
    self.cv = RollingCV(initial_train_size, test_size, step)
    self.model = self._create_model(model_type, **model_kwargs)
    self.predictions = []
    self.true_values = []
    self.feature_importance = None
    self.feature_names_ = None

fit ¶

fit(X, y=None)

Fit the model.

Source code in fplx/models/regression.py

def fit(self, X, y=None):
    """Fit the model."""
    self.feature_names_ = list(X.columns)
    self.model.fit(X, y)
    return self

predict ¶

predict(X)

Generate predictions.

Source code in fplx/models/regression.py

def predict(self, X):
    """Generate predictions."""
    # Ensure the prediction data has the same columns as the training data
    if self.feature_names_:
        X_pred = X.reindex(columns=self.feature_names_, fill_value=0)
        return self.model.predict(X_pred)
    return self.model.predict(X)

fit_predict ¶

fit_predict(
    y: Series, X: DataFrame, verbose: bool = False
) -> Series

Fit model and generate predictions using rolling CV.

PARAMETER	DESCRIPTION
`y`	Target time series (points to predict) TYPE: `Series`
`X`	Feature matrix TYPE: `DataFrame`
`verbose`	Print progress TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
`Series`	Predictions aligned with test indices

Source code in fplx/models/regression.py

def fit_predict(self, y: pd.Series, X: pd.DataFrame, verbose: bool = False) -> pd.Series:
    """
    Fit model and generate predictions using rolling CV.

    Parameters
    ----------
    y : pd.Series
        Target time series (points to predict)
    X : pd.DataFrame
        Feature matrix
    verbose : bool
        Print progress

    Returns
    -------
    pd.Series
        Predictions aligned with test indices
    """
    X_vals = X.values
    y_vals = y.values

    self.predictions = []
    self.true_values = []
    pred_indices = []

    for fold, (train_idx, test_idx) in enumerate(self.cv.split(X_vals)):
        X_train, X_test = X_vals[train_idx], X_vals[test_idx]
        y_train, y_test = y_vals[train_idx], y_vals[test_idx]

        # Handle NaN values
        valid_train = ~np.isnan(X_train).any(axis=1) & ~np.isnan(y_train)
        if valid_train.sum() < 5:
            if verbose:
                logger.warning(f"Fold {fold}: insufficient valid training data")
            continue

        X_train_clean = X_train[valid_train]
        y_train_clean = y_train[valid_train]

        # Fit model
        self.model.fit(X_train_clean, y_train_clean)

        # Predict
        valid_test = ~np.isnan(X_test).any(axis=1)
        if not valid_test.any():
            continue

        X_test_clean = X_test[valid_test]
        y_pred = self.model.predict(X_test_clean)

        self.predictions.extend(y_pred)
        self.true_values.extend(y_test[valid_test])
        pred_indices.extend(test_idx[valid_test])

        if verbose:
            rmse = np.sqrt(mean_squared_error(y_test[valid_test], y_pred))
            logger.info(f"Fold {fold}: RMSE = {rmse:.3f}")

    return pd.Series(self.predictions, index=pred_indices, name="predicted_points")

predict_next ¶

predict_next(X: DataFrame) -> float

Predict next value given features.

PARAMETER	DESCRIPTION
`X`	Feature matrix (single row for next gameweek) TYPE: `DataFrame`

RETURNS	DESCRIPTION
`float`	Predicted points

Source code in fplx/models/regression.py

def predict_next(self, X: pd.DataFrame) -> float:
    """
    Predict next value given features.

    Parameters
    ----------
    X : pd.DataFrame
        Feature matrix (single row for next gameweek)

    Returns
    -------
    float
        Predicted points
    """
    if X.empty or self.model is None:
        return 0.0

    X_vals = X.values
    if np.isnan(X_vals).any():
        # Impute with mean
        X_vals = np.nan_to_num(X_vals, nan=0.0)

    pred = self.model.predict(X_vals)
    return max(0, pred[0])

get_feature_importance ¶

get_feature_importance(
    feature_names: list[str],
) -> DataFrame

Get feature importance (for tree-based models).

PARAMETER	DESCRIPTION
`feature_names`	Names of features TYPE: `list[str]`

RETURNS	DESCRIPTION
`DataFrame`	Feature importance scores

Source code in fplx/models/regression.py

def get_feature_importance(self, feature_names: list[str]) -> pd.DataFrame:
    """
    Get feature importance (for tree-based models).

    Parameters
    ----------
    feature_names : list[str]
        Names of features

    Returns
    -------
    pd.DataFrame
        Feature importance scores
    """
    if self.model_type in ["xgboost", "lightgbm"]:
        importance = self.model.feature_importances_
        return pd.DataFrame({
            "feature": feature_names,
            "importance": importance,
        }).sort_values("importance", ascending=False)
    logger.warning("Feature importance only available for tree-based models")
    return pd.DataFrame()

evaluate ¶

evaluate() -> dict[str, float]

Evaluate model performance.

RETURNS	DESCRIPTION
`dict[str, float]`	Dictionary of metrics

Source code in fplx/models/regression.py

def evaluate(self) -> dict[str, float]:
    """
    Evaluate model performance.

    Returns
    -------
    dict[str, float]
        Dictionary of metrics
    """
    if not self.predictions:
        return {}

    predictions = np.array(self.predictions)
    true_values = np.array(self.true_values)

    rmse = np.sqrt(mean_squared_error(true_values, predictions))
    mae = np.mean(np.abs(true_values - predictions))

    return {
        "rmse": rmse,
        "mae": mae,
        "n_predictions": len(predictions),
    }

RollingCV ¶

RollingCV(
    initial_train_size: int, test_size: int, step: int = 1
)

Generates indices for rolling cross-validation splits.

This is adapted from the MLSP project for time-series validation.

PARAMETER	DESCRIPTION
`initial_train_size`	Size of the initial training set. TYPE: `int`
`test_size`	Size of the test set (forecast horizon). TYPE: `int`
`step`	Step size to move the training window forward. TYPE: `int` DEFAULT: `1`

Source code in fplx/models/rolling_cv.py

def __init__(self, initial_train_size: int, test_size: int, step: int = 1):
    if initial_train_size <= 0 or test_size <= 0 or step <= 0:
        raise ValueError(
            "initial_train_size, test_size, and step must be positive integers."
        )
    self.initial_train_size = initial_train_size
    self.test_size = test_size
    self.step = step

split ¶

split(X) -> Generator[tuple[ndarray, ndarray], None, None]

Generate indices to split data into training and test sets.

PARAMETER	DESCRIPTION
`X`	Time series data. TYPE: `array - like`

YIELDS	DESCRIPTION
`train_indices`	The training set indices for that split. TYPE:: `ndarray`
`test_indices`	The testing set indices for that split. TYPE:: `ndarray`

Source code in fplx/models/rolling_cv.py

def split(self, X) -> Generator[tuple[np.ndarray, np.ndarray], None, None]:
    """
    Generate indices to split data into training and test sets.

    Parameters
    ----------
    X : array-like
        Time series data.

    Yields
    ------
    train_indices : np.ndarray
        The training set indices for that split.
    test_indices : np.ndarray
        The testing set indices for that split.
    """
    n_samples = len(X)
    if self.initial_train_size + self.test_size > n_samples:
        raise ValueError(
            "initial_train_size + test_size is larger than the number of samples."
        )

    train_start = 0
    while train_start + self.initial_train_size + self.test_size <= n_samples:
        train_end = train_start + self.initial_train_size
        test_end = train_end + self.test_size

        train_indices = np.arange(train_start, train_end)
        test_indices = np.arange(train_end, test_end)

        yield train_indices, test_indices

        train_start += self.step

baseline ¶

Baseline heuristic models for FPL prediction.

BaselineModel ¶

BaselineModel(
    method: str = "rolling_mean", window: int = 5
)

Bases: BaseModel

Baseline model using simple heuristics.

Methods: - Rolling average of points - Weighted recent form - Form-based prediction

Initialize baseline model.

PARAMETER	DESCRIPTION
`method`	Prediction method: 'rolling_mean', 'ewma', 'last_value' TYPE: `str` DEFAULT: `'rolling_mean'`
`window`	Window size for rolling calculations TYPE: `int` DEFAULT: `5`

Source code in fplx/models/baseline.py

def __init__(self, method: str = "rolling_mean", window: int = 5):
    """
    Initialize baseline model.

    Parameters
    ----------
    method : str
        Prediction method: 'rolling_mean', 'ewma', 'last_value'
    window : int
        Window size for rolling calculations
    """
    self.method = method
    self.window = window
    self.predictions = {}

fit ¶

fit(X, y=None)

Fit the model (no-op for baseline).

Source code in fplx/models/baseline.py

def fit(self, X, y=None):
    """Fit the model (no-op for baseline)."""
    return self

predict ¶

predict(X: DataFrame) -> float

Predict next gameweek points for a player.

PARAMETER	DESCRIPTION
`X`	Player historical data TYPE: `DataFrame`

RETURNS	DESCRIPTION
`float`	Predicted points

Source code in fplx/models/baseline.py

def predict(self, X: pd.DataFrame) -> float:
    """
    Predict next gameweek points for a player.

    Parameters
    ----------
    X : pd.DataFrame
        Player historical data

    Returns
    -------
    float
        Predicted points
    """
    if X.empty or "points" not in X.columns:
        return 0.0

    points = X["points"]

    if self.method == "rolling_mean":
        return self._rolling_mean(points)
    if self.method == "ewma":
        return self._ewma(points)
    if self.method == "last_value":
        return points.iloc[-1]
    logger.warning(f"Unknown method {self.method}, using rolling_mean")
    return self._rolling_mean(points)

batch_predict ¶

batch_predict(
    players_data: dict[str, DataFrame],
) -> dict[str, float]

Predict for multiple players.

PARAMETER	DESCRIPTION
`players_data`	Dictionary mapping player ID to their data TYPE: `dict[str, DataFrame]`

RETURNS	DESCRIPTION
`dict[str, float]`	Dictionary of predictions

Source code in fplx/models/baseline.py

def batch_predict(self, players_data: dict[str, pd.DataFrame]) -> dict[str, float]:
    """
    Predict for multiple players.

    Parameters
    ----------
    players_data : dict[str, pd.DataFrame]
        Dictionary mapping player ID to their data

    Returns
    -------
    dict[str, float]
        Dictionary of predictions
    """
    predictions = {}
    for player_id, data in players_data.items():
        predictions[player_id] = self.predict(data)

    self.predictions = predictions
    return predictions

FormBasedModel ¶

FormBasedModel(
    method: str = "rolling_mean", window: int = 5
)

Bases: BaselineModel

Enhanced baseline using form indicators.

Source code in fplx/models/baseline.py

def __init__(self, method: str = "rolling_mean", window: int = 5):
    """
    Initialize baseline model.

    Parameters
    ----------
    method : str
        Prediction method: 'rolling_mean', 'ewma', 'last_value'
    window : int
        Window size for rolling calculations
    """
    self.method = method
    self.window = window
    self.predictions = {}

predict ¶

predict(X: DataFrame) -> float

Predict based on form with adjustments.

PARAMETER	DESCRIPTION
`X`	Player historical data TYPE: `DataFrame`

RETURNS	DESCRIPTION
`float`	Predicted points

Source code in fplx/models/baseline.py

def predict(self, X: pd.DataFrame) -> float:
    """
    Predict based on form with adjustments.

    Parameters
    ----------
    X : pd.DataFrame
        Player historical data

    Returns
    -------
    float
        Predicted points
    """
    if X.empty:
        return 0.0

    base_prediction = super().predict(X)

    # Apply adjustments
    latest = X.iloc[-1]

    # Minutes adjustment: if playing less, reduce prediction
    if "minutes" in latest and latest["minutes"] < 60:
        base_prediction *= 0.7

    # Trend adjustment
    if "points_trend_5" in latest:
        trend = latest["points_trend_5"]
        if trend > 0.5:
            base_prediction *= 1.1  # Positive trend bonus
        elif trend < -0.5:
            base_prediction *= 0.9  # Negative trend penalty

    return max(0, base_prediction)

ensemble ¶

Ensemble models combining multiple predictors.

EnsembleModel ¶

EnsembleModel(
    models: list, weights: Optional[list[float]] = None
)

Ensemble combining multiple models with weighted averaging.

PARAMETER	DESCRIPTION
`models`	List of model instances TYPE: `list`
`weights`	Weights for each model (must sum to 1) TYPE: `Optional[list[float]]` DEFAULT: `None`

Source code in fplx/models/ensemble.py

def __init__(self, models: list, weights: Optional[list[float]] = None):
    self.models = models

    if weights is None:
        # Equal weights
        self.weights = [1.0 / len(models)] * len(models)
    else:
        if len(weights) != len(models):
            raise ValueError("Number of weights must match number of models")
        if not np.isclose(sum(weights), 1.0):
            raise ValueError("Weights must sum to 1")
        self.weights = weights

predict ¶

predict(player_data: DataFrame) -> float

Ensemble prediction for a single player.

PARAMETER	DESCRIPTION
`player_data`	Player historical data TYPE: `DataFrame`

RETURNS	DESCRIPTION
`float`	Ensemble prediction

Source code in fplx/models/ensemble.py

def predict(self, player_data: pd.DataFrame) -> float:
    """
    Ensemble prediction for a single player.

    Parameters
    ----------
    player_data : pd.DataFrame
        Player historical data

    Returns
    -------
    float
        Ensemble prediction
    """
    predictions = []

    for model in self.models:
        try:
            pred = model.predict(player_data)
            predictions.append(pred)
        except Exception as e:
            logger.warning(f"Model {type(model).__name__} failed: {e}")
            predictions.append(0.0)

    # Weighted average
    ensemble_pred = sum(p * w for p, w in zip(predictions, self.weights))
    return max(0, ensemble_pred)

batch_predict ¶

batch_predict(
    players_data: dict[str, DataFrame],
) -> dict[str, float]

Ensemble predictions for multiple players.

PARAMETER	DESCRIPTION
`players_data`	Dictionary mapping player ID to their data TYPE: `Dict[str, DataFrame]`

RETURNS	DESCRIPTION
`Dict[str, float]`	Dictionary of ensemble predictions

Source code in fplx/models/ensemble.py

def batch_predict(self, players_data: dict[str, pd.DataFrame]) -> dict[str, float]:
    """
    Ensemble predictions for multiple players.

    Parameters
    ----------
    players_data : Dict[str, pd.DataFrame]
        Dictionary mapping player ID to their data

    Returns
    -------
    Dict[str, float]
        Dictionary of ensemble predictions
    """
    predictions = {}

    for player_id, data in players_data.items():
        predictions[player_id] = self.predict(data)

    return predictions

AdaptiveEnsemble ¶

AdaptiveEnsemble(models: list, learning_rate: float = 0.1)

Bases: EnsembleModel

Adaptive ensemble that adjusts weights based on recent performance.

Source code in fplx/models/ensemble.py

def __init__(self, models: list, learning_rate: float = 0.1):
    super().__init__(models)
    self.learning_rate = learning_rate
    self.model_errors = [[] for _ in models]

update_weights ¶

update_weights()

Update weights based on recent errors.

Source code in fplx/models/ensemble.py

def update_weights(self):
    """Update weights based on recent errors."""
    if not any(self.model_errors):
        return

    # Calculate inverse error scores
    avg_errors = []
    for errors in self.model_errors:
        if errors:
            avg_errors.append(np.mean(errors[-5:]))  # Last 5 predictions
        else:
            avg_errors.append(1.0)

    # Inverse error weighting
    inv_errors = [1.0 / (e + 1e-6) for e in avg_errors]
    total = sum(inv_errors)
    new_weights = [ie / total for ie in inv_errors]

    # Smooth update
    self.weights = [
        (1 - self.learning_rate) * old + self.learning_rate * new
        for old, new in zip(self.weights, new_weights)
    ]

    # Renormalize
    total_weight = sum(self.weights)
    self.weights = [w / total_weight for w in self.weights]

regression ¶

ML regression models for FPL prediction.

RegressionModel ¶

RegressionModel(
    model_type: str = "ridge",
    initial_train_size: int = 10,
    test_size: int = 1,
    step: int = 1,
    **model_kwargs
)

Bases: BaseModel

Machine learning regression model for FPL predictions.

Adapted from the MLSP project's regressor patterns.

PARAMETER	DESCRIPTION
`model_type`	Type of model: 'ridge', 'xgboost', 'lightgbm' TYPE: `str` DEFAULT: `'ridge'`
`initial_train_size`	Size of initial training window TYPE: `int` DEFAULT: `10`
`test_size`	Forecast horizon TYPE: `int` DEFAULT: `1`
`step`	Rolling window step size TYPE: `int` DEFAULT: `1`

Source code in fplx/models/regression.py

def __init__(
    self,
    model_type: str = "ridge",
    initial_train_size: int = 10,
    test_size: int = 1,
    step: int = 1,
    **model_kwargs,
):
    if not SKLEARN_AVAILABLE:
        raise ImportError(
            "sklearn, xgboost, or lightgbm not available. Install with: pip install fplx[ml]"
        )

    self.model_type = model_type
    self.cv = RollingCV(initial_train_size, test_size, step)
    self.model = self._create_model(model_type, **model_kwargs)
    self.predictions = []
    self.true_values = []
    self.feature_importance = None
    self.feature_names_ = None

fit ¶

fit(X, y=None)

Fit the model.

Source code in fplx/models/regression.py

def fit(self, X, y=None):
    """Fit the model."""
    self.feature_names_ = list(X.columns)
    self.model.fit(X, y)
    return self

predict ¶

predict(X)

Generate predictions.

Source code in fplx/models/regression.py

def predict(self, X):
    """Generate predictions."""
    # Ensure the prediction data has the same columns as the training data
    if self.feature_names_:
        X_pred = X.reindex(columns=self.feature_names_, fill_value=0)
        return self.model.predict(X_pred)
    return self.model.predict(X)

fit_predict ¶

fit_predict(
    y: Series, X: DataFrame, verbose: bool = False
) -> Series

Fit model and generate predictions using rolling CV.

PARAMETER	DESCRIPTION
`y`	Target time series (points to predict) TYPE: `Series`
`X`	Feature matrix TYPE: `DataFrame`
`verbose`	Print progress TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
`Series`	Predictions aligned with test indices

Source code in fplx/models/regression.py

def fit_predict(self, y: pd.Series, X: pd.DataFrame, verbose: bool = False) -> pd.Series:
    """
    Fit model and generate predictions using rolling CV.

    Parameters
    ----------
    y : pd.Series
        Target time series (points to predict)
    X : pd.DataFrame
        Feature matrix
    verbose : bool
        Print progress

    Returns
    -------
    pd.Series
        Predictions aligned with test indices
    """
    X_vals = X.values
    y_vals = y.values

    self.predictions = []
    self.true_values = []
    pred_indices = []

    for fold, (train_idx, test_idx) in enumerate(self.cv.split(X_vals)):
        X_train, X_test = X_vals[train_idx], X_vals[test_idx]
        y_train, y_test = y_vals[train_idx], y_vals[test_idx]

        # Handle NaN values
        valid_train = ~np.isnan(X_train).any(axis=1) & ~np.isnan(y_train)
        if valid_train.sum() < 5:
            if verbose:
                logger.warning(f"Fold {fold}: insufficient valid training data")
            continue

        X_train_clean = X_train[valid_train]
        y_train_clean = y_train[valid_train]

        # Fit model
        self.model.fit(X_train_clean, y_train_clean)

        # Predict
        valid_test = ~np.isnan(X_test).any(axis=1)
        if not valid_test.any():
            continue

        X_test_clean = X_test[valid_test]
        y_pred = self.model.predict(X_test_clean)

        self.predictions.extend(y_pred)
        self.true_values.extend(y_test[valid_test])
        pred_indices.extend(test_idx[valid_test])

        if verbose:
            rmse = np.sqrt(mean_squared_error(y_test[valid_test], y_pred))
            logger.info(f"Fold {fold}: RMSE = {rmse:.3f}")

    return pd.Series(self.predictions, index=pred_indices, name="predicted_points")

predict_next ¶

predict_next(X: DataFrame) -> float

Predict next value given features.

PARAMETER	DESCRIPTION
`X`	Feature matrix (single row for next gameweek) TYPE: `DataFrame`

RETURNS	DESCRIPTION
`float`	Predicted points

Source code in fplx/models/regression.py

def predict_next(self, X: pd.DataFrame) -> float:
    """
    Predict next value given features.

    Parameters
    ----------
    X : pd.DataFrame
        Feature matrix (single row for next gameweek)

    Returns
    -------
    float
        Predicted points
    """
    if X.empty or self.model is None:
        return 0.0

    X_vals = X.values
    if np.isnan(X_vals).any():
        # Impute with mean
        X_vals = np.nan_to_num(X_vals, nan=0.0)

    pred = self.model.predict(X_vals)
    return max(0, pred[0])

get_feature_importance ¶

get_feature_importance(
    feature_names: list[str],
) -> DataFrame

Get feature importance (for tree-based models).

PARAMETER	DESCRIPTION
`feature_names`	Names of features TYPE: `list[str]`

RETURNS	DESCRIPTION
`DataFrame`	Feature importance scores

Source code in fplx/models/regression.py

def get_feature_importance(self, feature_names: list[str]) -> pd.DataFrame:
    """
    Get feature importance (for tree-based models).

    Parameters
    ----------
    feature_names : list[str]
        Names of features

    Returns
    -------
    pd.DataFrame
        Feature importance scores
    """
    if self.model_type in ["xgboost", "lightgbm"]:
        importance = self.model.feature_importances_
        return pd.DataFrame({
            "feature": feature_names,
            "importance": importance,
        }).sort_values("importance", ascending=False)
    logger.warning("Feature importance only available for tree-based models")
    return pd.DataFrame()

evaluate ¶

evaluate() -> dict[str, float]

Evaluate model performance.

RETURNS	DESCRIPTION
`dict[str, float]`	Dictionary of metrics

Source code in fplx/models/regression.py

def evaluate(self) -> dict[str, float]:
    """
    Evaluate model performance.

    Returns
    -------
    dict[str, float]
        Dictionary of metrics
    """
    if not self.predictions:
        return {}

    predictions = np.array(self.predictions)
    true_values = np.array(self.true_values)

    rmse = np.sqrt(mean_squared_error(true_values, predictions))
    mae = np.mean(np.abs(true_values - predictions))

    return {
        "rmse": rmse,
        "mae": mae,
        "n_predictions": len(predictions),
    }

rolling_cv ¶

Rolling cross-validation for time-series models.

RollingCV ¶

RollingCV(
    initial_train_size: int, test_size: int, step: int = 1
)

Generates indices for rolling cross-validation splits.

This is adapted from the MLSP project for time-series validation.

PARAMETER	DESCRIPTION
`initial_train_size`	Size of the initial training set. TYPE: `int`
`test_size`	Size of the test set (forecast horizon). TYPE: `int`
`step`	Step size to move the training window forward. TYPE: `int` DEFAULT: `1`

Source code in fplx/models/rolling_cv.py

def __init__(self, initial_train_size: int, test_size: int, step: int = 1):
    if initial_train_size <= 0 or test_size <= 0 or step <= 0:
        raise ValueError(
            "initial_train_size, test_size, and step must be positive integers."
        )
    self.initial_train_size = initial_train_size
    self.test_size = test_size
    self.step = step

split ¶

split(X) -> Generator[tuple[ndarray, ndarray], None, None]

Generate indices to split data into training and test sets.

PARAMETER	DESCRIPTION
`X`	Time series data. TYPE: `array - like`

YIELDS	DESCRIPTION
`train_indices`	The training set indices for that split. TYPE:: `ndarray`
`test_indices`	The testing set indices for that split. TYPE:: `ndarray`

Source code in fplx/models/rolling_cv.py

def split(self, X) -> Generator[tuple[np.ndarray, np.ndarray], None, None]:
    """
    Generate indices to split data into training and test sets.

    Parameters
    ----------
    X : array-like
        Time series data.

    Yields
    ------
    train_indices : np.ndarray
        The training set indices for that split.
    test_indices : np.ndarray
        The testing set indices for that split.
    """
    n_samples = len(X)
    if self.initial_train_size + self.test_size > n_samples:
        raise ValueError(
            "initial_train_size + test_size is larger than the number of samples."
        )

    train_start = 0
    while train_start + self.initial_train_size + self.test_size <= n_samples:
        train_end = train_start + self.initial_train_size
        test_end = train_end + self.test_size

        train_indices = np.arange(train_start, train_end)
        test_indices = np.arange(train_end, test_end)

        yield train_indices, test_indices

        train_start += self.step

selection ¶

Squad selection and optimization.

BudgetConstraint ¶

BudgetConstraint(max_budget: float = 100.0)

Budget constraint for FPL squad (applied to 15-player squad).

Source code in fplx/selection/constraints.py

def __init__(self, max_budget: float = 100.0):
    self.max_budget = max_budget

FormationConstraints ¶

Formation constraints for FPL squad.

Rules: - Exactly 11 players - 1 GK - 3-5 DEF - 2-5 MID - 1-3 FWD

validate `classmethod` ¶

validate(players: list[Player]) -> bool

Check if squad satisfies formation constraints.

PARAMETER	DESCRIPTION
`players`	List of players in squad TYPE: `list[Player]`

RETURNS	DESCRIPTION
`bool`	True if valid formation

Source code in fplx/selection/constraints.py

@classmethod
def validate(cls, players: list[Player]) -> bool:
    """
    Check if squad satisfies formation constraints.

    Parameters
    ----------
    players : list[Player]
        List of players in squad

    Returns
    -------
    bool
        True if valid formation
    """
    if len(players) != cls.TOTAL_PLAYERS:
        return False
    counts = {"GK": 0, "DEF": 0, "MID": 0, "FWD": 0}
    for p in players:
        counts[p.position] += 1
    return all(lo <= counts[pos] <= hi for pos, (lo, hi) in cls.POSITION_LIMITS.items())

get_valid_formations `classmethod` ¶

get_valid_formations() -> list[str]

Get list of valid formation strings.

RETURNS	DESCRIPTION
`List[str]`	Valid formations (e.g., "3-4-3", "4-3-3")

Source code in fplx/selection/constraints.py

@classmethod
def get_valid_formations(cls) -> list[str]:
    """
    Get list of valid formation strings.

    Returns
    -------
    List[str]
        Valid formations (e.g., "3-4-3", "4-3-3")
    """
    formations = []
    for d in range(3, 6):
        for m in range(2, 6):
            for f in range(1, 4):
                if d + m + f == 10:
                    formations.append(f"{d}-{m}-{f}")
    return formations

SquadQuotas ¶

Position quotas for the 15-player FPL squad.

Rules: - 2 GK, 5 DEF, 5 MID, 3 FWD (exactly). - Total = 15 players.

TeamDiversityConstraint ¶

TeamDiversityConstraint(max_from_team: int = 3)

Max players from same real-world team (default 3).

Source code in fplx/selection/constraints.py

def __init__(self, max_from_team: int = 3):
    self.max_from_team = max_from_team

LagrangianOptimizer ¶

LagrangianOptimizer(
    budget: float = 100.0,
    max_from_team: int = 3,
    max_iter: int = 200,
    tol: float = 0.01,
    risk_aversion: float = 0.0,
)

Lagrangian relaxation for the FPL squad selection ILP.

Relaxes the budget constraint into the objective:

L(lambda) = max_{x in X} sum_i (mu_i - lambda * c_i) * x_i + lambda * B

where X encodes squad size, position quotas, and team caps. The inner maximization decomposes: for each position, select the top-k players by modified score (mu_i - lambda * c_i).

The dual problem min_{lambda >= 0} L(lambda) is solved via subgradient ascent.

PARAMETER	DESCRIPTION
`budget`	Total budget (default 100.0). TYPE: `float` DEFAULT: `100.0`
`max_from_team`	Maximum players from same club. TYPE: `int` DEFAULT: `3`
`max_iter`	Maximum subgradient iterations. TYPE: `int` DEFAULT: `200`
`tol`	Convergence tolerance on duality gap. TYPE: `float` DEFAULT: `0.01`
`risk_aversion`	Mean-variance penalty (same as ILP). TYPE: `float` DEFAULT: `0.0`

Source code in fplx/selection/lagrangian.py

def __init__(
    self,
    budget: float = 100.0,
    max_from_team: int = 3,
    max_iter: int = 200,
    tol: float = 0.01,
    risk_aversion: float = 0.0,
):
    self.budget = budget
    self.max_from_team = max_from_team
    self.max_iter = max_iter
    self.tol = tol
    self.risk_aversion = risk_aversion

solve ¶

solve(
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    best_known_primal: Optional[float] = None,
) -> LagrangianResult

Solve via Lagrangian relaxation with subgradient ascent.

PARAMETER	DESCRIPTION
`players`	TYPE: `list[Player]`
`expected_points`	TYPE: `dict[int, float]`
`expected_variance`	TYPE: `dict[int, float]` DEFAULT: `None`
`best_known_primal`	Best known primal objective (e.g., from ILP). Used for better step size computation. TYPE: `float` DEFAULT: `None`

RETURNS	DESCRIPTION
`LagrangianResult`

Source code in fplx/selection/lagrangian.py

def solve(
    self,
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    best_known_primal: Optional[float] = None,
) -> LagrangianResult:
    """
    Solve via Lagrangian relaxation with subgradient ascent.

    Parameters
    ----------
    players : list[Player]
    expected_points : dict[int, float]
    expected_variance : dict[int, float], optional
    best_known_primal : float, optional
        Best known primal objective (e.g., from ILP).
        Used for better step size computation.

    Returns
    -------
    LagrangianResult
    """
    start_time = time.perf_counter()

    # Initialize lambda
    lam = 0.5  # initial budget multiplier
    best_dual = np.inf
    best_primal = -np.inf
    best_squad = None
    best_lineup = None

    # Step size parameters (Polyak-style)
    theta = 2.0
    theta_decay = 0.95
    no_improve_count = 0

    result = LagrangianResult()

    for k in range(self.max_iter):
        # Compute modified scores
        scores = self._compute_modified_scores(players, expected_points, expected_variance, lam)

        # Solve inner problem
        squad, lineup = self._solve_inner(players, scores)

        # Dual objective: L(lambda) = sum scores*x + lambda*B
        inner_value = sum(scores[p.id] for p in lineup)
        dual_obj = inner_value + lam * self.budget

        # Primal objective (original, without lambda penalty)
        primal_obj = sum(expected_points.get(p.id, 0.0) for p in lineup)
        if self.risk_aversion > 0 and expected_variance:
            for p in lineup:
                primal_obj -= self.risk_aversion * np.sqrt(max(expected_variance.get(p.id, 0.0), 0.0))

        # Budget slack (subgradient)
        squad_cost = sum(p.price for p in squad)
        budget_slack = squad_cost - self.budget  # positive = over budget

        # Track best
        if dual_obj < best_dual:
            best_dual = dual_obj
            no_improve_count = 0
        else:
            no_improve_count += 1

        # Only count as feasible primal if budget satisfied
        if squad_cost <= self.budget + 0.01 and primal_obj > best_primal:
            best_primal = primal_obj
            best_squad = squad
            best_lineup = lineup

        # Record history
        result.dual_history.append(float(dual_obj))
        result.primal_history.append(float(primal_obj))
        result.lambda_history.append(float(lam))
        result.budget_slack_history.append(float(budget_slack))

        # Convergence check
        gap = (best_dual - best_primal) / max(abs(best_dual), 1e-6)
        if gap < self.tol and best_primal > -np.inf:
            result.converged = True
            break

        # Step size (Polyak with target)
        target = best_known_primal if best_known_primal else best_primal
        step = 0.0 if abs(budget_slack) < 1e-08 else theta * (dual_obj - target) / budget_slack**2

        # Update lambda
        lam = max(0.0, lam + step * budget_slack)

        # Decay step size if no improvement
        if no_improve_count >= 5:
            theta *= theta_decay
            no_improve_count = 0

    elapsed = time.perf_counter() - start_time

    # Build FullSquad from best feasible solution
    if best_squad and best_lineup and len(best_squad) == 15 and len(best_lineup) == 11:
        pos_counts = {"DEF": 0, "MID": 0, "FWD": 0}
        for p in best_lineup:
            if p.position in pos_counts:
                pos_counts[p.position] += 1
        formation = f"{pos_counts['DEF']}-{pos_counts['MID']}-{pos_counts['FWD']}"

        ep_lineup = sum(expected_points.get(p.id, 0.0) for p in best_lineup)
        captain = max(best_lineup, key=lambda p: expected_points.get(p.id, 0.0))

        lineup_obj = Squad(
            players=best_lineup,
            formation=formation,
            total_cost=sum(p.price for p in best_lineup),
            expected_points=ep_lineup,
            captain=captain,
        )
        result.full_squad = FullSquad(squad_players=best_squad, lineup=lineup_obj)

    result.primal_objective = best_primal
    result.dual_bound = best_dual
    result.duality_gap = (best_dual - best_primal) / max(abs(best_dual), 1e-6)
    result.n_iterations = k + 1
    result.solve_time = elapsed

    logger.info(
        "Lagrangian: %d iters, primal=%.1f, dual=%.1f, gap=%.2f%%, time=%.3fs",
        result.n_iterations,
        best_primal,
        best_dual,
        result.duality_gap * 100,
        elapsed,
    )

    return result

LagrangianResult `dataclass` ¶

LagrangianResult(
    full_squad: Optional[FullSquad] = None,
    primal_objective: float = 0.0,
    dual_bound: float = 0.0,
    duality_gap: float = 0.0,
    n_iterations: int = 0,
    converged: bool = False,
    solve_time: float = 0.0,
    dual_history: list[float] = list(),
    primal_history: list[float] = list(),
    lambda_history: list[float] = list(),
    budget_slack_history: list[float] = list(),
)

Convergence diagnostics for the Lagrangian solver.

GreedyOptimizer ¶

GreedyOptimizer(
    budget: float = 100.0, max_from_team: int = 3
)

Bases: BaseOptimizer

Greedy baseline: select best-value players per position.

Fast heuristic for comparison. Selects 15-player squad, then picks best 11 as lineup.

Source code in fplx/selection/optimizer.py

def __init__(self, budget: float = 100.0, max_from_team: int = 3):
    self.budget = budget
    self.max_from_team = max_from_team

optimize ¶

optimize(
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    formation: Optional[str] = None,
) -> FullSquad

Greedy squad + lineup selection.

Source code in fplx/selection/optimizer.py

def optimize(
    self,
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    formation: Optional[str] = None,
) -> FullSquad:
    """Greedy squad + lineup selection."""
    # Compute value = EP / price for each player
    for p in players:
        ep = expected_points.get(p.id, 0.0)
        p.expected_points = ep
        p._value = ep / max(p.price, 0.1)

    # Sort by value within each position
    by_pos: dict[str, list[Player]] = {"GK": [], "DEF": [], "MID": [], "FWD": []}
    for p in players:
        by_pos[p.position].append(p)
    for pos in by_pos:
        by_pos[pos].sort(key=lambda p: p._value, reverse=True)

    # Greedily fill squad (15 players)
    squad_quotas = {"GK": 2, "DEF": 5, "MID": 5, "FWD": 3}
    selected_squad: list[Player] = []
    team_counts: dict[str, int] = {}
    remaining = self.budget

    for pos in ["GK", "DEF", "MID", "FWD"]:
        count = 0
        for p in by_pos[pos]:
            if count >= squad_quotas[pos]:
                break
            if team_counts.get(p.team, 0) >= self.max_from_team:
                continue
            if p.price > remaining:
                continue
            selected_squad.append(p)
            team_counts[p.team] = team_counts.get(p.team, 0) + 1
            remaining -= p.price
            count += 1

    if len(selected_squad) != 15:
        logger.warning("Greedy only picked %d squad players.", len(selected_squad))
        # Pad if needed (shouldn't happen with 600+ players)
        return self._fallback(selected_squad, expected_points)

    # Select best 11 from the 15
    lineup = self._select_lineup(selected_squad, expected_points, formation)
    return FullSquad(squad_players=selected_squad, lineup=lineup)

OptimizationResult `dataclass` ¶

OptimizationResult(
    full_squad: FullSquad,
    objective_value: float = 0.0,
    solve_time: float = 0.0,
    lp_objective: Optional[float] = None,
    integrality_gap: Optional[float] = None,
    shadow_prices: dict = dict(),
    binding_constraints: list = list(),
)

Container for optimization outputs including duality analysis.

TwoLevelILPOptimizer ¶

TwoLevelILPOptimizer(
    budget: float = 100.0,
    max_from_team: int = 3,
    risk_aversion: float = 0.0,
)

Bases: BaseOptimizer

Two-level ILP: select 15-player squad then 11-player lineup jointly.

Supports risk-neutral and risk-averse (mean-variance) objectives. Also exposes LP relaxation for shadow price extraction.

PARAMETER	DESCRIPTION
`budget`	Maximum total squad budget (applied to 15 players). TYPE: `float` DEFAULT: `100.0`
`max_from_team`	Maximum players from same club. TYPE: `int` DEFAULT: `3`
`risk_aversion`	Lambda for mean-variance penalty. 0 = risk-neutral. TYPE: `float` DEFAULT: `0.0`

Source code in fplx/selection/optimizer.py

def __init__(
    self,
    budget: float = 100.0,
    max_from_team: int = 3,
    risk_aversion: float = 0.0,
):
    self.budget = budget
    self.max_from_team = max_from_team
    self.risk_aversion = risk_aversion

    try:
        import pulp

        self.pulp = pulp
    except ImportError:
        raise ImportError("pulp required for ILP optimization. Install with: pip install pulp")

solve ¶

solve(players, **kwargs)

Solve the optimization problem.

Source code in fplx/selection/optimizer.py

def solve(self, players, **kwargs):
    """Solve the optimization problem."""
    return self.optimize(players, **kwargs)

optimize ¶

optimize(
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    downside_risk: Optional[dict[int, float]] = None,
    formation: Optional[str] = None,
) -> FullSquad

Solve the two-level ILP.

PARAMETER	DESCRIPTION
`players`	Available player pool. TYPE: `list[Player]`
`expected_points`	E[P_i] per player. TYPE: `dict[int, float]`
`expected_variance`	Var[P_i] per player. TYPE: `dict[int, float]` DEFAULT: `None`
`downside_risk`	Downside spread per player. If provided, risk penalty uses this directly (instead of sqrt(variance)). TYPE: `dict[int, float]` DEFAULT: `None`
`formation`	Not used (formation is optimized automatically). TYPE: `Optional[str]` DEFAULT: `None`

RETURNS	DESCRIPTION
`FullSquad`

Source code in fplx/selection/optimizer.py

def optimize(
    self,
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    downside_risk: Optional[dict[int, float]] = None,
    formation: Optional[str] = None,
) -> FullSquad:
    """
    Solve the two-level ILP.

    Parameters
    ----------
    players : list[Player]
        Available player pool.
    expected_points : dict[int, float]
        E[P_i] per player.
    expected_variance : dict[int, float], optional
        Var[P_i] per player.
    downside_risk : dict[int, float], optional
        Downside spread per player. If provided, risk penalty uses this
        directly (instead of sqrt(variance)).
    formation : Optional[str]
        Not used (formation is optimized automatically).

    Returns
    -------
    FullSquad
    """
    import time

    start = time.perf_counter()
    prob, s_vars, x_vars = self._build_problem(
        players,
        expected_points,
        expected_variance,
        downside_risk,
        relax=False,
    )
    prob.solve(self.pulp.PULP_CBC_CMD(msg=0))
    elapsed = time.perf_counter() - start

    if prob.status != 1:
        logger.error("ILP solver did not find optimal solution (status=%d).", prob.status)

    # Extract solution
    squad_players = [p for p in players if s_vars[p.id].varValue and s_vars[p.id].varValue > 0.5]
    lineup_players = [p for p in players if x_vars[p.id].varValue and x_vars[p.id].varValue > 0.5]

    # Determine formation
    pos_counts = {"DEF": 0, "MID": 0, "FWD": 0}
    for p in lineup_players:
        if p.position in pos_counts:
            pos_counts[p.position] += 1
    formation_str = f"{pos_counts['DEF']}-{pos_counts['MID']}-{pos_counts['FWD']}"

    # Captain = highest expected points
    for p in lineup_players:
        p.expected_points = expected_points.get(p.id, 0.0)
    captain = (
        max(lineup_players, key=lambda p: expected_points.get(p.id, 0.0)) if lineup_players else None
    )

    total_ep = sum(expected_points.get(p.id, 0.0) for p in lineup_players)
    lineup_cost = sum(p.price for p in lineup_players)

    lineup = Squad(
        players=lineup_players,
        formation=formation_str,
        total_cost=lineup_cost,
        expected_points=total_ep,
        captain=captain,
    )
    full_squad = FullSquad(squad_players=squad_players, lineup=lineup)

    logger.info("ILP solved in %.3fs. Formation: %s. EP: %.2f", elapsed, formation_str, total_ep)
    return full_squad

solve_lp_relaxation ¶

solve_lp_relaxation(
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    downside_risk: Optional[dict[int, float]] = None,
) -> OptimizationResult

Solve the LP relaxation and extract shadow prices.

RETURNS	DESCRIPTION
`OptimizationResult`	Contains LP objective, shadow prices, binding constraints.

Source code in fplx/selection/optimizer.py

def solve_lp_relaxation(
    self,
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    downside_risk: Optional[dict[int, float]] = None,
) -> OptimizationResult:
    """
    Solve the LP relaxation and extract shadow prices.

    Returns
    -------
    OptimizationResult
        Contains LP objective, shadow prices, binding constraints.
    """
    import time

    start = time.perf_counter()
    prob, s_vars, x_vars = self._build_problem(
        players,
        expected_points,
        expected_variance,
        downside_risk,
        relax=True,
    )
    prob.solve(self.pulp.PULP_CBC_CMD(msg=0))
    elapsed = time.perf_counter() - start

    lp_obj = self.pulp.value(prob.objective)

    # Extract shadow prices from constraints
    shadow_prices = {}
    binding = []
    for name, constraint in prob.constraints.items():
        slack = constraint.slack
        # PuLP: pi attribute gives the dual value for LP
        dual = constraint.pi if constraint.pi is not None else 0.0
        shadow_prices[name] = {
            "dual_value": dual,
            "slack": slack,
            "binding": abs(slack) < 1e-6,
        }
        if abs(slack) < 1e-6:
            binding.append(name)

    # Also solve ILP to compute integrality gap
    full_squad = self.optimize(players, expected_points, expected_variance, downside_risk)
    ilp_obj = full_squad.lineup.expected_points
    gap = (lp_obj - ilp_obj) / lp_obj if lp_obj > 0 else 0.0

    return OptimizationResult(
        full_squad=full_squad,
        objective_value=ilp_obj,
        solve_time=elapsed,
        lp_objective=lp_obj,
        integrality_gap=gap,
        shadow_prices=shadow_prices,
        binding_constraints=binding,
    )

constraints ¶

Constraints for squad selection.

SquadQuotas ¶

Position quotas for the 15-player FPL squad.

Rules: - 2 GK, 5 DEF, 5 MID, 3 FWD (exactly). - Total = 15 players.

FormationConstraints ¶

Formation constraints for FPL squad.

Rules: - Exactly 11 players - 1 GK - 3-5 DEF - 2-5 MID - 1-3 FWD

validate `classmethod` ¶

validate(players: list[Player]) -> bool

Check if squad satisfies formation constraints.

PARAMETER	DESCRIPTION
`players`	List of players in squad TYPE: `list[Player]`

RETURNS	DESCRIPTION
`bool`	True if valid formation

Source code in fplx/selection/constraints.py

@classmethod
def validate(cls, players: list[Player]) -> bool:
    """
    Check if squad satisfies formation constraints.

    Parameters
    ----------
    players : list[Player]
        List of players in squad

    Returns
    -------
    bool
        True if valid formation
    """
    if len(players) != cls.TOTAL_PLAYERS:
        return False
    counts = {"GK": 0, "DEF": 0, "MID": 0, "FWD": 0}
    for p in players:
        counts[p.position] += 1
    return all(lo <= counts[pos] <= hi for pos, (lo, hi) in cls.POSITION_LIMITS.items())

get_valid_formations `classmethod` ¶

get_valid_formations() -> list[str]

Get list of valid formation strings.

RETURNS	DESCRIPTION
`List[str]`	Valid formations (e.g., "3-4-3", "4-3-3")

Source code in fplx/selection/constraints.py

@classmethod
def get_valid_formations(cls) -> list[str]:
    """
    Get list of valid formation strings.

    Returns
    -------
    List[str]
        Valid formations (e.g., "3-4-3", "4-3-3")
    """
    formations = []
    for d in range(3, 6):
        for m in range(2, 6):
            for f in range(1, 4):
                if d + m + f == 10:
                    formations.append(f"{d}-{m}-{f}")
    return formations

BudgetConstraint ¶

BudgetConstraint(max_budget: float = 100.0)

Budget constraint for FPL squad (applied to 15-player squad).

Source code in fplx/selection/constraints.py

def __init__(self, max_budget: float = 100.0):
    self.max_budget = max_budget

TeamDiversityConstraint ¶

TeamDiversityConstraint(max_from_team: int = 3)

Max players from same real-world team (default 3).

Source code in fplx/selection/constraints.py

def __init__(self, max_from_team: int = 3):
    self.max_from_team = max_from_team

lagrangian ¶

Lagrangian dual decomposition for FPL squad selection.

Relaxes the budget constraint into the objective and solves via subgradient ascent. The inner problem decomposes into per-position sorting problems, each solvable in O(n log n).

This provides: - A dual upper bound on the ILP optimum - A near-optimal primal solution via rounding - Convergence diagnostics for the 18-660 report

LagrangianResult `dataclass` ¶

LagrangianResult(
    full_squad: Optional[FullSquad] = None,
    primal_objective: float = 0.0,
    dual_bound: float = 0.0,
    duality_gap: float = 0.0,
    n_iterations: int = 0,
    converged: bool = False,
    solve_time: float = 0.0,
    dual_history: list[float] = list(),
    primal_history: list[float] = list(),
    lambda_history: list[float] = list(),
    budget_slack_history: list[float] = list(),
)

Convergence diagnostics for the Lagrangian solver.

LagrangianOptimizer ¶

LagrangianOptimizer(
    budget: float = 100.0,
    max_from_team: int = 3,
    max_iter: int = 200,
    tol: float = 0.01,
    risk_aversion: float = 0.0,
)

Lagrangian relaxation for the FPL squad selection ILP.

Relaxes the budget constraint into the objective:

L(lambda) = max_{x in X} sum_i (mu_i - lambda * c_i) * x_i + lambda * B

where X encodes squad size, position quotas, and team caps. The inner maximization decomposes: for each position, select the top-k players by modified score (mu_i - lambda * c_i).

The dual problem min_{lambda >= 0} L(lambda) is solved via subgradient ascent.

PARAMETER	DESCRIPTION
`budget`	Total budget (default 100.0). TYPE: `float` DEFAULT: `100.0`
`max_from_team`	Maximum players from same club. TYPE: `int` DEFAULT: `3`
`max_iter`	Maximum subgradient iterations. TYPE: `int` DEFAULT: `200`
`tol`	Convergence tolerance on duality gap. TYPE: `float` DEFAULT: `0.01`
`risk_aversion`	Mean-variance penalty (same as ILP). TYPE: `float` DEFAULT: `0.0`

Source code in fplx/selection/lagrangian.py

def __init__(
    self,
    budget: float = 100.0,
    max_from_team: int = 3,
    max_iter: int = 200,
    tol: float = 0.01,
    risk_aversion: float = 0.0,
):
    self.budget = budget
    self.max_from_team = max_from_team
    self.max_iter = max_iter
    self.tol = tol
    self.risk_aversion = risk_aversion

solve ¶

solve(
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    best_known_primal: Optional[float] = None,
) -> LagrangianResult

Solve via Lagrangian relaxation with subgradient ascent.

PARAMETER	DESCRIPTION
`players`	TYPE: `list[Player]`
`expected_points`	TYPE: `dict[int, float]`
`expected_variance`	TYPE: `dict[int, float]` DEFAULT: `None`
`best_known_primal`	Best known primal objective (e.g., from ILP). Used for better step size computation. TYPE: `float` DEFAULT: `None`

RETURNS	DESCRIPTION
`LagrangianResult`

Source code in fplx/selection/lagrangian.py

def solve(
    self,
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    best_known_primal: Optional[float] = None,
) -> LagrangianResult:
    """
    Solve via Lagrangian relaxation with subgradient ascent.

    Parameters
    ----------
    players : list[Player]
    expected_points : dict[int, float]
    expected_variance : dict[int, float], optional
    best_known_primal : float, optional
        Best known primal objective (e.g., from ILP).
        Used for better step size computation.

    Returns
    -------
    LagrangianResult
    """
    start_time = time.perf_counter()

    # Initialize lambda
    lam = 0.5  # initial budget multiplier
    best_dual = np.inf
    best_primal = -np.inf
    best_squad = None
    best_lineup = None

    # Step size parameters (Polyak-style)
    theta = 2.0
    theta_decay = 0.95
    no_improve_count = 0

    result = LagrangianResult()

    for k in range(self.max_iter):
        # Compute modified scores
        scores = self._compute_modified_scores(players, expected_points, expected_variance, lam)

        # Solve inner problem
        squad, lineup = self._solve_inner(players, scores)

        # Dual objective: L(lambda) = sum scores*x + lambda*B
        inner_value = sum(scores[p.id] for p in lineup)
        dual_obj = inner_value + lam * self.budget

        # Primal objective (original, without lambda penalty)
        primal_obj = sum(expected_points.get(p.id, 0.0) for p in lineup)
        if self.risk_aversion > 0 and expected_variance:
            for p in lineup:
                primal_obj -= self.risk_aversion * np.sqrt(max(expected_variance.get(p.id, 0.0), 0.0))

        # Budget slack (subgradient)
        squad_cost = sum(p.price for p in squad)
        budget_slack = squad_cost - self.budget  # positive = over budget

        # Track best
        if dual_obj < best_dual:
            best_dual = dual_obj
            no_improve_count = 0
        else:
            no_improve_count += 1

        # Only count as feasible primal if budget satisfied
        if squad_cost <= self.budget + 0.01 and primal_obj > best_primal:
            best_primal = primal_obj
            best_squad = squad
            best_lineup = lineup

        # Record history
        result.dual_history.append(float(dual_obj))
        result.primal_history.append(float(primal_obj))
        result.lambda_history.append(float(lam))
        result.budget_slack_history.append(float(budget_slack))

        # Convergence check
        gap = (best_dual - best_primal) / max(abs(best_dual), 1e-6)
        if gap < self.tol and best_primal > -np.inf:
            result.converged = True
            break

        # Step size (Polyak with target)
        target = best_known_primal if best_known_primal else best_primal
        step = 0.0 if abs(budget_slack) < 1e-08 else theta * (dual_obj - target) / budget_slack**2

        # Update lambda
        lam = max(0.0, lam + step * budget_slack)

        # Decay step size if no improvement
        if no_improve_count >= 5:
            theta *= theta_decay
            no_improve_count = 0

    elapsed = time.perf_counter() - start_time

    # Build FullSquad from best feasible solution
    if best_squad and best_lineup and len(best_squad) == 15 and len(best_lineup) == 11:
        pos_counts = {"DEF": 0, "MID": 0, "FWD": 0}
        for p in best_lineup:
            if p.position in pos_counts:
                pos_counts[p.position] += 1
        formation = f"{pos_counts['DEF']}-{pos_counts['MID']}-{pos_counts['FWD']}"

        ep_lineup = sum(expected_points.get(p.id, 0.0) for p in best_lineup)
        captain = max(best_lineup, key=lambda p: expected_points.get(p.id, 0.0))

        lineup_obj = Squad(
            players=best_lineup,
            formation=formation,
            total_cost=sum(p.price for p in best_lineup),
            expected_points=ep_lineup,
            captain=captain,
        )
        result.full_squad = FullSquad(squad_players=best_squad, lineup=lineup_obj)

    result.primal_objective = best_primal
    result.dual_bound = best_dual
    result.duality_gap = (best_dual - best_primal) / max(abs(best_dual), 1e-6)
    result.n_iterations = k + 1
    result.solve_time = elapsed

    logger.info(
        "Lagrangian: %d iters, primal=%.1f, dual=%.1f, gap=%.2f%%, time=%.3fs",
        result.n_iterations,
        best_primal,
        best_dual,
        result.duality_gap * 100,
        elapsed,
    )

    return result

optimizer ¶

Squad optimization: two-level ILP, mean-variance, LP relaxation.

OptimizationResult `dataclass` ¶

OptimizationResult(
    full_squad: FullSquad,
    objective_value: float = 0.0,
    solve_time: float = 0.0,
    lp_objective: Optional[float] = None,
    integrality_gap: Optional[float] = None,
    shadow_prices: dict = dict(),
    binding_constraints: list = list(),
)

Container for optimization outputs including duality analysis.

TwoLevelILPOptimizer ¶

TwoLevelILPOptimizer(
    budget: float = 100.0,
    max_from_team: int = 3,
    risk_aversion: float = 0.0,
)

Bases: BaseOptimizer

Two-level ILP: select 15-player squad then 11-player lineup jointly.

Supports risk-neutral and risk-averse (mean-variance) objectives. Also exposes LP relaxation for shadow price extraction.

PARAMETER	DESCRIPTION
`budget`	Maximum total squad budget (applied to 15 players). TYPE: `float` DEFAULT: `100.0`
`max_from_team`	Maximum players from same club. TYPE: `int` DEFAULT: `3`
`risk_aversion`	Lambda for mean-variance penalty. 0 = risk-neutral. TYPE: `float` DEFAULT: `0.0`

Source code in fplx/selection/optimizer.py

def __init__(
    self,
    budget: float = 100.0,
    max_from_team: int = 3,
    risk_aversion: float = 0.0,
):
    self.budget = budget
    self.max_from_team = max_from_team
    self.risk_aversion = risk_aversion

    try:
        import pulp

        self.pulp = pulp
    except ImportError:
        raise ImportError("pulp required for ILP optimization. Install with: pip install pulp")

solve ¶

solve(players, **kwargs)

Solve the optimization problem.

Source code in fplx/selection/optimizer.py

def solve(self, players, **kwargs):
    """Solve the optimization problem."""
    return self.optimize(players, **kwargs)

optimize ¶

optimize(
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    downside_risk: Optional[dict[int, float]] = None,
    formation: Optional[str] = None,
) -> FullSquad

Solve the two-level ILP.

PARAMETER	DESCRIPTION
`players`	Available player pool. TYPE: `list[Player]`
`expected_points`	E[P_i] per player. TYPE: `dict[int, float]`
`expected_variance`	Var[P_i] per player. TYPE: `dict[int, float]` DEFAULT: `None`
`downside_risk`	Downside spread per player. If provided, risk penalty uses this directly (instead of sqrt(variance)). TYPE: `dict[int, float]` DEFAULT: `None`
`formation`	Not used (formation is optimized automatically). TYPE: `Optional[str]` DEFAULT: `None`

RETURNS	DESCRIPTION
`FullSquad`

Source code in fplx/selection/optimizer.py

def optimize(
    self,
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    downside_risk: Optional[dict[int, float]] = None,
    formation: Optional[str] = None,
) -> FullSquad:
    """
    Solve the two-level ILP.

    Parameters
    ----------
    players : list[Player]
        Available player pool.
    expected_points : dict[int, float]
        E[P_i] per player.
    expected_variance : dict[int, float], optional
        Var[P_i] per player.
    downside_risk : dict[int, float], optional
        Downside spread per player. If provided, risk penalty uses this
        directly (instead of sqrt(variance)).
    formation : Optional[str]
        Not used (formation is optimized automatically).

    Returns
    -------
    FullSquad
    """
    import time

    start = time.perf_counter()
    prob, s_vars, x_vars = self._build_problem(
        players,
        expected_points,
        expected_variance,
        downside_risk,
        relax=False,
    )
    prob.solve(self.pulp.PULP_CBC_CMD(msg=0))
    elapsed = time.perf_counter() - start

    if prob.status != 1:
        logger.error("ILP solver did not find optimal solution (status=%d).", prob.status)

    # Extract solution
    squad_players = [p for p in players if s_vars[p.id].varValue and s_vars[p.id].varValue > 0.5]
    lineup_players = [p for p in players if x_vars[p.id].varValue and x_vars[p.id].varValue > 0.5]

    # Determine formation
    pos_counts = {"DEF": 0, "MID": 0, "FWD": 0}
    for p in lineup_players:
        if p.position in pos_counts:
            pos_counts[p.position] += 1
    formation_str = f"{pos_counts['DEF']}-{pos_counts['MID']}-{pos_counts['FWD']}"

    # Captain = highest expected points
    for p in lineup_players:
        p.expected_points = expected_points.get(p.id, 0.0)
    captain = (
        max(lineup_players, key=lambda p: expected_points.get(p.id, 0.0)) if lineup_players else None
    )

    total_ep = sum(expected_points.get(p.id, 0.0) for p in lineup_players)
    lineup_cost = sum(p.price for p in lineup_players)

    lineup = Squad(
        players=lineup_players,
        formation=formation_str,
        total_cost=lineup_cost,
        expected_points=total_ep,
        captain=captain,
    )
    full_squad = FullSquad(squad_players=squad_players, lineup=lineup)

    logger.info("ILP solved in %.3fs. Formation: %s. EP: %.2f", elapsed, formation_str, total_ep)
    return full_squad

solve_lp_relaxation ¶

solve_lp_relaxation(
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    downside_risk: Optional[dict[int, float]] = None,
) -> OptimizationResult

Solve the LP relaxation and extract shadow prices.

RETURNS	DESCRIPTION
`OptimizationResult`	Contains LP objective, shadow prices, binding constraints.

Source code in fplx/selection/optimizer.py

def solve_lp_relaxation(
    self,
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    downside_risk: Optional[dict[int, float]] = None,
) -> OptimizationResult:
    """
    Solve the LP relaxation and extract shadow prices.

    Returns
    -------
    OptimizationResult
        Contains LP objective, shadow prices, binding constraints.
    """
    import time

    start = time.perf_counter()
    prob, s_vars, x_vars = self._build_problem(
        players,
        expected_points,
        expected_variance,
        downside_risk,
        relax=True,
    )
    prob.solve(self.pulp.PULP_CBC_CMD(msg=0))
    elapsed = time.perf_counter() - start

    lp_obj = self.pulp.value(prob.objective)

    # Extract shadow prices from constraints
    shadow_prices = {}
    binding = []
    for name, constraint in prob.constraints.items():
        slack = constraint.slack
        # PuLP: pi attribute gives the dual value for LP
        dual = constraint.pi if constraint.pi is not None else 0.0
        shadow_prices[name] = {
            "dual_value": dual,
            "slack": slack,
            "binding": abs(slack) < 1e-6,
        }
        if abs(slack) < 1e-6:
            binding.append(name)

    # Also solve ILP to compute integrality gap
    full_squad = self.optimize(players, expected_points, expected_variance, downside_risk)
    ilp_obj = full_squad.lineup.expected_points
    gap = (lp_obj - ilp_obj) / lp_obj if lp_obj > 0 else 0.0

    return OptimizationResult(
        full_squad=full_squad,
        objective_value=ilp_obj,
        solve_time=elapsed,
        lp_objective=lp_obj,
        integrality_gap=gap,
        shadow_prices=shadow_prices,
        binding_constraints=binding,
    )

GreedyOptimizer ¶

GreedyOptimizer(
    budget: float = 100.0, max_from_team: int = 3
)

Bases: BaseOptimizer

Greedy baseline: select best-value players per position.

Fast heuristic for comparison. Selects 15-player squad, then picks best 11 as lineup.

Source code in fplx/selection/optimizer.py

def __init__(self, budget: float = 100.0, max_from_team: int = 3):
    self.budget = budget
    self.max_from_team = max_from_team

optimize ¶

optimize(
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    formation: Optional[str] = None,
) -> FullSquad

Greedy squad + lineup selection.

Source code in fplx/selection/optimizer.py

def optimize(
    self,
    players: list[Player],
    expected_points: dict[int, float],
    expected_variance: Optional[dict[int, float]] = None,
    formation: Optional[str] = None,
) -> FullSquad:
    """Greedy squad + lineup selection."""
    # Compute value = EP / price for each player
    for p in players:
        ep = expected_points.get(p.id, 0.0)
        p.expected_points = ep
        p._value = ep / max(p.price, 0.1)

    # Sort by value within each position
    by_pos: dict[str, list[Player]] = {"GK": [], "DEF": [], "MID": [], "FWD": []}
    for p in players:
        by_pos[p.position].append(p)
    for pos in by_pos:
        by_pos[pos].sort(key=lambda p: p._value, reverse=True)

    # Greedily fill squad (15 players)
    squad_quotas = {"GK": 2, "DEF": 5, "MID": 5, "FWD": 3}
    selected_squad: list[Player] = []
    team_counts: dict[str, int] = {}
    remaining = self.budget

    for pos in ["GK", "DEF", "MID", "FWD"]:
        count = 0
        for p in by_pos[pos]:
            if count >= squad_quotas[pos]:
                break
            if team_counts.get(p.team, 0) >= self.max_from_team:
                continue
            if p.price > remaining:
                continue
            selected_squad.append(p)
            team_counts[p.team] = team_counts.get(p.team, 0) + 1
            remaining -= p.price
            count += 1

    if len(selected_squad) != 15:
        logger.warning("Greedy only picked %d squad players.", len(selected_squad))
        # Pad if needed (shouldn't happen with 600+ players)
        return self._fallback(selected_squad, expected_points)

    # Select best 11 from the 15
    lineup = self._select_lineup(selected_squad, expected_points, formation)
    return FullSquad(squad_players=selected_squad, lineup=lineup)

signals ¶

Signal generation modules for player scoring.

FixtureSignal ¶

FixtureSignal(
    difficulty_ratings: Optional[dict[str, int]] = None,
)

Bases: BaseSignal

Generate signals based on fixture difficulty and schedule.

Initialize with team difficulty ratings.

PARAMETER	DESCRIPTION
`difficulty_ratings`	Team strength ratings (1-5, higher = harder opponent) TYPE: `Optional[dict[str, int]]` DEFAULT: `None`

Source code in fplx/signals/fixtures.py

def __init__(self, difficulty_ratings: Optional[dict[str, int]] = None):
    """
    Initialize with team difficulty ratings.

    Parameters
    ----------
    difficulty_ratings : Optional[dict[str, int]]
        Team strength ratings (1-5, higher = harder opponent)
    """
    self.difficulty_ratings = difficulty_ratings or {}

generate_signal ¶

generate_signal(data)

Generate fixture-based signal.

Source code in fplx/signals/fixtures.py

def generate_signal(self, data):
    """Generate fixture-based signal."""
    # This is a placeholder. The actual implementation would take
    # fixture data and compute a signal.
    return self.compute_fixture_advantage(
        data["team"], data["upcoming_opponents"], data["is_home"]
    )

set_difficulty_ratings ¶

set_difficulty_ratings(ratings: dict[str, int])

Set or update difficulty ratings.

PARAMETER	DESCRIPTION
`ratings`	Team strength ratings TYPE: `Dict[str, int]`

Source code in fplx/signals/fixtures.py

def set_difficulty_ratings(self, ratings: dict[str, int]):
    """
    Set or update difficulty ratings.

    Parameters
    ----------
    ratings : Dict[str, int]
        Team strength ratings
    """
    self.difficulty_ratings = ratings

compute_fixture_difficulty ¶

compute_fixture_difficulty(
    team: str,
    upcoming_opponents: list[str],
    is_home: list[bool],
) -> float

Compute fixture difficulty score for upcoming games.

PARAMETER	DESCRIPTION
`team`	Player's team TYPE: `str`
`upcoming_opponents`	List of upcoming opponent teams TYPE: `list[str]`
`is_home`	Whether each fixture is home TYPE: `list[bool]`

RETURNS	DESCRIPTION
`float`	Difficulty score (lower = easier fixtures)

Source code in fplx/signals/fixtures.py

def compute_fixture_difficulty(
    self, team: str, upcoming_opponents: list[str], is_home: list[bool]
) -> float:
    """
    Compute fixture difficulty score for upcoming games.

    Parameters
    ----------
    team : str
        Player's team
    upcoming_opponents : list[str]
        List of upcoming opponent teams
    is_home : list[bool]
        Whether each fixture is home

    Returns
    -------
    float
        Difficulty score (lower = easier fixtures)
    """
    if not upcoming_opponents:
        return 3.0  # Neutral

    difficulties = []
    for opponent, home in zip(upcoming_opponents, is_home):
        # Get opponent difficulty
        diff = self.difficulty_ratings.get(opponent, 3)

        # Adjust for home advantage
        if home:
            diff = max(1, diff - 0.5)
        else:
            diff = min(5, diff + 0.5)

        difficulties.append(diff)

    # Average difficulty
    avg_difficulty = sum(difficulties) / len(difficulties)
    return avg_difficulty

compute_fixture_advantage ¶

compute_fixture_advantage(
    team: str,
    upcoming_opponents: list[str],
    is_home: list[bool],
) -> float

Compute fixture advantage (inverse of difficulty).

Higher score = easier fixtures = better for player.

PARAMETER	DESCRIPTION
`team`	Player's team TYPE: `str`
`upcoming_opponents`	List of upcoming opponent teams TYPE: `list[str]`
`is_home`	Whether each fixture is home TYPE: `list[bool]`

RETURNS	DESCRIPTION
`float`	Advantage score (0-1, higher = better fixtures)

Source code in fplx/signals/fixtures.py

def compute_fixture_advantage(
    self, team: str, upcoming_opponents: list[str], is_home: list[bool]
) -> float:
    """
    Compute fixture advantage (inverse of difficulty).

    Higher score = easier fixtures = better for player.

    Parameters
    ----------
    team : str
        Player's team
    upcoming_opponents : list[str]
        List of upcoming opponent teams
    is_home : list[bool]
        Whether each fixture is home

    Returns
    -------
    float
        Advantage score (0-1, higher = better fixtures)
    """
    difficulty = self.compute_fixture_difficulty(team, upcoming_opponents, is_home)

    # Convert to advantage (invert and normalize)
    # difficulty: 1 (easiest) to 5 (hardest)
    # advantage: 1 (best) to 0 (worst)
    advantage = (6 - difficulty) / 5
    return max(0, min(1, advantage))

compute_fixture_congestion ¶

compute_fixture_congestion(
    fixtures: DataFrame, team: str, days_window: int = 14
) -> float

Compute fixture congestion (number of games in short period).

PARAMETER	DESCRIPTION
`fixtures`	Fixtures dataframe TYPE: `DataFrame`
`team`	Team name TYPE: `str`
`days_window`	Days to look ahead TYPE: `int` DEFAULT: `14`

RETURNS	DESCRIPTION
`float`	Congestion score (0-1, higher = more congested)

Source code in fplx/signals/fixtures.py

def compute_fixture_congestion(
    self, fixtures: pd.DataFrame, team: str, days_window: int = 14
) -> float:
    """
    Compute fixture congestion (number of games in short period).

    Parameters
    ----------
    fixtures : pd.DataFrame
        Fixtures dataframe
    team : str
        Team name
    days_window : int
        Days to look ahead

    Returns
    -------
    float
        Congestion score (0-1, higher = more congested)
    """
    # Filter fixtures for the team
    team_fixtures = fixtures[
        (fixtures["team_h"] == team) | (fixtures["team_a"] == team)
    ]

    if team_fixtures.empty:
        return 0.0

    # Count fixtures in window
    num_fixtures = len(team_fixtures)

    # Normalize: 1 game/week = 0, 3+ games/week = 1
    games_per_week = num_fixtures / (days_window / 7)
    congestion = min(1.0, (games_per_week - 1) / 2)

    return max(0, congestion)

batch_compute_advantages ¶

batch_compute_advantages(
    players_teams: dict[str, str],
    fixtures_data: dict[str, tuple],
) -> dict[str, float]

Compute fixture advantages for multiple players.

PARAMETER	DESCRIPTION
`players_teams`	Mapping of player ID to team TYPE: `dict[str, str]`
`fixtures_data`	Mapping of team to (opponents, is_home) tuples TYPE: `dict[str, tuple]`

RETURNS	DESCRIPTION
`dict[str, float]`	Dictionary of player fixture advantage scores

Source code in fplx/signals/fixtures.py

def batch_compute_advantages(
    self, players_teams: dict[str, str], fixtures_data: dict[str, tuple]
) -> dict[str, float]:
    """
    Compute fixture advantages for multiple players.

    Parameters
    ----------
    players_teams : dict[str, str]
        Mapping of player ID to team
    fixtures_data : dict[str, tuple]
        Mapping of team to (opponents, is_home) tuples

    Returns
    -------
    dict[str, float]
        Dictionary of player fixture advantage scores
    """
    advantages = {}

    for player_id, team in players_teams.items():
        if team in fixtures_data:
            opponents, is_home = fixtures_data[team]
            advantage = self.compute_fixture_advantage(team, opponents, is_home)
            advantages[player_id] = advantage
        else:
            advantages[player_id] = 0.5  # Neutral

    return advantages

NewsParser ¶

Parse and interpret FPL news text into structured signals.

parse_availability ¶

parse_availability(news_text: str) -> float

Parse availability from news text.

PARAMETER	DESCRIPTION
`news_text`	News text TYPE: `str`

RETURNS	DESCRIPTION
`float`	Availability score (0-1)

Source code in fplx/signals/news.py

def parse_availability(self, news_text: str) -> float:
    """
    Parse availability from news text.

    Parameters
    ----------
    news_text : str
        News text

    Returns
    -------
    float
        Availability score (0-1)
    """
    if not news_text or news_text.strip() == "":
        return 1.0

    text_lower = news_text.lower()

    # Check unavailable patterns
    for pattern in self.UNAVAILABLE_PATTERNS:
        if re.search(pattern, text_lower):
            return 0.0

    # Check doubtful patterns
    for pattern in self.DOUBTFUL_PATTERNS:
        if re.search(pattern, text_lower):
            return 0.5

    # Check positive patterns
    for pattern in self.POSITIVE_PATTERNS:
        if re.search(pattern, text_lower):
            return 0.9

    # Default: assume available if no negative signals
    return 1.0

parse_minutes_risk ¶

parse_minutes_risk(news_text: str) -> float

Parse minutes risk from news text.

PARAMETER	DESCRIPTION
`news_text`	News text TYPE: `str`

RETURNS	DESCRIPTION
`float`	Minutes risk score (0-1, higher = more risk)

Source code in fplx/signals/news.py

def parse_minutes_risk(self, news_text: str) -> float:
    """
    Parse minutes risk from news text.

    Parameters
    ----------
    news_text : str
        News text

    Returns
    -------
    float
        Minutes risk score (0-1, higher = more risk)
    """
    if not news_text or news_text.strip() == "":
        return 0.0

    text_lower = news_text.lower()

    # Check rotation patterns
    for pattern in self.ROTATION_PATTERNS:
        if re.search(pattern, text_lower):
            return 0.7

    # Check if doubtful (moderate risk)
    for pattern in self.DOUBTFUL_PATTERNS:
        if re.search(pattern, text_lower):
            return 0.3

    return 0.0

parse_confidence ¶

parse_confidence(news_text: str) -> float

Estimate confidence in the parsed signal.

PARAMETER	DESCRIPTION
`news_text`	News text TYPE: `str`

RETURNS	DESCRIPTION
`float`	Confidence score (0-1)

Source code in fplx/signals/news.py

def parse_confidence(self, news_text: str) -> float:
    """
    Estimate confidence in the parsed signal.

    Parameters
    ----------
    news_text : str
        News text

    Returns
    -------
    float
        Confidence score (0-1)
    """
    if not news_text or news_text.strip() == "":
        return 1.0  # High confidence when no news

    # Confidence based on clarity of news
    text_lower = news_text.lower()

    # High confidence patterns
    if any(
        re.search(p, text_lower) for p in ["ruled out", "confirmed", "definitely"]
    ):
        return 0.9

    # Medium confidence patterns
    if any(re.search(p, text_lower) for p in ["likely", "expected", "should"]):
        return 0.7

    # Low confidence patterns
    if any(re.search(p, text_lower) for p in ["maybe", "possible", "unclear"]):
        return 0.4

    return 0.6  # Default medium confidence

NewsSignal ¶

NewsSignal()

Bases: BaseSignal

Generate structured news signals for players.

Source code in fplx/signals/news.py

def __init__(self):
    self.parser = NewsParser()

generate_signal ¶

generate_signal(news_text: str) -> dict[str, float]

Generate signal from news text.

PARAMETER	DESCRIPTION
`news_text`	News text TYPE: `str`

RETURNS	DESCRIPTION
`dict[str, float]`	Dictionary with availability, minutes_risk, confidence

Source code in fplx/signals/news.py

def generate_signal(self, news_text: str) -> dict[str, float]:
    """Generate signal from news text.

    Parameters
    ----------
    news_text : str
        News text

    Returns
    -------
    dict[str, float]
        Dictionary with availability, minutes_risk, confidence
    """
    availability = self.parser.parse_availability(news_text)
    minutes_risk = self.parser.parse_minutes_risk(news_text)
    confidence = self.parser.parse_confidence(news_text)

    return {
        "availability": availability,
        "minutes_risk": minutes_risk,
        "confidence": confidence,
        "adjustment_factor": availability * (1 - minutes_risk),
    }

batch_generate ¶

batch_generate(
    news_dict: dict[str, str],
) -> dict[str, dict[str, float]]

Generate signals for multiple players.

PARAMETER	DESCRIPTION
`news_dict`	Dictionary mapping player ID to news text TYPE: `dict[str, str]`

RETURNS	DESCRIPTION
`dict[str, dict[str, float]]`	Dictionary of player signals

Source code in fplx/signals/news.py

def batch_generate(self, news_dict: dict[str, str]) -> dict[str, dict[str, float]]:
    """
    Generate signals for multiple players.

    Parameters
    ----------
    news_dict : dict[str, str]
        Dictionary mapping player ID to news text

    Returns
    -------
    dict[str, dict[str, float]]
        Dictionary of player signals
    """
    signals = {}
    for player_id, news_text in news_dict.items():
        signals[player_id] = self.generate_signal(news_text)

    return signals

StatsSignal ¶

StatsSignal(weights: Optional[dict[str, float]] = None)

Generate performance signals from statistical data.

Combines multiple statistical indicators into a unified score.

Initialize with custom weights for different stats.

PARAMETER	DESCRIPTION
`weights`	Weights for different statistics TYPE: `Optional[dict[str, float]]` DEFAULT: `None`

Source code in fplx/signals/stats.py

def __init__(self, weights: Optional[dict[str, float]] = None):
    """
    Initialize with custom weights for different stats.

    Parameters
    ----------
    weights : Optional[dict[str, float]]
        Weights for different statistics
    """
    self.weights = weights or {
        "points_mean": 0.3,
        "xG_mean": 0.15,
        "xA_mean": 0.15,
        "minutes_consistency": 0.2,
        "form_trend": 0.2,
    }

compute_signal ¶

compute_signal(player_data: DataFrame) -> float

Compute aggregated signal score from player statistics.

PARAMETER	DESCRIPTION
`player_data`	Player historical data with engineered features TYPE: `DataFrame`

RETURNS	DESCRIPTION
`float`	Aggregated signal score (0-100)

Source code in fplx/signals/stats.py

def compute_signal(self, player_data: pd.DataFrame) -> float:
    """
    Compute aggregated signal score from player statistics.

    Parameters
    ----------
    player_data : pd.DataFrame
        Player historical data with engineered features

    Returns
    -------
    float
        Aggregated signal score (0-100)
    """
    if player_data.empty:
        return 0.0

    # Get latest row (most recent data)
    latest = player_data.iloc[-1]

    score = 0.0

    # Points form (rolling mean)
    if "points_rolling_5_mean" in latest:
        points_component = (
            latest["points_rolling_5_mean"] * self.weights["points_mean"]
        )
        score += points_component

    # xG contribution
    if "xG_rolling_5_mean" in latest:
        xg_component = latest["xG_rolling_5_mean"] * 10 * self.weights["xG_mean"]
        score += xg_component

    # xA contribution
    if "xA_rolling_5_mean" in latest:
        xa_component = latest["xA_rolling_5_mean"] * 10 * self.weights["xA_mean"]
        score += xa_component

    # Minutes consistency (inverse of coefficient of variation)
    if "minutes_consistency_5" in latest:
        consistency = 1.0 / (1.0 + latest["minutes_consistency_5"])
        consistency_component = (
            consistency * 10 * self.weights["minutes_consistency"]
        )
        score += consistency_component

    # Form trend
    if "points_trend_5" in latest:
        trend = latest["points_trend_5"]
        # Normalize trend: positive trend is good
        trend_component = max(0, trend) * 5 * self.weights["form_trend"]
        score += trend_component

    return max(0, score)

batch_compute ¶

batch_compute(
    players_data: dict[str, DataFrame],
) -> dict[str, float]

Compute signals for multiple players.

PARAMETER	DESCRIPTION
`players_data`	Dictionary mapping player ID/name to their data TYPE: `dict[str, DataFrame]`

RETURNS	DESCRIPTION
`dict[str, float]`	Dictionary of player signals

Source code in fplx/signals/stats.py

def batch_compute(self, players_data: dict[str, pd.DataFrame]) -> dict[str, float]:
    """
    Compute signals for multiple players.

    Parameters
    ----------
    players_data : dict[str, pd.DataFrame]
        Dictionary mapping player ID/name to their data

    Returns
    -------
    dict[str, float]
        Dictionary of player signals
    """
    signals = {}
    for player_id, data in players_data.items():
        signals[player_id] = self.compute_signal(data)

    return signals

fixtures ¶

Fixture difficulty signals.

FixtureSignal ¶

FixtureSignal(
    difficulty_ratings: Optional[dict[str, int]] = None,
)

Bases: BaseSignal

Generate signals based on fixture difficulty and schedule.

Initialize with team difficulty ratings.

PARAMETER	DESCRIPTION
`difficulty_ratings`	Team strength ratings (1-5, higher = harder opponent) TYPE: `Optional[dict[str, int]]` DEFAULT: `None`

Source code in fplx/signals/fixtures.py

def __init__(self, difficulty_ratings: Optional[dict[str, int]] = None):
    """
    Initialize with team difficulty ratings.

    Parameters
    ----------
    difficulty_ratings : Optional[dict[str, int]]
        Team strength ratings (1-5, higher = harder opponent)
    """
    self.difficulty_ratings = difficulty_ratings or {}

generate_signal ¶

generate_signal(data)

Generate fixture-based signal.

Source code in fplx/signals/fixtures.py

def generate_signal(self, data):
    """Generate fixture-based signal."""
    # This is a placeholder. The actual implementation would take
    # fixture data and compute a signal.
    return self.compute_fixture_advantage(
        data["team"], data["upcoming_opponents"], data["is_home"]
    )

set_difficulty_ratings ¶

set_difficulty_ratings(ratings: dict[str, int])

Set or update difficulty ratings.

PARAMETER	DESCRIPTION
`ratings`	Team strength ratings TYPE: `Dict[str, int]`

Source code in fplx/signals/fixtures.py

def set_difficulty_ratings(self, ratings: dict[str, int]):
    """
    Set or update difficulty ratings.

    Parameters
    ----------
    ratings : Dict[str, int]
        Team strength ratings
    """
    self.difficulty_ratings = ratings

compute_fixture_difficulty ¶

compute_fixture_difficulty(
    team: str,
    upcoming_opponents: list[str],
    is_home: list[bool],
) -> float

Compute fixture difficulty score for upcoming games.

PARAMETER	DESCRIPTION
`team`	Player's team TYPE: `str`
`upcoming_opponents`	List of upcoming opponent teams TYPE: `list[str]`
`is_home`	Whether each fixture is home TYPE: `list[bool]`

RETURNS	DESCRIPTION
`float`	Difficulty score (lower = easier fixtures)

Source code in fplx/signals/fixtures.py

def compute_fixture_difficulty(
    self, team: str, upcoming_opponents: list[str], is_home: list[bool]
) -> float:
    """
    Compute fixture difficulty score for upcoming games.

    Parameters
    ----------
    team : str
        Player's team
    upcoming_opponents : list[str]
        List of upcoming opponent teams
    is_home : list[bool]
        Whether each fixture is home

    Returns
    -------
    float
        Difficulty score (lower = easier fixtures)
    """
    if not upcoming_opponents:
        return 3.0  # Neutral

    difficulties = []
    for opponent, home in zip(upcoming_opponents, is_home):
        # Get opponent difficulty
        diff = self.difficulty_ratings.get(opponent, 3)

        # Adjust for home advantage
        if home:
            diff = max(1, diff - 0.5)
        else:
            diff = min(5, diff + 0.5)

        difficulties.append(diff)

    # Average difficulty
    avg_difficulty = sum(difficulties) / len(difficulties)
    return avg_difficulty

compute_fixture_advantage ¶

compute_fixture_advantage(
    team: str,
    upcoming_opponents: list[str],
    is_home: list[bool],
) -> float

Compute fixture advantage (inverse of difficulty).

Higher score = easier fixtures = better for player.

PARAMETER	DESCRIPTION
`team`	Player's team TYPE: `str`
`upcoming_opponents`	List of upcoming opponent teams TYPE: `list[str]`
`is_home`	Whether each fixture is home TYPE: `list[bool]`

RETURNS	DESCRIPTION
`float`	Advantage score (0-1, higher = better fixtures)

Source code in fplx/signals/fixtures.py

def compute_fixture_advantage(
    self, team: str, upcoming_opponents: list[str], is_home: list[bool]
) -> float:
    """
    Compute fixture advantage (inverse of difficulty).

    Higher score = easier fixtures = better for player.

    Parameters
    ----------
    team : str
        Player's team
    upcoming_opponents : list[str]
        List of upcoming opponent teams
    is_home : list[bool]
        Whether each fixture is home

    Returns
    -------
    float
        Advantage score (0-1, higher = better fixtures)
    """
    difficulty = self.compute_fixture_difficulty(team, upcoming_opponents, is_home)

    # Convert to advantage (invert and normalize)
    # difficulty: 1 (easiest) to 5 (hardest)
    # advantage: 1 (best) to 0 (worst)
    advantage = (6 - difficulty) / 5
    return max(0, min(1, advantage))

compute_fixture_congestion ¶

compute_fixture_congestion(
    fixtures: DataFrame, team: str, days_window: int = 14
) -> float

Compute fixture congestion (number of games in short period).

PARAMETER	DESCRIPTION
`fixtures`	Fixtures dataframe TYPE: `DataFrame`
`team`	Team name TYPE: `str`
`days_window`	Days to look ahead TYPE: `int` DEFAULT: `14`

RETURNS	DESCRIPTION
`float`	Congestion score (0-1, higher = more congested)

Source code in fplx/signals/fixtures.py

def compute_fixture_congestion(
    self, fixtures: pd.DataFrame, team: str, days_window: int = 14
) -> float:
    """
    Compute fixture congestion (number of games in short period).

    Parameters
    ----------
    fixtures : pd.DataFrame
        Fixtures dataframe
    team : str
        Team name
    days_window : int
        Days to look ahead

    Returns
    -------
    float
        Congestion score (0-1, higher = more congested)
    """
    # Filter fixtures for the team
    team_fixtures = fixtures[
        (fixtures["team_h"] == team) | (fixtures["team_a"] == team)
    ]

    if team_fixtures.empty:
        return 0.0

    # Count fixtures in window
    num_fixtures = len(team_fixtures)

    # Normalize: 1 game/week = 0, 3+ games/week = 1
    games_per_week = num_fixtures / (days_window / 7)
    congestion = min(1.0, (games_per_week - 1) / 2)

    return max(0, congestion)

batch_compute_advantages ¶

batch_compute_advantages(
    players_teams: dict[str, str],
    fixtures_data: dict[str, tuple],
) -> dict[str, float]

Compute fixture advantages for multiple players.

PARAMETER	DESCRIPTION
`players_teams`	Mapping of player ID to team TYPE: `dict[str, str]`
`fixtures_data`	Mapping of team to (opponents, is_home) tuples TYPE: `dict[str, tuple]`

RETURNS	DESCRIPTION
`dict[str, float]`	Dictionary of player fixture advantage scores

Source code in fplx/signals/fixtures.py

def batch_compute_advantages(
    self, players_teams: dict[str, str], fixtures_data: dict[str, tuple]
) -> dict[str, float]:
    """
    Compute fixture advantages for multiple players.

    Parameters
    ----------
    players_teams : dict[str, str]
        Mapping of player ID to team
    fixtures_data : dict[str, tuple]
        Mapping of team to (opponents, is_home) tuples

    Returns
    -------
    dict[str, float]
        Dictionary of player fixture advantage scores
    """
    advantages = {}

    for player_id, team in players_teams.items():
        if team in fixtures_data:
            opponents, is_home = fixtures_data[team]
            advantage = self.compute_fixture_advantage(team, opponents, is_home)
            advantages[player_id] = advantage
        else:
            advantages[player_id] = 0.5  # Neutral

    return advantages

news ¶

News and injury signal processing.

NewsParser ¶

Parse and interpret FPL news text into structured signals.

parse_availability ¶

parse_availability(news_text: str) -> float

Parse availability from news text.

PARAMETER	DESCRIPTION
`news_text`	News text TYPE: `str`

RETURNS	DESCRIPTION
`float`	Availability score (0-1)

Source code in fplx/signals/news.py

def parse_availability(self, news_text: str) -> float:
    """
    Parse availability from news text.

    Parameters
    ----------
    news_text : str
        News text

    Returns
    -------
    float
        Availability score (0-1)
    """
    if not news_text or news_text.strip() == "":
        return 1.0

    text_lower = news_text.lower()

    # Check unavailable patterns
    for pattern in self.UNAVAILABLE_PATTERNS:
        if re.search(pattern, text_lower):
            return 0.0

    # Check doubtful patterns
    for pattern in self.DOUBTFUL_PATTERNS:
        if re.search(pattern, text_lower):
            return 0.5

    # Check positive patterns
    for pattern in self.POSITIVE_PATTERNS:
        if re.search(pattern, text_lower):
            return 0.9

    # Default: assume available if no negative signals
    return 1.0

parse_minutes_risk ¶

parse_minutes_risk(news_text: str) -> float

Parse minutes risk from news text.

PARAMETER	DESCRIPTION
`news_text`	News text TYPE: `str`

RETURNS	DESCRIPTION
`float`	Minutes risk score (0-1, higher = more risk)

Source code in fplx/signals/news.py

def parse_minutes_risk(self, news_text: str) -> float:
    """
    Parse minutes risk from news text.

    Parameters
    ----------
    news_text : str
        News text

    Returns
    -------
    float
        Minutes risk score (0-1, higher = more risk)
    """
    if not news_text or news_text.strip() == "":
        return 0.0

    text_lower = news_text.lower()

    # Check rotation patterns
    for pattern in self.ROTATION_PATTERNS:
        if re.search(pattern, text_lower):
            return 0.7

    # Check if doubtful (moderate risk)
    for pattern in self.DOUBTFUL_PATTERNS:
        if re.search(pattern, text_lower):
            return 0.3

    return 0.0

parse_confidence ¶

parse_confidence(news_text: str) -> float

Estimate confidence in the parsed signal.

PARAMETER	DESCRIPTION
`news_text`	News text TYPE: `str`

RETURNS	DESCRIPTION
`float`	Confidence score (0-1)

Source code in fplx/signals/news.py

def parse_confidence(self, news_text: str) -> float:
    """
    Estimate confidence in the parsed signal.

    Parameters
    ----------
    news_text : str
        News text

    Returns
    -------
    float
        Confidence score (0-1)
    """
    if not news_text or news_text.strip() == "":
        return 1.0  # High confidence when no news

    # Confidence based on clarity of news
    text_lower = news_text.lower()

    # High confidence patterns
    if any(
        re.search(p, text_lower) for p in ["ruled out", "confirmed", "definitely"]
    ):
        return 0.9

    # Medium confidence patterns
    if any(re.search(p, text_lower) for p in ["likely", "expected", "should"]):
        return 0.7

    # Low confidence patterns
    if any(re.search(p, text_lower) for p in ["maybe", "possible", "unclear"]):
        return 0.4

    return 0.6  # Default medium confidence

NewsSignal ¶

NewsSignal()

Bases: BaseSignal

Generate structured news signals for players.

Source code in fplx/signals/news.py

def __init__(self):
    self.parser = NewsParser()

generate_signal ¶

generate_signal(news_text: str) -> dict[str, float]

Generate signal from news text.

PARAMETER	DESCRIPTION
`news_text`	News text TYPE: `str`

RETURNS	DESCRIPTION
`dict[str, float]`	Dictionary with availability, minutes_risk, confidence

Source code in fplx/signals/news.py

def generate_signal(self, news_text: str) -> dict[str, float]:
    """Generate signal from news text.

    Parameters
    ----------
    news_text : str
        News text

    Returns
    -------
    dict[str, float]
        Dictionary with availability, minutes_risk, confidence
    """
    availability = self.parser.parse_availability(news_text)
    minutes_risk = self.parser.parse_minutes_risk(news_text)
    confidence = self.parser.parse_confidence(news_text)

    return {
        "availability": availability,
        "minutes_risk": minutes_risk,
        "confidence": confidence,
        "adjustment_factor": availability * (1 - minutes_risk),
    }

batch_generate ¶

batch_generate(
    news_dict: dict[str, str],
) -> dict[str, dict[str, float]]

Generate signals for multiple players.

PARAMETER	DESCRIPTION
`news_dict`	Dictionary mapping player ID to news text TYPE: `dict[str, str]`

RETURNS	DESCRIPTION
`dict[str, dict[str, float]]`	Dictionary of player signals

Source code in fplx/signals/news.py

def batch_generate(self, news_dict: dict[str, str]) -> dict[str, dict[str, float]]:
    """
    Generate signals for multiple players.

    Parameters
    ----------
    news_dict : dict[str, str]
        Dictionary mapping player ID to news text

    Returns
    -------
    dict[str, dict[str, float]]
        Dictionary of player signals
    """
    signals = {}
    for player_id, news_text in news_dict.items():
        signals[player_id] = self.generate_signal(news_text)

    return signals

stats ¶

Statistical performance signals.

StatsSignal ¶

StatsSignal(weights: Optional[dict[str, float]] = None)

Generate performance signals from statistical data.

Combines multiple statistical indicators into a unified score.

Initialize with custom weights for different stats.

PARAMETER	DESCRIPTION
`weights`	Weights for different statistics TYPE: `Optional[dict[str, float]]` DEFAULT: `None`

Source code in fplx/signals/stats.py

def __init__(self, weights: Optional[dict[str, float]] = None):
    """
    Initialize with custom weights for different stats.

    Parameters
    ----------
    weights : Optional[dict[str, float]]
        Weights for different statistics
    """
    self.weights = weights or {
        "points_mean": 0.3,
        "xG_mean": 0.15,
        "xA_mean": 0.15,
        "minutes_consistency": 0.2,
        "form_trend": 0.2,
    }

compute_signal ¶

compute_signal(player_data: DataFrame) -> float

Compute aggregated signal score from player statistics.

PARAMETER	DESCRIPTION
`player_data`	Player historical data with engineered features TYPE: `DataFrame`

RETURNS	DESCRIPTION
`float`	Aggregated signal score (0-100)

Source code in fplx/signals/stats.py

def compute_signal(self, player_data: pd.DataFrame) -> float:
    """
    Compute aggregated signal score from player statistics.

    Parameters
    ----------
    player_data : pd.DataFrame
        Player historical data with engineered features

    Returns
    -------
    float
        Aggregated signal score (0-100)
    """
    if player_data.empty:
        return 0.0

    # Get latest row (most recent data)
    latest = player_data.iloc[-1]

    score = 0.0

    # Points form (rolling mean)
    if "points_rolling_5_mean" in latest:
        points_component = (
            latest["points_rolling_5_mean"] * self.weights["points_mean"]
        )
        score += points_component

    # xG contribution
    if "xG_rolling_5_mean" in latest:
        xg_component = latest["xG_rolling_5_mean"] * 10 * self.weights["xG_mean"]
        score += xg_component

    # xA contribution
    if "xA_rolling_5_mean" in latest:
        xa_component = latest["xA_rolling_5_mean"] * 10 * self.weights["xA_mean"]
        score += xa_component

    # Minutes consistency (inverse of coefficient of variation)
    if "minutes_consistency_5" in latest:
        consistency = 1.0 / (1.0 + latest["minutes_consistency_5"])
        consistency_component = (
            consistency * 10 * self.weights["minutes_consistency"]
        )
        score += consistency_component

    # Form trend
    if "points_trend_5" in latest:
        trend = latest["points_trend_5"]
        # Normalize trend: positive trend is good
        trend_component = max(0, trend) * 5 * self.weights["form_trend"]
        score += trend_component

    return max(0, score)

batch_compute ¶

batch_compute(
    players_data: dict[str, DataFrame],
) -> dict[str, float]

Compute signals for multiple players.

PARAMETER	DESCRIPTION
`players_data`	Dictionary mapping player ID/name to their data TYPE: `dict[str, DataFrame]`

RETURNS	DESCRIPTION
`dict[str, float]`	Dictionary of player signals

Source code in fplx/signals/stats.py

def batch_compute(self, players_data: dict[str, pd.DataFrame]) -> dict[str, float]:
    """
    Compute signals for multiple players.

    Parameters
    ----------
    players_data : dict[str, pd.DataFrame]
        Dictionary mapping player ID/name to their data

    Returns
    -------
    dict[str, float]
        Dictionary of player signals
    """
    signals = {}
    for player_id, data in players_data.items():
        signals[player_id] = self.compute_signal(data)

    return signals

timeseries ¶

Time-series feature engineering and transformations.

FeatureEngineer ¶

FeatureEngineer(config: Optional[dict] = None)

Feature engineering pipeline for player time-series data.

PARAMETER	DESCRIPTION
`config`	Feature configuration dictionary TYPE: `Optional[Dict]` DEFAULT: `None`

Source code in fplx/timeseries/features.py

def __init__(self, config: Optional[dict] = None):
    self.config = {**self.DEFAULT_CONFIG, **(config or {})}

fit_transform ¶

fit_transform(df: DataFrame) -> DataFrame

Apply all feature engineering transformations.

PARAMETER	DESCRIPTION
`df`	Input player timeseries data TYPE: `DataFrame`

RETURNS	DESCRIPTION
`DataFrame`	Transformed data with engineered features

Source code in fplx/timeseries/features.py

def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
    """
    Apply all feature engineering transformations.

    Parameters
    ----------
    df : pd.DataFrame
        Input player timeseries data

    Returns
    -------
    pd.DataFrame
        Transformed data with engineered features
    """
    df = df.copy()

    # Identify available columns and ensure they are numeric
    key_cols = [c for c in self.config["key_columns"] if c in df.columns]

    for col in key_cols:
        df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)

    if not key_cols:
        logger.warning("No key columns found for feature engineering")
        return df

    # Apply transformations
    logger.info("Adding rolling features...")
    df = add_rolling_features(
        df,
        columns=key_cols,
        windows=self.config["rolling_windows"],
        agg_funcs=["mean", "std"],
    )

    logger.info("Adding lag features...")
    df = add_lag_features(df, columns=key_cols, lags=self.config["lag_periods"])

    logger.info("Adding EWMA features...")
    df = add_ewma_features(df, columns=key_cols, alphas=self.config["ewma_alphas"])

    logger.info("Adding trend features...")
    df = add_trend_features(
        df, columns=key_cols, windows=self.config["trend_windows"]
    )

    logger.info("Adding difference features...")
    df = add_diff_features(df, columns=key_cols, periods=[1])

    logger.info("Adding consistency features...")
    df = add_consistency_features(df, columns=["minutes", "points"], window=5)

    return df

get_feature_names ¶

get_feature_names(base_columns: list[str]) -> list[str]

Get list of all generated feature names.

PARAMETER	DESCRIPTION
`base_columns`	Base column names TYPE: `list[str]`

RETURNS	DESCRIPTION
`list[str]`	Generated feature names

Source code in fplx/timeseries/features.py

def get_feature_names(self, base_columns: list[str]) -> list[str]:
    """
    Get list of all generated feature names.

    Parameters
    ----------
    base_columns : list[str]
        Base column names

    Returns
    -------
    list[str]
        Generated feature names
    """
    features = []

    for col in base_columns:
        # Rolling features
        for window in self.config["rolling_windows"]:
            features.extend([
                f"{col}_rolling_{window}_mean",
                f"{col}_rolling_{window}_std",
            ])

        # Lag features
        for lag in self.config["lag_periods"]:
            features.append(f"{col}_lag_{lag}")

        # EWMA features
        for alpha in self.config["ewma_alphas"]:
            features.append(f"{col}_ewma_{int(alpha * 100)}")

        # Trend features
        for window in self.config["trend_windows"]:
            features.append(f"{col}_trend_{window}")

        # Diff features
        features.append(f"{col}_diff_1")

    # Consistency features
    features.extend([
        "minutes_consistency_5",
        "points_consistency_5",
    ])

    return features

create_future_features ¶

create_future_features(
    df: DataFrame, horizon: int
) -> DataFrame

Create features for future predictions.

This method extends the historical data by horizon periods, applies the full feature engineering pipeline, and returns the newly created future feature set.

PARAMETER	DESCRIPTION
`df`	Historical data TYPE: `DataFrame`
`horizon`	Number of future gameweeks to predict TYPE: `int`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with features for future gameweeks

Source code in fplx/timeseries/features.py

def create_future_features(self, df: pd.DataFrame, horizon: int) -> pd.DataFrame:
    """
    Create features for future predictions.

    This method extends the historical data by `horizon` periods,
    applies the full feature engineering pipeline, and returns
    the newly created future feature set.

    Parameters
    ----------
    df : pd.DataFrame
        Historical data
    horizon : int
        Number of future gameweeks to predict

    Returns
    -------
    pd.DataFrame
        DataFrame with features for future gameweeks
    """
    if df.empty:
        return pd.DataFrame()

    # Create future placeholders by repeating the last known data point
    last_row = df.iloc[-1:].copy()

    # Avoid duplicating index if it's a timestamp or gameweek
    is_numeric_index = pd.api.types.is_numeric_dtype(df.index)
    if isinstance(df.index, pd.DatetimeIndex) or is_numeric_index:
        last_index = df.index[-1]
        future_index = pd.RangeIndex(
            start=last_index + 1, stop=last_index + 1 + horizon
        )
        last_row.index = [future_index[0]]  # Temporarily align for concat
    else:
        future_index = pd.RangeIndex(start=len(df), stop=len(df) + horizon)

    future_rows = pd.concat([last_row] * horizon, ignore_index=True)
    if isinstance(df.index, pd.DatetimeIndex) or is_numeric_index:
        future_rows.index = future_index

    # Combine historical and future data
    combined_df = pd.concat([df, future_rows])

    # Run the full feature engineering pipeline on the combined data
    # This ensures that rolling/lag features are calculated correctly
    # based on the historical context.
    engineered_df = self.fit_transform(combined_df)

    # Return only the future part
    return engineered_df.tail(horizon)

add_ewma_features ¶

add_ewma_features(
    df: DataFrame,
    columns: list[str],
    alphas: list[float] = [0.3, 0.5, 0.7],
) -> DataFrame

Add exponentially weighted moving average features.

PARAMETER	DESCRIPTION
`df`	Input dataframe TYPE: `DataFrame`
`columns`	Columns to compute EWMA for TYPE: `list[str]`
`alphas`	Smoothing factors (0 < alpha < 1) TYPE: `list[float]` DEFAULT: `[0.3, 0.5, 0.7]`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with EWMA features

Source code in fplx/timeseries/transforms.py

def add_ewma_features(
    df: pd.DataFrame, columns: list[str], alphas: list[float] = [0.3, 0.5, 0.7]
) -> pd.DataFrame:
    """
    Add exponentially weighted moving average features.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe
    columns : list[str]
        Columns to compute EWMA for
    alphas : list[float]
        Smoothing factors (0 < alpha < 1)

    Returns
    -------
    pd.DataFrame
        DataFrame with EWMA features
    """
    df = df.copy()

    for col in columns:
        if col not in df.columns:
            continue

        for alpha in alphas:
            feature_name = f"{col}_ewma_{int(alpha * 100)}"
            df[feature_name] = df[col].ewm(alpha=alpha, adjust=False).mean()

    return df

add_lag_features ¶

add_lag_features(
    df: DataFrame,
    columns: list[str],
    lags: list[int] = [1, 2, 3, 7],
) -> DataFrame

Add lagged features to dataframe.

PARAMETER	DESCRIPTION
`df`	Input dataframe TYPE: `DataFrame`
`columns`	Columns to create lags for TYPE: `list[str]`
`lags`	Lag periods TYPE: `list[int]` DEFAULT: `[1, 2, 3, 7]`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with lagged features

Source code in fplx/timeseries/transforms.py

def add_lag_features(
    df: pd.DataFrame, columns: list[str], lags: list[int] = [1, 2, 3, 7]
) -> pd.DataFrame:
    """
    Add lagged features to dataframe.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe
    columns : list[str]
        Columns to create lags for
    lags : list[int]
        Lag periods

    Returns
    -------
    pd.DataFrame
        DataFrame with lagged features
    """
    df = df.copy()

    for col in columns:
        if col not in df.columns:
            continue

        for lag in lags:
            feature_name = f"{col}_lag_{lag}"
            df[feature_name] = df[col].shift(lag)

    return df

add_rolling_features ¶

add_rolling_features(
    df: DataFrame,
    columns: list[str],
    windows: list[int] = [3, 5, 10],
    agg_funcs: list[str] = ["mean", "std"],
    min_periods: int = 1,
) -> DataFrame

Add rolling window features to dataframe.

PARAMETER	DESCRIPTION
`df`	Input dataframe with time-series data TYPE: `DataFrame`
`columns`	Columns to compute rolling features for TYPE: `list[str]`
`windows`	Window sizes for rolling computation TYPE: `list[int]` DEFAULT: `[3, 5, 10]`
`agg_funcs`	Aggregation functions ('mean', 'std', 'min', 'max', 'sum') TYPE: `list[str]` DEFAULT: `['mean', 'std']`
`min_periods`	Minimum observations in window TYPE: `int` DEFAULT: `1`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with added rolling features

Source code in fplx/timeseries/transforms.py

def add_rolling_features(
    df: pd.DataFrame,
    columns: list[str],
    windows: list[int] = [3, 5, 10],
    agg_funcs: list[str] = ["mean", "std"],
    min_periods: int = 1,
) -> pd.DataFrame:
    """
    Add rolling window features to dataframe.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe with time-series data
    columns : list[str]
        Columns to compute rolling features for
    windows : list[int]
        Window sizes for rolling computation
    agg_funcs : list[str]
        Aggregation functions ('mean', 'std', 'min', 'max', 'sum')
    min_periods : int
        Minimum observations in window

    Returns
    -------
    pd.DataFrame
        DataFrame with added rolling features
    """
    df = df.copy()

    for col in columns:
        if col not in df.columns:
            continue

        for window in windows:
            for func in agg_funcs:
                feature_name = f"{col}_rolling_{window}_{func}"
                df[feature_name] = (
                    df[col].rolling(window=window, min_periods=min_periods).agg(func)
                )

    return df

add_trend_features ¶

add_trend_features(
    df: DataFrame,
    columns: list[str],
    windows: list[int] = [5, 10],
) -> DataFrame

Add trend features (slope) using linear regression.

PARAMETER	DESCRIPTION
`df`	Input dataframe TYPE: `DataFrame`
`columns`	Columns to compute trends for TYPE: `list[str]`
`windows`	Window sizes for trend calculation TYPE: `list[int]` DEFAULT: `[5, 10]`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with trend features

Source code in fplx/timeseries/transforms.py

def add_trend_features(
    df: pd.DataFrame, columns: list[str], windows: list[int] = [5, 10]
) -> pd.DataFrame:
    """
    Add trend features (slope) using linear regression.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe
    columns : list[str]
        Columns to compute trends for
    windows : list[int]
        Window sizes for trend calculation

    Returns
    -------
    pd.DataFrame
        DataFrame with trend features
    """
    df = df.copy()

    def calculate_slope(series):
        """Calculate slope of linear fit."""
        if len(series) < 2 or series.isna().all():
            return np.nan
        x = np.arange(len(series))
        y = series.values
        mask = ~np.isnan(y)
        if mask.sum() < 2:
            return np.nan
        slope = np.polyfit(x[mask], y[mask], 1)[0]
        return slope

    for col in columns:
        if col not in df.columns:
            continue

        for window in windows:
            feature_name = f"{col}_trend_{window}"
            df[feature_name] = (
                df[col]
                .rolling(window=window, min_periods=2)
                .apply(calculate_slope, raw=False)
            )

    return df

features ¶

Feature engineering pipeline for FPL time-series data.

FeatureEngineer ¶

FeatureEngineer(config: Optional[dict] = None)

Feature engineering pipeline for player time-series data.

PARAMETER	DESCRIPTION
`config`	Feature configuration dictionary TYPE: `Optional[Dict]` DEFAULT: `None`

Source code in fplx/timeseries/features.py

def __init__(self, config: Optional[dict] = None):
    self.config = {**self.DEFAULT_CONFIG, **(config or {})}

fit_transform ¶

fit_transform(df: DataFrame) -> DataFrame

Apply all feature engineering transformations.

PARAMETER	DESCRIPTION
`df`	Input player timeseries data TYPE: `DataFrame`

RETURNS	DESCRIPTION
`DataFrame`	Transformed data with engineered features

Source code in fplx/timeseries/features.py

def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
    """
    Apply all feature engineering transformations.

    Parameters
    ----------
    df : pd.DataFrame
        Input player timeseries data

    Returns
    -------
    pd.DataFrame
        Transformed data with engineered features
    """
    df = df.copy()

    # Identify available columns and ensure they are numeric
    key_cols = [c for c in self.config["key_columns"] if c in df.columns]

    for col in key_cols:
        df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)

    if not key_cols:
        logger.warning("No key columns found for feature engineering")
        return df

    # Apply transformations
    logger.info("Adding rolling features...")
    df = add_rolling_features(
        df,
        columns=key_cols,
        windows=self.config["rolling_windows"],
        agg_funcs=["mean", "std"],
    )

    logger.info("Adding lag features...")
    df = add_lag_features(df, columns=key_cols, lags=self.config["lag_periods"])

    logger.info("Adding EWMA features...")
    df = add_ewma_features(df, columns=key_cols, alphas=self.config["ewma_alphas"])

    logger.info("Adding trend features...")
    df = add_trend_features(
        df, columns=key_cols, windows=self.config["trend_windows"]
    )

    logger.info("Adding difference features...")
    df = add_diff_features(df, columns=key_cols, periods=[1])

    logger.info("Adding consistency features...")
    df = add_consistency_features(df, columns=["minutes", "points"], window=5)

    return df

get_feature_names ¶

get_feature_names(base_columns: list[str]) -> list[str]

Get list of all generated feature names.

PARAMETER	DESCRIPTION
`base_columns`	Base column names TYPE: `list[str]`

RETURNS	DESCRIPTION
`list[str]`	Generated feature names

Source code in fplx/timeseries/features.py

def get_feature_names(self, base_columns: list[str]) -> list[str]:
    """
    Get list of all generated feature names.

    Parameters
    ----------
    base_columns : list[str]
        Base column names

    Returns
    -------
    list[str]
        Generated feature names
    """
    features = []

    for col in base_columns:
        # Rolling features
        for window in self.config["rolling_windows"]:
            features.extend([
                f"{col}_rolling_{window}_mean",
                f"{col}_rolling_{window}_std",
            ])

        # Lag features
        for lag in self.config["lag_periods"]:
            features.append(f"{col}_lag_{lag}")

        # EWMA features
        for alpha in self.config["ewma_alphas"]:
            features.append(f"{col}_ewma_{int(alpha * 100)}")

        # Trend features
        for window in self.config["trend_windows"]:
            features.append(f"{col}_trend_{window}")

        # Diff features
        features.append(f"{col}_diff_1")

    # Consistency features
    features.extend([
        "minutes_consistency_5",
        "points_consistency_5",
    ])

    return features

create_future_features ¶

create_future_features(
    df: DataFrame, horizon: int
) -> DataFrame

Create features for future predictions.

This method extends the historical data by horizon periods, applies the full feature engineering pipeline, and returns the newly created future feature set.

PARAMETER	DESCRIPTION
`df`	Historical data TYPE: `DataFrame`
`horizon`	Number of future gameweeks to predict TYPE: `int`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with features for future gameweeks

Source code in fplx/timeseries/features.py

def create_future_features(self, df: pd.DataFrame, horizon: int) -> pd.DataFrame:
    """
    Create features for future predictions.

    This method extends the historical data by `horizon` periods,
    applies the full feature engineering pipeline, and returns
    the newly created future feature set.

    Parameters
    ----------
    df : pd.DataFrame
        Historical data
    horizon : int
        Number of future gameweeks to predict

    Returns
    -------
    pd.DataFrame
        DataFrame with features for future gameweeks
    """
    if df.empty:
        return pd.DataFrame()

    # Create future placeholders by repeating the last known data point
    last_row = df.iloc[-1:].copy()

    # Avoid duplicating index if it's a timestamp or gameweek
    is_numeric_index = pd.api.types.is_numeric_dtype(df.index)
    if isinstance(df.index, pd.DatetimeIndex) or is_numeric_index:
        last_index = df.index[-1]
        future_index = pd.RangeIndex(
            start=last_index + 1, stop=last_index + 1 + horizon
        )
        last_row.index = [future_index[0]]  # Temporarily align for concat
    else:
        future_index = pd.RangeIndex(start=len(df), stop=len(df) + horizon)

    future_rows = pd.concat([last_row] * horizon, ignore_index=True)
    if isinstance(df.index, pd.DatetimeIndex) or is_numeric_index:
        future_rows.index = future_index

    # Combine historical and future data
    combined_df = pd.concat([df, future_rows])

    # Run the full feature engineering pipeline on the combined data
    # This ensures that rolling/lag features are calculated correctly
    # based on the historical context.
    engineered_df = self.fit_transform(combined_df)

    # Return only the future part
    return engineered_df.tail(horizon)

transforms ¶

Time-series transformations for FPL data.

add_rolling_features ¶

add_rolling_features(
    df: DataFrame,
    columns: list[str],
    windows: list[int] = [3, 5, 10],
    agg_funcs: list[str] = ["mean", "std"],
    min_periods: int = 1,
) -> DataFrame

Add rolling window features to dataframe.

PARAMETER	DESCRIPTION
`df`	Input dataframe with time-series data TYPE: `DataFrame`
`columns`	Columns to compute rolling features for TYPE: `list[str]`
`windows`	Window sizes for rolling computation TYPE: `list[int]` DEFAULT: `[3, 5, 10]`
`agg_funcs`	Aggregation functions ('mean', 'std', 'min', 'max', 'sum') TYPE: `list[str]` DEFAULT: `['mean', 'std']`
`min_periods`	Minimum observations in window TYPE: `int` DEFAULT: `1`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with added rolling features

Source code in fplx/timeseries/transforms.py

def add_rolling_features(
    df: pd.DataFrame,
    columns: list[str],
    windows: list[int] = [3, 5, 10],
    agg_funcs: list[str] = ["mean", "std"],
    min_periods: int = 1,
) -> pd.DataFrame:
    """
    Add rolling window features to dataframe.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe with time-series data
    columns : list[str]
        Columns to compute rolling features for
    windows : list[int]
        Window sizes for rolling computation
    agg_funcs : list[str]
        Aggregation functions ('mean', 'std', 'min', 'max', 'sum')
    min_periods : int
        Minimum observations in window

    Returns
    -------
    pd.DataFrame
        DataFrame with added rolling features
    """
    df = df.copy()

    for col in columns:
        if col not in df.columns:
            continue

        for window in windows:
            for func in agg_funcs:
                feature_name = f"{col}_rolling_{window}_{func}"
                df[feature_name] = (
                    df[col].rolling(window=window, min_periods=min_periods).agg(func)
                )

    return df

add_lag_features ¶

add_lag_features(
    df: DataFrame,
    columns: list[str],
    lags: list[int] = [1, 2, 3, 7],
) -> DataFrame

Add lagged features to dataframe.

PARAMETER	DESCRIPTION
`df`	Input dataframe TYPE: `DataFrame`
`columns`	Columns to create lags for TYPE: `list[str]`
`lags`	Lag periods TYPE: `list[int]` DEFAULT: `[1, 2, 3, 7]`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with lagged features

Source code in fplx/timeseries/transforms.py

def add_lag_features(
    df: pd.DataFrame, columns: list[str], lags: list[int] = [1, 2, 3, 7]
) -> pd.DataFrame:
    """
    Add lagged features to dataframe.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe
    columns : list[str]
        Columns to create lags for
    lags : list[int]
        Lag periods

    Returns
    -------
    pd.DataFrame
        DataFrame with lagged features
    """
    df = df.copy()

    for col in columns:
        if col not in df.columns:
            continue

        for lag in lags:
            feature_name = f"{col}_lag_{lag}"
            df[feature_name] = df[col].shift(lag)

    return df

add_ewma_features ¶

add_ewma_features(
    df: DataFrame,
    columns: list[str],
    alphas: list[float] = [0.3, 0.5, 0.7],
) -> DataFrame

Add exponentially weighted moving average features.

PARAMETER	DESCRIPTION
`df`	Input dataframe TYPE: `DataFrame`
`columns`	Columns to compute EWMA for TYPE: `list[str]`
`alphas`	Smoothing factors (0 < alpha < 1) TYPE: `list[float]` DEFAULT: `[0.3, 0.5, 0.7]`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with EWMA features

Source code in fplx/timeseries/transforms.py

def add_ewma_features(
    df: pd.DataFrame, columns: list[str], alphas: list[float] = [0.3, 0.5, 0.7]
) -> pd.DataFrame:
    """
    Add exponentially weighted moving average features.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe
    columns : list[str]
        Columns to compute EWMA for
    alphas : list[float]
        Smoothing factors (0 < alpha < 1)

    Returns
    -------
    pd.DataFrame
        DataFrame with EWMA features
    """
    df = df.copy()

    for col in columns:
        if col not in df.columns:
            continue

        for alpha in alphas:
            feature_name = f"{col}_ewma_{int(alpha * 100)}"
            df[feature_name] = df[col].ewm(alpha=alpha, adjust=False).mean()

    return df

add_trend_features ¶

add_trend_features(
    df: DataFrame,
    columns: list[str],
    windows: list[int] = [5, 10],
) -> DataFrame

Add trend features (slope) using linear regression.

PARAMETER	DESCRIPTION
`df`	Input dataframe TYPE: `DataFrame`
`columns`	Columns to compute trends for TYPE: `list[str]`
`windows`	Window sizes for trend calculation TYPE: `list[int]` DEFAULT: `[5, 10]`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with trend features

Source code in fplx/timeseries/transforms.py

def add_trend_features(
    df: pd.DataFrame, columns: list[str], windows: list[int] = [5, 10]
) -> pd.DataFrame:
    """
    Add trend features (slope) using linear regression.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe
    columns : list[str]
        Columns to compute trends for
    windows : list[int]
        Window sizes for trend calculation

    Returns
    -------
    pd.DataFrame
        DataFrame with trend features
    """
    df = df.copy()

    def calculate_slope(series):
        """Calculate slope of linear fit."""
        if len(series) < 2 or series.isna().all():
            return np.nan
        x = np.arange(len(series))
        y = series.values
        mask = ~np.isnan(y)
        if mask.sum() < 2:
            return np.nan
        slope = np.polyfit(x[mask], y[mask], 1)[0]
        return slope

    for col in columns:
        if col not in df.columns:
            continue

        for window in windows:
            feature_name = f"{col}_trend_{window}"
            df[feature_name] = (
                df[col]
                .rolling(window=window, min_periods=2)
                .apply(calculate_slope, raw=False)
            )

    return df

add_diff_features ¶

add_diff_features(
    df: DataFrame,
    columns: list[str],
    periods: list[int] = [1, 2],
) -> DataFrame

Add difference features (current - previous).

PARAMETER	DESCRIPTION
`df`	Input dataframe TYPE: `DataFrame`
`columns`	Columns to compute differences for TYPE: `list[str]`
`periods`	Difference periods TYPE: `list[int]` DEFAULT: `[1, 2]`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with difference features

Source code in fplx/timeseries/transforms.py

def add_diff_features(
    df: pd.DataFrame, columns: list[str], periods: list[int] = [1, 2]
) -> pd.DataFrame:
    """
    Add difference features (current - previous).

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe
    columns : list[str]
        Columns to compute differences for
    periods : list[int]
        Difference periods

    Returns
    -------
    pd.DataFrame
        DataFrame with difference features
    """
    df = df.copy()

    for col in columns:
        if col not in df.columns:
            continue

        for period in periods:
            feature_name = f"{col}_diff_{period}"
            df[feature_name] = df[col].diff(periods=period)

    return df

add_consistency_features ¶

add_consistency_features(
    df: DataFrame, columns: list[str], window: int = 5
) -> DataFrame

Add consistency measures (coefficient of variation).

PARAMETER	DESCRIPTION
`df`	Input dataframe TYPE: `DataFrame`
`columns`	Columns to measure consistency for TYPE: `list[str]`
`window`	Window size TYPE: `int` DEFAULT: `5`

RETURNS	DESCRIPTION
`DataFrame`	DataFrame with consistency features

Source code in fplx/timeseries/transforms.py

def add_consistency_features(
    df: pd.DataFrame, columns: list[str], window: int = 5
) -> pd.DataFrame:
    """
    Add consistency measures (coefficient of variation).

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe
    columns : list[str]
        Columns to measure consistency for
    window : int
        Window size

    Returns
    -------
    pd.DataFrame
        DataFrame with consistency features
    """
    df = df.copy()

    for col in columns:
        if col not in df.columns:
            continue

        feature_name = f"{col}_consistency_{window}"
        rolling_mean = df[col].rolling(window=window, min_periods=1).mean()
        rolling_std = df[col].rolling(window=window, min_periods=1).std()

        # Coefficient of variation (lower = more consistent)
        df[feature_name] = rolling_std / (rolling_mean + 1e-6)

    return df

utils ¶

Utility modules.

Config ¶

Config(config: Optional[dict] = None)

Configuration manager for FPLX.

PARAMETER	DESCRIPTION
`config`	Configuration dictionary TYPE: `Optional[Dict]` DEFAULT: `None`

Source code in fplx/utils/config.py

def __init__(self, config: Optional[dict] = None):
    self.config = {**self.DEFAULT_CONFIG}
    if config:
        self._update_nested(self.config, config)

get ¶

get(key: str, default: Any = None) -> Any

Get configuration value.

PARAMETER	DESCRIPTION
`key`	Configuration key (supports nested keys with '.') TYPE: `str`
`default`	Default value if key not found TYPE: `Any` DEFAULT: `None`

RETURNS	DESCRIPTION
`Any`	Configuration value

Source code in fplx/utils/config.py

def get(self, key: str, default: Any = None) -> Any:
    """
    Get configuration value.

    Parameters
    ----------
    key : str
        Configuration key (supports nested keys with '.')
    default : Any
        Default value if key not found

    Returns
    -------
    Any
        Configuration value
    """
    keys = key.split(".")
    value = self.config

    for k in keys:
        if isinstance(value, dict) and k in value:
            value = value[k]
        else:
            return default

    return value

set ¶

set(key: str, value: Any)

Set configuration value.

PARAMETER	DESCRIPTION
`key`	Configuration key (supports nested keys with '.') TYPE: `str`
`value`	Value to set TYPE: `Any`

Source code in fplx/utils/config.py

def set(self, key: str, value: Any):
    """
    Set configuration value.

    Parameters
    ----------
    key : str
        Configuration key (supports nested keys with '.')
    value : Any
        Value to set
    """
    keys = key.split(".")
    config = self.config

    for k in keys[:-1]:
        if k not in config:
            config[k] = {}
        config = config[k]

    config[keys[-1]] = value

load_from_file ¶

load_from_file(filepath: Path)

Load configuration from JSON file.

PARAMETER	DESCRIPTION
`filepath`	Path to configuration file TYPE: `Path`

Source code in fplx/utils/config.py

def load_from_file(self, filepath: Path):
    """
    Load configuration from JSON file.

    Parameters
    ----------
    filepath : Path
        Path to configuration file
    """
    with open(filepath) as f:
        file_config = json.load(f)

    self._update_nested(self.config, file_config)

save_to_file ¶

save_to_file(filepath: Path)

Save configuration to JSON file.

PARAMETER	DESCRIPTION
`filepath`	Path to save configuration TYPE: `Path`

Source code in fplx/utils/config.py

def save_to_file(self, filepath: Path):
    """
    Save configuration to JSON file.

    Parameters
    ----------
    filepath : Path
        Path to save configuration
    """
    with open(filepath, "w") as f:
        json.dump(self.config, f, indent=2)

to_dict ¶

to_dict() -> dict

Get configuration as dictionary.

RETURNS	DESCRIPTION
`Dict`	Configuration dictionary

Source code in fplx/utils/config.py

def to_dict(self) -> dict:
    """
    Get configuration as dictionary.

    Returns
    -------
    Dict
        Configuration dictionary
    """
    return self.config.copy()

validate_data ¶

validate_data(
    df: DataFrame, required_columns: list[str]
) -> bool

Validate that dataframe has required columns.

PARAMETER	DESCRIPTION
`df`	Dataframe to validate TYPE: `DataFrame`
`required_columns`	Required column names TYPE: `list[str]`

RETURNS	DESCRIPTION
`bool`	True if valid

Source code in fplx/utils/validation.py

def validate_data(df: pd.DataFrame, required_columns: list[str]) -> bool:
    """
    Validate that dataframe has required columns.

    Parameters
    ----------
    df : pd.DataFrame
        Dataframe to validate
    required_columns : list[str]
        Required column names

    Returns
    -------
    bool
        True if valid
    """
    missing = set(required_columns) - set(df.columns)

    if missing:
        logger.error(f"Missing required columns: {missing}")
        return False

    return True

config ¶

Configuration management.

Config ¶

Config(config: Optional[dict] = None)

Configuration manager for FPLX.

PARAMETER	DESCRIPTION
`config`	Configuration dictionary TYPE: `Optional[Dict]` DEFAULT: `None`

Source code in fplx/utils/config.py

def __init__(self, config: Optional[dict] = None):
    self.config = {**self.DEFAULT_CONFIG}
    if config:
        self._update_nested(self.config, config)

get ¶

get(key: str, default: Any = None) -> Any

Get configuration value.

PARAMETER	DESCRIPTION
`key`	Configuration key (supports nested keys with '.') TYPE: `str`
`default`	Default value if key not found TYPE: `Any` DEFAULT: `None`

RETURNS	DESCRIPTION
`Any`	Configuration value

Source code in fplx/utils/config.py

def get(self, key: str, default: Any = None) -> Any:
    """
    Get configuration value.

    Parameters
    ----------
    key : str
        Configuration key (supports nested keys with '.')
    default : Any
        Default value if key not found

    Returns
    -------
    Any
        Configuration value
    """
    keys = key.split(".")
    value = self.config

    for k in keys:
        if isinstance(value, dict) and k in value:
            value = value[k]
        else:
            return default

    return value

set ¶

set(key: str, value: Any)

Set configuration value.

PARAMETER	DESCRIPTION
`key`	Configuration key (supports nested keys with '.') TYPE: `str`
`value`	Value to set TYPE: `Any`

Source code in fplx/utils/config.py

def set(self, key: str, value: Any):
    """
    Set configuration value.

    Parameters
    ----------
    key : str
        Configuration key (supports nested keys with '.')
    value : Any
        Value to set
    """
    keys = key.split(".")
    config = self.config

    for k in keys[:-1]:
        if k not in config:
            config[k] = {}
        config = config[k]

    config[keys[-1]] = value

load_from_file ¶

load_from_file(filepath: Path)

Load configuration from JSON file.

PARAMETER	DESCRIPTION
`filepath`	Path to configuration file TYPE: `Path`

Source code in fplx/utils/config.py

def load_from_file(self, filepath: Path):
    """
    Load configuration from JSON file.

    Parameters
    ----------
    filepath : Path
        Path to configuration file
    """
    with open(filepath) as f:
        file_config = json.load(f)

    self._update_nested(self.config, file_config)

save_to_file ¶

save_to_file(filepath: Path)

Save configuration to JSON file.

PARAMETER	DESCRIPTION
`filepath`	Path to save configuration TYPE: `Path`

Source code in fplx/utils/config.py

def save_to_file(self, filepath: Path):
    """
    Save configuration to JSON file.

    Parameters
    ----------
    filepath : Path
        Path to save configuration
    """
    with open(filepath, "w") as f:
        json.dump(self.config, f, indent=2)

to_dict ¶

to_dict() -> dict

Get configuration as dictionary.

RETURNS	DESCRIPTION
`Dict`	Configuration dictionary

Source code in fplx/utils/config.py

def to_dict(self) -> dict:
    """
    Get configuration as dictionary.

    Returns
    -------
    Dict
        Configuration dictionary
    """
    return self.config.copy()

validation ¶

Data validation utilities.

validate_data ¶

validate_data(
    df: DataFrame, required_columns: list[str]
) -> bool

Validate that dataframe has required columns.

PARAMETER	DESCRIPTION
`df`	Dataframe to validate TYPE: `DataFrame`
`required_columns`	Required column names TYPE: `list[str]`

RETURNS	DESCRIPTION
`bool`	True if valid

Source code in fplx/utils/validation.py

def validate_data(df: pd.DataFrame, required_columns: list[str]) -> bool:
    """
    Validate that dataframe has required columns.

    Parameters
    ----------
    df : pd.DataFrame
        Dataframe to validate
    required_columns : list[str]
        Required column names

    Returns
    -------
    bool
        True if valid
    """
    missing = set(required_columns) - set(df.columns)

    if missing:
        logger.error(f"Missing required columns: {missing}")
        return False

    return True

check_data_quality ¶

check_data_quality(
    df: DataFrame, max_missing_pct: float = 0.3
) -> dict[str, float]

Check data quality and report issues.

PARAMETER	DESCRIPTION
`df`	Data to check TYPE: `DataFrame`
`max_missing_pct`	Maximum acceptable percentage of missing values TYPE: `float` DEFAULT: `0.3`

RETURNS	DESCRIPTION
`Dict[str, float]`	Quality metrics

Source code in fplx/utils/validation.py

def check_data_quality(
    df: pd.DataFrame, max_missing_pct: float = 0.3
) -> dict[str, float]:
    """
    Check data quality and report issues.

    Parameters
    ----------
    df : pd.DataFrame
        Data to check
    max_missing_pct : float
        Maximum acceptable percentage of missing values

    Returns
    -------
    Dict[str, float]
        Quality metrics
    """
    total_cells = df.shape[0] * df.shape[1]
    missing_cells = df.isna().sum().sum()
    missing_pct = missing_cells / total_cells if total_cells > 0 else 0

    # Per-column missing
    col_missing = df.isna().mean()
    problematic_cols = col_missing[col_missing > max_missing_pct].index.tolist()

    metrics = {
        "total_rows": df.shape[0],
        "total_columns": df.shape[1],
        "missing_percentage": missing_pct * 100,
        "problematic_columns": problematic_cols,
    }

    if missing_pct > max_missing_pct:
        logger.warning(f"High missing data: {missing_pct * 100:.2f}%")

    if problematic_cols:
        logger.warning(f"Columns with high missing data: {problematic_cols}")

    return metrics

impute_missing ¶

impute_missing(
    df: DataFrame, strategy: str = "mean"
) -> DataFrame

Impute missing values.

PARAMETER	DESCRIPTION
`df`	Data with missing values TYPE: `DataFrame`
`strategy`	Imputation strategy: 'mean', 'median', 'forward_fill', 'zero' TYPE: `str` DEFAULT: `'mean'`

RETURNS	DESCRIPTION
`DataFrame`	Data with imputed values

Source code in fplx/utils/validation.py

def impute_missing(df: pd.DataFrame, strategy: str = "mean") -> pd.DataFrame:
    """
    Impute missing values.

    Parameters
    ----------
    df : pd.DataFrame
        Data with missing values
    strategy : str
        Imputation strategy: 'mean', 'median', 'forward_fill', 'zero'

    Returns
    -------
    pd.DataFrame
        Data with imputed values
    """
    df = df.copy()

    if strategy == "mean":
        df = df.fillna(df.mean())
    elif strategy == "median":
        df = df.fillna(df.median())
    elif strategy == "forward_fill":
        df = df.fillna(method="ffill")
    elif strategy == "zero":
        df = df.fillna(0)
    else:
        logger.warning(f"Unknown strategy {strategy}, using mean")
        df = df.fillna(df.mean())

    return df

fplx¶

fplx ¶

FPLModel ¶

load_data ¶

fit ¶

select_best_11 ¶

Matchweek dataclass ¶

Player dataclass ¶

last_5_points property ¶

availability property ¶

FullSquad dataclass ¶

summary ¶

Squad dataclass ¶

summary ¶

FPLDataLoader ¶

fetch_bootstrap_data ¶

load_players ¶

load_player_history ¶

load_fixtures ¶

load_from_csv ¶

enrich_player_history ¶

api ¶

FPLModel ¶

load_data ¶

fit ¶

select_best_11 ¶

interface ¶

FPLModel ¶

load_data ¶

fit ¶

select_best_11 ¶

core ¶

Matchweek dataclass ¶

Player dataclass ¶

last_5_points property ¶

availability property ¶

FullSquad dataclass ¶

summary ¶

Squad dataclass ¶

summary ¶

matchweek ¶

Matchweek dataclass ¶

player ¶

Player dataclass ¶

last_5_points property ¶

availability property ¶

squad ¶

Squad dataclass ¶

summary ¶

FullSquad dataclass ¶

summary ¶

data ¶

FPLDataLoader ¶

fetch_bootstrap_data ¶

load_players ¶

load_player_history ¶

load_fixtures ¶

load_from_csv ¶

enrich_player_history ¶

VaastavLoader ¶

load_merged_gw ¶

load_player_raw ¶

load_gameweek ¶

build_player_objects ¶

get_actual_points ¶

get_fixture_info ¶

double_gameweek ¶

detect_dgw_gameweeks ¶

aggregate_dgw_timeseries ¶

scale_predictions_for_dgw ¶

get_fixture_counts_from_bootstrap ¶

get_fixture_counts_from_vaastav ¶

loaders ¶

FPLDataLoader ¶

fetch_bootstrap_data ¶

load_players ¶

load_player_history ¶

load_fixtures ¶

load_from_csv ¶

enrich_player_history ¶

Matchweek `dataclass` ¶

Player `dataclass` ¶

last_5_points `property` ¶

availability `property` ¶

FullSquad `dataclass` ¶

Squad `dataclass` ¶

Matchweek `dataclass` ¶

Player `dataclass` ¶

last_5_points `property` ¶

availability `property` ¶

FullSquad `dataclass` ¶

Squad `dataclass` ¶

Matchweek `dataclass` ¶

Player `dataclass` ¶

last_5_points `property` ¶

availability `property` ¶

Squad `dataclass` ¶

FullSquad `dataclass` ¶

InferenceMetrics `dataclass` ¶

OptimizationMetrics `dataclass` ¶

InferenceMetrics `dataclass` ¶

OptimizationMetrics `dataclass` ¶

InferenceResult `dataclass` ¶