readme

f42429f6 · bailuo · f42429f6 · f42429f6 · f42429f6 · f42429f6
Commit f42429f6 authored Nov 19, 2025 by bailuo
20 changed files
--- a/experiments/foundation-time-series-arena/xiuhmolpilli/models/benchmarks/neural.py
+++ b/experiments/foundation-time-series-arena/xiuhmolpilli/models/benchmarks/neural.py
+import os
+
+import pandas as pd
+from neuralforecast import NeuralForecast
+from neuralforecast.auto import (
+    AutoNHITS as _AutoNHITS,
+    AutoTFT as _AutoTFT,
+)
+from neuralforecast.common._base_model import BaseModel as NeuralForecastModel
+from ray import tune
+
+from ..utils.forecaster import Forecaster
+
+os.environ["NIXTLA_ID_AS_COL"] = "true"
+
+
+def run_neuralforecast_model(
+    model: NeuralForecastModel,
+    df: pd.DataFrame,
+    freq: str,
+) -> pd.DataFrame:
+    nf = NeuralForecast(
+        models=[model],
+        freq=freq,
+    )
+    nf.fit(df=df)
+    fcst_df = nf.predict()
+    return fcst_df
+
+
+class AutoNHITS(Forecaster):
+    def __init__(
+        self,
+        alias: str = "AutoNHITS",
+        num_samples: int = 10,
+        backend: str = "optuna",
+    ):
+        self.alias = alias
+        self.num_samples = num_samples
+        self.backend = backend
+
+    def forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+    ) -> pd.DataFrame:
+        config = _AutoNHITS.get_default_config(h=h, backend="ray")
+        config["scaler_type"] = tune.choice(["robust"])
+
+        if self.backend == "optuna":
+            config = _AutoNHITS._ray_config_to_optuna(config)
+        fcst_df = run_neuralforecast_model(
+            model=_AutoNHITS(
+                h=h,
+                alias=self.alias,
+                num_samples=self.num_samples,
+                backend=self.backend,
+                config=config,
+            ),
+            df=df,
+            freq=freq,
+        )
+        return fcst_df
+
+
+class AutoTFT(Forecaster):
+    def __init__(
+        self,
+        alias: str = "AutoTFT",
+        num_samples: int = 10,
+        backend: str = "optuna",
+    ):
+        self.alias = alias
+        self.num_samples = num_samples
+        self.backend = backend
+
+    def forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+    ) -> pd.DataFrame:
+        config = _AutoTFT.get_default_config(h=h, backend="ray")
+        config["scaler_type"] = tune.choice(["robust"])
+        if self.backend == "optuna":
+            config = _AutoTFT._ray_config_to_optuna(config)
+        fcst_df = run_neuralforecast_model(
+            model=_AutoTFT(
+                h=h,
+                alias=self.alias,
+                num_samples=self.num_samples,
+                backend=self.backend,
+                config=config,
+            ),
+            df=df,
+            freq=freq,
+        )
+        return fcst_df
--- a/experiments/foundation-time-series-arena/xiuhmolpilli/models/benchmarks/prophet.py
+++ b/experiments/foundation-time-series-arena/xiuhmolpilli/models/benchmarks/prophet.py
+from copy import deepcopy
+from typing import List
+from threadpoolctl import threadpool_limits
+
+import pandas as pd
+from prophet import Prophet
+
+from ..utils.parallel_forecaster import ParallelForecaster
+from ..utils.forecaster import Forecaster
+
+
+class NixtlaProphet(Prophet, ParallelForecaster, Forecaster):
+    def __init__(
+        self,
+        alias: str = "Prophet",
+        *args,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+        self.alias = alias
+
+    def __local_forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+        quantiles: List[float] | None = None,
+    ) -> pd.DataFrame:
+        if quantiles is not None:
+            raise NotImplementedError
+        model = deepcopy(self)
+        model.fit(df=df)
+        future_df = model.make_future_dataframe(
+            periods=h,
+            include_history=False,
+            freq=freq,
+        )
+        fcst_df = model.predict(future_df)
+        fcst_df = fcst_df.rename({"yhat": self.alias}, axis=1)
+        fcst_df = fcst_df[["ds", self.alias]]
+        return fcst_df
+
+    def _local_forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+        quantiles: List[float] | None = None,
+    ) -> pd.DataFrame:
+        with threadpool_limits(limits=1):
+            return self.__local_forecast(
+                df=df,
+                h=h,
+                freq=freq,
+                quantiles=quantiles,
+            )
--- a/experiments/foundation-time-series-arena/xiuhmolpilli/models/benchmarks/stats.py
+++ b/experiments/foundation-time-series-arena/xiuhmolpilli/models/benchmarks/stats.py
+import os
+
+import pandas as pd
+from statsforecast import StatsForecast
+from statsforecast.models import (
+    _TS as StatsForecastModel,
+    ADIDA as _ADIDA,
+    AutoARIMA as _AutoARIMA,
+    AutoCES as _AutoCES,
+    AutoETS as _AutoETS,
+    CrostonClassic as _CrostonClassic,
+    DynamicOptimizedTheta as _DOTheta,
+    HistoricAverage as _HistoricAverage,
+    IMAPA as _IMAPA,
+    SeasonalNaive as _SeasonalNaive,
+    Theta as _Theta,
+    ZeroModel as _ZeroModel,
+)
+
+from ..utils.forecaster import Forecaster, get_seasonality
+
+os.environ["NIXTLA_ID_AS_COL"] = "true"
+
+
+def run_statsforecast_model(
+    model: StatsForecastModel,
+    df: pd.DataFrame,
+    h: int,
+    freq: str,
+) -> pd.DataFrame:
+    sf = StatsForecast(
+        models=[model],
+        freq=freq,
+        n_jobs=-1,
+        fallback_model=_SeasonalNaive(
+            season_length=get_seasonality(freq),
+        ),
+    )
+    fcst_df = sf.forecast(df=df, h=h)
+    return fcst_df
+
+
+class ADIDA(Forecaster):
+    def __init__(
+        self,
+        alias: str = "ADIDA",
+    ):
+        self.alias = alias
+
+    def forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+    ) -> pd.DataFrame:
+        fcst_df = run_statsforecast_model(
+            model=_ADIDA(alias=self.alias),
+            df=df,
+            h=h,
+            freq=freq,
+        )
+        return fcst_df
+
+
+class AutoARIMA(Forecaster):
+    def __init__(
+        self,
+        alias: str = "AutoARIMA",
+    ):
+        self.alias = alias
+
+    def forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+    ) -> pd.DataFrame:
+        seasonality = get_seasonality(freq)
+        fcst_df = run_statsforecast_model(
+            model=_AutoARIMA(season_length=seasonality, alias=self.alias),
+            df=df,
+            h=h,
+            freq=freq,
+        )
+        return fcst_df
+
+
+class AutoCES(Forecaster):
+    def __init__(
+        self,
+        alias: str = "AutoCES",
+    ):
+        self.alias = alias
+
+    def forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+    ) -> pd.DataFrame:
+        seasonality = get_seasonality(freq)
+        fcst_df = run_statsforecast_model(
+            model=_AutoCES(season_length=seasonality, alias=self.alias),
+            df=df,
+            h=h,
+            freq=freq,
+        )
+        return fcst_df
+
+
+class AutoETS(Forecaster):
+    def __init__(
+        self,
+        alias: str = "AutoETS",
+    ):
+        self.alias = alias
+
+    def forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+    ) -> pd.DataFrame:
+        seasonality = get_seasonality(freq)
+        fcst_df = run_statsforecast_model(
+            model=_AutoETS(season_length=seasonality, alias=self.alias),
+            df=df,
+            h=h,
+            freq=freq,
+        )
+        return fcst_df
+
+
+class CrostonClassic(Forecaster):
+    def __init__(
+        self,
+        alias: str = "CrostonClassic",
+    ):
+        self.alias = alias
+
+    def forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+    ) -> pd.DataFrame:
+        fcst_df = run_statsforecast_model(
+            model=_CrostonClassic(alias=self.alias),
+            df=df,
+            h=h,
+            freq=freq,
+        )
+        return fcst_df
+
+
+class DOTheta(Forecaster):
+    def __init__(
+        self,
+        alias: str = "DOTheta",
+    ):
+        self.alias = alias
+
+    def forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+    ) -> pd.DataFrame:
+        seasonality = get_seasonality(freq)
+        fcst_df = run_statsforecast_model(
+            model=_DOTheta(season_length=seasonality, alias=self.alias),
+            df=df,
+            h=h,
+            freq=freq,
+        )
+        return fcst_df
+
+
+class HistoricAverage(Forecaster):
+    def __init__(
+        self,
+        alias: str = "HistoricAverage",
+    ):
+        self.alias = alias
+
+    def forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+    ) -> pd.DataFrame:
+        fcst_df = run_statsforecast_model(
+            model=_HistoricAverage(alias=self.alias),
+            df=df,
+            h=h,
+            freq=freq,
+        )
+        return fcst_df
+
+
+class IMAPA(Forecaster):
+    def __init__(
+        self,
+        alias: str = "IMAPA",
+    ):
+        self.alias = alias
+
+    def forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+    ) -> pd.DataFrame:
+        fcst_df = run_statsforecast_model(
+            model=_IMAPA(alias=self.alias),
+            df=df,
+            h=h,
+            freq=freq,
+        )
+        return fcst_df
+
+
+class SeasonalNaive(Forecaster):
+    def __init__(
+        self,
+        alias: str = "SeasonalNaive",
+    ):
+        self.alias = alias
+
+    def forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+    ) -> pd.DataFrame:
+        seasonality = get_seasonality(freq)
+        fcst_df = run_statsforecast_model(
+            model=_SeasonalNaive(season_length=seasonality, alias=self.alias),
+            df=df,
+            h=h,
+            freq=freq,
+        )
+        return fcst_df
+
+
+class Theta(Forecaster):
+    def __init__(
+        self,
+        alias: str = "Theta",
+    ):
+        self.alias = alias
+
+    def forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+    ) -> pd.DataFrame:
+        seasonality = get_seasonality(freq)
+        fcst_df = run_statsforecast_model(
+            model=_Theta(season_length=seasonality, alias=self.alias),
+            df=df,
+            h=h,
+            freq=freq,
+        )
+        return fcst_df
+
+
+class ZeroModel(Forecaster):
+    def __init__(
+        self,
+        alias: str = "ZeroModel",
+    ):
+        self.alias = alias
+
+    def forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+    ) -> pd.DataFrame:
+        fcst_df = run_statsforecast_model(
+            model=_ZeroModel(alias=self.alias),
+            df=df,
+            h=h,
+            freq=freq,
+        )
+        return fcst_df
--- a/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/__init__.py
+++ b/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/__init__.py
+from .chronos import Chronos
+from .lagllama import LagLlama
+from .moirai import Moirai
+from .timegpt import TimeGPT
+from .timesfm import TimesFM
+
+__all__ = [
+    "Chronos",
+    "LagLlama",
+    "Moirai",
+    "TimeGPT",
+    "TimesFM",
+]
--- a/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/chronos.py
+++ b/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/chronos.py
+from typing import Iterable, List
+
+import numpy as np
+import pandas as pd
+import torch
+from chronos import ChronosPipeline
+from tqdm import tqdm
+from utilsforecast.processing import make_future_dataframe
+
+from ..utils.forecaster import Forecaster
+
+
+class TimeSeriesDataset:
+    def __init__(
+        self,
+        data: torch.Tensor,
+        uids: Iterable,
+        last_times: Iterable,
+        batch_size: int,
+    ):
+        self.data = data
+        self.uids = uids
+        self.last_times = last_times
+        self.batch_size = batch_size
+        self.n_batches = len(data) // self.batch_size + (
+            0 if len(data) % self.batch_size == 0 else 1
+        )
+        self.current_batch = 0
+
+    @classmethod
+    def from_df(cls, df: pd.DataFrame, batch_size: int):
+        num_unique_ids = df["unique_id"].nunique()
+        max_series_length = df["unique_id"].value_counts().max()
+        padded_tensor = torch.full(
+            size=(num_unique_ids, max_series_length),
+            fill_value=torch.nan,
+            dtype=torch.bfloat16,
+        )  # type: ignore
+        df_sorted = df.sort_values(by=["unique_id", "ds"])
+        for idx, (_, group) in enumerate(df_sorted.groupby("unique_id")):
+            series_length = len(group)
+            padded_tensor[idx, -series_length:] = torch.tensor(
+                group["y"].values,
+                dtype=torch.bfloat16,
+            )
+        uids = df_sorted["unique_id"].unique()
+        last_times = df_sorted.groupby("unique_id")["ds"].tail(1)
+        return cls(padded_tensor, uids, last_times, batch_size)
+
+    def __len__(self):
+        return self.n_batches
+
+    def make_future_dataframe(self, h: int, freq: str) -> pd.DataFrame:
+        return make_future_dataframe(
+            uids=self.uids,
+            last_times=pd.to_datetime(self.last_times),
+            h=h,
+            freq=freq,
+        )  # type: ignore
+
+    def __iter__(self):
+        self.current_batch = 0  # Reset for new iteration
+        return self
+
+    def __next__(self):
+        if self.current_batch < self.n_batches:
+            start_idx = self.current_batch * self.batch_size
+            end_idx = start_idx + self.batch_size
+            self.current_batch += 1
+            return self.data[start_idx:end_idx]
+        else:
+            raise StopIteration
+
+
+class Chronos(Forecaster):
+    def __init__(
+        self,
+        repo_id: str = "amazon/chronos-t5-large",
+        batch_size: int = 16,
+        alias: str = "Chronos",
+    ):
+        self.repo_id = repo_id
+        self.batch_size = batch_size
+        self.alias = alias
+        self.model = ChronosPipeline.from_pretrained(
+            repo_id,
+            device_map="auto",
+            torch_dtype=torch.bfloat16,
+        )
+
+    def forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+    ) -> pd.DataFrame:
+        dataset = TimeSeriesDataset.from_df(df, batch_size=self.batch_size)
+        fcsts = [
+            self.model.predict(batch, prediction_length=h) for batch in tqdm(dataset)
+        ]
+        fcst = torch.cat(fcsts)
+        fcst = fcst.numpy()
+        fcst_df = dataset.make_future_dataframe(h=h, freq=freq)
+        fcst_df[self.alias] = np.mean(fcst, axis=1).reshape(-1, 1)
+        return fcst_df
--- a/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/lagllama.py
+++ b/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/lagllama.py
+from gluonts.torch.model.predictor import PyTorchPredictor
+from lag_llama.gluon.estimator import LagLlamaEstimator
+
+from ..utils.gluonts_forecaster import GluonTSForecaster
+
+
+class LagLlama(GluonTSForecaster):
+    def __init__(
+        self,
+        repo_id: str = "time-series-foundation-models/Lag-Llama",
+        filename: str = "lag-llama.ckpt",
+        alias: str = "LagLlama",
+    ):
+        super().__init__(
+            repo_id=repo_id,
+            filename=filename,
+            alias=alias,
+        )
+
+    def get_predictor(self, prediction_length: int) -> PyTorchPredictor:
+        ckpt = self.load()
+        estimator_args = ckpt["hyper_parameters"]["model_kwargs"]
+        # this context length is reported in the paper
+        context_length = 32
+        estimator = LagLlamaEstimator(
+            ckpt_path=self.checkpoint_path,
+            prediction_length=prediction_length,
+            context_length=context_length,
+            # estimator args
+            input_size=estimator_args["input_size"],
+            n_layer=estimator_args["n_layer"],
+            n_embd_per_head=estimator_args["n_embd_per_head"],
+            n_head=estimator_args["n_head"],
+            scaling=estimator_args["scaling"],
+            time_feat=estimator_args["time_feat"],
+        )
+        lightning_module = estimator.create_lightning_module()
+        transformation = estimator.create_transformation()
+        predictor = estimator.create_predictor(transformation, lightning_module)
+        return predictor
--- a/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/moirai.py
+++ b/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/moirai.py
+from gluonts.torch.model.predictor import PyTorchPredictor
+from uni2ts.model.moirai import MoiraiForecast, MoiraiModule
+
+from ..utils.gluonts_forecaster import GluonTSForecaster
+
+
+class Moirai(GluonTSForecaster):
+    def __init__(
+        self,
+        repo_id: str = "Salesforce/moirai-1.0-R-large",
+        filename: str = "model.ckpt",
+        alias: str = "Moirai",
+    ):
+        super().__init__(
+            repo_id=repo_id,
+            filename=filename,
+            alias=alias,
+        )
+
+    def get_predictor(self, prediction_length: int) -> PyTorchPredictor:
+        model = MoiraiForecast(
+            module=MoiraiModule.from_pretrained(self.repo_id),
+            prediction_length=prediction_length,
+            context_length=200,
+            patch_size="auto",
+            num_samples=100,
+            target_dim=1,
+            feat_dynamic_real_dim=0,
+            past_feat_dynamic_real_dim=0,
+        )
+        predictor = model.create_predictor(batch_size=32)
+        return predictor
--- a/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/timegpt.py
+++ b/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/timegpt.py
+import os
+
+import pandas as pd
+from dotenv import load_dotenv
+from nixtla import NixtlaClient
+from typing import Optional
+from ..utils.forecaster import Forecaster
+
+load_dotenv()
+
+
+class TimeGPT(Forecaster):
+    def __init__(
+        self,
+        api_key: str | None = None,
+        base_url: Optional[str] = None,
+        max_retries: int = 1,
+        model: str = "timegpt-1",
+        alias: str = "TimeGPT",
+    ):
+        self.api_key = api_key
+        self.base_url = base_url
+        self.max_retries = max_retries
+        self.model = model
+        self.alias = alias
+
+    def _get_client(self) -> NixtlaClient:
+        if self.api_key is None:
+            api_key = os.environ["NIXTLA_API_KEY"]
+        else:
+            api_key = self.api_key
+        return NixtlaClient(
+            api_key=api_key,
+            base_url=self.base_url,
+            max_retries=self.max_retries,
+        )
+
+    def forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+    ) -> pd.DataFrame:
+        client = self._get_client()
+        fcst_df = client.forecast(
+            df=df,
+            h=h,
+            freq=freq,
+            model=self.model,
+        )
+        fcst_df["ds"] = pd.to_datetime(fcst_df["ds"])
+        fcst_df = fcst_df.rename(columns={"TimeGPT": self.alias})
+        return fcst_df
--- a/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/timesfm.py
+++ b/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/timesfm.py
+import pandas as pd
+import timesfm
+import torch
+from paxml import checkpoints
+
+from ..utils.forecaster import Forecaster
+
+
+class TimesFM(Forecaster):
+    def __init__(
+        self,
+        repo_id: str = "google/timesfm-1.0-200m",
+        context_length: int = 512,
+        batch_size: int = 64,
+        alias: str = "TimesFM",
+    ):
+        self.repo_id = repo_id
+        self.context_length = context_length
+        self.batch_size = batch_size
+        self.alias = alias
+
+    def get_predictor(
+        self,
+        prediction_length: int,
+    ) -> timesfm.TimesFm:
+        backend = "gpu" if torch.cuda.is_available() else "cpu"
+        tfm = timesfm.TimesFm(
+            context_len=self.context_length,
+            horizon_len=prediction_length,
+            input_patch_len=32,
+            output_patch_len=128,
+            num_layers=20,
+            model_dims=1280,
+            backend=backend,
+            per_core_batch_size=self.batch_size,
+        )
+        tfm.load_from_checkpoint(repo_id=self.repo_id)
+        return tfm
+
+    def forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+    ) -> pd.DataFrame:
+        predictor = self.get_predictor(prediction_length=h)
+        fcst_df = predictor.forecast_on_df(
+            inputs=df,
+            freq=freq,
+            value_name="y",
+            model_name=self.alias,
+            num_jobs=1,
+        )
+        fcst_df = fcst_df[["unique_id", "ds", self.alias]]
+        return fcst_df
--- a/experiments/foundation-time-series-arena/xiuhmolpilli/models/utils/__init__.py
+++ b/experiments/foundation-time-series-arena/xiuhmolpilli/models/utils/__init__.py
--- a/experiments/foundation-time-series-arena/xiuhmolpilli/models/utils/forecaster.py
+++ b/experiments/foundation-time-series-arena/xiuhmolpilli/models/utils/forecaster.py
+from typing import List
+
+import pandas as pd
+from gluonts.time_feature.seasonality import get_seasonality as _get_seasonality
+from tqdm import tqdm
+from utilsforecast.processing import (
+    backtest_splits,
+    drop_index_if_pandas,
+    join,
+    maybe_compute_sort_indices,
+    take_rows,
+    vertical_concat,
+)
+
+
+def get_seasonality(freq: str) -> int:
+    return _get_seasonality(freq, seasonalities={"D": 7})
+
+
+def maybe_convert_col_to_datetime(df: pd.DataFrame, col_name: str) -> pd.DataFrame:
+    if not pd.api.types.is_datetime64_any_dtype(df[col_name]):
+        df = df.copy()
+        df[col_name] = pd.to_datetime(df[col_name])
+    return df
+
+
+class Forecaster:
+    def forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+    ) -> pd.DataFrame:
+        raise NotImplementedError
+
+    def cross_validation(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+        n_windows: int = 1,
+        step_size: int | None = None,
+    ) -> pd.DataFrame:
+        df = maybe_convert_col_to_datetime(df, "ds")
+        # mlforecast cv code
+        results = []
+        sort_idxs = maybe_compute_sort_indices(df, "unique_id", "ds")
+        if sort_idxs is not None:
+            df = take_rows(df, sort_idxs)
+        splits = backtest_splits(
+            df,
+            n_windows=n_windows,
+            h=h,
+            id_col="unique_id",
+            time_col="ds",
+            freq=pd.tseries.frequencies.to_offset(freq),
+            step_size=h if step_size is None else step_size,
+        )
+        for _, (cutoffs, train, valid) in tqdm(enumerate(splits)):
+            if len(valid.columns) > 3:
+                raise NotImplementedError(
+                    "Cross validation with exogenous variables is not yet supported."
+                )
+            y_pred = self.forecast(
+                df=train,
+                h=h,
+                freq=freq,
+            )
+            y_pred = join(y_pred, cutoffs, on="unique_id", how="left")
+            result = join(
+                valid[["unique_id", "ds", "y"]],
+                y_pred,
+                on=["unique_id", "ds"],
+            )
+            if result.shape[0] < valid.shape[0]:
+                raise ValueError(
+                    "Cross validation result produced less results than expected. "
+                    "Please verify that the frequency parameter (freq) matches your series' "
+                    "and that there aren't any missing periods."
+                )
+            results.append(result)
+        out = vertical_concat(results)
+        out = drop_index_if_pandas(out)
+        first_out_cols = ["unique_id", "ds", "cutoff", "y"]
+        remaining_cols = [c for c in out.columns if c not in first_out_cols]
+        fcst_cv_df = out[first_out_cols + remaining_cols]
+        return fcst_cv_df
--- a/experiments/foundation-time-series-arena/xiuhmolpilli/models/utils/gluonts_forecaster.py
+++ b/experiments/foundation-time-series-arena/xiuhmolpilli/models/utils/gluonts_forecaster.py
+from typing import Iterable, List, Any
+
+import pandas as pd
+import torch
+from gluonts.dataset.pandas import PandasDataset
+from gluonts.model.forecast import Forecast
+from gluonts.torch.model.predictor import PyTorchPredictor
+from huggingface_hub import hf_hub_download
+from tqdm import tqdm
+
+from .forecaster import Forecaster
+
+
+def fix_freq(freq: str) -> str:
+    # see https://github.com/awslabs/gluonts/pull/2462/files
+    if len(freq) > 1 and freq.endswith("S"):
+        return freq[:-1]
+    return freq
+
+
+def maybe_convert_col_to_float32(df: pd.DataFrame, col_name: str) -> pd.DataFrame:
+    if df[col_name].dtype != "float32":
+        df = df.copy()
+        df[col_name] = df[col_name].astype("float32")
+    return df
+
+
+class GluonTSForecaster(Forecaster):
+    def __init__(self, repo_id: str, filename: str, alias: str):
+        self.repo_id = repo_id
+        self.filename = filename
+        self.alias = alias
+
+    @property
+    def checkpoint_path(self) -> str:
+        return hf_hub_download(
+            repo_id=self.repo_id,
+            filename=self.filename,
+        )
+
+    @property
+    def map_location(self) -> str:
+        map_location = "cuda:0" if torch.cuda.is_available() else "cpu"
+        return map_location
+
+    def load(self) -> Any:
+        return torch.load(
+            self.checkpoint_path,
+            map_location=self.map_location,
+        )
+
+    def get_predictor(self, prediction_length: int) -> PyTorchPredictor:
+        raise NotImplementedError
+
+    def gluonts_instance_fcst_to_df(
+        self,
+        fcst: Forecast,
+        freq: str,
+        model_name: str,
+    ) -> pd.DataFrame:
+        point_forecast = fcst.mean
+        h = len(point_forecast)
+        dates = pd.date_range(
+            fcst.start_date.to_timestamp(),
+            freq=freq,
+            periods=h,
+        )
+        fcst_df = pd.DataFrame(
+            {
+                "ds": dates,
+                "unique_id": fcst.item_id,
+                model_name: point_forecast,
+            }
+        )
+        return fcst_df
+
+    def gluonts_fcsts_to_df(
+        self,
+        fcsts: Iterable[Forecast],
+        freq: str,
+        model_name: str,
+    ) -> pd.DataFrame:
+        df = []
+        for fcst in tqdm(fcsts):
+            fcst_df = self.gluonts_instance_fcst_to_df(
+                fcst=fcst,
+                freq=freq,
+                model_name=model_name,
+            )
+            df.append(fcst_df)
+        return pd.concat(df).reset_index(drop=True)
+
+    def forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+    ) -> pd.DataFrame:
+        df = maybe_convert_col_to_float32(df, "y")
+        gluonts_dataset = PandasDataset.from_long_dataframe(
+            df,
+            target="y",
+            item_id="unique_id",
+            timestamp="ds",
+            freq=fix_freq(freq),
+        )
+        predictor = self.get_predictor(prediction_length=h)
+        fcsts = predictor.predict(gluonts_dataset, num_samples=100)
+        fcst_df = self.gluonts_fcsts_to_df(
+            fcsts,
+            freq=freq,
+            model_name=self.alias,
+        )
+        return fcst_df
--- a/experiments/foundation-time-series-arena/xiuhmolpilli/models/utils/parallel_forecaster.py
+++ b/experiments/foundation-time-series-arena/xiuhmolpilli/models/utils/parallel_forecaster.py
+import os
+from multiprocessing import Pool
+from typing import Callable, List
+
+import pandas as pd
+
+
+class ParallelForecaster:
+    def _process_group(
+        self,
+        df: pd.DataFrame,
+        func: Callable,
+        **kwargs,
+    ) -> pd.DataFrame:
+        uid = df["unique_id"].iloc[0]
+        _df = df.drop("unique_id", axis=1)
+        res_df = func(_df, **kwargs)
+        res_df.insert(0, "unique_id", uid)
+        return res_df
+
+    def _apply_parallel(
+        self,
+        df_grouped: pd.DataFrame,
+        func: Callable,
+        **kwargs,
+    ) -> pd.DataFrame:
+        with Pool(os.cpu_count() - 1) as executor:
+            futures = [
+                executor.apply_async(
+                    self._process_group,
+                    args=(df, func),
+                    kwds=kwargs,
+                )
+                for _, df in df_grouped
+            ]
+            results = [future.get() for future in futures]
+        return pd.concat(results)
+
+    def _local_forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+        quantiles: List[float] | None = None,
+    ) -> pd.DataFrame:
+        raise NotImplementedError
+
+    def forecast(
+        self,
+        df: pd.DataFrame,
+        h: int,
+        freq: str,
+        quantiles: List[float] | None = None,
+    ) -> pd.DataFrame:
+        fcst_df = self._apply_parallel(
+            df.groupby("unique_id"),
+            self._local_forecast,
+            h=h,
+            freq=freq,
+            quantiles=quantiles,
+        )
+        return fcst_df
--- a/experiments/foundation-time-series-arena/xiuhmolpilli/utils/download_data.py
+++ b/experiments/foundation-time-series-arena/xiuhmolpilli/utils/download_data.py
+import logging
+from concurrent.futures import ProcessPoolExecutor
+
+import pandas as pd
+
+logging.basicConfig(level=logging.INFO)
+main_logger = logging.getLogger(__name__)
+
+
+def read_parquet_and_assign(uid, url):
+    df = pd.read_parquet(url)
+    df["unique_id"] = uid
+    df["ds"] = df["ds"].astype(str)
+    return df[["unique_id", "ds", "y"]]
+
+
+def download_data():
+    catalogue_splits = pd.read_csv("./data/series_catalogue_hourly.csv")
+    catalogue_df = catalogue_splits.query("dataset == 'moirai'")
+    catalogue_df["pandas_frequency"] = "H"
+    catalogue_df["seasonality"] = 24
+    catalogue_df["horizon"] = 24
+    catalogue_df = catalogue_df.query("split == 'test'")[
+        [
+            "unique_id",
+            "frequency",
+            "url",
+            "pandas_frequency",
+            "seasonality",
+            "horizon",
+        ]
+    ]
+    grouped_df = catalogue_df.groupby(["frequency", "pandas_frequency"])
+    for (frequency, pandas_frequency), df in grouped_df:
+        uids, urls = df["unique_id"].values, df["url"].values
+        main_logger.info(
+            f"frequency: {frequency}, pandas_frequency: {pandas_frequency}"
+        )
+        n_uids = len(uids)
+        main_logger.info(f"number of uids: {n_uids}")
+        max_workers = min(10, n_uids)
+        with ProcessPoolExecutor(max_workers=max_workers) as executor:
+            futures = [
+                executor.submit(read_parquet_and_assign, uid, url)
+                for uid, url in zip(uids, urls)
+            ]
+            results = [future.result() for future in futures]
+        main_logger.info("dataset read")
+        Y_df = pd.concat(results)
+        Y_df = Y_df.merge(
+            df.drop(columns="url"),
+            on="unique_id",
+            how="left",
+        )
+        # Y_df.to_parquet(f"./data/{frequency}_{pandas_frequency}.parquet")
+        Y_df.to_parquet(f"./data/filtered_datasets/moirai-data.parquet")
+        del Y_df
+        main_logger.info("dataset saved")
+
+
+if __name__ == "__main__":
+    download_data()
--- a/experiments/foundation-time-series-arena/xiuhmolpilli/utils/experiment_handler.py
+++ b/experiments/foundation-time-series-arena/xiuhmolpilli/utils/experiment_handler.py
+import warnings
+from dataclasses import dataclass, asdict
+from functools import partial
+from pathlib import Path
+from typing import Any, Callable, List
+
+import pandas as pd
+from utilsforecast.evaluation import evaluate
+from utilsforecast.losses import mae, _zero_to_nan
+
+from .logger_config import setup_logger
+
+warnings.simplefilter(
+    action="ignore",
+    category=FutureWarning,
+)
+main_logger = setup_logger(__name__)
+
+
+def mase(
+    df: pd.DataFrame,
+    models: List[str],
+    seasonality: int,
+    train_df: pd.DataFrame,
+    id_col: str = "unique_id",
+    target_col: str = "y",
+) -> pd.DataFrame:
+    mean_abs_err = mae(df, models, id_col, target_col)
+    mean_abs_err = mean_abs_err.set_index(id_col)
+    # assume train_df is sorted
+    lagged = train_df.groupby(id_col, observed=True)[target_col].shift(seasonality)
+    scale = train_df[target_col].sub(lagged).abs()
+    scale = scale.groupby(train_df[id_col], observed=True).mean()
+    scale[scale < 1e-2] = 0.0
+    res = mean_abs_err.div(_zero_to_nan(scale), axis=0).fillna(0)
+    res.index.name = id_col
+    res = res.reset_index()
+    return res
+
+
+def generate_train_cv_splits(
+    df: pd.DataFrame,
+    cutoffs: pd.DataFrame,
+) -> pd.DataFrame:
+    """
+    based on `cutoffs` (columns `unique_id`, `cutoffs`)
+    generates train cv splits using `df`
+    """
+    df = df.merge(cutoffs, on="unique_id", how="outer")
+    df = df.query("ds <= cutoff")
+    df = df.reset_index(drop=True)
+    return df
+
+
+@dataclass
+class DatasetParams:
+    frequency: str
+    pandas_frequency: str
+    horizon: int
+    seasonality: int
+
+    @staticmethod
+    def _get_value_from_df_col(
+        df: pd.DataFrame,
+        col: str,
+        dtype: Callable | None = None,
+    ) -> Any:
+        col_values = df[col].unique()
+        if len(col_values) > 1:
+            raise ValueError(f"{col} is not unique: {col_values}")
+        value = col_values[0]
+        if dtype is not None:
+            value = dtype(value)
+        return value
+
+    @classmethod
+    def from_df(cls, df: pd.DataFrame) -> "DatasetParams":
+        dataset_params = {}
+        dataset_params_cols = [
+            "frequency",
+            "pandas_frequency",
+            "horizon",
+            "seasonality",
+        ]
+        dataset_params_cols_dtypes = [str, str, int, int]
+        for col, dtype in zip(dataset_params_cols, dataset_params_cols_dtypes):
+            dataset_params[col] = cls._get_value_from_df_col(df, col, dtype=dtype)
+        return cls(**dataset_params)
+
+
+@dataclass
+class ExperimentDataset(DatasetParams):
+    df: pd.DataFrame
+
+    @classmethod
+    def from_df(cls, df: pd.DataFrame) -> "ExperimentDataset":
+        """
+        Parameters
+        ----------
+        df : pd.DataFrame
+            df should have columns:
+            unique_id, ds, y, frequency, pandas_frequency, horizon, seasonality
+        """
+        ds_params = DatasetParams.from_df(df=df)
+        df = df[["unique_id", "ds", "y"]]  # type: ignore
+        return cls(
+            df=df,
+            **asdict(ds_params),
+        )
+
+    @classmethod
+    def from_parquet(
+        cls,
+        parquet_path: str | Path,
+    ) -> "ExperimentDataset":
+        df = pd.read_parquet(parquet_path)
+        return cls.from_df(df=df)
+
+    def evaluate_forecast_df(
+        self,
+        forecast_df: pd.DataFrame,
+        models: List[str],
+    ) -> pd.DataFrame:
+        """
+        Parameters
+        ----------
+        forecast_df : pd.DataFrame
+            df should have columns: unique_id, ds, cutoff, y, and models
+        """
+        for model in models:
+            if forecast_df[model].isna().sum() > 0:
+                print(forecast_df.loc[forecast_df[model].isna()]["unique_id"].unique())
+                raise ValueError(f"model {model} has NaN values")
+        cutoffs = forecast_df[["unique_id", "cutoff"]].drop_duplicates()
+        train_cv_splits = generate_train_cv_splits(df=self.df, cutoffs=cutoffs)
+
+        def add_id_cutoff(df: pd.DataFrame):
+            df["id_cutoff"] = (
+                df["unique_id"].astype(str) + "-" + df["cutoff"].astype(str)
+            )
+
+        for df in [cutoffs, train_cv_splits, forecast_df]:
+            add_id_cutoff(df)
+        partial_mase = partial(mase, seasonality=self.seasonality)
+        eval_df = evaluate(
+            df=forecast_df,
+            train_df=train_cv_splits,
+            metrics=[partial_mase],
+            models=models,
+            id_col="id_cutoff",
+        )
+        eval_df = eval_df.merge(cutoffs, on=["id_cutoff"])
+        eval_df = eval_df.drop(columns=["id_cutoff"])
+        eval_df = eval_df[["unique_id", "cutoff", "metric"] + models]
+        return eval_df
+
+
+@dataclass
+class ForecastDataset:
+    forecast_df: pd.DataFrame
+    time_df: pd.DataFrame
+
+    @classmethod
+    def from_dir(cls, dir: str | Path):
+        dir_ = Path(dir)
+        forecast_df = pd.read_parquet(dir_ / "forecast_df.parquet")
+        time_df = pd.read_parquet(dir_ / "time_df.parquet")
+        return cls(forecast_df=forecast_df, time_df=time_df)
+
+    @staticmethod
+    def is_forecast_ready(dir: str | Path):
+        dir_ = Path(dir)
+        forecast_path = dir_ / "forecast_df.parquet"
+        time_path = dir_ / "time_df.parquet"
+        return forecast_path.exists() and time_path.exists()
+
+    def save_to_dir(self, dir: str | Path):
+        dir_ = Path(dir)
+        dir_.mkdir(parents=True, exist_ok=True)
+        self.forecast_df.to_parquet(dir_ / "forecast_df.parquet")
+        self.time_df.to_parquet(dir_ / "time_df.parquet")
--- a/experiments/foundation-time-series-arena/xiuhmolpilli/utils/filter_data.py
+++ b/experiments/foundation-time-series-arena/xiuhmolpilli/utils/filter_data.py
+"""
+this module takes Nixtla's benchmarking data 
+and filters it to prevent azureml from crashing
+in the following cases:
+- too short series, see https://learn.microsoft.com/en-us/azure/machine-learning/concept-automl-forecasting-methods?view=azureml-api-2#data-length-requirements
+"""
+import logging
+from pathlib import Path
+
+import fire
+import numpy as np
+import pandas as pd
+
+main_logger = logging.getLogger(__name__)
+main_logger.setLevel(logging.INFO)
+
+
+def get_min_size_per_series(dataset_path: str) -> int:
+    if "Daily" in dataset_path or "Hourly" in dataset_path:
+        return 1_000
+    elif "Monthly" in dataset_path:
+        return 10 * 12
+    else:
+        return 1_000 // 7
+
+
+def filter_and_clean_dataset(
+    dataset_path: str,
+    max_series: int = 1_000,
+    random_seed: int = 420,
+):
+    main_logger.info(f"Processing dataset {dataset_path}")
+    df = pd.read_parquet(dataset_path)
+    df = df.drop_duplicates(["unique_id", "ds"])  # type: ignore
+    df = df.sort_values(["unique_id", "ds"])
+    min_size_per_series = get_min_size_per_series(dataset_path)
+    df = (
+        df.groupby("unique_id")
+        .filter(lambda x: len(x) >= min_size_per_series)
+        .reset_index(drop=True)
+    )
+    uids = df["unique_id"].unique()  # type: ignore
+    if len(uids) > max_series:
+        np.random.seed(random_seed)
+        uids = np.random.choice(uids, max_series, replace=False)  # type: ignore
+        df = df.query("unique_id in @uids")  # type: ignore
+        main_logger.info(f"Filtering out {len(uids) - max_series} series")
+    n_series = len(df["unique_id"].unique())  # type: ignore
+    main_logger.info(f"Number of series: {n_series}")
+    if n_series == 0:
+        raise ValueError("No series left after filtering")
+    # finally we clean some strange dates
+    mask = df["ds"].str.endswith(":01")  # type: ignore
+    df.loc[mask, "ds"] = df.loc[mask, "ds"].str[:-3] + ":00"
+    # save the dataset
+    dataset_path = Path(dataset_path)  # type: ignore
+    filtered_dataset_path = dataset_path.parent / "filtered_datasets" / dataset_path.name  # type: ignore
+    filtered_dataset_path.parent.mkdir(exist_ok=True, parents=True)
+    df.to_parquet(filtered_dataset_path)
+    main_logger.info(f"Filtered dataset saved to {filtered_dataset_path}")
+
+
+if __name__ == "__main__":
+    fire.Fire(filter_and_clean_dataset)
--- a/experiments/foundation-time-series-arena/xiuhmolpilli/utils/logger_config.py
+++ b/experiments/foundation-time-series-arena/xiuhmolpilli/utils/logger_config.py
+import logging
+
+
+def setup_logger(logger_name, log_file=None):
+    logger = logging.getLogger(logger_name)
+    logger.setLevel(logging.INFO)
+    formatter = logging.Formatter(
+        "%(asctime)s,%(levelname)s,%(module)s,%(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+    console_handler = logging.StreamHandler()
+    console_handler.setLevel(logging.INFO)
+    console_handler.setFormatter(formatter)
+    logger.addHandler(console_handler)
+    return logger
--- a/experiments/lag-llama/Makefile
+++ b/experiments/lag-llama/Makefile
+download_lag_llama_code:
+	@git clone https://github.com/time-series-foundation-models/lag-llama tempdir
+	@cp -R tempdir/data/ .
+	@cp -R tempdir/gluon_utils/ .
+	@cp -R tempdir/lag_llama/ .
+	@cp -R tempdir/requirements.txt lag-llama-requirements.txt
+	@rm -rf tempdir
+
+download_lag_llama_model:
+	@huggingface-cli download time-series-foundation-models/Lag-Llama lag-llama.ckpt --local-dir ./models/
--- a/experiments/lag-llama/README.md
+++ b/experiments/lag-llama/README.md
+# LagLLama is 40% less accurate than a simple SeasonalNaive and 1000x slower.
+
+We present a fully reproducible experiment showing that SeasonalNaive significantly outperforms LagLlama, a recently introduced open-source foundational model for time series forecasting (a deep learning architecture pre-trained on time series datasets). Specifically, **SeasonalNaive achieves 42%, 24%, and 16% better performance** in terms of MASE, MAPE, and CRPS respectively, and boasts **a 1,000x speed advantage**. These findings are based on an extensive analysis covering 105,289 unique time series from the M1, M3, M4, and Tourism datasets, which were omitted in the original LagLlama paper.
+
+# Introduction
+
+In the field of time series forecasting, recent developments have introduced foundational models such as LagLlama, which utilizes deep learning and extensive data for pretraining, aiming to enhance predictive performance and model complexity. LagLLama is to be praised as one of the first open-source foundational models. However, contrary to expectations, our analysis indicates that the traditional SeasonalNaive model, known for its straightforward approach of extending past seasonal trends into future predictions, outperforms LagLlama in terms of both accuracy and computational efficiency. 
+
+## Empirical Evaluation
+
+The original paper uses 3,113 time series to assess the model performance. The original paper only reports CRPS and omits point forecast error metrics widely used in academia and industry, e.g. MASE and MAPE.
+
+Our evaluation encompasses 105,289 unique time series from different datasets, including M1, M3, M4, and Tourism, covering yearly, quarterly, monthly, weekly, daily, and hourly frequencies. This diverse dataset selection allows for a robust assessment of the models across various time series characteristics and forecasting horizons. We also reproduce results for Pedestrian Counts and Weather originally included in the paper/code to show that we are running LagLlama correctly. 
+
+## Results
+
+The results are summarized in the following table, highlighting the performance metrics of MASE, MAPE, CRPS, and TIME (measured in seconds). The best results are indicated in **bold** for easy reference.
+
+<img width="953" alt="image" src="https://github.com/Nixtla/nixtla/assets/10517170/8e65338d-930e-4837-8bf5-2e7aeddad5cc">
+
+
+## Reproducibility
+
+To ensure the reproducibility of our findings, the experiments were conducted on an AWS g5.4xlarge GPU instance equipped with 16 vCPUs, 64 GiB of RAM, and an NVIDIA A10G Tensor Core GPU (24 GiB). The complete code can be found in this repo.
+
+### Instructions
+
+1. Create a python environment using:
+```
+mamba env create -f environment.yml
+conda activate lag-llama
+```
+
+2. Add lag-llama code to your environment
+
+```
+make download_lag_llama_code
+```
+
+5. Download lag-llama model
+
+```
+make download_lag_llama_model
+```
+
+4. Install lag-llama requirements
+
+```
+pip install -r lag-llama-requirements.txt
+```
+
+5. Run complete experiments reported in the table
+
+```
+python -m src.main
+```
+
+### References
+- **Lag-Llama Paper**: [Towards Foundation Models for Probabilistic Time Series Forecasting](https://arxiv.org/abs/2310.08278)
+- **SeasonalNaive Implementation**: [GitHub Repository](https://github.com/nixtla/statsforecast/)
+- **CRPS Replication Note**: The CRPS performance for `LagLlama` is replicated from the model's publicly available [Colab notebook](https://colab.research.google.com/drive/13HHKYL_HflHBKxDWycXgIUAHSeHRR5eo?usp=sharing), ensuring a fair comparison.
--- a/experiments/lag-llama/environment.yml
+++ b/experiments/lag-llama/environment.yml
+name: lag-llama
+channels:
+  - conda-forge
+  - defaults
+  - anaconda
+dependencies:
+  - jupyterlab
+  - pip
+  - python=3.10
+  - pip:
+    - datasetsforecast
+    - fire
+    - huggingface_hub[cli]
+    - neuralforecast
+    - orjson
+    - statsforecast
+    - utilsforecast
+