v1.0

e3f7f7b3 · chenzk · e3f7f7b3 · e3f7f7b3 · e3f7f7b3 · e3f7f7b3
Commit e3f7f7b3 authored May 06, 2024 by chenzk
16 changed files
--- a/nbs/nbdev.yml
+++ b/nbs/nbdev.yml
+project:
+  output-dir: _docs
+
+website:
+  title: "neuralforecast"
+  site-url: "https://Nixtla.github.io/neuralforecast/"
+  description: "Time series forecasting suite using deep learning models"
+  repo-branch: main
+  repo-url: "https://github.com/Nixtla/neuralforecast/"
--- a/nbs/styles.css
+++ b/nbs/styles.css
+.cell-output pre {
+    margin-left: 0.8rem;
+    margin-top: 0;
+    background: none;
+    border-left: 2px solid lightsalmon;
+    border-top-left-radius: 0;
+    border-top-right-radius: 0;
+  }
+  
+  .cell-output .sourceCode {
+    background: none;
+    margin-top: 0;
+  }
+  
+  .cell > .sourceCode {
+    margin-bottom: 0;
+  }
+  
\ No newline at end of file
--- a/nbs/tsdataset.ipynb
+++ b/nbs/tsdataset.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "524620c1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| default_exp tsdataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "15392f6f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "12fa25a4",
+   "metadata": {},
+   "source": [
+    "# PyTorch Dataset/Loader\n",
+    "> Torch Dataset for Time Series\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2508f7a9-1433-4ad8-8f2f-0078c6ed6c3c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "from fastcore.test import test_eq\n",
+    "from nbdev.showdoc import show_doc\n",
+    "from neuralforecast.utils import generate_series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "44065066-e72a-431f-938f-1528adef9fe8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "import warnings\n",
+    "from collections.abc import Mapping\n",
+    "from typing import List, Optional, Union\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import pytorch_lightning as pl\n",
+    "import torch\n",
+    "import utilsforecast.processing as ufp\n",
+    "from torch.utils.data import Dataset, DataLoader\n",
+    "from utilsforecast.compat import DataFrame, pl_Series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "323a7a6e-38c3-496d-8f1e-cad05f643d41",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "class TimeSeriesLoader(DataLoader):\n",
+    "    \"\"\"TimeSeriesLoader DataLoader.\n",
+    "    [Source code](https://github.com/Nixtla/neuralforecast1/blob/main/neuralforecast/tsdataset.py).\n",
+    "\n",
+    "    Small change to PyTorch's Data loader. \n",
+    "    Combines a dataset and a sampler, and provides an iterable over the given dataset.\n",
+    "\n",
+    "    The class `~torch.utils.data.DataLoader` supports both map-style and\n",
+    "    iterable-style datasets with single- or multi-process loading, customizing\n",
+    "    loading order and optional automatic batching (collation) and memory pinning.    \n",
+    "    \n",
+    "    **Parameters:**<br>\n",
+    "    `batch_size`: (int, optional): how many samples per batch to load (default: 1).<br>\n",
+    "    `shuffle`: (bool, optional): set to `True` to have the data reshuffled at every epoch (default: `False`).<br>\n",
+    "    `sampler`: (Sampler or Iterable, optional): defines the strategy to draw samples from the dataset.<br>\n",
+    "                Can be any `Iterable` with `__len__` implemented. If specified, `shuffle` must not be specified.<br>\n",
+    "    \"\"\"\n",
+    "    def __init__(self, dataset, **kwargs):\n",
+    "        if 'collate_fn' in kwargs:\n",
+    "            kwargs.pop('collate_fn')\n",
+    "        kwargs_ = {**kwargs, **dict(collate_fn=self._collate_fn)}\n",
+    "        DataLoader.__init__(self, dataset=dataset, **kwargs_)\n",
+    "    \n",
+    "    def _collate_fn(self, batch):\n",
+    "        elem = batch[0]\n",
+    "        elem_type = type(elem)\n",
+    "\n",
+    "        if isinstance(elem, torch.Tensor):\n",
+    "            out = None\n",
+    "            if torch.utils.data.get_worker_info() is not None:\n",
+    "                # If we're in a background process, concatenate directly into a\n",
+    "                # shared memory tensor to avoid an extra copy\n",
+    "                numel = sum(x.numel() for x in batch)\n",
+    "                storage = elem.storage()._new_shared(numel, device=elem.device)\n",
+    "                out = elem.new(storage).resize_(len(batch), *list(elem.size()))\n",
+    "            return torch.stack(batch, 0, out=out)\n",
+    "\n",
+    "        elif isinstance(elem, Mapping):\n",
+    "            if elem['static'] is None:\n",
+    "                return dict(temporal=self.collate_fn([d['temporal'] for d in batch]),\n",
+    "                            temporal_cols = elem['temporal_cols'],\n",
+    "                            y_idx=elem['y_idx'])\n",
+    "            \n",
+    "            return dict(static=self.collate_fn([d['static'] for d in batch]),\n",
+    "                        static_cols = elem['static_cols'],\n",
+    "                        temporal=self.collate_fn([d['temporal'] for d in batch]),\n",
+    "                        temporal_cols = elem['temporal_cols'],\n",
+    "                        y_idx=elem['y_idx'])\n",
+    "\n",
+    "        raise TypeError(f'Unknown {elem_type}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "93e94050-0290-43ad-9a73-c4626bba9541",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(TimeSeriesLoader)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "05687429-c139-44c0-adb9-097c616908cc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "class TimeSeriesDataset(Dataset):\n",
+    "\n",
+    "    def __init__(self,\n",
+    "                 temporal,\n",
+    "                 temporal_cols,\n",
+    "                 indptr,\n",
+    "                 max_size: int,\n",
+    "                 min_size: int,\n",
+    "                 y_idx: int,\n",
+    "                 static=None,\n",
+    "                 static_cols=None,\n",
+    "                 sorted=False,\n",
+    "                ):\n",
+    "        super().__init__()\n",
+    "        self.temporal = self._as_torch_copy(temporal)\n",
+    "        self.temporal_cols = pd.Index(list(temporal_cols))\n",
+    "\n",
+    "        if static is not None:\n",
+    "            self.static = self._as_torch_copy(static)\n",
+    "            self.static_cols = static_cols\n",
+    "        else:\n",
+    "            self.static = static\n",
+    "            self.static_cols = static_cols\n",
+    "\n",
+    "        self.indptr = indptr\n",
+    "        self.n_groups = self.indptr.size - 1\n",
+    "        self.max_size = max_size\n",
+    "        self.min_size = min_size\n",
+    "        self.y_idx = y_idx\n",
+    "\n",
+    "        # Upadated flag. To protect consistency, dataset can only be updated once\n",
+    "        self.updated = False\n",
+    "        self.sorted = sorted\n",
+    "\n",
+    "    def __getitem__(self, idx):\n",
+    "        if isinstance(idx, int):\n",
+    "            # Parse temporal data and pad its left\n",
+    "            temporal = torch.zeros(size=(len(self.temporal_cols), self.max_size),\n",
+    "                                   dtype=torch.float32)\n",
+    "            ts = self.temporal[self.indptr[idx] : self.indptr[idx + 1], :]\n",
+    "            temporal[:len(self.temporal_cols), -len(ts):] = ts.permute(1, 0)\n",
+    "\n",
+    "            # Add static data if available\n",
+    "            static = None if self.static is None else self.static[idx,:]\n",
+    "\n",
+    "            item = dict(temporal=temporal, temporal_cols=self.temporal_cols,\n",
+    "                        static=static, static_cols=self.static_cols,\n",
+    "                        y_idx=self.y_idx)\n",
+    "\n",
+    "            return item\n",
+    "        raise ValueError(f'idx must be int, got {type(idx)}')\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return self.n_groups\n",
+    "\n",
+    "    def __repr__(self):\n",
+    "        return f'TimeSeriesDataset(n_data={self.temporal.shape[0]:,}, n_groups={self.n_groups:,})'\n",
+    "\n",
+    "    def __eq__(self, other):\n",
+    "        if not hasattr(other, 'data') or not hasattr(other, 'indptr'):\n",
+    "            return False\n",
+    "        return np.allclose(self.data, other.data) and np.array_equal(self.indptr, other.indptr)\n",
+    "\n",
+    "    def _as_torch_copy(\n",
+    "        self,\n",
+    "        x: Union[np.ndarray, torch.Tensor],\n",
+    "        dtype: torch.dtype = torch.float32,\n",
+    "    ) -> torch.Tensor:\n",
+    "        if isinstance(x, np.ndarray):\n",
+    "            x = torch.from_numpy(x)\n",
+    "        return x.to(dtype, copy=False).clone()\n",
+    "\n",
+    "    def align(self, df: DataFrame, id_col: str, time_col: str, target_col: str) -> 'TimeSeriesDataset':\n",
+    "        # Protect consistency\n",
+    "        df = ufp.copy_if_pandas(df, deep=False)\n",
+    "\n",
+    "        # Add Nones to missing columns (without available_mask)\n",
+    "        temporal_cols = self.temporal_cols.copy()\n",
+    "        for col in temporal_cols:\n",
+    "            if col not in df.columns:\n",
+    "                df = ufp.assign_columns(df, col, np.nan)\n",
+    "            if col == 'available_mask':\n",
+    "                df = ufp.assign_columns(df, col, 1.0)\n",
+    "        \n",
+    "        # Sort columns to match self.temporal_cols (without available_mask)\n",
+    "        df = df[ [id_col, time_col] + temporal_cols.tolist() ]\n",
+    "\n",
+    "        # Process future_df\n",
+    "        dataset, *_ = TimeSeriesDataset.from_df(\n",
+    "            df=df,\n",
+    "            sort_df=self.sorted,\n",
+    "            id_col=id_col,\n",
+    "            time_col=time_col,\n",
+    "            target_col=target_col,\n",
+    "        )\n",
+    "        return dataset\n",
+    "\n",
+    "    def append(self, futr_dataset: 'TimeSeriesDataset') -> 'TimeSeriesDataset':\n",
+    "        \"\"\"Add future observations to the dataset. Returns a copy\"\"\"\n",
+    "        if self.indptr.size != futr_dataset.indptr.size:\n",
+    "            raise ValueError('Cannot append `futr_dataset` with different number of groups.')\n",
+    "        # Define and fill new temporal with updated information\n",
+    "        len_temporal, col_temporal = self.temporal.shape\n",
+    "        len_futr = futr_dataset.temporal.shape[0]\n",
+    "        new_temporal = torch.empty(size=(len_temporal + len_futr, col_temporal))\n",
+    "        new_sizes = np.diff(self.indptr) + np.diff(futr_dataset.indptr)\n",
+    "        new_indptr = np.append(0, new_sizes.cumsum()).astype(np.int32)\n",
+    "        new_max_size = np.max(new_sizes)\n",
+    "\n",
+    "        for i in range(self.n_groups):\n",
+    "            curr_slice = slice(self.indptr[i], self.indptr[i + 1])\n",
+    "            curr_size = curr_slice.stop - curr_slice.start\n",
+    "            futr_slice = slice(futr_dataset.indptr[i], futr_dataset.indptr[i + 1])\n",
+    "            new_temporal[new_indptr[i] : new_indptr[i] + curr_size] = self.temporal[curr_slice]\n",
+    "            new_temporal[new_indptr[i] + curr_size : new_indptr[i + 1]] = futr_dataset.temporal[futr_slice]\n",
+    "        \n",
+    "        # Define new dataset\n",
+    "        updated_dataset = TimeSeriesDataset(temporal=new_temporal,\n",
+    "                                            temporal_cols=self.temporal_cols.copy(),\n",
+    "                                            indptr=new_indptr,\n",
+    "                                            max_size=new_max_size,\n",
+    "                                            min_size=self.min_size,\n",
+    "                                            static=self.static,\n",
+    "                                            y_idx=self.y_idx,\n",
+    "                                            static_cols=self.static_cols,\n",
+    "                                            sorted=self.sorted)\n",
+    "\n",
+    "        return updated_dataset\n",
+    "\n",
+    "    @staticmethod\n",
+    "    def update_dataset(dataset, futr_df, id_col='unique_id', time_col='ds', target_col='y'):\n",
+    "        futr_dataset = dataset.align(\n",
+    "            futr_df, id_col=id_col, time_col=time_col, target_col=target_col\n",
+    "        )\n",
+    "        return dataset.append(futr_dataset)\n",
+    "    \n",
+    "    @staticmethod\n",
+    "    def trim_dataset(dataset, left_trim: int = 0, right_trim: int = 0):\n",
+    "        \"\"\"\n",
+    "        Trim temporal information from a dataset.\n",
+    "        Returns temporal indexes [t+left:t-right] for all series.\n",
+    "        \"\"\"\n",
+    "        if dataset.min_size <= left_trim + right_trim:\n",
+    "            raise Exception(f'left_trim + right_trim ({left_trim} + {right_trim}) \\\n",
+    "                                must be lower than the shorter time series ({dataset.min_size})')\n",
+    "\n",
+    "        # Define and fill new temporal with trimmed information        \n",
+    "        len_temporal, col_temporal = dataset.temporal.shape\n",
+    "        total_trim = (left_trim + right_trim) * dataset.n_groups\n",
+    "        new_temporal = torch.zeros(size=(len_temporal-total_trim, col_temporal))\n",
+    "        new_indptr = [0]\n",
+    "\n",
+    "        acum = 0\n",
+    "        for i in range(dataset.n_groups):\n",
+    "            series_length = dataset.indptr[i + 1] - dataset.indptr[i]\n",
+    "            new_length = series_length - left_trim - right_trim\n",
+    "            new_temporal[acum:(acum+new_length), :] = dataset.temporal[dataset.indptr[i]+left_trim : \\\n",
+    "                                                                       dataset.indptr[i + 1]-right_trim, :]\n",
+    "            acum += new_length\n",
+    "            new_indptr.append(acum)\n",
+    "\n",
+    "        new_max_size = dataset.max_size-left_trim-right_trim\n",
+    "        new_min_size = dataset.min_size-left_trim-right_trim\n",
+    "        \n",
+    "        # Define new dataset\n",
+    "        updated_dataset = TimeSeriesDataset(temporal=new_temporal,\n",
+    "                                            temporal_cols= dataset.temporal_cols.copy(),\n",
+    "                                            indptr=np.array(new_indptr, dtype=np.int32),\n",
+    "                                            max_size=new_max_size,\n",
+    "                                            min_size=new_min_size,\n",
+    "                                            y_idx=dataset.y_idx,\n",
+    "                                            static=dataset.static,\n",
+    "                                            static_cols=dataset.static_cols,\n",
+    "                                            sorted=dataset.sorted)\n",
+    "\n",
+    "        return updated_dataset\n",
+    "\n",
+    "    @staticmethod\n",
+    "    def from_df(df, static_df=None, sort_df=False, id_col='unique_id', time_col='ds', target_col='y'):\n",
+    "        # TODO: protect on equality of static_df + df indexes\n",
+    "        if isinstance(df, pd.DataFrame) and df.index.name == id_col:\n",
+    "            warnings.warn(\n",
+    "                \"Passing the id as index is deprecated, please provide it as a column instead.\",\n",
+    "                FutureWarning,\n",
+    "            )\n",
+    "            df = df.reset_index(id_col)\n",
+    "        # Define indexes if not given\n",
+    "        if static_df is not None:\n",
+    "            if isinstance(static_df, pd.DataFrame) and static_df.index.name == id_col:\n",
+    "                warnings.warn(\n",
+    "                    \"Passing the id as index is deprecated, please provide it as a column instead.\",\n",
+    "                    FutureWarning,\n",
+    "                )\n",
+    "            if sort_df:\n",
+    "                static_df = ufp.sort(static_df, by=id_col)\n",
+    "\n",
+    "        ids, times, data, indptr, sort_idxs = ufp.process_df(df, id_col, time_col, target_col)\n",
+    "        # processor sets y as the first column\n",
+    "        temporal_cols = pd.Index(\n",
+    "            [target_col] + [c for c in df.columns if c not in (id_col, time_col, target_col)]\n",
+    "        )\n",
+    "        temporal = data.astype(np.float32, copy=False)\n",
+    "        indices = ids\n",
+    "        if isinstance(df, pd.DataFrame):\n",
+    "            dates = pd.Index(times, name=time_col)\n",
+    "        else:\n",
+    "            dates = pl_Series(time_col, times)\n",
+    "        sizes = np.diff(indptr)\n",
+    "        max_size = max(sizes)\n",
+    "        min_size = min(sizes)\n",
+    "\n",
+    "        # Add Available mask efficiently (without adding column to df)\n",
+    "        if 'available_mask' not in df.columns:\n",
+    "            available_mask = np.ones((len(temporal),1), dtype=np.float32)\n",
+    "            temporal = np.append(temporal, available_mask, axis=1)\n",
+    "            temporal_cols = temporal_cols.append(pd.Index(['available_mask']))\n",
+    "\n",
+    "        # Static features\n",
+    "        if static_df is not None:\n",
+    "            static_cols = [col for col in static_df.columns if col != id_col]\n",
+    "            static = ufp.to_numpy(static_df[static_cols])\n",
+    "            static_cols = pd.Index(static_cols)\n",
+    "        else:\n",
+    "            static = None\n",
+    "            static_cols = None\n",
+    "\n",
+    "        dataset = TimeSeriesDataset(\n",
+    "            temporal=temporal,\n",
+    "            temporal_cols=temporal_cols,\n",
+    "            static=static,\n",
+    "            static_cols=static_cols,\n",
+    "            indptr=indptr,\n",
+    "            max_size=max_size,\n",
+    "            min_size=min_size,\n",
+    "            sorted=sort_df,\n",
+    "            y_idx=0,\n",
+    "        )\n",
+    "        ds = df[time_col].to_numpy()\n",
+    "        if sort_idxs is not None:\n",
+    "            ds = ds[sort_idxs]\n",
+    "        return dataset, indices, dates, ds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "61a818bf-28d2-4561-8036-475f6fe78d0a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(TimeSeriesDataset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "52c07552-b6fa-4d10-8792-71743dcdfd1d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "\n",
+    "# Testing sort_df=True functionality\n",
+    "temporal_df = generate_series(n_series=1000, \n",
+    "                         n_temporal_features=0, equal_ends=False)\n",
+    "sorted_temporal_df = temporal_df.sort_values(['unique_id', 'ds'])\n",
+    "unsorted_temporal_df = sorted_temporal_df.sample(frac=1.0)\n",
+    "dataset, indices, dates, ds = TimeSeriesDataset.from_df(df=unsorted_temporal_df,\n",
+    "                                                        sort_df=True)\n",
+    "\n",
+    "np.testing.assert_allclose(dataset.temporal[:,:-1], \n",
+    "                           sorted_temporal_df.drop(columns=['unique_id', 'ds']).values)\n",
+    "test_eq(indices, pd.Series(sorted_temporal_df['unique_id'].unique()))\n",
+    "test_eq(dates, temporal_df.groupby('unique_id')['ds'].max().values)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "24e51cf3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "class _FilesDataset:\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        files: List[str],\n",
+    "        temporal_cols: List[str],\n",
+    "        static_cols: Optional[List[str]],\n",
+    "        id_col: str,\n",
+    "        time_col: str,\n",
+    "        target_col: str,\n",
+    "        min_size: int,\n",
+    "    ):\n",
+    "        self.files = files\n",
+    "        self.temporal_cols = pd.Index(temporal_cols)\n",
+    "        self.static_cols = pd.Index(static_cols) if static_cols is not None else None\n",
+    "        self.id_col = id_col\n",
+    "        self.time_col = time_col\n",
+    "        self.target_col = target_col\n",
+    "        self.min_size = min_size"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c4dae43c-4d11-4bbc-a431-ac33b004859a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "class TimeSeriesDataModule(pl.LightningDataModule):\n",
+    "    \n",
+    "    def __init__(\n",
+    "            self, \n",
+    "            dataset: TimeSeriesDataset,\n",
+    "            batch_size=32, \n",
+    "            valid_batch_size=1024,\n",
+    "            num_workers=0,\n",
+    "            drop_last=False,\n",
+    "            shuffle_train=True,\n",
+    "        ):\n",
+    "        super().__init__()\n",
+    "        self.dataset = dataset\n",
+    "        self.batch_size = batch_size\n",
+    "        self.valid_batch_size = valid_batch_size\n",
+    "        self.num_workers = num_workers\n",
+    "        self.drop_last = drop_last\n",
+    "        self.shuffle_train = shuffle_train\n",
+    "    \n",
+    "    def train_dataloader(self):\n",
+    "        loader = TimeSeriesLoader(\n",
+    "            self.dataset,\n",
+    "            batch_size=self.batch_size, \n",
+    "            num_workers=self.num_workers,\n",
+    "            shuffle=self.shuffle_train,\n",
+    "            drop_last=self.drop_last\n",
+    "        )\n",
+    "        return loader\n",
+    "    \n",
+    "    def val_dataloader(self):\n",
+    "        loader = TimeSeriesLoader(\n",
+    "            self.dataset, \n",
+    "            batch_size=self.valid_batch_size, \n",
+    "            num_workers=self.num_workers,\n",
+    "            shuffle=False,\n",
+    "            drop_last=self.drop_last\n",
+    "        )\n",
+    "        return loader\n",
+    "    \n",
+    "    def predict_dataloader(self):\n",
+    "        loader = TimeSeriesLoader(\n",
+    "            self.dataset,\n",
+    "            batch_size=self.valid_batch_size, \n",
+    "            num_workers=self.num_workers,\n",
+    "            shuffle=False\n",
+    "        )\n",
+    "        return loader"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8535a15f-b5cf-4ca1-bfa2-e53a9e8c3bc0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(TimeSeriesDataModule)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b534d29d-eecc-43ba-8468-c23305fa24a2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "\n",
+    "batch_size = 128\n",
+    "data = TimeSeriesDataModule(dataset=dataset, \n",
+    "                            batch_size=batch_size, drop_last=True)\n",
+    "for batch in data.train_dataloader():\n",
+    "    test_eq(batch['temporal'].shape, (batch_size, 2, 500))\n",
+    "    test_eq(batch['temporal_cols'], ['y', 'available_mask'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4481272a-ea3a-4b63-8f14-9445d8f41338",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "\n",
+    "batch_size = 128\n",
+    "n_static_features = 2\n",
+    "n_temporal_features = 4\n",
+    "temporal_df, static_df = generate_series(n_series=1000,\n",
+    "                                         n_static_features=n_static_features,\n",
+    "                                         n_temporal_features=n_temporal_features, \n",
+    "                                         equal_ends=False)\n",
+    "\n",
+    "dataset, indices, dates, ds = TimeSeriesDataset.from_df(df=temporal_df,\n",
+    "                                                        static_df=static_df,\n",
+    "                                                        sort_df=True)\n",
+    "data = TimeSeriesDataModule(dataset=dataset,\n",
+    "                            batch_size=batch_size, drop_last=True)\n",
+    "\n",
+    "for batch in data.train_dataloader():\n",
+    "    test_eq(batch['temporal'].shape, (batch_size, n_temporal_features + 2, 500))\n",
+    "    test_eq(batch['temporal_cols'],\n",
+    "            ['y'] + [f'temporal_{i}' for i in range(n_temporal_features)] + ['available_mask'])\n",
+    "    \n",
+    "    test_eq(batch['static'].shape, (batch_size, n_static_features))\n",
+    "    test_eq(batch['static_cols'], [f'static_{i}' for i in range(n_static_features)])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "252b59f6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "\n",
+    "# Testing sort_df=True functionality\n",
+    "temporal_df = generate_series(n_series=2,\n",
+    "                              n_temporal_features=2, equal_ends=True)\n",
+    "temporal_df = temporal_df.groupby('unique_id').tail(10)\n",
+    "temporal_df = temporal_df.reset_index()\n",
+    "temporal_full_df = temporal_df.sort_values(['unique_id', 'ds']).reset_index(drop=True)\n",
+    "temporal_full_df.loc[temporal_full_df.ds > '2001-05-11', ['y', 'temporal_0']] = None\n",
+    "\n",
+    "split1_df = temporal_full_df.loc[temporal_full_df.ds <= '2001-05-11']\n",
+    "split2_df = temporal_full_df.loc[temporal_full_df.ds > '2001-05-11']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6eab7367",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "\n",
+    "# Testing available mask\n",
+    "temporal_df_w_mask = temporal_df.copy()\n",
+    "temporal_df_w_mask['available_mask'] = 1\n",
+    "\n",
+    "# Mask with all 1's\n",
+    "dataset, indices, dates, ds = TimeSeriesDataset.from_df(df=temporal_df_w_mask,\n",
+    "                                                        sort_df=True)\n",
+    "mask_average = dataset.temporal[:, -1].mean()\n",
+    "np.testing.assert_almost_equal(mask_average, 1.0000)\n",
+    "\n",
+    "# Add 0's to available mask\n",
+    "temporal_df_w_mask.loc[temporal_df_w_mask.ds > '2001-05-11', 'available_mask'] = 0\n",
+    "dataset, indices, dates, ds = TimeSeriesDataset.from_df(df=temporal_df_w_mask,\n",
+    "                                                        sort_df=True)\n",
+    "mask_average = dataset.temporal[:, -1].mean()\n",
+    "np.testing.assert_almost_equal(mask_average, 0.7000)\n",
+    "\n",
+    "# Available mask not in last column\n",
+    "temporal_df_w_mask = temporal_df_w_mask[['unique_id','ds','y','available_mask', 'temporal_0','temporal_1']]\n",
+    "dataset, indices, dates, ds = TimeSeriesDataset.from_df(df=temporal_df_w_mask,\n",
+    "                                                        sort_df=True)\n",
+    "mask_average = dataset.temporal[:, 1].mean()\n",
+    "np.testing.assert_almost_equal(mask_average, 0.7000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a0d23f1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# To test correct future_df wrangling of the `update_df` method\n",
+    "# We are checking that we are able to recover the AirPassengers dataset\n",
+    "# using the dataframe or splitting it into parts and initializing."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "39f999c2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "\n",
+    "# FULL DATASET\n",
+    "dataset_full, indices_full, dates_full, ds_full = TimeSeriesDataset.from_df(df=temporal_full_df,\n",
+    "                                                                            sort_df=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "30f927e2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "\n",
+    "# SPLIT_1 DATASET\n",
+    "dataset_1, indices_1, dates_1, ds_1 = TimeSeriesDataset.from_df(df=split1_df,\n",
+    "                                                                sort_df=False)\n",
+    "dataset_1 = dataset_1.update_dataset(dataset_1, split2_df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "468a6879",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "\n",
+    "np.testing.assert_almost_equal(dataset_full.temporal.numpy(), dataset_1.temporal.numpy())\n",
+    "test_eq(dataset_full.max_size, dataset_1.max_size)\n",
+    "test_eq(dataset_full.indptr, dataset_1.indptr)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "556f852c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "\n",
+    "# Testing trim_dataset functionality\n",
+    "n_static_features = 0\n",
+    "n_temporal_features = 2\n",
+    "temporal_df = generate_series(n_series=100,\n",
+    "                              min_length=50,\n",
+    "                              max_length=100,\n",
+    "                              n_static_features=n_static_features,\n",
+    "                              n_temporal_features=n_temporal_features, \n",
+    "                              equal_ends=False)\n",
+    "dataset, indices, dates, ds = TimeSeriesDataset.from_df(df=temporal_df,\n",
+    "                                                        static_df=static_df,\n",
+    "                                                        sort_df=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "db7b1a51",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "left_trim = 10\n",
+    "right_trim = 20\n",
+    "dataset_trimmed = dataset.trim_dataset(dataset, left_trim=left_trim, right_trim=right_trim)\n",
+    "\n",
+    "np.testing.assert_almost_equal(dataset.temporal[dataset.indptr[50]+left_trim:dataset.indptr[51]-right_trim].numpy(),\n",
+    "                               dataset_trimmed.temporal[dataset_trimmed.indptr[50]:dataset_trimmed.indptr[51]].numpy())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "624a3fbb-cb78-4440-a645-54699fd82660",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "#| polars\n",
+    "import polars"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a1bdd479-b4c7-4a40-93eb-2b7c9b969a80",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "#| polars\n",
+    "temporal_df2 = temporal_df.copy()\n",
+    "for col in ('unique_id', 'temporal_0', 'temporal_1'):\n",
+    "    temporal_df2[col] = temporal_df2[col].cat.codes\n",
+    "temporal_pl = polars.from_pandas(temporal_df2).sample(fraction=1.0)\n",
+    "static_pl = polars.from_pandas(static_df.assign(unique_id=lambda df: df['unique_id'].astype('int64')))\n",
+    "dataset_pl, indices_pl, dates_pl, ds_pl = TimeSeriesDataset.from_df(df=temporal_pl, static_df=static_df, sort_df=True)\n",
+    "for attr in ('static_cols', 'temporal_cols', 'min_size', 'max_size', 'n_groups'):\n",
+    "    test_eq(getattr(dataset, attr), getattr(dataset_pl, attr))\n",
+    "torch.testing.assert_allclose(dataset.temporal, dataset_pl.temporal)\n",
+    "torch.testing.assert_allclose(dataset.static, dataset_pl.static)\n",
+    "pd.testing.assert_series_equal(indices.astype('int64'), indices_pl.to_pandas().astype('int64'))\n",
+    "pd.testing.assert_index_equal(dates, pd.Index(dates_pl, name='ds'))\n",
+    "np.testing.assert_array_equal(ds, ds_pl)\n",
+    "np.testing.assert_array_equal(dataset.indptr, dataset_pl.indptr)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "959ea63c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "class _DistributedTimeSeriesDataModule(TimeSeriesDataModule):\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        dataset: _FilesDataset,\n",
+    "        batch_size=32,\n",
+    "        valid_batch_size=1024,\n",
+    "        num_workers=0,\n",
+    "        drop_last=False,\n",
+    "        shuffle_train=True,\n",
+    "    ):\n",
+    "        super(TimeSeriesDataModule, self).__init__()\n",
+    "        self.files_ds = dataset\n",
+    "        self.batch_size = batch_size\n",
+    "        self.valid_batch_size = valid_batch_size\n",
+    "        self.num_workers = num_workers\n",
+    "        self.drop_last = drop_last\n",
+    "        self.shuffle_train = shuffle_train\n",
+    "\n",
+    "    def setup(self, stage):\n",
+    "        import torch.distributed as dist\n",
+    "\n",
+    "        df = pd.read_parquet(self.files_ds.files[dist.get_rank()])\n",
+    "        if self.files_ds.static_cols is not None:\n",
+    "            static_df = (\n",
+    "                df[[self.files_ds.id_col] + self.files_ds.static_cols.tolist()]\n",
+    "                .groupby(self.files_ds.id_col, observed=True)\n",
+    "                .head(1)\n",
+    "            )\n",
+    "            df = df.drop(columns=self.files_ds.static_cols)\n",
+    "        else:\n",
+    "            static_df = None\n",
+    "        self.dataset, *_ = TimeSeriesDataset.from_df(\n",
+    "            df=df,\n",
+    "            static_df=static_df,\n",
+    "            sort_df=True,\n",
+    "            id_col=self.files_ds.id_col,\n",
+    "            time_col=self.files_ds.time_col,\n",
+    "            target_col=self.files_ds.target_col,\n",
+    "        )"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python3",
+   "language": "python",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/nbs/utils.ipynb
+++ b/nbs/utils.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| default_exp utils"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Example Data\n",
+    "\n",
+    "> The `core.NeuralForecast` class allows you to efficiently fit multiple `NeuralForecast` models for large sets of time series. It operates with pandas DataFrame `df` that identifies individual series and datestamps with the `unique_id` and `ds` columns, and the `y` column denotes the target time series variable. To assist development, we declare useful datasets that we use throughout all `NeuralForecast`'s unit tests.<br><br>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "import random\n",
+    "from itertools import chain\n",
+    "from typing import List\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "from nbdev.showdoc import add_docs, show_doc"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 1. Synthetic Panel Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "def generate_series(n_series: int,\n",
+    "                    freq: str = 'D',\n",
+    "                    min_length: int = 50,\n",
+    "                    max_length: int = 500,\n",
+    "                    n_temporal_features: int = 0,\n",
+    "                    n_static_features: int = 0,\n",
+    "                    equal_ends: bool = False,\n",
+    "                    seed: int = 0) -> pd.DataFrame:\n",
+    "    \"\"\"Generate Synthetic Panel Series.\n",
+    "\n",
+    "    Generates `n_series` of frequency `freq` of different lengths in the interval [`min_length`, `max_length`].\n",
+    "    If `n_temporal_features > 0`, then each serie gets temporal features with random values.\n",
+    "    If `n_static_features > 0`, then a static dataframe is returned along the temporal dataframe.\n",
+    "    If `equal_ends == True` then all series end at the same date.\n",
+    "\n",
+    "    **Parameters:**<br>\n",
+    "    `n_series`: int, number of series for synthetic panel.<br>\n",
+    "    `min_length`: int, minimal length of synthetic panel's series.<br>\n",
+    "    `max_length`: int, minimal length of synthetic panel's series.<br>\n",
+    "    `n_temporal_features`: int, default=0, number of temporal exogenous variables for synthetic panel's series.<br>\n",
+    "    `n_static_features`: int, default=0, number of static exogenous variables for synthetic panel's series.<br>\n",
+    "    `equal_ends`: bool, if True, series finish in the same date stamp `ds`.<br>\n",
+    "    `freq`: str, frequency of the data, [panda's available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases).<br>\n",
+    "\n",
+    "    **Returns:**<br>\n",
+    "    `freq`: pandas.DataFrame, synthetic panel with columns [`unique_id`, `ds`, `y`] and exogenous.\n",
+    "    \"\"\"\n",
+    "    seasonalities = {'D': 7, 'M': 12}\n",
+    "    season = seasonalities[freq]\n",
+    "\n",
+    "    rng = np.random.RandomState(seed)\n",
+    "    series_lengths = rng.randint(min_length, max_length + 1, n_series)\n",
+    "    total_length = series_lengths.sum()\n",
+    "\n",
+    "    dates = pd.date_range('2000-01-01', periods=max_length, freq=freq).values\n",
+    "    uids = [\n",
+    "        np.repeat(i, serie_length) for i, serie_length in enumerate(series_lengths)\n",
+    "    ]\n",
+    "    if equal_ends:\n",
+    "        ds = [dates[-serie_length:] for serie_length in series_lengths]\n",
+    "    else:\n",
+    "        ds = [dates[:serie_length] for serie_length in series_lengths]\n",
+    "\n",
+    "    y = np.arange(total_length) % season + rng.rand(total_length) * 0.5\n",
+    "    temporal_df = pd.DataFrame(dict(unique_id=chain.from_iterable(uids),\n",
+    "                                    ds=chain.from_iterable(ds),\n",
+    "                                    y=y))\n",
+    "\n",
+    "    random.seed(seed)\n",
+    "    for i in range(n_temporal_features):\n",
+    "        random.seed(seed)\n",
+    "        temporal_values = [\n",
+    "            [random.randint(0, 100)] * serie_length for serie_length in series_lengths\n",
+    "        ]\n",
+    "        temporal_df[f'temporal_{i}'] = np.hstack(temporal_values)\n",
+    "        temporal_df[f'temporal_{i}'] = temporal_df[f'temporal_{i}'].astype('category')\n",
+    "        if i == 0:\n",
+    "            temporal_df['y'] = temporal_df['y'] * \\\n",
+    "                                  (1 + temporal_df[f'temporal_{i}'].cat.codes)\n",
+    "\n",
+    "    temporal_df['unique_id'] = temporal_df['unique_id'].astype('category')\n",
+    "    temporal_df['unique_id'] = temporal_df['unique_id'].cat.as_ordered()\n",
+    "\n",
+    "    if n_static_features > 0:\n",
+    "        static_features = np.random.uniform(low=0.0, high=1.0, \n",
+    "                        size=(n_series, n_static_features))\n",
+    "        static_df = pd.DataFrame.from_records(static_features, \n",
+    "                           columns = [f'static_{i}'for i in  range(n_static_features)])\n",
+    "        \n",
+    "        static_df['unique_id'] = np.arange(n_series)\n",
+    "        static_df['unique_id'] = static_df['unique_id'].astype('category')\n",
+    "        static_df['unique_id'] = static_df['unique_id'].cat.as_ordered()\n",
+    "\n",
+    "        return temporal_df, static_df\n",
+    "\n",
+    "    return temporal_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "show_doc(generate_series, title_level=3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "synthetic_panel = generate_series(n_series=2)\n",
+    "synthetic_panel.groupby('unique_id').head(4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "temporal_df, static_df = generate_series(n_series=1000, n_static_features=2,\n",
+    "                                         n_temporal_features=4, equal_ends=False)\n",
+    "static_df.head(2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 2. AirPassengers Data\n",
+    "\n",
+    "The classic Box & Jenkins airline data. Monthly totals of international airline passengers, 1949 to 1960.\n",
+    "\n",
+    "It has been used as a reference on several forecasting libraries, since it is a series that shows clear trends and seasonalities it offers a nice opportunity to quickly showcase a model's predictions performance."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "AirPassengers = np.array([112., 118., 132., 129., 121., 135., 148., 148., 136., 119., 104.,\n",
+    "                          118., 115., 126., 141., 135., 125., 149., 170., 170., 158., 133.,\n",
+    "                          114., 140., 145., 150., 178., 163., 172., 178., 199., 199., 184.,\n",
+    "                          162., 146., 166., 171., 180., 193., 181., 183., 218., 230., 242.,\n",
+    "                          209., 191., 172., 194., 196., 196., 236., 235., 229., 243., 264.,\n",
+    "                          272., 237., 211., 180., 201., 204., 188., 235., 227., 234., 264.,\n",
+    "                          302., 293., 259., 229., 203., 229., 242., 233., 267., 269., 270.,\n",
+    "                          315., 364., 347., 312., 274., 237., 278., 284., 277., 317., 313.,\n",
+    "                          318., 374., 413., 405., 355., 306., 271., 306., 315., 301., 356.,\n",
+    "                          348., 355., 422., 465., 467., 404., 347., 305., 336., 340., 318.,\n",
+    "                          362., 348., 363., 435., 491., 505., 404., 359., 310., 337., 360.,\n",
+    "                          342., 406., 396., 420., 472., 548., 559., 463., 407., 362., 405.,\n",
+    "                          417., 391., 419., 461., 472., 535., 622., 606., 508., 461., 390.,\n",
+    "                          432.], dtype=np.float32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "AirPassengersDF = pd.DataFrame({'unique_id': np.ones(len(AirPassengers)),\n",
+    "                                'ds': pd.date_range(start='1949-01-01',\n",
+    "                                                    periods=len(AirPassengers), freq=pd.offsets.MonthEnd()),\n",
+    "                                'y': AirPassengers})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "AirPassengersDF.head(12)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#We are going to plot the ARIMA predictions, and the prediction intervals.\n",
+    "fig, ax = plt.subplots(1, 1, figsize = (20, 7))\n",
+    "plot_df = AirPassengersDF.set_index('ds')\n",
+    "\n",
+    "plot_df[['y']].plot(ax=ax, linewidth=2)\n",
+    "ax.set_title('AirPassengers Forecast', fontsize=22)\n",
+    "ax.set_ylabel('Monthly Passengers', fontsize=20)\n",
+    "ax.set_xlabel('Timestamp [t]', fontsize=20)\n",
+    "ax.legend(prop={'size': 15})\n",
+    "ax.grid()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n_static_features = 3\n",
+    "n_series = 5\n",
+    "\n",
+    "static_features = np.random.uniform(low=0.0, high=1.0, \n",
+    "                        size=(n_series, n_static_features))\n",
+    "static_df = pd.DataFrame.from_records(static_features, \n",
+    "                   columns = [f'static_{i}'for i in  range(n_static_features)])\n",
+    "static_df['unique_id'] = np.arange(n_series)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "static_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 3. Panel AirPassengers Data\n",
+    "\n",
+    "Extension to classic Box & Jenkins airline data. Monthly totals of international airline passengers, 1949 to 1960.\n",
+    "\n",
+    "It includes two series with static, temporal and future exogenous variables, that can help to explore the performance of models like `NBEATSx` and `TFT`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "\n",
+    "# Declare Panel Data\n",
+    "unique_id = np.concatenate([['Airline1']*len(AirPassengers), ['Airline2']*len(AirPassengers)])\n",
+    "ds = np.tile(\n",
+    "    pd.date_range(\n",
+    "        start='1949-01-01', periods=len(AirPassengers), freq=pd.offsets.MonthEnd()\n",
+    "    ).to_numpy(), \n",
+    "    2,\n",
+    ")\n",
+    "y = np.concatenate([AirPassengers, AirPassengers+300])\n",
+    "\n",
+    "AirPassengersPanel = pd.DataFrame({'unique_id': unique_id, 'ds': ds, 'y': y})\n",
+    "\n",
+    "# For future exogenous variables\n",
+    "# Declare SeasonalNaive12 and fill first 12 values with y\n",
+    "snaive = AirPassengersPanel.groupby('unique_id')['y'].shift(periods=12).reset_index(drop=True)\n",
+    "AirPassengersPanel['trend'] = range(len(AirPassengersPanel))\n",
+    "AirPassengersPanel['y_[lag12]'] = snaive.fillna(AirPassengersPanel['y'])\n",
+    "\n",
+    "# Declare Static Data\n",
+    "unique_id = np.array(['Airline1', 'Airline2'])\n",
+    "airline1_dummy = [0, 1]\n",
+    "airline2_dummy = [1, 0]\n",
+    "AirPassengersStatic = pd.DataFrame({'unique_id': unique_id,\n",
+    "                                    'airline1': airline1_dummy,\n",
+    "                                    'airline2': airline2_dummy})\n",
+    "\n",
+    "AirPassengersPanel.groupby('unique_id').tail(4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots(1, 1, figsize = (20, 7))\n",
+    "plot_df = AirPassengersPanel.set_index('ds')\n",
+    "\n",
+    "plot_df.groupby('unique_id')['y'].plot(legend=True)\n",
+    "ax.set_title('AirPassengers Panel Data', fontsize=22)\n",
+    "ax.set_ylabel('Monthly Passengers', fontsize=20)\n",
+    "ax.set_xlabel('Timestamp [t]', fontsize=20)\n",
+    "ax.legend(title='unique_id', prop={'size': 15})\n",
+    "ax.grid()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots(1, 1, figsize = (20, 7))\n",
+    "plot_df = AirPassengersPanel[AirPassengersPanel.unique_id=='Airline1'].set_index('ds')\n",
+    "\n",
+    "plot_df[['y', 'trend', 'y_[lag12]']].plot(ax=ax, linewidth=2)\n",
+    "ax.set_title('Box-Cox AirPassengers Data', fontsize=22)\n",
+    "ax.set_ylabel('Monthly Passengers', fontsize=20)\n",
+    "ax.set_xlabel('Timestamp [t]', fontsize=20)\n",
+    "ax.legend(prop={'size': 15})\n",
+    "ax.grid()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 4. Time Features"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We have developed a utility that generates normalized calendar features for use as absolute positional embeddings in Transformer-based models. These embeddings capture seasonal patterns in time series data and can be easily incorporated into the model architecture. Additionally, the features can be used as exogenous variables in other models to inform them of calendar patterns in the data."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**References**<br>\n",
+    "- [Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, Wancai Zhang. \"Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting\"](https://arxiv.org/abs/2012.07436)<br>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "class TimeFeature:\n",
+    "    def __init__(self):\n",
+    "        pass\n",
+    "\n",
+    "    def __call__(self, index: pd.DatetimeIndex):\n",
+    "        return print('Overwrite with corresponding feature')\n",
+    "\n",
+    "    def __repr__(self):\n",
+    "        return self.__class__.__name__ + \"()\"\n",
+    "\n",
+    "class SecondOfMinute(TimeFeature):\n",
+    "    \"\"\"Minute of hour encoded as value between [-0.5, 0.5]\"\"\"\n",
+    "    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:\n",
+    "        return index.second / 59.0 - 0.5\n",
+    "\n",
+    "class MinuteOfHour(TimeFeature):\n",
+    "    \"\"\"Minute of hour encoded as value between [-0.5, 0.5]\"\"\"\n",
+    "    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:\n",
+    "        return index.minute / 59.0 - 0.5\n",
+    "\n",
+    "class HourOfDay(TimeFeature):\n",
+    "    \"\"\"Hour of day encoded as value between [-0.5, 0.5]\"\"\"\n",
+    "    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:\n",
+    "        return index.hour / 23.0 - 0.5\n",
+    "\n",
+    "class DayOfWeek(TimeFeature):\n",
+    "    \"\"\"Hour of day encoded as value between [-0.5, 0.5]\"\"\"\n",
+    "    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:\n",
+    "        return index.dayofweek / 6.0 - 0.5\n",
+    "\n",
+    "class DayOfMonth(TimeFeature):\n",
+    "    \"\"\"Day of month encoded as value between [-0.5, 0.5]\"\"\"\n",
+    "    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:\n",
+    "        return (index.day - 1) / 30.0 - 0.5\n",
+    "\n",
+    "class DayOfYear(TimeFeature):\n",
+    "    \"\"\"Day of year encoded as value between [-0.5, 0.5]\"\"\"\n",
+    "    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:\n",
+    "        return (index.dayofyear - 1) / 365.0 - 0.5\n",
+    "\n",
+    "class MonthOfYear(TimeFeature):\n",
+    "    \"\"\"Month of year encoded as value between [-0.5, 0.5]\"\"\"\n",
+    "    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:\n",
+    "        return (index.month - 1) / 11.0 - 0.5\n",
+    "\n",
+    "class WeekOfYear(TimeFeature):\n",
+    "    \"\"\"Week of year encoded as value between [-0.5, 0.5]\"\"\"\n",
+    "    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:\n",
+    "        return (index.week - 1) / 52.0 - 0.5\n",
+    "\n",
+    "def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:\n",
+    "    \"\"\"\n",
+    "    Returns a list of time features that will be appropriate for the given frequency string.\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    freq_str\n",
+    "        Frequency string of the form [multiple][granularity] such as \"12H\", \"5min\", \"1D\" etc.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    if freq_str not in ['Q', 'M', 'MS', 'W', 'D', 'B', 'H', 'T', 'S']:\n",
+    "        raise Exception('Frequency not supported')\n",
+    "    \n",
+    "    if freq_str in ['Q','M', 'MS']:\n",
+    "        return [cls() for cls in [MonthOfYear]]\n",
+    "    elif freq_str == 'W':\n",
+    "        return [cls() for cls in [DayOfMonth, WeekOfYear]]\n",
+    "    elif freq_str in ['D','B']:\n",
+    "        return [cls() for cls in [DayOfWeek, DayOfMonth, DayOfYear]]\n",
+    "    elif freq_str == 'H':\n",
+    "        return [cls() for cls in [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear]]\n",
+    "    elif freq_str == 'T':\n",
+    "        return [cls() for cls in [MinuteOfHour, HourOfDay, DayOfWeek, DayOfMonth, DayOfYear]]\n",
+    "    else:\n",
+    "        return [cls() for cls in [SecondOfMinute, MinuteOfHour, HourOfDay, DayOfWeek, DayOfMonth, DayOfYear]]\n",
+    "\n",
+    "def augment_calendar_df(df, freq='H'):\n",
+    "    \"\"\"\n",
+    "    > * Q - [month]\n",
+    "    > * M - [month]\n",
+    "    > * W - [Day of month, week of year]\n",
+    "    > * D - [Day of week, day of month, day of year]\n",
+    "    > * B - [Day of week, day of month, day of year]\n",
+    "    > * H - [Hour of day, day of week, day of month, day of year]\n",
+    "    > * T - [Minute of hour*, hour of day, day of week, day of month, day of year]\n",
+    "    > * S - [Second of minute, minute of hour, hour of day, day of week, day of month, day of year]\n",
+    "    *minute returns a number from 0-3 corresponding to the 15 minute period it falls into.\n",
+    "    \"\"\"\n",
+    "    df = df.copy()\n",
+    "\n",
+    "    freq_map = {\n",
+    "        'Q':['month'],\n",
+    "        'M':['month'],\n",
+    "        'MS':['month'],\n",
+    "        'W':['monthday', 'yearweek'],\n",
+    "        'D':['weekday','monthday','yearday'],\n",
+    "        'B':['weekday','monthday','yearday'],\n",
+    "        'H':['dayhour','weekday','monthday','yearday'],\n",
+    "        'T':['hourminute','dayhour','weekday','monthday','yearday'],\n",
+    "        'S':['minutesecond','hourminute','dayhour','weekday','monthday','yearday']\n",
+    "    }\n",
+    "\n",
+    "    ds_col = pd.to_datetime(df.ds.values)\n",
+    "    ds_data = np.vstack([feat(ds_col) for feat in time_features_from_frequency_str(freq)]).transpose(1,0)\n",
+    "    ds_data = pd.DataFrame(ds_data, columns=freq_map[freq])\n",
+    "    \n",
+    "    return pd.concat([df, ds_data], axis=1), freq_map[freq]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "AirPassengerPanelCalendar, calendar_cols = augment_calendar_df(df=AirPassengersPanel, freq='M')\n",
+    "AirPassengerPanelCalendar.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_df = AirPassengerPanelCalendar[AirPassengerPanelCalendar.unique_id=='Airline1'].set_index('ds')\n",
+    "plt.plot(plot_df['month'])\n",
+    "plt.grid()\n",
+    "plt.xlabel('Datestamp')\n",
+    "plt.ylabel('Normalized Month')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "def get_indexer_raise_missing(idx: pd.Index, vals: List[str]) -> List[int]:\n",
+    "    idxs = idx.get_indexer(vals)\n",
+    "    missing = [v for i, v in zip(idxs, vals) if i == -1]\n",
+    "    if missing:\n",
+    "        raise ValueError(f'The following values are missing from the index: {missing}')\n",
+    "    return idxs"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python3",
+   "language": "python",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/neuralforecast/__init__.py
+++ b/neuralforecast/__init__.py
+__version__ = "1.7.1"
+__all__ = ['NeuralForecast']
+from .core import NeuralForecast
+from .common._base_model import DistributedConfig  # noqa: F401
--- a/neuralforecast/_modidx.py
+++ b/neuralforecast/_modidx.py
+# Autogenerated by nbdev
+
+d = { 'settings': { 'branch': 'main',
+                'doc_baseurl': '/neuralforecast/',
+                'doc_host': 'https://Nixtla.github.io',
+                'git_url': 'https://github.com/Nixtla/neuralforecast/',
+                'lib_path': 'neuralforecast'},
+  'syms': { 'neuralforecast.auto': { 'neuralforecast.auto.AutoAutoformer': ('models.html#autoautoformer', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoAutoformer.__init__': ( 'models.html#autoautoformer.__init__',
+                                                                                      'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoAutoformer.get_default_config': ( 'models.html#autoautoformer.get_default_config',
+                                                                                                'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoBiTCN': ('models.html#autobitcn', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoBiTCN.__init__': ('models.html#autobitcn.__init__', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoBiTCN.get_default_config': ( 'models.html#autobitcn.get_default_config',
+                                                                                           'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoDLinear': ('models.html#autodlinear', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoDLinear.__init__': ( 'models.html#autodlinear.__init__',
+                                                                                   'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoDLinear.get_default_config': ( 'models.html#autodlinear.get_default_config',
+                                                                                             'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoDeepAR': ('models.html#autodeepar', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoDeepAR.__init__': ( 'models.html#autodeepar.__init__',
+                                                                                  'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoDeepAR.get_default_config': ( 'models.html#autodeepar.get_default_config',
+                                                                                            'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoDilatedRNN': ('models.html#autodilatedrnn', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoDilatedRNN.__init__': ( 'models.html#autodilatedrnn.__init__',
+                                                                                      'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoDilatedRNN.get_default_config': ( 'models.html#autodilatedrnn.get_default_config',
+                                                                                                'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoFEDformer': ('models.html#autofedformer', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoFEDformer.__init__': ( 'models.html#autofedformer.__init__',
+                                                                                     'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoFEDformer.get_default_config': ( 'models.html#autofedformer.get_default_config',
+                                                                                               'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoGRU': ('models.html#autogru', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoGRU.__init__': ('models.html#autogru.__init__', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoGRU.get_default_config': ( 'models.html#autogru.get_default_config',
+                                                                                         'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoHINT': ('models.html#autohint', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoHINT.__init__': ('models.html#autohint.__init__', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoHINT._fit_model': ( 'models.html#autohint._fit_model',
+                                                                                  'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoHINT.get_default_config': ( 'models.html#autohint.get_default_config',
+                                                                                          'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoInformer': ('models.html#autoinformer', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoInformer.__init__': ( 'models.html#autoinformer.__init__',
+                                                                                    'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoInformer.get_default_config': ( 'models.html#autoinformer.get_default_config',
+                                                                                              'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoLSTM': ('models.html#autolstm', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoLSTM.__init__': ('models.html#autolstm.__init__', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoLSTM.get_default_config': ( 'models.html#autolstm.get_default_config',
+                                                                                          'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoMLP': ('models.html#automlp', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoMLP.__init__': ('models.html#automlp.__init__', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoMLP.get_default_config': ( 'models.html#automlp.get_default_config',
+                                                                                         'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoMLPMultivariate': ( 'models.html#automlpmultivariate',
+                                                                                  'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoMLPMultivariate.__init__': ( 'models.html#automlpmultivariate.__init__',
+                                                                                           'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoMLPMultivariate.get_default_config': ( 'models.html#automlpmultivariate.get_default_config',
+                                                                                                     'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoNBEATS': ('models.html#autonbeats', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoNBEATS.__init__': ( 'models.html#autonbeats.__init__',
+                                                                                  'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoNBEATS.get_default_config': ( 'models.html#autonbeats.get_default_config',
+                                                                                            'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoNBEATSx': ('models.html#autonbeatsx', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoNBEATSx.__init__': ( 'models.html#autonbeatsx.__init__',
+                                                                                   'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoNBEATSx.get_default_config': ( 'models.html#autonbeatsx.get_default_config',
+                                                                                             'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoNHITS': ('models.html#autonhits', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoNHITS.__init__': ('models.html#autonhits.__init__', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoNHITS.get_default_config': ( 'models.html#autonhits.get_default_config',
+                                                                                           'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoNLinear': ('models.html#autonlinear', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoNLinear.__init__': ( 'models.html#autonlinear.__init__',
+                                                                                   'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoNLinear.get_default_config': ( 'models.html#autonlinear.get_default_config',
+                                                                                             'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoPatchTST': ('models.html#autopatchtst', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoPatchTST.__init__': ( 'models.html#autopatchtst.__init__',
+                                                                                    'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoPatchTST.get_default_config': ( 'models.html#autopatchtst.get_default_config',
+                                                                                              'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoRNN': ('models.html#autornn', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoRNN.__init__': ('models.html#autornn.__init__', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoRNN.get_default_config': ( 'models.html#autornn.get_default_config',
+                                                                                         'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoStemGNN': ('models.html#autostemgnn', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoStemGNN.__init__': ( 'models.html#autostemgnn.__init__',
+                                                                                   'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoStemGNN.get_default_config': ( 'models.html#autostemgnn.get_default_config',
+                                                                                             'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoTCN': ('models.html#autotcn', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoTCN.__init__': ('models.html#autotcn.__init__', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoTCN.get_default_config': ( 'models.html#autotcn.get_default_config',
+                                                                                         'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoTFT': ('models.html#autotft', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoTFT.__init__': ('models.html#autotft.__init__', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoTFT.get_default_config': ( 'models.html#autotft.get_default_config',
+                                                                                         'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoTSMixer': ('models.html#autotsmixer', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoTSMixer.__init__': ( 'models.html#autotsmixer.__init__',
+                                                                                   'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoTSMixer.get_default_config': ( 'models.html#autotsmixer.get_default_config',
+                                                                                             'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoTSMixerx': ('models.html#autotsmixerx', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoTSMixerx.__init__': ( 'models.html#autotsmixerx.__init__',
+                                                                                    'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoTSMixerx.get_default_config': ( 'models.html#autotsmixerx.get_default_config',
+                                                                                              'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoTimesNet': ('models.html#autotimesnet', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoTimesNet.__init__': ( 'models.html#autotimesnet.__init__',
+                                                                                    'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoTimesNet.get_default_config': ( 'models.html#autotimesnet.get_default_config',
+                                                                                              'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoVanillaTransformer': ( 'models.html#autovanillatransformer',
+                                                                                     'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoVanillaTransformer.__init__': ( 'models.html#autovanillatransformer.__init__',
+                                                                                              'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoVanillaTransformer.get_default_config': ( 'models.html#autovanillatransformer.get_default_config',
+                                                                                                        'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoiTransformer': ('models.html#autoitransformer', 'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoiTransformer.__init__': ( 'models.html#autoitransformer.__init__',
+                                                                                        'neuralforecast/auto.py'),
+                                     'neuralforecast.auto.AutoiTransformer.get_default_config': ( 'models.html#autoitransformer.get_default_config',
+                                                                                                  'neuralforecast/auto.py')},
+            'neuralforecast.compat': {},
+            'neuralforecast.core': { 'neuralforecast.core.NeuralForecast': ('core.html#neuralforecast', 'neuralforecast/core.py'),
+                                     'neuralforecast.core.NeuralForecast.__init__': ( 'core.html#neuralforecast.__init__',
+                                                                                      'neuralforecast/core.py'),
+                                     'neuralforecast.core.NeuralForecast._check_nan': ( 'core.html#neuralforecast._check_nan',
+                                                                                        'neuralforecast/core.py'),
+                                     'neuralforecast.core.NeuralForecast._get_model_names': ( 'core.html#neuralforecast._get_model_names',
+                                                                                              'neuralforecast/core.py'),
+                                     'neuralforecast.core.NeuralForecast._get_needed_futr_exog': ( 'core.html#neuralforecast._get_needed_futr_exog',
+                                                                                                   'neuralforecast/core.py'),
+                                     'neuralforecast.core.NeuralForecast._no_refit_cross_validation': ( 'core.html#neuralforecast._no_refit_cross_validation',
+                                                                                                        'neuralforecast/core.py'),
+                                     'neuralforecast.core.NeuralForecast._prepare_fit': ( 'core.html#neuralforecast._prepare_fit',
+                                                                                          'neuralforecast/core.py'),
+                                     'neuralforecast.core.NeuralForecast._reset_models': ( 'core.html#neuralforecast._reset_models',
+                                                                                           'neuralforecast/core.py'),
+                                     'neuralforecast.core.NeuralForecast._scalers_fit_transform': ( 'core.html#neuralforecast._scalers_fit_transform',
+                                                                                                    'neuralforecast/core.py'),
+                                     'neuralforecast.core.NeuralForecast._scalers_target_inverse_transform': ( 'core.html#neuralforecast._scalers_target_inverse_transform',
+                                                                                                               'neuralforecast/core.py'),
+                                     'neuralforecast.core.NeuralForecast._scalers_transform': ( 'core.html#neuralforecast._scalers_transform',
+                                                                                                'neuralforecast/core.py'),
+                                     'neuralforecast.core.NeuralForecast.cross_validation': ( 'core.html#neuralforecast.cross_validation',
+                                                                                              'neuralforecast/core.py'),
+                                     'neuralforecast.core.NeuralForecast.fit': ('core.html#neuralforecast.fit', 'neuralforecast/core.py'),
+                                     'neuralforecast.core.NeuralForecast.get_missing_future': ( 'core.html#neuralforecast.get_missing_future',
+                                                                                                'neuralforecast/core.py'),
+                                     'neuralforecast.core.NeuralForecast.load': ('core.html#neuralforecast.load', 'neuralforecast/core.py'),
+                                     'neuralforecast.core.NeuralForecast.make_future_dataframe': ( 'core.html#neuralforecast.make_future_dataframe',
+                                                                                                   'neuralforecast/core.py'),
+                                     'neuralforecast.core.NeuralForecast.predict': ( 'core.html#neuralforecast.predict',
+                                                                                     'neuralforecast/core.py'),
+                                     'neuralforecast.core.NeuralForecast.predict_insample': ( 'core.html#neuralforecast.predict_insample',
+                                                                                              'neuralforecast/core.py'),
+                                     'neuralforecast.core.NeuralForecast.save': ('core.html#neuralforecast.save', 'neuralforecast/core.py'),
+                                     'neuralforecast.core._id_as_idx': ('core.html#_id_as_idx', 'neuralforecast/core.py'),
+                                     'neuralforecast.core._insample_times': ('core.html#_insample_times', 'neuralforecast/core.py'),
+                                     'neuralforecast.core._warn_id_as_idx': ('core.html#_warn_id_as_idx', 'neuralforecast/core.py')},
+            'neuralforecast.losses.numpy': { 'neuralforecast.losses.numpy._divide_no_nan': ( 'losses.numpy.html#_divide_no_nan',
+                                                                                             'neuralforecast/losses/numpy.py'),
+                                             'neuralforecast.losses.numpy._metric_protections': ( 'losses.numpy.html#_metric_protections',
+                                                                                                  'neuralforecast/losses/numpy.py'),
+                                             'neuralforecast.losses.numpy.mae': ('losses.numpy.html#mae', 'neuralforecast/losses/numpy.py'),
+                                             'neuralforecast.losses.numpy.mape': ( 'losses.numpy.html#mape',
+                                                                                   'neuralforecast/losses/numpy.py'),
+                                             'neuralforecast.losses.numpy.mase': ( 'losses.numpy.html#mase',
+                                                                                   'neuralforecast/losses/numpy.py'),
+                                             'neuralforecast.losses.numpy.mqloss': ( 'losses.numpy.html#mqloss',
+                                                                                     'neuralforecast/losses/numpy.py'),
+                                             'neuralforecast.losses.numpy.mse': ('losses.numpy.html#mse', 'neuralforecast/losses/numpy.py'),
+                                             'neuralforecast.losses.numpy.quantile_loss': ( 'losses.numpy.html#quantile_loss',
+                                                                                            'neuralforecast/losses/numpy.py'),
+                                             'neuralforecast.losses.numpy.rmae': ( 'losses.numpy.html#rmae',
+                                                                                   'neuralforecast/losses/numpy.py'),
+                                             'neuralforecast.losses.numpy.rmse': ( 'losses.numpy.html#rmse',
+                                                                                   'neuralforecast/losses/numpy.py'),
+                                             'neuralforecast.losses.numpy.smape': ( 'losses.numpy.html#smape',
+                                                                                    'neuralforecast/losses/numpy.py')},
+            'neuralforecast.losses.pytorch': { 'neuralforecast.losses.pytorch.Accuracy': ( 'losses.pytorch.html#accuracy',
+                                                                                           'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.Accuracy.__call__': ( 'losses.pytorch.html#accuracy.__call__',
+                                                                                                    'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.Accuracy.__init__': ( 'losses.pytorch.html#accuracy.__init__',
+                                                                                                    'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.Accuracy.domain_map': ( 'losses.pytorch.html#accuracy.domain_map',
+                                                                                                      'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.BasePointLoss': ( 'losses.pytorch.html#basepointloss',
+                                                                                                'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.BasePointLoss.__init__': ( 'losses.pytorch.html#basepointloss.__init__',
+                                                                                                         'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.BasePointLoss._compute_weights': ( 'losses.pytorch.html#basepointloss._compute_weights',
+                                                                                                                 'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.BasePointLoss.domain_map': ( 'losses.pytorch.html#basepointloss.domain_map',
+                                                                                                           'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.DistributionLoss': ( 'losses.pytorch.html#distributionloss',
+                                                                                                   'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.DistributionLoss.__call__': ( 'losses.pytorch.html#distributionloss.__call__',
+                                                                                                            'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.DistributionLoss.__init__': ( 'losses.pytorch.html#distributionloss.__init__',
+                                                                                                            'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.DistributionLoss.get_distribution': ( 'losses.pytorch.html#distributionloss.get_distribution',
+                                                                                                                    'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.DistributionLoss.sample': ( 'losses.pytorch.html#distributionloss.sample',
+                                                                                                          'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.GMM': ( 'losses.pytorch.html#gmm',
+                                                                                      'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.GMM.__call__': ( 'losses.pytorch.html#gmm.__call__',
+                                                                                               'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.GMM.__init__': ( 'losses.pytorch.html#gmm.__init__',
+                                                                                               'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.GMM.domain_map': ( 'losses.pytorch.html#gmm.domain_map',
+                                                                                                 'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.GMM.neglog_likelihood': ( 'losses.pytorch.html#gmm.neglog_likelihood',
+                                                                                                        'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.GMM.sample': ( 'losses.pytorch.html#gmm.sample',
+                                                                                             'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.GMM.scale_decouple': ( 'losses.pytorch.html#gmm.scale_decouple',
+                                                                                                     'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.HuberLoss': ( 'losses.pytorch.html#huberloss',
+                                                                                            'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.HuberLoss.__call__': ( 'losses.pytorch.html#huberloss.__call__',
+                                                                                                     'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.HuberLoss.__init__': ( 'losses.pytorch.html#huberloss.__init__',
+                                                                                                     'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.HuberMQLoss': ( 'losses.pytorch.html#hubermqloss',
+                                                                                              'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.HuberMQLoss.__call__': ( 'losses.pytorch.html#hubermqloss.__call__',
+                                                                                                       'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.HuberMQLoss.__init__': ( 'losses.pytorch.html#hubermqloss.__init__',
+                                                                                                       'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.HuberMQLoss._compute_weights': ( 'losses.pytorch.html#hubermqloss._compute_weights',
+                                                                                                               'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.HuberMQLoss.domain_map': ( 'losses.pytorch.html#hubermqloss.domain_map',
+                                                                                                         'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.HuberQLoss': ( 'losses.pytorch.html#huberqloss',
+                                                                                             'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.HuberQLoss.__call__': ( 'losses.pytorch.html#huberqloss.__call__',
+                                                                                                      'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.HuberQLoss.__init__': ( 'losses.pytorch.html#huberqloss.__init__',
+                                                                                                      'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.MAE': ( 'losses.pytorch.html#mae',
+                                                                                      'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.MAE.__call__': ( 'losses.pytorch.html#mae.__call__',
+                                                                                               'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.MAE.__init__': ( 'losses.pytorch.html#mae.__init__',
+                                                                                               'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.MAPE': ( 'losses.pytorch.html#mape',
+                                                                                       'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.MAPE.__call__': ( 'losses.pytorch.html#mape.__call__',
+                                                                                                'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.MAPE.__init__': ( 'losses.pytorch.html#mape.__init__',
+                                                                                                'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.MASE': ( 'losses.pytorch.html#mase',
+                                                                                       'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.MASE.__call__': ( 'losses.pytorch.html#mase.__call__',
+                                                                                                'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.MASE.__init__': ( 'losses.pytorch.html#mase.__init__',
+                                                                                                'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.MQLoss': ( 'losses.pytorch.html#mqloss',
+                                                                                         'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.MQLoss.__call__': ( 'losses.pytorch.html#mqloss.__call__',
+                                                                                                  'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.MQLoss.__init__': ( 'losses.pytorch.html#mqloss.__init__',
+                                                                                                  'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.MQLoss._compute_weights': ( 'losses.pytorch.html#mqloss._compute_weights',
+                                                                                                          'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.MQLoss.domain_map': ( 'losses.pytorch.html#mqloss.domain_map',
+                                                                                                    'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.MSE': ( 'losses.pytorch.html#mse',
+                                                                                      'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.MSE.__call__': ( 'losses.pytorch.html#mse.__call__',
+                                                                                               'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.MSE.__init__': ( 'losses.pytorch.html#mse.__init__',
+                                                                                               'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.NBMM': ( 'losses.pytorch.html#nbmm',
+                                                                                       'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.NBMM.__call__': ( 'losses.pytorch.html#nbmm.__call__',
+                                                                                                'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.NBMM.__init__': ( 'losses.pytorch.html#nbmm.__init__',
+                                                                                                'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.NBMM.domain_map': ( 'losses.pytorch.html#nbmm.domain_map',
+                                                                                                  'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.NBMM.neglog_likelihood': ( 'losses.pytorch.html#nbmm.neglog_likelihood',
+                                                                                                         'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.NBMM.sample': ( 'losses.pytorch.html#nbmm.sample',
+                                                                                              'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.NBMM.scale_decouple': ( 'losses.pytorch.html#nbmm.scale_decouple',
+                                                                                                      'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.PMM': ( 'losses.pytorch.html#pmm',
+                                                                                      'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.PMM.__call__': ( 'losses.pytorch.html#pmm.__call__',
+                                                                                               'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.PMM.__init__': ( 'losses.pytorch.html#pmm.__init__',
+                                                                                               'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.PMM.domain_map': ( 'losses.pytorch.html#pmm.domain_map',
+                                                                                                 'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.PMM.neglog_likelihood': ( 'losses.pytorch.html#pmm.neglog_likelihood',
+                                                                                                        'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.PMM.sample': ( 'losses.pytorch.html#pmm.sample',
+                                                                                             'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.PMM.scale_decouple': ( 'losses.pytorch.html#pmm.scale_decouple',
+                                                                                                     'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.QuantileLoss': ( 'losses.pytorch.html#quantileloss',
+                                                                                               'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.QuantileLoss.__call__': ( 'losses.pytorch.html#quantileloss.__call__',
+                                                                                                        'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.QuantileLoss.__init__': ( 'losses.pytorch.html#quantileloss.__init__',
+                                                                                                        'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.RMSE': ( 'losses.pytorch.html#rmse',
+                                                                                       'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.RMSE.__call__': ( 'losses.pytorch.html#rmse.__call__',
+                                                                                                'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.RMSE.__init__': ( 'losses.pytorch.html#rmse.__init__',
+                                                                                                'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.SMAPE': ( 'losses.pytorch.html#smape',
+                                                                                        'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.SMAPE.__call__': ( 'losses.pytorch.html#smape.__call__',
+                                                                                                 'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.SMAPE.__init__': ( 'losses.pytorch.html#smape.__init__',
+                                                                                                 'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.TukeyLoss': ( 'losses.pytorch.html#tukeyloss',
+                                                                                            'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.TukeyLoss.__call__': ( 'losses.pytorch.html#tukeyloss.__call__',
+                                                                                                     'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.TukeyLoss.__init__': ( 'losses.pytorch.html#tukeyloss.__init__',
+                                                                                                     'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.TukeyLoss.domain_map': ( 'losses.pytorch.html#tukeyloss.domain_map',
+                                                                                                       'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.TukeyLoss.masked_mean': ( 'losses.pytorch.html#tukeyloss.masked_mean',
+                                                                                                        'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.Tweedie': ( 'losses.pytorch.html#tweedie',
+                                                                                          'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.Tweedie.__init__': ( 'losses.pytorch.html#tweedie.__init__',
+                                                                                                   'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.Tweedie.log_prob': ( 'losses.pytorch.html#tweedie.log_prob',
+                                                                                                   'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.Tweedie.mean': ( 'losses.pytorch.html#tweedie.mean',
+                                                                                               'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.Tweedie.sample': ( 'losses.pytorch.html#tweedie.sample',
+                                                                                                 'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.Tweedie.variance': ( 'losses.pytorch.html#tweedie.variance',
+                                                                                                   'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch._divide_no_nan': ( 'losses.pytorch.html#_divide_no_nan',
+                                                                                                 'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch._weighted_mean': ( 'losses.pytorch.html#_weighted_mean',
+                                                                                                 'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.bernoulli_domain_map': ( 'losses.pytorch.html#bernoulli_domain_map',
+                                                                                                       'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.bernoulli_scale_decouple': ( 'losses.pytorch.html#bernoulli_scale_decouple',
+                                                                                                           'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.est_alpha': ( 'losses.pytorch.html#est_alpha',
+                                                                                            'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.est_beta': ( 'losses.pytorch.html#est_beta',
+                                                                                           'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.est_lambda': ( 'losses.pytorch.html#est_lambda',
+                                                                                             'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.level_to_outputs': ( 'losses.pytorch.html#level_to_outputs',
+                                                                                                   'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.nbinomial_domain_map': ( 'losses.pytorch.html#nbinomial_domain_map',
+                                                                                                       'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.nbinomial_scale_decouple': ( 'losses.pytorch.html#nbinomial_scale_decouple',
+                                                                                                           'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.normal_domain_map': ( 'losses.pytorch.html#normal_domain_map',
+                                                                                                    'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.normal_scale_decouple': ( 'losses.pytorch.html#normal_scale_decouple',
+                                                                                                        'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.poisson_domain_map': ( 'losses.pytorch.html#poisson_domain_map',
+                                                                                                     'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.poisson_scale_decouple': ( 'losses.pytorch.html#poisson_scale_decouple',
+                                                                                                         'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.quantiles_to_outputs': ( 'losses.pytorch.html#quantiles_to_outputs',
+                                                                                                       'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.relMSE': ( 'losses.pytorch.html#relmse',
+                                                                                         'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.relMSE.__call__': ( 'losses.pytorch.html#relmse.__call__',
+                                                                                                  'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.relMSE.__init__': ( 'losses.pytorch.html#relmse.__init__',
+                                                                                                  'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.sCRPS': ( 'losses.pytorch.html#scrps',
+                                                                                        'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.sCRPS.__call__': ( 'losses.pytorch.html#scrps.__call__',
+                                                                                                 'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.sCRPS.__init__': ( 'losses.pytorch.html#scrps.__init__',
+                                                                                                 'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.student_domain_map': ( 'losses.pytorch.html#student_domain_map',
+                                                                                                     'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.student_scale_decouple': ( 'losses.pytorch.html#student_scale_decouple',
+                                                                                                         'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.tweedie_domain_map': ( 'losses.pytorch.html#tweedie_domain_map',
+                                                                                                     'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.tweedie_scale_decouple': ( 'losses.pytorch.html#tweedie_scale_decouple',
+                                                                                                         'neuralforecast/losses/pytorch.py'),
+                                               'neuralforecast.losses.pytorch.weighted_average': ( 'losses.pytorch.html#weighted_average',
+                                                                                                   'neuralforecast/losses/pytorch.py')},
+            'neuralforecast.models.autoformer': { 'neuralforecast.models.autoformer.AutoCorrelation': ( 'models.autoformer.html#autocorrelation',
+                                                                                                        'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.AutoCorrelation.__init__': ( 'models.autoformer.html#autocorrelation.__init__',
+                                                                                                                 'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.AutoCorrelation.forward': ( 'models.autoformer.html#autocorrelation.forward',
+                                                                                                                'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.AutoCorrelation.time_delay_agg_full': ( 'models.autoformer.html#autocorrelation.time_delay_agg_full',
+                                                                                                                            'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.AutoCorrelation.time_delay_agg_inference': ( 'models.autoformer.html#autocorrelation.time_delay_agg_inference',
+                                                                                                                                 'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.AutoCorrelation.time_delay_agg_training': ( 'models.autoformer.html#autocorrelation.time_delay_agg_training',
+                                                                                                                                'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.AutoCorrelationLayer': ( 'models.autoformer.html#autocorrelationlayer',
+                                                                                                             'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.AutoCorrelationLayer.__init__': ( 'models.autoformer.html#autocorrelationlayer.__init__',
+                                                                                                                      'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.AutoCorrelationLayer.forward': ( 'models.autoformer.html#autocorrelationlayer.forward',
+                                                                                                                     'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.Autoformer': ( 'models.autoformer.html#autoformer',
+                                                                                                   'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.Autoformer.__init__': ( 'models.autoformer.html#autoformer.__init__',
+                                                                                                            'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.Autoformer.forward': ( 'models.autoformer.html#autoformer.forward',
+                                                                                                           'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.Decoder': ( 'models.autoformer.html#decoder',
+                                                                                                'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.Decoder.__init__': ( 'models.autoformer.html#decoder.__init__',
+                                                                                                         'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.Decoder.forward': ( 'models.autoformer.html#decoder.forward',
+                                                                                                        'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.DecoderLayer': ( 'models.autoformer.html#decoderlayer',
+                                                                                                     'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.DecoderLayer.__init__': ( 'models.autoformer.html#decoderlayer.__init__',
+                                                                                                              'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.DecoderLayer.forward': ( 'models.autoformer.html#decoderlayer.forward',
+                                                                                                             'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.Encoder': ( 'models.autoformer.html#encoder',
+                                                                                                'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.Encoder.__init__': ( 'models.autoformer.html#encoder.__init__',
+                                                                                                         'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.Encoder.forward': ( 'models.autoformer.html#encoder.forward',
+                                                                                                        'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.EncoderLayer': ( 'models.autoformer.html#encoderlayer',
+                                                                                                     'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.EncoderLayer.__init__': ( 'models.autoformer.html#encoderlayer.__init__',
+                                                                                                              'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.EncoderLayer.forward': ( 'models.autoformer.html#encoderlayer.forward',
+                                                                                                             'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.LayerNorm': ( 'models.autoformer.html#layernorm',
+                                                                                                  'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.LayerNorm.__init__': ( 'models.autoformer.html#layernorm.__init__',
+                                                                                                           'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.LayerNorm.forward': ( 'models.autoformer.html#layernorm.forward',
+                                                                                                          'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.MovingAvg': ( 'models.autoformer.html#movingavg',
+                                                                                                  'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.MovingAvg.__init__': ( 'models.autoformer.html#movingavg.__init__',
+                                                                                                           'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.MovingAvg.forward': ( 'models.autoformer.html#movingavg.forward',
+                                                                                                          'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.SeriesDecomp': ( 'models.autoformer.html#seriesdecomp',
+                                                                                                     'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.SeriesDecomp.__init__': ( 'models.autoformer.html#seriesdecomp.__init__',
+                                                                                                              'neuralforecast/models/autoformer.py'),
+                                                  'neuralforecast.models.autoformer.SeriesDecomp.forward': ( 'models.autoformer.html#seriesdecomp.forward',
+                                                                                                             'neuralforecast/models/autoformer.py')},
+            'neuralforecast.models.bitcn': { 'neuralforecast.models.bitcn.BiTCN': ( 'models.bitcn.html#bitcn',
+                                                                                    'neuralforecast/models/bitcn.py'),
+                                             'neuralforecast.models.bitcn.BiTCN.__init__': ( 'models.bitcn.html#bitcn.__init__',
+                                                                                             'neuralforecast/models/bitcn.py'),
+                                             'neuralforecast.models.bitcn.BiTCN.forward': ( 'models.bitcn.html#bitcn.forward',
+                                                                                            'neuralforecast/models/bitcn.py'),
+                                             'neuralforecast.models.bitcn.CustomConv1d': ( 'models.bitcn.html#customconv1d',
+                                                                                           'neuralforecast/models/bitcn.py'),
+                                             'neuralforecast.models.bitcn.CustomConv1d.__init__': ( 'models.bitcn.html#customconv1d.__init__',
+                                                                                                    'neuralforecast/models/bitcn.py'),
+                                             'neuralforecast.models.bitcn.CustomConv1d.forward': ( 'models.bitcn.html#customconv1d.forward',
+                                                                                                   'neuralforecast/models/bitcn.py'),
+                                             'neuralforecast.models.bitcn.TCNCell': ( 'models.bitcn.html#tcncell',
+                                                                                      'neuralforecast/models/bitcn.py'),
+                                             'neuralforecast.models.bitcn.TCNCell.__init__': ( 'models.bitcn.html#tcncell.__init__',
+                                                                                               'neuralforecast/models/bitcn.py'),
+                                             'neuralforecast.models.bitcn.TCNCell.forward': ( 'models.bitcn.html#tcncell.forward',
+                                                                                              'neuralforecast/models/bitcn.py')},
+            'neuralforecast.models.deepar': { 'neuralforecast.models.deepar.Decoder': ( 'models.deepar.html#decoder',
+                                                                                        'neuralforecast/models/deepar.py'),
+                                              'neuralforecast.models.deepar.Decoder.__init__': ( 'models.deepar.html#decoder.__init__',
+                                                                                                 'neuralforecast/models/deepar.py'),
+                                              'neuralforecast.models.deepar.Decoder.forward': ( 'models.deepar.html#decoder.forward',
+                                                                                                'neuralforecast/models/deepar.py'),
+                                              'neuralforecast.models.deepar.DeepAR': ( 'models.deepar.html#deepar',
+                                                                                       'neuralforecast/models/deepar.py'),
+                                              'neuralforecast.models.deepar.DeepAR.__init__': ( 'models.deepar.html#deepar.__init__',
+                                                                                                'neuralforecast/models/deepar.py'),
+                                              'neuralforecast.models.deepar.DeepAR.forward': ( 'models.deepar.html#deepar.forward',
+                                                                                               'neuralforecast/models/deepar.py'),
+                                              'neuralforecast.models.deepar.DeepAR.predict_step': ( 'models.deepar.html#deepar.predict_step',
+                                                                                                    'neuralforecast/models/deepar.py'),
+                                              'neuralforecast.models.deepar.DeepAR.train_forward': ( 'models.deepar.html#deepar.train_forward',
+                                                                                                     'neuralforecast/models/deepar.py'),
+                                              'neuralforecast.models.deepar.DeepAR.training_step': ( 'models.deepar.html#deepar.training_step',
+                                                                                                     'neuralforecast/models/deepar.py'),
+                                              'neuralforecast.models.deepar.DeepAR.validation_step': ( 'models.deepar.html#deepar.validation_step',
+                                                                                                       'neuralforecast/models/deepar.py')},
+            'neuralforecast.models.dilated_rnn': { 'neuralforecast.models.dilated_rnn.AttentiveLSTMLayer': ( 'models.dilated_rnn.html#attentivelstmlayer',
+                                                                                                             'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.AttentiveLSTMLayer.__init__': ( 'models.dilated_rnn.html#attentivelstmlayer.__init__',
+                                                                                                                      'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.AttentiveLSTMLayer.forward': ( 'models.dilated_rnn.html#attentivelstmlayer.forward',
+                                                                                                                     'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.DRNN': ( 'models.dilated_rnn.html#drnn',
+                                                                                               'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.DRNN.__init__': ( 'models.dilated_rnn.html#drnn.__init__',
+                                                                                                        'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.DRNN._apply_cell': ( 'models.dilated_rnn.html#drnn._apply_cell',
+                                                                                                           'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.DRNN._pad_inputs': ( 'models.dilated_rnn.html#drnn._pad_inputs',
+                                                                                                           'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.DRNN._prepare_inputs': ( 'models.dilated_rnn.html#drnn._prepare_inputs',
+                                                                                                               'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.DRNN._split_outputs': ( 'models.dilated_rnn.html#drnn._split_outputs',
+                                                                                                              'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.DRNN._unpad_outputs': ( 'models.dilated_rnn.html#drnn._unpad_outputs',
+                                                                                                              'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.DRNN.drnn_layer': ( 'models.dilated_rnn.html#drnn.drnn_layer',
+                                                                                                          'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.DRNN.forward': ( 'models.dilated_rnn.html#drnn.forward',
+                                                                                                       'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.DilatedRNN': ( 'models.dilated_rnn.html#dilatedrnn',
+                                                                                                     'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.DilatedRNN.__init__': ( 'models.dilated_rnn.html#dilatedrnn.__init__',
+                                                                                                              'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.DilatedRNN.forward': ( 'models.dilated_rnn.html#dilatedrnn.forward',
+                                                                                                             'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.LSTMCell': ( 'models.dilated_rnn.html#lstmcell',
+                                                                                                   'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.LSTMCell.__init__': ( 'models.dilated_rnn.html#lstmcell.__init__',
+                                                                                                            'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.LSTMCell.forward': ( 'models.dilated_rnn.html#lstmcell.forward',
+                                                                                                           'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.ResLSTMCell': ( 'models.dilated_rnn.html#reslstmcell',
+                                                                                                      'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.ResLSTMCell.__init__': ( 'models.dilated_rnn.html#reslstmcell.__init__',
+                                                                                                               'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.ResLSTMCell.forward': ( 'models.dilated_rnn.html#reslstmcell.forward',
+                                                                                                              'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.ResLSTMLayer': ( 'models.dilated_rnn.html#reslstmlayer',
+                                                                                                       'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.ResLSTMLayer.__init__': ( 'models.dilated_rnn.html#reslstmlayer.__init__',
+                                                                                                                'neuralforecast/models/dilated_rnn.py'),
+                                                   'neuralforecast.models.dilated_rnn.ResLSTMLayer.forward': ( 'models.dilated_rnn.html#reslstmlayer.forward',
+                                                                                                               'neuralforecast/models/dilated_rnn.py')},
+            'neuralforecast.models.dlinear': { 'neuralforecast.models.dlinear.DLinear': ( 'models.dlinear.html#dlinear',
+                                                                                          'neuralforecast/models/dlinear.py'),
+                                               'neuralforecast.models.dlinear.DLinear.__init__': ( 'models.dlinear.html#dlinear.__init__',
+                                                                                                   'neuralforecast/models/dlinear.py'),
+                                               'neuralforecast.models.dlinear.DLinear.forward': ( 'models.dlinear.html#dlinear.forward',
+                                                                                                  'neuralforecast/models/dlinear.py'),
+                                               'neuralforecast.models.dlinear.MovingAvg': ( 'models.dlinear.html#movingavg',
+                                                                                            'neuralforecast/models/dlinear.py'),
+                                               'neuralforecast.models.dlinear.MovingAvg.__init__': ( 'models.dlinear.html#movingavg.__init__',
+                                                                                                     'neuralforecast/models/dlinear.py'),
+                                               'neuralforecast.models.dlinear.MovingAvg.forward': ( 'models.dlinear.html#movingavg.forward',
+                                                                                                    'neuralforecast/models/dlinear.py'),
+                                               'neuralforecast.models.dlinear.SeriesDecomp': ( 'models.dlinear.html#seriesdecomp',
+                                                                                               'neuralforecast/models/dlinear.py'),
+                                               'neuralforecast.models.dlinear.SeriesDecomp.__init__': ( 'models.dlinear.html#seriesdecomp.__init__',
+                                                                                                        'neuralforecast/models/dlinear.py'),
+                                               'neuralforecast.models.dlinear.SeriesDecomp.forward': ( 'models.dlinear.html#seriesdecomp.forward',
+                                                                                                       'neuralforecast/models/dlinear.py')},
+            'neuralforecast.models.fedformer': { 'neuralforecast.models.fedformer.AutoCorrelationLayer': ( 'models.fedformer.html#autocorrelationlayer',
+                                                                                                           'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.AutoCorrelationLayer.__init__': ( 'models.fedformer.html#autocorrelationlayer.__init__',
+                                                                                                                    'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.AutoCorrelationLayer.forward': ( 'models.fedformer.html#autocorrelationlayer.forward',
+                                                                                                                   'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.Decoder': ( 'models.fedformer.html#decoder',
+                                                                                              'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.Decoder.__init__': ( 'models.fedformer.html#decoder.__init__',
+                                                                                                       'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.Decoder.forward': ( 'models.fedformer.html#decoder.forward',
+                                                                                                      'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.DecoderLayer': ( 'models.fedformer.html#decoderlayer',
+                                                                                                   'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.DecoderLayer.__init__': ( 'models.fedformer.html#decoderlayer.__init__',
+                                                                                                            'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.DecoderLayer.forward': ( 'models.fedformer.html#decoderlayer.forward',
+                                                                                                           'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.Encoder': ( 'models.fedformer.html#encoder',
+                                                                                              'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.Encoder.__init__': ( 'models.fedformer.html#encoder.__init__',
+                                                                                                       'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.Encoder.forward': ( 'models.fedformer.html#encoder.forward',
+                                                                                                      'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.EncoderLayer': ( 'models.fedformer.html#encoderlayer',
+                                                                                                   'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.EncoderLayer.__init__': ( 'models.fedformer.html#encoderlayer.__init__',
+                                                                                                            'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.EncoderLayer.forward': ( 'models.fedformer.html#encoderlayer.forward',
+                                                                                                           'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.FEDformer': ( 'models.fedformer.html#fedformer',
+                                                                                                'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.FEDformer.__init__': ( 'models.fedformer.html#fedformer.__init__',
+                                                                                                         'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.FEDformer.forward': ( 'models.fedformer.html#fedformer.forward',
+                                                                                                        'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.FourierBlock': ( 'models.fedformer.html#fourierblock',
+                                                                                                   'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.FourierBlock.__init__': ( 'models.fedformer.html#fourierblock.__init__',
+                                                                                                            'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.FourierBlock.compl_mul1d': ( 'models.fedformer.html#fourierblock.compl_mul1d',
+                                                                                                               'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.FourierBlock.forward': ( 'models.fedformer.html#fourierblock.forward',
+                                                                                                           'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.FourierCrossAttention': ( 'models.fedformer.html#fouriercrossattention',
+                                                                                                            'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.FourierCrossAttention.__init__': ( 'models.fedformer.html#fouriercrossattention.__init__',
+                                                                                                                     'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.FourierCrossAttention.compl_mul1d': ( 'models.fedformer.html#fouriercrossattention.compl_mul1d',
+                                                                                                                        'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.FourierCrossAttention.forward': ( 'models.fedformer.html#fouriercrossattention.forward',
+                                                                                                                    'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.LayerNorm': ( 'models.fedformer.html#layernorm',
+                                                                                                'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.LayerNorm.__init__': ( 'models.fedformer.html#layernorm.__init__',
+                                                                                                         'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.LayerNorm.forward': ( 'models.fedformer.html#layernorm.forward',
+                                                                                                        'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.MovingAvg': ( 'models.fedformer.html#movingavg',
+                                                                                                'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.MovingAvg.__init__': ( 'models.fedformer.html#movingavg.__init__',
+                                                                                                         'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.MovingAvg.forward': ( 'models.fedformer.html#movingavg.forward',
+                                                                                                        'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.SeriesDecomp': ( 'models.fedformer.html#seriesdecomp',
+                                                                                                   'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.SeriesDecomp.__init__': ( 'models.fedformer.html#seriesdecomp.__init__',
+                                                                                                            'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.SeriesDecomp.forward': ( 'models.fedformer.html#seriesdecomp.forward',
+                                                                                                           'neuralforecast/models/fedformer.py'),
+                                                 'neuralforecast.models.fedformer.get_frequency_modes': ( 'models.fedformer.html#get_frequency_modes',
+                                                                                                          'neuralforecast/models/fedformer.py')},
+            'neuralforecast.models.gru': { 'neuralforecast.models.gru.GRU': ('models.gru.html#gru', 'neuralforecast/models/gru.py'),
+                                           'neuralforecast.models.gru.GRU.__init__': ( 'models.gru.html#gru.__init__',
+                                                                                       'neuralforecast/models/gru.py'),
+                                           'neuralforecast.models.gru.GRU.forward': ( 'models.gru.html#gru.forward',
+                                                                                      'neuralforecast/models/gru.py')},
+            'neuralforecast.models.hint': { 'neuralforecast.models.hint.HINT': ('models.hint.html#hint', 'neuralforecast/models/hint.py'),
+                                            'neuralforecast.models.hint.HINT.__init__': ( 'models.hint.html#hint.__init__',
+                                                                                          'neuralforecast/models/hint.py'),
+                                            'neuralforecast.models.hint.HINT.__repr__': ( 'models.hint.html#hint.__repr__',
+                                                                                          'neuralforecast/models/hint.py'),
+                                            'neuralforecast.models.hint.HINT.fit': ( 'models.hint.html#hint.fit',
+                                                                                     'neuralforecast/models/hint.py'),
+                                            'neuralforecast.models.hint.HINT.get_test_size': ( 'models.hint.html#hint.get_test_size',
+                                                                                               'neuralforecast/models/hint.py'),
+                                            'neuralforecast.models.hint.HINT.predict': ( 'models.hint.html#hint.predict',
+                                                                                         'neuralforecast/models/hint.py'),
+                                            'neuralforecast.models.hint.HINT.save': ( 'models.hint.html#hint.save',
+                                                                                      'neuralforecast/models/hint.py'),
+                                            'neuralforecast.models.hint.HINT.set_test_size': ( 'models.hint.html#hint.set_test_size',
+                                                                                               'neuralforecast/models/hint.py'),
+                                            'neuralforecast.models.hint.get_bottomup_P': ( 'models.hint.html#get_bottomup_p',
+                                                                                           'neuralforecast/models/hint.py'),
+                                            'neuralforecast.models.hint.get_identity_P': ( 'models.hint.html#get_identity_p',
+                                                                                           'neuralforecast/models/hint.py'),
+                                            'neuralforecast.models.hint.get_mintrace_ols_P': ( 'models.hint.html#get_mintrace_ols_p',
+                                                                                               'neuralforecast/models/hint.py'),
+                                            'neuralforecast.models.hint.get_mintrace_wls_P': ( 'models.hint.html#get_mintrace_wls_p',
+                                                                                               'neuralforecast/models/hint.py')},
+            'neuralforecast.models.informer': { 'neuralforecast.models.informer.ConvLayer': ( 'models.informer.html#convlayer',
+                                                                                              'neuralforecast/models/informer.py'),
+                                                'neuralforecast.models.informer.ConvLayer.__init__': ( 'models.informer.html#convlayer.__init__',
+                                                                                                       'neuralforecast/models/informer.py'),
+                                                'neuralforecast.models.informer.ConvLayer.forward': ( 'models.informer.html#convlayer.forward',
+                                                                                                      'neuralforecast/models/informer.py'),
+                                                'neuralforecast.models.informer.Informer': ( 'models.informer.html#informer',
+                                                                                             'neuralforecast/models/informer.py'),
+                                                'neuralforecast.models.informer.Informer.__init__': ( 'models.informer.html#informer.__init__',
+                                                                                                      'neuralforecast/models/informer.py'),
+                                                'neuralforecast.models.informer.Informer.forward': ( 'models.informer.html#informer.forward',
+                                                                                                     'neuralforecast/models/informer.py'),
+                                                'neuralforecast.models.informer.ProbAttention': ( 'models.informer.html#probattention',
+                                                                                                  'neuralforecast/models/informer.py'),
+                                                'neuralforecast.models.informer.ProbAttention.__init__': ( 'models.informer.html#probattention.__init__',
+                                                                                                           'neuralforecast/models/informer.py'),
+                                                'neuralforecast.models.informer.ProbAttention._get_initial_context': ( 'models.informer.html#probattention._get_initial_context',
+                                                                                                                       'neuralforecast/models/informer.py'),
+                                                'neuralforecast.models.informer.ProbAttention._prob_QK': ( 'models.informer.html#probattention._prob_qk',
+                                                                                                           'neuralforecast/models/informer.py'),
+                                                'neuralforecast.models.informer.ProbAttention._update_context': ( 'models.informer.html#probattention._update_context',
+                                                                                                                  'neuralforecast/models/informer.py'),
+                                                'neuralforecast.models.informer.ProbAttention.forward': ( 'models.informer.html#probattention.forward',
+                                                                                                          'neuralforecast/models/informer.py'),
+                                                'neuralforecast.models.informer.ProbMask': ( 'models.informer.html#probmask',
+                                                                                             'neuralforecast/models/informer.py'),
+                                                'neuralforecast.models.informer.ProbMask.__init__': ( 'models.informer.html#probmask.__init__',
+                                                                                                      'neuralforecast/models/informer.py'),
+                                                'neuralforecast.models.informer.ProbMask.mask': ( 'models.informer.html#probmask.mask',
+                                                                                                  'neuralforecast/models/informer.py')},
+            'neuralforecast.models.itransformer': { 'neuralforecast.models.itransformer.DataEmbedding_inverted': ( 'models.itransformer.html#dataembedding_inverted',
+                                                                                                                   'neuralforecast/models/itransformer.py'),
+                                                    'neuralforecast.models.itransformer.DataEmbedding_inverted.__init__': ( 'models.itransformer.html#dataembedding_inverted.__init__',
+                                                                                                                            'neuralforecast/models/itransformer.py'),
+                                                    'neuralforecast.models.itransformer.DataEmbedding_inverted.forward': ( 'models.itransformer.html#dataembedding_inverted.forward',
+                                                                                                                           'neuralforecast/models/itransformer.py'),
+                                                    'neuralforecast.models.itransformer.FullAttention': ( 'models.itransformer.html#fullattention',
+                                                                                                          'neuralforecast/models/itransformer.py'),
+                                                    'neuralforecast.models.itransformer.FullAttention.__init__': ( 'models.itransformer.html#fullattention.__init__',
+                                                                                                                   'neuralforecast/models/itransformer.py'),
+                                                    'neuralforecast.models.itransformer.FullAttention.forward': ( 'models.itransformer.html#fullattention.forward',
+                                                                                                                  'neuralforecast/models/itransformer.py'),
+                                                    'neuralforecast.models.itransformer.TriangularCausalMask': ( 'models.itransformer.html#triangularcausalmask',
+                                                                                                                 'neuralforecast/models/itransformer.py'),
+                                                    'neuralforecast.models.itransformer.TriangularCausalMask.__init__': ( 'models.itransformer.html#triangularcausalmask.__init__',
+                                                                                                                          'neuralforecast/models/itransformer.py'),
+                                                    'neuralforecast.models.itransformer.TriangularCausalMask.mask': ( 'models.itransformer.html#triangularcausalmask.mask',
+                                                                                                                      'neuralforecast/models/itransformer.py'),
+                                                    'neuralforecast.models.itransformer.iTransformer': ( 'models.itransformer.html#itransformer',
+                                                                                                         'neuralforecast/models/itransformer.py'),
+                                                    'neuralforecast.models.itransformer.iTransformer.__init__': ( 'models.itransformer.html#itransformer.__init__',
+                                                                                                                  'neuralforecast/models/itransformer.py'),
+                                                    'neuralforecast.models.itransformer.iTransformer.forecast': ( 'models.itransformer.html#itransformer.forecast',
+                                                                                                                  'neuralforecast/models/itransformer.py'),
+                                                    'neuralforecast.models.itransformer.iTransformer.forward': ( 'models.itransformer.html#itransformer.forward',
+                                                                                                                 'neuralforecast/models/itransformer.py')},
+            'neuralforecast.models.lstm': { 'neuralforecast.models.lstm.LSTM': ('models.lstm.html#lstm', 'neuralforecast/models/lstm.py'),
+                                            'neuralforecast.models.lstm.LSTM.__init__': ( 'models.lstm.html#lstm.__init__',
+                                                                                          'neuralforecast/models/lstm.py'),
+                                            'neuralforecast.models.lstm.LSTM.forward': ( 'models.lstm.html#lstm.forward',
+                                                                                         'neuralforecast/models/lstm.py')},
+            'neuralforecast.models.mlp': { 'neuralforecast.models.mlp.MLP': ('models.mlp.html#mlp', 'neuralforecast/models/mlp.py'),
+                                           'neuralforecast.models.mlp.MLP.__init__': ( 'models.mlp.html#mlp.__init__',
+                                                                                       'neuralforecast/models/mlp.py'),
+                                           'neuralforecast.models.mlp.MLP.forward': ( 'models.mlp.html#mlp.forward',
+                                                                                      'neuralforecast/models/mlp.py')},
+            'neuralforecast.models.mlpmultivariate': { 'neuralforecast.models.mlpmultivariate.MLPMultivariate': ( 'models.mlpmultivariate.html#mlpmultivariate',
+                                                                                                                  'neuralforecast/models/mlpmultivariate.py'),
+                                                       'neuralforecast.models.mlpmultivariate.MLPMultivariate.__init__': ( 'models.mlpmultivariate.html#mlpmultivariate.__init__',
+                                                                                                                           'neuralforecast/models/mlpmultivariate.py'),
+                                                       'neuralforecast.models.mlpmultivariate.MLPMultivariate.forward': ( 'models.mlpmultivariate.html#mlpmultivariate.forward',
+                                                                                                                          'neuralforecast/models/mlpmultivariate.py')},
+            'neuralforecast.models.nbeats': { 'neuralforecast.models.nbeats.IdentityBasis': ( 'models.nbeats.html#identitybasis',
+                                                                                              'neuralforecast/models/nbeats.py'),
+                                              'neuralforecast.models.nbeats.IdentityBasis.__init__': ( 'models.nbeats.html#identitybasis.__init__',
+                                                                                                       'neuralforecast/models/nbeats.py'),
+                                              'neuralforecast.models.nbeats.IdentityBasis.forward': ( 'models.nbeats.html#identitybasis.forward',
+                                                                                                      'neuralforecast/models/nbeats.py'),
+                                              'neuralforecast.models.nbeats.NBEATS': ( 'models.nbeats.html#nbeats',
+                                                                                       'neuralforecast/models/nbeats.py'),
+                                              'neuralforecast.models.nbeats.NBEATS.__init__': ( 'models.nbeats.html#nbeats.__init__',
+                                                                                                'neuralforecast/models/nbeats.py'),
+                                              'neuralforecast.models.nbeats.NBEATS.create_stack': ( 'models.nbeats.html#nbeats.create_stack',
+                                                                                                    'neuralforecast/models/nbeats.py'),
+                                              'neuralforecast.models.nbeats.NBEATS.forward': ( 'models.nbeats.html#nbeats.forward',
+                                                                                               'neuralforecast/models/nbeats.py'),
+                                              'neuralforecast.models.nbeats.NBEATSBlock': ( 'models.nbeats.html#nbeatsblock',
+                                                                                            'neuralforecast/models/nbeats.py'),
+                                              'neuralforecast.models.nbeats.NBEATSBlock.__init__': ( 'models.nbeats.html#nbeatsblock.__init__',
+                                                                                                     'neuralforecast/models/nbeats.py'),
+                                              'neuralforecast.models.nbeats.NBEATSBlock.forward': ( 'models.nbeats.html#nbeatsblock.forward',
+                                                                                                    'neuralforecast/models/nbeats.py'),
+                                              'neuralforecast.models.nbeats.SeasonalityBasis': ( 'models.nbeats.html#seasonalitybasis',
+                                                                                                 'neuralforecast/models/nbeats.py'),
+                                              'neuralforecast.models.nbeats.SeasonalityBasis.__init__': ( 'models.nbeats.html#seasonalitybasis.__init__',
+                                                                                                          'neuralforecast/models/nbeats.py'),
+                                              'neuralforecast.models.nbeats.SeasonalityBasis.forward': ( 'models.nbeats.html#seasonalitybasis.forward',
+                                                                                                         'neuralforecast/models/nbeats.py'),
+                                              'neuralforecast.models.nbeats.TrendBasis': ( 'models.nbeats.html#trendbasis',
+                                                                                           'neuralforecast/models/nbeats.py'),
+                                              'neuralforecast.models.nbeats.TrendBasis.__init__': ( 'models.nbeats.html#trendbasis.__init__',
+                                                                                                    'neuralforecast/models/nbeats.py'),
+                                              'neuralforecast.models.nbeats.TrendBasis.forward': ( 'models.nbeats.html#trendbasis.forward',
+                                                                                                   'neuralforecast/models/nbeats.py')},
+            'neuralforecast.models.nbeatsx': { 'neuralforecast.models.nbeatsx.ExogenousBasis': ( 'models.nbeatsx.html#exogenousbasis',
+                                                                                                 'neuralforecast/models/nbeatsx.py'),
+                                               'neuralforecast.models.nbeatsx.ExogenousBasis.__init__': ( 'models.nbeatsx.html#exogenousbasis.__init__',
+                                                                                                          'neuralforecast/models/nbeatsx.py'),
+                                               'neuralforecast.models.nbeatsx.ExogenousBasis.forward': ( 'models.nbeatsx.html#exogenousbasis.forward',
+                                                                                                         'neuralforecast/models/nbeatsx.py'),
+                                               'neuralforecast.models.nbeatsx.IdentityBasis': ( 'models.nbeatsx.html#identitybasis',
+                                                                                                'neuralforecast/models/nbeatsx.py'),
+                                               'neuralforecast.models.nbeatsx.IdentityBasis.__init__': ( 'models.nbeatsx.html#identitybasis.__init__',
+                                                                                                         'neuralforecast/models/nbeatsx.py'),
+                                               'neuralforecast.models.nbeatsx.IdentityBasis.forward': ( 'models.nbeatsx.html#identitybasis.forward',
+                                                                                                        'neuralforecast/models/nbeatsx.py'),
+                                               'neuralforecast.models.nbeatsx.NBEATSBlock': ( 'models.nbeatsx.html#nbeatsblock',
+                                                                                              'neuralforecast/models/nbeatsx.py'),
+                                               'neuralforecast.models.nbeatsx.NBEATSBlock.__init__': ( 'models.nbeatsx.html#nbeatsblock.__init__',
+                                                                                                       'neuralforecast/models/nbeatsx.py'),
+                                               'neuralforecast.models.nbeatsx.NBEATSBlock.forward': ( 'models.nbeatsx.html#nbeatsblock.forward',
+                                                                                                      'neuralforecast/models/nbeatsx.py'),
+                                               'neuralforecast.models.nbeatsx.NBEATSx': ( 'models.nbeatsx.html#nbeatsx',
+                                                                                          'neuralforecast/models/nbeatsx.py'),
+                                               'neuralforecast.models.nbeatsx.NBEATSx.__init__': ( 'models.nbeatsx.html#nbeatsx.__init__',
+                                                                                                   'neuralforecast/models/nbeatsx.py'),
+                                               'neuralforecast.models.nbeatsx.NBEATSx.create_stack': ( 'models.nbeatsx.html#nbeatsx.create_stack',
+                                                                                                       'neuralforecast/models/nbeatsx.py'),
+                                               'neuralforecast.models.nbeatsx.NBEATSx.forward': ( 'models.nbeatsx.html#nbeatsx.forward',
+                                                                                                  'neuralforecast/models/nbeatsx.py'),
+                                               'neuralforecast.models.nbeatsx.SeasonalityBasis': ( 'models.nbeatsx.html#seasonalitybasis',
+                                                                                                   'neuralforecast/models/nbeatsx.py'),
+                                               'neuralforecast.models.nbeatsx.SeasonalityBasis.__init__': ( 'models.nbeatsx.html#seasonalitybasis.__init__',
+                                                                                                            'neuralforecast/models/nbeatsx.py'),
+                                               'neuralforecast.models.nbeatsx.SeasonalityBasis.forward': ( 'models.nbeatsx.html#seasonalitybasis.forward',
+                                                                                                           'neuralforecast/models/nbeatsx.py'),
+                                               'neuralforecast.models.nbeatsx.TrendBasis': ( 'models.nbeatsx.html#trendbasis',
+                                                                                             'neuralforecast/models/nbeatsx.py'),
+                                               'neuralforecast.models.nbeatsx.TrendBasis.__init__': ( 'models.nbeatsx.html#trendbasis.__init__',
+                                                                                                      'neuralforecast/models/nbeatsx.py'),
+                                               'neuralforecast.models.nbeatsx.TrendBasis.forward': ( 'models.nbeatsx.html#trendbasis.forward',
+                                                                                                     'neuralforecast/models/nbeatsx.py')},
+            'neuralforecast.models.nhits': { 'neuralforecast.models.nhits.NHITS': ( 'models.nhits.html#nhits',
+                                                                                    'neuralforecast/models/nhits.py'),
+                                             'neuralforecast.models.nhits.NHITS.__init__': ( 'models.nhits.html#nhits.__init__',
+                                                                                             'neuralforecast/models/nhits.py'),
+                                             'neuralforecast.models.nhits.NHITS.create_stack': ( 'models.nhits.html#nhits.create_stack',
+                                                                                                 'neuralforecast/models/nhits.py'),
+                                             'neuralforecast.models.nhits.NHITS.forward': ( 'models.nhits.html#nhits.forward',
+                                                                                            'neuralforecast/models/nhits.py'),
+                                             'neuralforecast.models.nhits.NHITSBlock': ( 'models.nhits.html#nhitsblock',
+                                                                                         'neuralforecast/models/nhits.py'),
+                                             'neuralforecast.models.nhits.NHITSBlock.__init__': ( 'models.nhits.html#nhitsblock.__init__',
+                                                                                                  'neuralforecast/models/nhits.py'),
+                                             'neuralforecast.models.nhits.NHITSBlock.forward': ( 'models.nhits.html#nhitsblock.forward',
+                                                                                                 'neuralforecast/models/nhits.py'),
+                                             'neuralforecast.models.nhits._IdentityBasis': ( 'models.nhits.html#_identitybasis',
+                                                                                             'neuralforecast/models/nhits.py'),
+                                             'neuralforecast.models.nhits._IdentityBasis.__init__': ( 'models.nhits.html#_identitybasis.__init__',
+                                                                                                      'neuralforecast/models/nhits.py'),
+                                             'neuralforecast.models.nhits._IdentityBasis.forward': ( 'models.nhits.html#_identitybasis.forward',
+                                                                                                     'neuralforecast/models/nhits.py')},
+            'neuralforecast.models.nlinear': { 'neuralforecast.models.nlinear.NLinear': ( 'models.nlinear.html#nlinear',
+                                                                                          'neuralforecast/models/nlinear.py'),
+                                               'neuralforecast.models.nlinear.NLinear.__init__': ( 'models.nlinear.html#nlinear.__init__',
+                                                                                                   'neuralforecast/models/nlinear.py'),
+                                               'neuralforecast.models.nlinear.NLinear.forward': ( 'models.nlinear.html#nlinear.forward',
+                                                                                                  'neuralforecast/models/nlinear.py')},
+            'neuralforecast.models.patchtst': { 'neuralforecast.models.patchtst.Coord1dPosEncoding': ( 'models.patchtst.html#coord1dposencoding',
+                                                                                                       'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.Coord2dPosEncoding': ( 'models.patchtst.html#coord2dposencoding',
+                                                                                                       'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.Flatten_Head': ( 'models.patchtst.html#flatten_head',
+                                                                                                 'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.Flatten_Head.__init__': ( 'models.patchtst.html#flatten_head.__init__',
+                                                                                                          'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.Flatten_Head.forward': ( 'models.patchtst.html#flatten_head.forward',
+                                                                                                         'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.PatchTST': ( 'models.patchtst.html#patchtst',
+                                                                                             'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.PatchTST.__init__': ( 'models.patchtst.html#patchtst.__init__',
+                                                                                                      'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.PatchTST.forward': ( 'models.patchtst.html#patchtst.forward',
+                                                                                                     'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.PatchTST_backbone': ( 'models.patchtst.html#patchtst_backbone',
+                                                                                                      'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.PatchTST_backbone.__init__': ( 'models.patchtst.html#patchtst_backbone.__init__',
+                                                                                                               'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.PatchTST_backbone.create_pretrain_head': ( 'models.patchtst.html#patchtst_backbone.create_pretrain_head',
+                                                                                                                           'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.PatchTST_backbone.forward': ( 'models.patchtst.html#patchtst_backbone.forward',
+                                                                                                              'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.PositionalEncoding': ( 'models.patchtst.html#positionalencoding',
+                                                                                                       'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.RevIN': ( 'models.patchtst.html#revin',
+                                                                                          'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.RevIN.__init__': ( 'models.patchtst.html#revin.__init__',
+                                                                                                   'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.RevIN._denormalize': ( 'models.patchtst.html#revin._denormalize',
+                                                                                                       'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.RevIN._get_statistics': ( 'models.patchtst.html#revin._get_statistics',
+                                                                                                          'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.RevIN._init_params': ( 'models.patchtst.html#revin._init_params',
+                                                                                                       'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.RevIN._normalize': ( 'models.patchtst.html#revin._normalize',
+                                                                                                     'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.RevIN.forward': ( 'models.patchtst.html#revin.forward',
+                                                                                                  'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.TSTEncoder': ( 'models.patchtst.html#tstencoder',
+                                                                                               'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.TSTEncoder.__init__': ( 'models.patchtst.html#tstencoder.__init__',
+                                                                                                        'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.TSTEncoder.forward': ( 'models.patchtst.html#tstencoder.forward',
+                                                                                                       'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.TSTEncoderLayer': ( 'models.patchtst.html#tstencoderlayer',
+                                                                                                    'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.TSTEncoderLayer.__init__': ( 'models.patchtst.html#tstencoderlayer.__init__',
+                                                                                                             'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.TSTEncoderLayer.forward': ( 'models.patchtst.html#tstencoderlayer.forward',
+                                                                                                            'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.TSTiEncoder': ( 'models.patchtst.html#tstiencoder',
+                                                                                                'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.TSTiEncoder.__init__': ( 'models.patchtst.html#tstiencoder.__init__',
+                                                                                                         'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.TSTiEncoder.forward': ( 'models.patchtst.html#tstiencoder.forward',
+                                                                                                        'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.Transpose': ( 'models.patchtst.html#transpose',
+                                                                                              'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.Transpose.__init__': ( 'models.patchtst.html#transpose.__init__',
+                                                                                                       'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.Transpose.forward': ( 'models.patchtst.html#transpose.forward',
+                                                                                                      'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst._MultiheadAttention': ( 'models.patchtst.html#_multiheadattention',
+                                                                                                        'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst._MultiheadAttention.__init__': ( 'models.patchtst.html#_multiheadattention.__init__',
+                                                                                                                 'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst._MultiheadAttention.forward': ( 'models.patchtst.html#_multiheadattention.forward',
+                                                                                                                'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst._ScaledDotProductAttention': ( 'models.patchtst.html#_scaleddotproductattention',
+                                                                                                               'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst._ScaledDotProductAttention.__init__': ( 'models.patchtst.html#_scaleddotproductattention.__init__',
+                                                                                                                        'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst._ScaledDotProductAttention.forward': ( 'models.patchtst.html#_scaleddotproductattention.forward',
+                                                                                                                       'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.get_activation_fn': ( 'models.patchtst.html#get_activation_fn',
+                                                                                                      'neuralforecast/models/patchtst.py'),
+                                                'neuralforecast.models.patchtst.positional_encoding': ( 'models.patchtst.html#positional_encoding',
+                                                                                                        'neuralforecast/models/patchtst.py')},
+            'neuralforecast.models.rnn': { 'neuralforecast.models.rnn.RNN': ('models.rnn.html#rnn', 'neuralforecast/models/rnn.py'),
+                                           'neuralforecast.models.rnn.RNN.__init__': ( 'models.rnn.html#rnn.__init__',
+                                                                                       'neuralforecast/models/rnn.py'),
+                                           'neuralforecast.models.rnn.RNN.forward': ( 'models.rnn.html#rnn.forward',
+                                                                                      'neuralforecast/models/rnn.py')},
+            'neuralforecast.models.stemgnn': { 'neuralforecast.models.stemgnn.GLU': ( 'models.stemgnn.html#glu',
+                                                                                      'neuralforecast/models/stemgnn.py'),
+                                               'neuralforecast.models.stemgnn.GLU.__init__': ( 'models.stemgnn.html#glu.__init__',
+                                                                                               'neuralforecast/models/stemgnn.py'),
+                                               'neuralforecast.models.stemgnn.GLU.forward': ( 'models.stemgnn.html#glu.forward',
+                                                                                              'neuralforecast/models/stemgnn.py'),
+                                               'neuralforecast.models.stemgnn.StemGNN': ( 'models.stemgnn.html#stemgnn',
+                                                                                          'neuralforecast/models/stemgnn.py'),
+                                               'neuralforecast.models.stemgnn.StemGNN.__init__': ( 'models.stemgnn.html#stemgnn.__init__',
+                                                                                                   'neuralforecast/models/stemgnn.py'),
+                                               'neuralforecast.models.stemgnn.StemGNN.cheb_polynomial': ( 'models.stemgnn.html#stemgnn.cheb_polynomial',
+                                                                                                          'neuralforecast/models/stemgnn.py'),
+                                               'neuralforecast.models.stemgnn.StemGNN.forward': ( 'models.stemgnn.html#stemgnn.forward',
+                                                                                                  'neuralforecast/models/stemgnn.py'),
+                                               'neuralforecast.models.stemgnn.StemGNN.get_laplacian': ( 'models.stemgnn.html#stemgnn.get_laplacian',
+                                                                                                        'neuralforecast/models/stemgnn.py'),
+                                               'neuralforecast.models.stemgnn.StemGNN.graph_fft': ( 'models.stemgnn.html#stemgnn.graph_fft',
+                                                                                                    'neuralforecast/models/stemgnn.py'),
+                                               'neuralforecast.models.stemgnn.StemGNN.latent_correlation_layer': ( 'models.stemgnn.html#stemgnn.latent_correlation_layer',
+                                                                                                                   'neuralforecast/models/stemgnn.py'),
+                                               'neuralforecast.models.stemgnn.StemGNN.self_graph_attention': ( 'models.stemgnn.html#stemgnn.self_graph_attention',
+                                                                                                               'neuralforecast/models/stemgnn.py'),
+                                               'neuralforecast.models.stemgnn.StockBlockLayer': ( 'models.stemgnn.html#stockblocklayer',
+                                                                                                  'neuralforecast/models/stemgnn.py'),
+                                               'neuralforecast.models.stemgnn.StockBlockLayer.__init__': ( 'models.stemgnn.html#stockblocklayer.__init__',
+                                                                                                           'neuralforecast/models/stemgnn.py'),
+                                               'neuralforecast.models.stemgnn.StockBlockLayer.forward': ( 'models.stemgnn.html#stockblocklayer.forward',
+                                                                                                          'neuralforecast/models/stemgnn.py'),
+                                               'neuralforecast.models.stemgnn.StockBlockLayer.spe_seq_cell': ( 'models.stemgnn.html#stockblocklayer.spe_seq_cell',
+                                                                                                               'neuralforecast/models/stemgnn.py')},
+            'neuralforecast.models.tcn': { 'neuralforecast.models.tcn.TCN': ('models.tcn.html#tcn', 'neuralforecast/models/tcn.py'),
+                                           'neuralforecast.models.tcn.TCN.__init__': ( 'models.tcn.html#tcn.__init__',
+                                                                                       'neuralforecast/models/tcn.py'),
+                                           'neuralforecast.models.tcn.TCN.forward': ( 'models.tcn.html#tcn.forward',
+                                                                                      'neuralforecast/models/tcn.py')},
+            'neuralforecast.models.tft': { 'neuralforecast.models.tft.GLU': ('models.tft.html#glu', 'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.GLU.__init__': ( 'models.tft.html#glu.__init__',
+                                                                                       'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.GLU.forward': ( 'models.tft.html#glu.forward',
+                                                                                      'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.GRN': ('models.tft.html#grn', 'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.GRN.__init__': ( 'models.tft.html#grn.__init__',
+                                                                                       'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.GRN.forward': ( 'models.tft.html#grn.forward',
+                                                                                      'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.InterpretableMultiHeadAttention': ( 'models.tft.html#interpretablemultiheadattention',
+                                                                                                          'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.InterpretableMultiHeadAttention.__init__': ( 'models.tft.html#interpretablemultiheadattention.__init__',
+                                                                                                                   'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.InterpretableMultiHeadAttention.forward': ( 'models.tft.html#interpretablemultiheadattention.forward',
+                                                                                                                  'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.MaybeLayerNorm': ( 'models.tft.html#maybelayernorm',
+                                                                                         'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.MaybeLayerNorm.__init__': ( 'models.tft.html#maybelayernorm.__init__',
+                                                                                                  'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.MaybeLayerNorm.forward': ( 'models.tft.html#maybelayernorm.forward',
+                                                                                                 'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.StaticCovariateEncoder': ( 'models.tft.html#staticcovariateencoder',
+                                                                                                 'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.StaticCovariateEncoder.__init__': ( 'models.tft.html#staticcovariateencoder.__init__',
+                                                                                                          'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.StaticCovariateEncoder.forward': ( 'models.tft.html#staticcovariateencoder.forward',
+                                                                                                         'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.TFT': ('models.tft.html#tft', 'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.TFT.__init__': ( 'models.tft.html#tft.__init__',
+                                                                                       'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.TFT.forward': ( 'models.tft.html#tft.forward',
+                                                                                      'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.TFTEmbedding': ( 'models.tft.html#tftembedding',
+                                                                                       'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.TFTEmbedding.__init__': ( 'models.tft.html#tftembedding.__init__',
+                                                                                                'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.TFTEmbedding._apply_embedding': ( 'models.tft.html#tftembedding._apply_embedding',
+                                                                                                        'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.TFTEmbedding.forward': ( 'models.tft.html#tftembedding.forward',
+                                                                                               'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.TemporalCovariateEncoder': ( 'models.tft.html#temporalcovariateencoder',
+                                                                                                   'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.TemporalCovariateEncoder.__init__': ( 'models.tft.html#temporalcovariateencoder.__init__',
+                                                                                                            'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.TemporalCovariateEncoder.forward': ( 'models.tft.html#temporalcovariateencoder.forward',
+                                                                                                           'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.TemporalFusionDecoder': ( 'models.tft.html#temporalfusiondecoder',
+                                                                                                'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.TemporalFusionDecoder.__init__': ( 'models.tft.html#temporalfusiondecoder.__init__',
+                                                                                                         'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.TemporalFusionDecoder.forward': ( 'models.tft.html#temporalfusiondecoder.forward',
+                                                                                                        'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.VariableSelectionNetwork': ( 'models.tft.html#variableselectionnetwork',
+                                                                                                   'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.VariableSelectionNetwork.__init__': ( 'models.tft.html#variableselectionnetwork.__init__',
+                                                                                                            'neuralforecast/models/tft.py'),
+                                           'neuralforecast.models.tft.VariableSelectionNetwork.forward': ( 'models.tft.html#variableselectionnetwork.forward',
+                                                                                                           'neuralforecast/models/tft.py')},
+            'neuralforecast.models.timellm': { 'neuralforecast.models.timellm.FlattenHead': ( 'models.timellm.html#flattenhead',
+                                                                                              'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.FlattenHead.__init__': ( 'models.timellm.html#flattenhead.__init__',
+                                                                                                       'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.FlattenHead.forward': ( 'models.timellm.html#flattenhead.forward',
+                                                                                                      'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.Normalize': ( 'models.timellm.html#normalize',
+                                                                                            'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.Normalize.__init__': ( 'models.timellm.html#normalize.__init__',
+                                                                                                     'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.Normalize._denormalize': ( 'models.timellm.html#normalize._denormalize',
+                                                                                                         'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.Normalize._get_statistics': ( 'models.timellm.html#normalize._get_statistics',
+                                                                                                            'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.Normalize._init_params': ( 'models.timellm.html#normalize._init_params',
+                                                                                                         'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.Normalize._normalize': ( 'models.timellm.html#normalize._normalize',
+                                                                                                       'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.Normalize.forward': ( 'models.timellm.html#normalize.forward',
+                                                                                                    'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.PatchEmbedding': ( 'models.timellm.html#patchembedding',
+                                                                                                 'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.PatchEmbedding.__init__': ( 'models.timellm.html#patchembedding.__init__',
+                                                                                                          'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.PatchEmbedding.forward': ( 'models.timellm.html#patchembedding.forward',
+                                                                                                         'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.ReplicationPad1d': ( 'models.timellm.html#replicationpad1d',
+                                                                                                   'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.ReplicationPad1d.__init__': ( 'models.timellm.html#replicationpad1d.__init__',
+                                                                                                            'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.ReplicationPad1d.forward': ( 'models.timellm.html#replicationpad1d.forward',
+                                                                                                           'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.ReprogrammingLayer': ( 'models.timellm.html#reprogramminglayer',
+                                                                                                     'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.ReprogrammingLayer.__init__': ( 'models.timellm.html#reprogramminglayer.__init__',
+                                                                                                              'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.ReprogrammingLayer.forward': ( 'models.timellm.html#reprogramminglayer.forward',
+                                                                                                             'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.ReprogrammingLayer.reprogramming': ( 'models.timellm.html#reprogramminglayer.reprogramming',
+                                                                                                                   'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.TimeLLM': ( 'models.timellm.html#timellm',
+                                                                                          'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.TimeLLM.__init__': ( 'models.timellm.html#timellm.__init__',
+                                                                                                   'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.TimeLLM.calcute_lags': ( 'models.timellm.html#timellm.calcute_lags',
+                                                                                                       'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.TimeLLM.forecast': ( 'models.timellm.html#timellm.forecast',
+                                                                                                   'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.TimeLLM.forward': ( 'models.timellm.html#timellm.forward',
+                                                                                                  'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.TokenEmbedding': ( 'models.timellm.html#tokenembedding',
+                                                                                                 'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.TokenEmbedding.__init__': ( 'models.timellm.html#tokenembedding.__init__',
+                                                                                                          'neuralforecast/models/timellm.py'),
+                                               'neuralforecast.models.timellm.TokenEmbedding.forward': ( 'models.timellm.html#tokenembedding.forward',
+                                                                                                         'neuralforecast/models/timellm.py')},
+            'neuralforecast.models.timesnet': { 'neuralforecast.models.timesnet.FFT_for_Period': ( 'models.timesnet.html#fft_for_period',
+                                                                                                   'neuralforecast/models/timesnet.py'),
+                                                'neuralforecast.models.timesnet.Inception_Block_V1': ( 'models.timesnet.html#inception_block_v1',
+                                                                                                       'neuralforecast/models/timesnet.py'),
+                                                'neuralforecast.models.timesnet.Inception_Block_V1.__init__': ( 'models.timesnet.html#inception_block_v1.__init__',
+                                                                                                                'neuralforecast/models/timesnet.py'),
+                                                'neuralforecast.models.timesnet.Inception_Block_V1._initialize_weights': ( 'models.timesnet.html#inception_block_v1._initialize_weights',
+                                                                                                                           'neuralforecast/models/timesnet.py'),
+                                                'neuralforecast.models.timesnet.Inception_Block_V1.forward': ( 'models.timesnet.html#inception_block_v1.forward',
+                                                                                                               'neuralforecast/models/timesnet.py'),
+                                                'neuralforecast.models.timesnet.TimesBlock': ( 'models.timesnet.html#timesblock',
+                                                                                               'neuralforecast/models/timesnet.py'),
+                                                'neuralforecast.models.timesnet.TimesBlock.__init__': ( 'models.timesnet.html#timesblock.__init__',
+                                                                                                        'neuralforecast/models/timesnet.py'),
+                                                'neuralforecast.models.timesnet.TimesBlock.forward': ( 'models.timesnet.html#timesblock.forward',
+                                                                                                       'neuralforecast/models/timesnet.py'),
+                                                'neuralforecast.models.timesnet.TimesNet': ( 'models.timesnet.html#timesnet',
+                                                                                             'neuralforecast/models/timesnet.py'),
+                                                'neuralforecast.models.timesnet.TimesNet.__init__': ( 'models.timesnet.html#timesnet.__init__',
+                                                                                                      'neuralforecast/models/timesnet.py'),
+                                                'neuralforecast.models.timesnet.TimesNet.forward': ( 'models.timesnet.html#timesnet.forward',
+                                                                                                     'neuralforecast/models/timesnet.py')},
+            'neuralforecast.models.tsmixer': { 'neuralforecast.models.tsmixer.FeatureMixing': ( 'models.tsmixer.html#featuremixing',
+                                                                                                'neuralforecast/models/tsmixer.py'),
+                                               'neuralforecast.models.tsmixer.FeatureMixing.__init__': ( 'models.tsmixer.html#featuremixing.__init__',
+                                                                                                         'neuralforecast/models/tsmixer.py'),
+                                               'neuralforecast.models.tsmixer.FeatureMixing.forward': ( 'models.tsmixer.html#featuremixing.forward',
+                                                                                                        'neuralforecast/models/tsmixer.py'),
+                                               'neuralforecast.models.tsmixer.MixingLayer': ( 'models.tsmixer.html#mixinglayer',
+                                                                                              'neuralforecast/models/tsmixer.py'),
+                                               'neuralforecast.models.tsmixer.MixingLayer.__init__': ( 'models.tsmixer.html#mixinglayer.__init__',
+                                                                                                       'neuralforecast/models/tsmixer.py'),
+                                               'neuralforecast.models.tsmixer.MixingLayer.forward': ( 'models.tsmixer.html#mixinglayer.forward',
+                                                                                                      'neuralforecast/models/tsmixer.py'),
+                                               'neuralforecast.models.tsmixer.ReversibleInstanceNorm1d': ( 'models.tsmixer.html#reversibleinstancenorm1d',
+                                                                                                           'neuralforecast/models/tsmixer.py'),
+                                               'neuralforecast.models.tsmixer.ReversibleInstanceNorm1d.__init__': ( 'models.tsmixer.html#reversibleinstancenorm1d.__init__',
+                                                                                                                    'neuralforecast/models/tsmixer.py'),
+                                               'neuralforecast.models.tsmixer.ReversibleInstanceNorm1d.forward': ( 'models.tsmixer.html#reversibleinstancenorm1d.forward',
+                                                                                                                   'neuralforecast/models/tsmixer.py'),
+                                               'neuralforecast.models.tsmixer.ReversibleInstanceNorm1d.reverse': ( 'models.tsmixer.html#reversibleinstancenorm1d.reverse',
+                                                                                                                   'neuralforecast/models/tsmixer.py'),
+                                               'neuralforecast.models.tsmixer.TSMixer': ( 'models.tsmixer.html#tsmixer',
+                                                                                          'neuralforecast/models/tsmixer.py'),
+                                               'neuralforecast.models.tsmixer.TSMixer.__init__': ( 'models.tsmixer.html#tsmixer.__init__',
+                                                                                                   'neuralforecast/models/tsmixer.py'),
+                                               'neuralforecast.models.tsmixer.TSMixer.forward': ( 'models.tsmixer.html#tsmixer.forward',
+                                                                                                  'neuralforecast/models/tsmixer.py'),
+                                               'neuralforecast.models.tsmixer.TemporalMixing': ( 'models.tsmixer.html#temporalmixing',
+                                                                                                 'neuralforecast/models/tsmixer.py'),
+                                               'neuralforecast.models.tsmixer.TemporalMixing.__init__': ( 'models.tsmixer.html#temporalmixing.__init__',
+                                                                                                          'neuralforecast/models/tsmixer.py'),
+                                               'neuralforecast.models.tsmixer.TemporalMixing.forward': ( 'models.tsmixer.html#temporalmixing.forward',
+                                                                                                         'neuralforecast/models/tsmixer.py')},
+            'neuralforecast.models.tsmixerx': { 'neuralforecast.models.tsmixerx.FeatureMixing': ( 'models.tsmixerx.html#featuremixing',
+                                                                                                  'neuralforecast/models/tsmixerx.py'),
+                                                'neuralforecast.models.tsmixerx.FeatureMixing.__init__': ( 'models.tsmixerx.html#featuremixing.__init__',
+                                                                                                           'neuralforecast/models/tsmixerx.py'),
+                                                'neuralforecast.models.tsmixerx.FeatureMixing.forward': ( 'models.tsmixerx.html#featuremixing.forward',
+                                                                                                          'neuralforecast/models/tsmixerx.py'),
+                                                'neuralforecast.models.tsmixerx.MixingLayer': ( 'models.tsmixerx.html#mixinglayer',
+                                                                                                'neuralforecast/models/tsmixerx.py'),
+                                                'neuralforecast.models.tsmixerx.MixingLayer.__init__': ( 'models.tsmixerx.html#mixinglayer.__init__',
+                                                                                                         'neuralforecast/models/tsmixerx.py'),
+                                                'neuralforecast.models.tsmixerx.MixingLayer.forward': ( 'models.tsmixerx.html#mixinglayer.forward',
+                                                                                                        'neuralforecast/models/tsmixerx.py'),
+                                                'neuralforecast.models.tsmixerx.MixingLayerWithStaticExogenous': ( 'models.tsmixerx.html#mixinglayerwithstaticexogenous',
+                                                                                                                   'neuralforecast/models/tsmixerx.py'),
+                                                'neuralforecast.models.tsmixerx.MixingLayerWithStaticExogenous.__init__': ( 'models.tsmixerx.html#mixinglayerwithstaticexogenous.__init__',
+                                                                                                                            'neuralforecast/models/tsmixerx.py'),
+                                                'neuralforecast.models.tsmixerx.MixingLayerWithStaticExogenous.forward': ( 'models.tsmixerx.html#mixinglayerwithstaticexogenous.forward',
+                                                                                                                           'neuralforecast/models/tsmixerx.py'),
+                                                'neuralforecast.models.tsmixerx.ReversibleInstanceNorm1d': ( 'models.tsmixerx.html#reversibleinstancenorm1d',
+                                                                                                             'neuralforecast/models/tsmixerx.py'),
+                                                'neuralforecast.models.tsmixerx.ReversibleInstanceNorm1d.__init__': ( 'models.tsmixerx.html#reversibleinstancenorm1d.__init__',
+                                                                                                                      'neuralforecast/models/tsmixerx.py'),
+                                                'neuralforecast.models.tsmixerx.ReversibleInstanceNorm1d.forward': ( 'models.tsmixerx.html#reversibleinstancenorm1d.forward',
+                                                                                                                     'neuralforecast/models/tsmixerx.py'),
+                                                'neuralforecast.models.tsmixerx.ReversibleInstanceNorm1d.reverse': ( 'models.tsmixerx.html#reversibleinstancenorm1d.reverse',
+                                                                                                                     'neuralforecast/models/tsmixerx.py'),
+                                                'neuralforecast.models.tsmixerx.TSMixerx': ( 'models.tsmixerx.html#tsmixerx',
+                                                                                             'neuralforecast/models/tsmixerx.py'),
+                                                'neuralforecast.models.tsmixerx.TSMixerx.__init__': ( 'models.tsmixerx.html#tsmixerx.__init__',
+                                                                                                      'neuralforecast/models/tsmixerx.py'),
+                                                'neuralforecast.models.tsmixerx.TSMixerx.forward': ( 'models.tsmixerx.html#tsmixerx.forward',
+                                                                                                     'neuralforecast/models/tsmixerx.py'),
+                                                'neuralforecast.models.tsmixerx.TemporalMixing': ( 'models.tsmixerx.html#temporalmixing',
+                                                                                                   'neuralforecast/models/tsmixerx.py'),
+                                                'neuralforecast.models.tsmixerx.TemporalMixing.__init__': ( 'models.tsmixerx.html#temporalmixing.__init__',
+                                                                                                            'neuralforecast/models/tsmixerx.py'),
+                                                'neuralforecast.models.tsmixerx.TemporalMixing.forward': ( 'models.tsmixerx.html#temporalmixing.forward',
+                                                                                                           'neuralforecast/models/tsmixerx.py')},
+            'neuralforecast.models.vanillatransformer': { 'neuralforecast.models.vanillatransformer.FullAttention': ( 'models.vanillatransformer.html#fullattention',
+                                                                                                                      'neuralforecast/models/vanillatransformer.py'),
+                                                          'neuralforecast.models.vanillatransformer.FullAttention.__init__': ( 'models.vanillatransformer.html#fullattention.__init__',
+                                                                                                                               'neuralforecast/models/vanillatransformer.py'),
+                                                          'neuralforecast.models.vanillatransformer.FullAttention.forward': ( 'models.vanillatransformer.html#fullattention.forward',
+                                                                                                                              'neuralforecast/models/vanillatransformer.py'),
+                                                          'neuralforecast.models.vanillatransformer.TriangularCausalMask': ( 'models.vanillatransformer.html#triangularcausalmask',
+                                                                                                                             'neuralforecast/models/vanillatransformer.py'),
+                                                          'neuralforecast.models.vanillatransformer.TriangularCausalMask.__init__': ( 'models.vanillatransformer.html#triangularcausalmask.__init__',
+                                                                                                                                      'neuralforecast/models/vanillatransformer.py'),
+                                                          'neuralforecast.models.vanillatransformer.TriangularCausalMask.mask': ( 'models.vanillatransformer.html#triangularcausalmask.mask',
+                                                                                                                                  'neuralforecast/models/vanillatransformer.py'),
+                                                          'neuralforecast.models.vanillatransformer.VanillaTransformer': ( 'models.vanillatransformer.html#vanillatransformer',
+                                                                                                                           'neuralforecast/models/vanillatransformer.py'),
+                                                          'neuralforecast.models.vanillatransformer.VanillaTransformer.__init__': ( 'models.vanillatransformer.html#vanillatransformer.__init__',
+                                                                                                                                    'neuralforecast/models/vanillatransformer.py'),
+                                                          'neuralforecast.models.vanillatransformer.VanillaTransformer.forward': ( 'models.vanillatransformer.html#vanillatransformer.forward',
+                                                                                                                                   'neuralforecast/models/vanillatransformer.py')},
+            'neuralforecast.tsdataset': { 'neuralforecast.tsdataset.TimeSeriesDataModule': ( 'tsdataset.html#timeseriesdatamodule',
+                                                                                             'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset.TimeSeriesDataModule.__init__': ( 'tsdataset.html#timeseriesdatamodule.__init__',
+                                                                                                      'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset.TimeSeriesDataModule.predict_dataloader': ( 'tsdataset.html#timeseriesdatamodule.predict_dataloader',
+                                                                                                                'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset.TimeSeriesDataModule.train_dataloader': ( 'tsdataset.html#timeseriesdatamodule.train_dataloader',
+                                                                                                              'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset.TimeSeriesDataModule.val_dataloader': ( 'tsdataset.html#timeseriesdatamodule.val_dataloader',
+                                                                                                            'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset.TimeSeriesDataset': ( 'tsdataset.html#timeseriesdataset',
+                                                                                          'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset.TimeSeriesDataset.__eq__': ( 'tsdataset.html#timeseriesdataset.__eq__',
+                                                                                                 'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset.TimeSeriesDataset.__getitem__': ( 'tsdataset.html#timeseriesdataset.__getitem__',
+                                                                                                      'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset.TimeSeriesDataset.__init__': ( 'tsdataset.html#timeseriesdataset.__init__',
+                                                                                                   'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset.TimeSeriesDataset.__len__': ( 'tsdataset.html#timeseriesdataset.__len__',
+                                                                                                  'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset.TimeSeriesDataset.__repr__': ( 'tsdataset.html#timeseriesdataset.__repr__',
+                                                                                                   'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset.TimeSeriesDataset._as_torch_copy': ( 'tsdataset.html#timeseriesdataset._as_torch_copy',
+                                                                                                         'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset.TimeSeriesDataset.align': ( 'tsdataset.html#timeseriesdataset.align',
+                                                                                                'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset.TimeSeriesDataset.append': ( 'tsdataset.html#timeseriesdataset.append',
+                                                                                                 'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset.TimeSeriesDataset.from_df': ( 'tsdataset.html#timeseriesdataset.from_df',
+                                                                                                  'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset.TimeSeriesDataset.trim_dataset': ( 'tsdataset.html#timeseriesdataset.trim_dataset',
+                                                                                                       'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset.TimeSeriesDataset.update_dataset': ( 'tsdataset.html#timeseriesdataset.update_dataset',
+                                                                                                         'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset.TimeSeriesLoader': ( 'tsdataset.html#timeseriesloader',
+                                                                                         'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset.TimeSeriesLoader.__init__': ( 'tsdataset.html#timeseriesloader.__init__',
+                                                                                                  'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset.TimeSeriesLoader._collate_fn': ( 'tsdataset.html#timeseriesloader._collate_fn',
+                                                                                                     'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset._DistributedTimeSeriesDataModule': ( 'tsdataset.html#_distributedtimeseriesdatamodule',
+                                                                                                         'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset._DistributedTimeSeriesDataModule.__init__': ( 'tsdataset.html#_distributedtimeseriesdatamodule.__init__',
+                                                                                                                  'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset._DistributedTimeSeriesDataModule.setup': ( 'tsdataset.html#_distributedtimeseriesdatamodule.setup',
+                                                                                                               'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset._FilesDataset': ( 'tsdataset.html#_filesdataset',
+                                                                                      'neuralforecast/tsdataset.py'),
+                                          'neuralforecast.tsdataset._FilesDataset.__init__': ( 'tsdataset.html#_filesdataset.__init__',
+                                                                                               'neuralforecast/tsdataset.py')},
+            'neuralforecast.utils': { 'neuralforecast.utils.DayOfMonth': ('utils.html#dayofmonth', 'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.DayOfMonth.__call__': ( 'utils.html#dayofmonth.__call__',
+                                                                                    'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.DayOfWeek': ('utils.html#dayofweek', 'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.DayOfWeek.__call__': ( 'utils.html#dayofweek.__call__',
+                                                                                   'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.DayOfYear': ('utils.html#dayofyear', 'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.DayOfYear.__call__': ( 'utils.html#dayofyear.__call__',
+                                                                                   'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.HourOfDay': ('utils.html#hourofday', 'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.HourOfDay.__call__': ( 'utils.html#hourofday.__call__',
+                                                                                   'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.MinuteOfHour': ('utils.html#minuteofhour', 'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.MinuteOfHour.__call__': ( 'utils.html#minuteofhour.__call__',
+                                                                                      'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.MonthOfYear': ('utils.html#monthofyear', 'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.MonthOfYear.__call__': ( 'utils.html#monthofyear.__call__',
+                                                                                     'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.SecondOfMinute': ('utils.html#secondofminute', 'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.SecondOfMinute.__call__': ( 'utils.html#secondofminute.__call__',
+                                                                                        'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.TimeFeature': ('utils.html#timefeature', 'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.TimeFeature.__call__': ( 'utils.html#timefeature.__call__',
+                                                                                     'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.TimeFeature.__init__': ( 'utils.html#timefeature.__init__',
+                                                                                     'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.TimeFeature.__repr__': ( 'utils.html#timefeature.__repr__',
+                                                                                     'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.WeekOfYear': ('utils.html#weekofyear', 'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.WeekOfYear.__call__': ( 'utils.html#weekofyear.__call__',
+                                                                                    'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.augment_calendar_df': ( 'utils.html#augment_calendar_df',
+                                                                                    'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.generate_series': ('utils.html#generate_series', 'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.get_indexer_raise_missing': ( 'utils.html#get_indexer_raise_missing',
+                                                                                          'neuralforecast/utils.py'),
+                                      'neuralforecast.utils.time_features_from_frequency_str': ( 'utils.html#time_features_from_frequency_str',
+                                                                                                 'neuralforecast/utils.py')}}}
--- a/neuralforecast/auto.py
+++ b/neuralforecast/auto.py
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/models.ipynb.
+
+# %% auto 0
+__all__ = ['AutoRNN', 'AutoLSTM', 'AutoGRU', 'AutoTCN', 'AutoDeepAR', 'AutoDilatedRNN', 'AutoBiTCN', 'AutoMLP', 'AutoNBEATS',
+           'AutoNBEATSx', 'AutoNHITS', 'AutoDLinear', 'AutoNLinear', 'AutoTFT', 'AutoVanillaTransformer',
+           'AutoInformer', 'AutoAutoformer', 'AutoFEDformer', 'AutoPatchTST', 'AutoiTransformer', 'AutoTimesNet',
+           'AutoStemGNN', 'AutoHINT', 'AutoTSMixer', 'AutoTSMixerx', 'AutoMLPMultivariate']
+
+# %% ../nbs/models.ipynb 2
+from os import cpu_count
+import torch
+
+from ray import tune
+from ray.tune.search.basic_variant import BasicVariantGenerator
+
+from .common._base_auto import BaseAuto
+from .common._base_auto import MockTrial
+
+from .models.rnn import RNN
+from .models.gru import GRU
+from .models.tcn import TCN
+from .models.lstm import LSTM
+from .models.deepar import DeepAR
+from .models.dilated_rnn import DilatedRNN
+from .models.bitcn import BiTCN
+
+from .models.mlp import MLP
+from .models.nbeats import NBEATS
+from .models.nbeatsx import NBEATSx
+from .models.nhits import NHITS
+from .models.dlinear import DLinear
+from .models.nlinear import NLinear
+
+from .models.tft import TFT
+from .models.vanillatransformer import VanillaTransformer
+from .models.informer import Informer
+from .models.autoformer import Autoformer
+from .models.fedformer import FEDformer
+from .models.patchtst import PatchTST
+from .models.timesnet import TimesNet
+from .models.itransformer import iTransformer
+
+from .models.stemgnn import StemGNN
+from .models.hint import HINT
+from .models.tsmixer import TSMixer
+from .models.tsmixerx import TSMixerx
+from .models.mlpmultivariate import MLPMultivariate
+
+from .losses.pytorch import MAE, MQLoss, DistributionLoss
+
+# %% ../nbs/models.ipynb 13
+class AutoRNN(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [-1, 4, 16, 64],
+        "inference_input_size_multiplier": [-1],
+        "h": None,
+        "encoder_hidden_size": tune.choice([50, 100, 200, 300]),
+        "encoder_n_layers": tune.randint(1, 4),
+        "context_size": tune.choice([5, 10, 50]),
+        "decoder_hidden_size": tune.choice([64, 128, 256, 512]),
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "max_steps": tune.choice([500, 1000]),
+        "batch_size": tune.choice([16, 32]),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+        """Auto RNN
+
+        **Parameters:**<br>
+
+        """
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend)
+
+        super(AutoRNN, self).__init__(
+            cls_model=RNN,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+        config["inference_input_size"] = tune.choice(
+            [h * x for x in config["inference_input_size_multiplier"]]
+        )
+        del config["input_size_multiplier"], config["inference_input_size_multiplier"]
+        if backend == "optuna":
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 17
+class AutoLSTM(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [-1, 4, 16, 64],
+        "inference_input_size_multiplier": [-1],
+        "h": None,
+        "encoder_hidden_size": tune.choice([50, 100, 200, 300]),
+        "encoder_n_layers": tune.randint(1, 4),
+        "context_size": tune.choice([5, 10, 50]),
+        "decoder_hidden_size": tune.choice([64, 128, 256, 512]),
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "max_steps": tune.choice([500, 1000]),
+        "batch_size": tune.choice([16, 32]),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend)
+
+        super(AutoLSTM, self).__init__(
+            cls_model=LSTM,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+        config["inference_input_size"] = tune.choice(
+            [h * x for x in config["inference_input_size_multiplier"]]
+        )
+        del config["input_size_multiplier"], config["inference_input_size_multiplier"]
+        if backend == "optuna":
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 21
+class AutoGRU(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [-1, 4, 16, 64],
+        "inference_input_size_multiplier": [-1],
+        "h": None,
+        "encoder_hidden_size": tune.choice([50, 100, 200, 300]),
+        "encoder_n_layers": tune.randint(1, 4),
+        "context_size": tune.choice([5, 10, 50]),
+        "decoder_hidden_size": tune.choice([64, 128, 256, 512]),
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "max_steps": tune.choice([500, 1000]),
+        "batch_size": tune.choice([16, 32]),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend)
+
+        super(AutoGRU, self).__init__(
+            cls_model=GRU,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+        config["inference_input_size"] = tune.choice(
+            [h * x for x in config["inference_input_size_multiplier"]]
+        )
+        del config["input_size_multiplier"], config["inference_input_size_multiplier"]
+        if backend == "optuna":
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 25
+class AutoTCN(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [-1, 4, 16, 64],
+        "inference_input_size_multiplier": [-1],
+        "h": None,
+        "encoder_hidden_size": tune.choice([50, 100, 200, 300]),
+        "context_size": tune.choice([5, 10, 50]),
+        "decoder_hidden_size": tune.choice([64, 128]),
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "max_steps": tune.choice([500, 1000]),
+        "batch_size": tune.choice([16, 32]),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend)
+
+        super(AutoTCN, self).__init__(
+            cls_model=TCN,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+        config["inference_input_size"] = tune.choice(
+            [h * x for x in config["inference_input_size_multiplier"]]
+        )
+        del config["input_size_multiplier"], config["inference_input_size_multiplier"]
+        if backend == "optuna":
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 29
+class AutoDeepAR(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [1, 2, 3, 4, 5],
+        "h": None,
+        "lstm_hidden_size": tune.choice([32, 64, 128, 256]),
+        "lstm_n_layers": tune.randint(1, 4),
+        "lstm_dropout": tune.uniform(0.0, 0.5),
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "scaler_type": tune.choice(["robust", "minmax1"]),
+        "max_steps": tune.choice([500, 1000, 2000]),
+        "batch_size": tune.choice([32, 64, 128, 256]),
+        "windows_batch_size": tune.choice([128, 256, 512, 1024]),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        loss=DistributionLoss(
+            distribution="StudentT", level=[80, 90], return_params=False
+        ),
+        valid_loss=MQLoss(level=[80, 90]),
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend)
+
+        super(AutoDeepAR, self).__init__(
+            cls_model=DeepAR,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+        config["step_size"] = tune.choice([1, h])
+        del config["input_size_multiplier"]
+        if backend == "optuna":
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 33
+class AutoDilatedRNN(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [-1, 4, 16, 64],
+        "inference_input_size_multiplier": [-1],
+        "h": None,
+        "cell_type": tune.choice(["LSTM", "GRU"]),
+        "encoder_hidden_size": tune.choice([50, 100, 200, 300]),
+        "dilations": tune.choice([[[1, 2], [4, 8]], [[1, 2, 4, 8]]]),
+        "context_size": tune.choice([5, 10, 50]),
+        "decoder_hidden_size": tune.choice([64, 128, 256, 512]),
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "max_steps": tune.choice([500, 1000]),
+        "batch_size": tune.choice([16, 32]),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend)
+
+        super(AutoDilatedRNN, self).__init__(
+            cls_model=DilatedRNN,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+        config["inference_input_size"] = tune.choice(
+            [h * x for x in config["inference_input_size_multiplier"]]
+        )
+        del config["input_size_multiplier"], config["inference_input_size_multiplier"]
+        if backend == "optuna":
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 37
+class AutoBiTCN(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [1, 2, 3, 4, 5],
+        "h": None,
+        "hidden_size": tune.choice([16, 32]),
+        "dropout": tune.uniform(0.0, 0.99),
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "scaler_type": tune.choice([None, "robust", "standard"]),
+        "max_steps": tune.choice([500, 1000, 2000]),
+        "batch_size": tune.choice([32, 64, 128, 256]),
+        "windows_batch_size": tune.choice([128, 256, 512, 1024]),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend)
+
+        super(AutoBiTCN, self).__init__(
+            cls_model=BiTCN,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+        config["step_size"] = tune.choice([1, h])
+        del config["input_size_multiplier"]
+        if backend == "optuna":
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 42
+class AutoMLP(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [1, 2, 3, 4, 5],
+        "h": None,
+        "hidden_size": tune.choice([256, 512, 1024]),
+        "num_layers": tune.randint(2, 6),
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "scaler_type": tune.choice([None, "robust", "standard"]),
+        "max_steps": tune.choice([500, 1000]),
+        "batch_size": tune.choice([32, 64, 128, 256]),
+        "windows_batch_size": tune.choice([128, 256, 512, 1024]),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend)
+
+        super(AutoMLP, self).__init__(
+            cls_model=MLP,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+        config["step_size"] = tune.choice([1, h])
+        del config["input_size_multiplier"]
+        if backend == "optuna":
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 46
+class AutoNBEATS(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [1, 2, 3, 4, 5],
+        "h": None,
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "scaler_type": tune.choice([None, "robust", "standard"]),
+        "max_steps": tune.choice([500, 1000]),
+        "batch_size": tune.choice([32, 64, 128, 256]),
+        "windows_batch_size": tune.choice([128, 256, 512, 1024]),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend)
+
+        super(AutoNBEATS, self).__init__(
+            cls_model=NBEATS,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+        config["step_size"] = tune.choice([1, h])
+        del config["input_size_multiplier"]
+        if backend == "optuna":
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 50
+class AutoNBEATSx(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [1, 2, 3, 4, 5],
+        "h": None,
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "scaler_type": tune.choice([None, "robust", "standard"]),
+        "max_steps": tune.choice([500, 1000]),
+        "batch_size": tune.choice([32, 64, 128, 256]),
+        "windows_batch_size": tune.choice([128, 256, 512, 1024]),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend)
+
+        super(AutoNBEATSx, self).__init__(
+            cls_model=NBEATSx,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+        config["step_size"] = tune.choice([1, h])
+        del config["input_size_multiplier"]
+        if backend == "optuna":
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 54
+class AutoNHITS(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [1, 2, 3, 4, 5],
+        "h": None,
+        "n_pool_kernel_size": tune.choice(
+            [[2, 2, 1], 3 * [1], 3 * [2], 3 * [4], [8, 4, 1], [16, 8, 1]]
+        ),
+        "n_freq_downsample": tune.choice(
+            [
+                [168, 24, 1],
+                [24, 12, 1],
+                [180, 60, 1],
+                [60, 8, 1],
+                [40, 20, 1],
+                [1, 1, 1],
+            ]
+        ),
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "scaler_type": tune.choice([None, "robust", "standard"]),
+        "max_steps": tune.quniform(lower=500, upper=1500, q=100),
+        "batch_size": tune.choice([32, 64, 128, 256]),
+        "windows_batch_size": tune.choice([128, 256, 512, 1024]),
+        "loss": None,
+        "random_seed": tune.randint(lower=1, upper=20),
+    }
+
+    def __init__(
+        self,
+        h,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend)
+
+        super(AutoNHITS, self).__init__(
+            cls_model=NHITS,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+        config["step_size"] = tune.choice([1, h])
+        del config["input_size_multiplier"]
+        if backend == "optuna":
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 58
+class AutoDLinear(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [1, 2, 3, 4, 5],
+        "h": None,
+        "moving_avg_window": tune.choice([11, 25, 51]),
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "scaler_type": tune.choice([None, "robust", "standard"]),
+        "max_steps": tune.quniform(lower=500, upper=1500, q=100),
+        "batch_size": tune.choice([32, 64, 128, 256]),
+        "windows_batch_size": tune.choice([128, 256, 512, 1024]),
+        "loss": None,
+        "random_seed": tune.randint(lower=1, upper=20),
+    }
+
+    def __init__(
+        self,
+        h,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend)
+
+        super(AutoDLinear, self).__init__(
+            cls_model=DLinear,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+        config["step_size"] = tune.choice([1, h])
+        del config["input_size_multiplier"]
+        if backend == "optuna":
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 62
+class AutoNLinear(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [1, 2, 3, 4, 5],
+        "h": None,
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "scaler_type": tune.choice([None, "robust", "standard"]),
+        "max_steps": tune.quniform(lower=500, upper=1500, q=100),
+        "batch_size": tune.choice([32, 64, 128, 256]),
+        "windows_batch_size": tune.choice([128, 256, 512, 1024]),
+        "loss": None,
+        "random_seed": tune.randint(lower=1, upper=20),
+    }
+
+    def __init__(
+        self,
+        h,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend)
+
+        super(AutoNLinear, self).__init__(
+            cls_model=NLinear,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+        config["step_size"] = tune.choice([1, h])
+        del config["input_size_multiplier"]
+        if backend == "optuna":
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 67
+class AutoTFT(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [1, 2, 3, 4, 5],
+        "h": None,
+        "hidden_size": tune.choice([64, 128, 256]),
+        "n_head": tune.choice([4, 8]),
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "scaler_type": tune.choice([None, "robust", "standard"]),
+        "max_steps": tune.choice([500, 1000, 2000]),
+        "batch_size": tune.choice([32, 64, 128, 256]),
+        "windows_batch_size": tune.choice([128, 256, 512, 1024]),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend)
+
+        super(AutoTFT, self).__init__(
+            cls_model=TFT,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+        config["step_size"] = tune.choice([1, h])
+        del config["input_size_multiplier"]
+        if backend == "optuna":
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 71
+class AutoVanillaTransformer(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [1, 2, 3, 4, 5],
+        "h": None,
+        "hidden_size": tune.choice([64, 128, 256]),
+        "n_head": tune.choice([4, 8]),
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "scaler_type": tune.choice([None, "robust", "standard"]),
+        "max_steps": tune.choice([500, 1000, 2000]),
+        "batch_size": tune.choice([32, 64, 128, 256]),
+        "windows_batch_size": tune.choice([128, 256, 512, 1024]),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend)
+
+        super(AutoVanillaTransformer, self).__init__(
+            cls_model=VanillaTransformer,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+        config["step_size"] = tune.choice([1, h])
+        del config["input_size_multiplier"]
+        if backend == "optuna":
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 75
+class AutoInformer(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [1, 2, 3, 4, 5],
+        "h": None,
+        "hidden_size": tune.choice([64, 128, 256]),
+        "n_head": tune.choice([4, 8]),
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "scaler_type": tune.choice([None, "robust", "standard"]),
+        "max_steps": tune.choice([500, 1000, 2000]),
+        "batch_size": tune.choice([32, 64, 128, 256]),
+        "windows_batch_size": tune.choice([128, 256, 512, 1024]),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend)
+
+        super(AutoInformer, self).__init__(
+            cls_model=Informer,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+        config["step_size"] = tune.choice([1, h])
+        del config["input_size_multiplier"]
+        if backend == "optuna":
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 79
+class AutoAutoformer(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [1, 2, 3, 4, 5],
+        "h": None,
+        "hidden_size": tune.choice([64, 128, 256]),
+        "n_head": tune.choice([4, 8]),
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "scaler_type": tune.choice([None, "robust", "standard"]),
+        "max_steps": tune.choice([500, 1000, 2000]),
+        "batch_size": tune.choice([32, 64, 128, 256]),
+        "windows_batch_size": tune.choice([128, 256, 512, 1024]),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend)
+
+        super(AutoAutoformer, self).__init__(
+            cls_model=Autoformer,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+        config["step_size"] = tune.choice([1, h])
+        del config["input_size_multiplier"]
+        if backend == "optuna":
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 83
+class AutoFEDformer(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [1, 2, 3, 4, 5],
+        "h": None,
+        "hidden_size": tune.choice([64, 128, 256]),
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "scaler_type": tune.choice([None, "robust", "standard"]),
+        "max_steps": tune.choice([500, 1000, 2000]),
+        "batch_size": tune.choice([32, 64, 128, 256]),
+        "windows_batch_size": tune.choice([128, 256, 512, 1024]),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend)
+
+        super(AutoFEDformer, self).__init__(
+            cls_model=FEDformer,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+        config["step_size"] = tune.choice([1, h])
+        del config["input_size_multiplier"]
+        if backend == "optuna":
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 87
+class AutoPatchTST(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [1, 2, 3],
+        "h": None,
+        "hidden_size": tune.choice([16, 128, 256]),
+        "n_heads": tune.choice([4, 16]),
+        "patch_len": tune.choice([16, 24]),
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "scaler_type": tune.choice([None, "robust", "standard"]),
+        "revin": tune.choice([False, True]),
+        "max_steps": tune.choice([500, 1000, 5000]),
+        "batch_size": tune.choice([32, 64, 128, 256]),
+        "windows_batch_size": tune.choice([128, 256, 512, 1024]),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend)
+
+        super(AutoPatchTST, self).__init__(
+            cls_model=PatchTST,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+        config["step_size"] = tune.choice([1, h])
+        del config["input_size_multiplier"]
+        if backend == "optuna":
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 91
+class AutoiTransformer(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [1, 2, 3, 4, 5],
+        "h": None,
+        "n_series": None,
+        "hidden_size": tune.choice([64, 128, 256]),
+        "n_heads": tune.choice([4, 8]),
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "scaler_type": tune.choice([None, "robust", "standard"]),
+        "max_steps": tune.choice([500, 1000, 2000]),
+        "batch_size": tune.choice([32, 64, 128, 256]),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        n_series,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend, n_series=n_series)
+
+        # Always use n_series from parameters, raise exception with Optuna because we can't enforce it
+        if backend == "ray":
+            config["n_series"] = n_series
+        elif backend == "optuna":
+            mock_trial = MockTrial()
+            if (
+                "n_series" in config(mock_trial)
+                and config(mock_trial)["n_series"] != n_series
+            ) or ("n_series" not in config(mock_trial)):
+                raise Exception(f"config needs 'n_series': {n_series}")
+
+        super(AutoiTransformer, self).__init__(
+            cls_model=iTransformer,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+
+        # Rolling windows with step_size=1 or step_size=h
+        # See `BaseWindows` and `BaseRNN`'s create_windows
+        config["step_size"] = tune.choice([1, h])
+        del config["input_size_multiplier"]
+        if backend == "optuna":
+            # Always use n_series from parameters
+            config["n_series"] = n_series
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 96
+class AutoTimesNet(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [1, 2, 3, 4, 5],
+        "h": None,
+        "hidden_size": tune.choice([32, 64, 128]),
+        "conv_hidden_size": tune.choice([32, 64, 128]),
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "scaler_type": tune.choice(["robust", "standard"]),
+        "max_steps": tune.choice([500, 1000, 2000]),
+        "batch_size": tune.choice([32, 64, 128]),
+        "windows_batch_size": tune.choice([32, 64, 128, 256]),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend)
+
+        super(AutoTimesNet, self).__init__(
+            cls_model=TimesNet,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+        config["step_size"] = tune.choice([1, h])
+        del config["input_size_multiplier"]
+        if backend == "optuna":
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 101
+class AutoStemGNN(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [1, 2, 3, 4],
+        "h": None,
+        "n_series": None,
+        "n_stacks": tune.choice([2]),
+        "multi_layer": tune.choice([3, 5, 7]),
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "scaler_type": tune.choice([None, "robust", "standard"]),
+        "max_steps": tune.choice([500, 1000, 2000]),
+        "batch_size": tune.choice([32, 64, 128, 256]),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        n_series,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend, n_series=n_series)
+
+        # Always use n_series from parameters, raise exception with Optuna because we can't enforce it
+        if backend == "ray":
+            config["n_series"] = n_series
+        elif backend == "optuna":
+            mock_trial = MockTrial()
+            if (
+                "n_series" in config(mock_trial)
+                and config(mock_trial)["n_series"] != n_series
+            ) or ("n_series" not in config(mock_trial)):
+                raise Exception(f"config needs 'n_series': {n_series}")
+
+        super(AutoStemGNN, self).__init__(
+            cls_model=StemGNN,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+
+        # Rolling windows with step_size=1 or step_size=h
+        # See `BaseWindows` and `BaseRNN`'s create_windows
+        config["step_size"] = tune.choice([1, h])
+        del config["input_size_multiplier"]
+        if backend == "optuna":
+            # Always use n_series from parameters
+            config["n_series"] = n_series
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 105
+class AutoHINT(BaseAuto):
+
+    def __init__(
+        self,
+        cls_model,
+        h,
+        loss,
+        valid_loss,
+        S,
+        config,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        refit_with_val=False,
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        super(AutoHINT, self).__init__(
+            cls_model=cls_model,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+        if backend == "optuna":
+            raise Exception("Optuna is not supported for AutoHINT.")
+
+        # Validate presence of reconciliation strategy
+        # parameter in configuration space
+        if not ("reconciliation" in config.keys()):
+            raise Exception(
+                "config needs reconciliation, \
+                            try tune.choice(['BottomUp', 'MinTraceOLS', 'MinTraceWLS'])"
+            )
+        self.S = S
+
+    def _fit_model(
+        self, cls_model, config, dataset, val_size, test_size, distributed_config=None
+    ):
+        # Overwrite _fit_model for HINT two-stage instantiation
+        reconciliation = config.pop("reconciliation")
+        base_model = cls_model(**config)
+        model = HINT(
+            h=base_model.h, model=base_model, S=self.S, reconciliation=reconciliation
+        )
+        model.test_size = test_size
+        model = model.fit(
+            dataset,
+            val_size=val_size,
+            test_size=test_size,
+            distributed_config=distributed_config,
+        )
+        return model
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series=None):
+        raise Exception("AutoHINT has no default configuration.")
+
+# %% ../nbs/models.ipynb 110
+class AutoTSMixer(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [1, 2, 3, 4],
+        "h": None,
+        "n_series": None,
+        "n_block": tune.choice([1, 2, 4, 6, 8]),
+        "learning_rate": tune.loguniform(1e-4, 1e-2),
+        "ff_dim": tune.choice([32, 64, 128]),
+        "scaler_type": tune.choice(["identity", "robust", "standard"]),
+        "max_steps": tune.choice([500, 1000, 2000]),
+        "batch_size": tune.choice([32, 64, 128, 256]),
+        "dropout": tune.uniform(0.0, 0.99),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        n_series,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend, n_series=n_series)
+
+        # Always use n_series from parameters, raise exception with Optuna because we can't enforce it
+        if backend == "ray":
+            config["n_series"] = n_series
+        elif backend == "optuna":
+            mock_trial = MockTrial()
+            if (
+                "n_series" in config(mock_trial)
+                and config(mock_trial)["n_series"] != n_series
+            ) or ("n_series" not in config(mock_trial)):
+                raise Exception(f"config needs 'n_series': {n_series}")
+
+        super(AutoTSMixer, self).__init__(
+            cls_model=TSMixer,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+
+        # Rolling windows with step_size=1 or step_size=h
+        # See `BaseWindows` and `BaseRNN`'s create_windows
+        config["step_size"] = tune.choice([1, h])
+        del config["input_size_multiplier"]
+        if backend == "optuna":
+            # Always use n_series from parameters
+            config["n_series"] = n_series
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 114
+class AutoTSMixerx(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [1, 2, 3, 4],
+        "h": None,
+        "n_series": None,
+        "n_block": tune.choice([1, 2, 4, 6, 8]),
+        "learning_rate": tune.loguniform(1e-4, 1e-2),
+        "ff_dim": tune.choice([32, 64, 128]),
+        "scaler_type": tune.choice(["identity", "robust", "standard"]),
+        "max_steps": tune.choice([500, 1000, 2000]),
+        "batch_size": tune.choice([32, 64, 128, 256]),
+        "dropout": tune.uniform(0.0, 0.99),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        n_series,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend, n_series=n_series)
+
+        # Always use n_series from parameters, raise exception with Optuna because we can't enforce it
+        if backend == "ray":
+            config["n_series"] = n_series
+        elif backend == "optuna":
+            mock_trial = MockTrial()
+            if (
+                "n_series" in config(mock_trial)
+                and config(mock_trial)["n_series"] != n_series
+            ) or ("n_series" not in config(mock_trial)):
+                raise Exception(f"config needs 'n_series': {n_series}")
+
+        super(AutoTSMixerx, self).__init__(
+            cls_model=TSMixerx,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+
+        # Rolling windows with step_size=1 or step_size=h
+        # See `BaseWindows` and `BaseRNN`'s create_windows
+        config["step_size"] = tune.choice([1, h])
+        del config["input_size_multiplier"]
+        if backend == "optuna":
+            # Always use n_series from parameters
+            config["n_series"] = n_series
+            config = cls._ray_config_to_optuna(config)
+
+        return config
+
+# %% ../nbs/models.ipynb 118
+class AutoMLPMultivariate(BaseAuto):
+
+    default_config = {
+        "input_size_multiplier": [1, 2, 3, 4, 5],
+        "h": None,
+        "n_series": None,
+        "hidden_size": tune.choice([256, 512, 1024]),
+        "num_layers": tune.randint(2, 6),
+        "learning_rate": tune.loguniform(1e-4, 1e-1),
+        "scaler_type": tune.choice([None, "robust", "standard"]),
+        "max_steps": tune.choice([500, 1000]),
+        "batch_size": tune.choice([32, 64, 128, 256]),
+        "loss": None,
+        "random_seed": tune.randint(1, 20),
+    }
+
+    def __init__(
+        self,
+        h,
+        n_series,
+        loss=MAE(),
+        valid_loss=None,
+        config=None,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        refit_with_val=False,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+
+        # Define search space, input/output sizes
+        if config is None:
+            config = self.get_default_config(h=h, backend=backend, n_series=n_series)
+
+        # Always use n_series from parameters, raise exception with Optuna because we can't enforce it
+        if backend == "ray":
+            config["n_series"] = n_series
+        elif backend == "optuna":
+            mock_trial = MockTrial()
+            if (
+                "n_series" in config(mock_trial)
+                and config(mock_trial)["n_series"] != n_series
+            ) or ("n_series" not in config(mock_trial)):
+                raise Exception(f"config needs 'n_series': {n_series}")
+
+        super(AutoMLPMultivariate, self).__init__(
+            cls_model=MLPMultivariate,
+            h=h,
+            loss=loss,
+            valid_loss=valid_loss,
+            config=config,
+            search_alg=search_alg,
+            num_samples=num_samples,
+            refit_with_val=refit_with_val,
+            cpus=cpus,
+            gpus=gpus,
+            verbose=verbose,
+            alias=alias,
+            backend=backend,
+            callbacks=callbacks,
+        )
+
+    @classmethod
+    def get_default_config(cls, h, backend, n_series):
+        config = cls.default_config.copy()
+        config["input_size"] = tune.choice(
+            [h * x for x in config["input_size_multiplier"]]
+        )
+
+        # Rolling windows with step_size=1 or step_size=h
+        # See `BaseWindows` and `BaseRNN`'s create_windows
+        config["step_size"] = tune.choice([1, h])
+        del config["input_size_multiplier"]
+        if backend == "optuna":
+            # Always use n_series from parameters
+            config["n_series"] = n_series
+            config = cls._ray_config_to_optuna(config)
+
+        return config
--- a/neuralforecast/common/__init__.py
+++ b/neuralforecast/common/__init__.py
--- a/neuralforecast/common/_base_auto.py
+++ b/neuralforecast/common/_base_auto.py
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/common.base_auto.ipynb.
+
+# %% auto 0
+__all__ = ['BaseAuto']
+
+# %% ../../nbs/common.base_auto.ipynb 5
+from copy import deepcopy
+from os import cpu_count
+
+import torch
+import pytorch_lightning as pl
+
+from ray import air, tune
+from ray.tune.integration.pytorch_lightning import TuneReportCallback
+from ray.tune.search.basic_variant import BasicVariantGenerator
+
+# %% ../../nbs/common.base_auto.ipynb 6
+class MockTrial:
+    def suggest_int(*args, **kwargs):
+        return "int"
+
+    def suggest_categorical(*args, **kwargs):
+        return "categorical"
+
+    def suggest_uniform(*args, **kwargs):
+        return "uniform"
+
+    def suggest_loguniform(*args, **kwargs):
+        return "loguniform"
+
+    def suggest_float(*args, **kwargs):
+        if "log" in kwargs:
+            return "quantized_log"
+        elif "step" in kwargs:
+            return "quantized_loguniform"
+        return "float"
+
+# %% ../../nbs/common.base_auto.ipynb 7
+class BaseAuto(pl.LightningModule):
+    """
+    Class for Automatic Hyperparameter Optimization, it builds on top of `ray` to
+    give access to a wide variety of hyperparameter optimization tools ranging
+    from classic grid search, to Bayesian optimization and HyperBand algorithm.
+
+    The validation loss to be optimized is defined by the `config['loss']` dictionary
+    value, the config also contains the rest of the hyperparameter search space.
+
+    It is important to note that the success of this hyperparameter optimization
+    heavily relies on a strong correlation between the validation and test periods.
+
+    Parameters
+    ----------
+    cls_model : PyTorch/PyTorchLightning model
+        See `neuralforecast.models` [collection here](https://nixtla.github.io/neuralforecast/models.html).
+    h : int
+        Forecast horizon
+    loss : PyTorch module
+        Instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
+    valid_loss : PyTorch module
+        Instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
+    config : dict or callable
+        Dictionary with ray.tune defined search space or function that takes an optuna trial and returns a configuration dict.
+    search_alg : ray.tune.search variant or optuna.sampler
+        For ray see https://docs.ray.io/en/latest/tune/api_docs/suggestion.html
+        For optuna see https://optuna.readthedocs.io/en/stable/reference/samplers/index.html.
+    num_samples : int
+        Number of hyperparameter optimization steps/samples.
+    cpus : int (default=os.cpu_count())
+        Number of cpus to use during optimization. Only used with ray tune.
+    gpus : int (default=torch.cuda.device_count())
+        Number of gpus to use during optimization, default all available. Only used with ray tune.
+    refit_with_val : bool
+        Refit of best model should preserve val_size.
+    verbose : bool
+        Track progress.
+    alias : str, optional (default=None)
+        Custom name of the model.
+    backend : str (default='ray')
+        Backend to use for searching the hyperparameter space, can be either 'ray' or 'optuna'.
+    callbacks : list of callable, optional (default=None)
+        List of functions to call during the optimization process.
+        ray reference: https://docs.ray.io/en/latest/tune/tutorials/tune-metrics.html
+        optuna reference: https://optuna.readthedocs.io/en/stable/tutorial/20_recipes/007_optuna_callback.html
+    """
+
+    def __init__(
+        self,
+        cls_model,
+        h,
+        loss,
+        valid_loss,
+        config,
+        search_alg=BasicVariantGenerator(random_state=1),
+        num_samples=10,
+        cpus=cpu_count(),
+        gpus=torch.cuda.device_count(),
+        refit_with_val=False,
+        verbose=False,
+        alias=None,
+        backend="ray",
+        callbacks=None,
+    ):
+        super(BaseAuto, self).__init__()
+        self.save_hyperparameters()  # Allows instantiation from a checkpoint from class
+
+        if backend == "ray":
+            if not isinstance(config, dict):
+                raise ValueError(
+                    "You have to provide a dict as `config` when using `backend='ray'`"
+                )
+            config_base = deepcopy(config)
+        elif backend == "optuna":
+            if not callable(config):
+                raise ValueError(
+                    "You have to provide a function that takes a trial and returns a dict as `config` when using `backend='optuna'`"
+                )
+            # extract constant values from the config fn for validations
+            config_base = config(MockTrial())
+        else:
+            raise ValueError(
+                f"Unknown backend {backend}. The supported backends are 'ray' and 'optuna'."
+            )
+        if config_base.get("h", None) is not None:
+            raise Exception("Please use `h` init argument instead of `config['h']`.")
+        if config_base.get("loss", None) is not None:
+            raise Exception(
+                "Please use `loss` init argument instead of `config['loss']`."
+            )
+        if config_base.get("valid_loss", None) is not None:
+            raise Exception(
+                "Please use `valid_loss` init argument instead of `config['valid_loss']`."
+            )
+        # This attribute helps to protect
+        # model and datasets interactions protections
+        if "early_stop_patience_steps" in config_base.keys():
+            self.early_stop_patience_steps = 1
+        else:
+            self.early_stop_patience_steps = -1
+
+        if callable(config):
+            # reset config_base here to save params to override in the config fn
+            config_base = {}
+
+        # Add losses to config and protect valid_loss default
+        config_base["h"] = h
+        config_base["loss"] = loss
+        if valid_loss is None:
+            valid_loss = loss
+        config_base["valid_loss"] = valid_loss
+
+        if isinstance(config, dict):
+            self.config = config_base
+        else:
+
+            def config_f(trial):
+                return {**config(trial), **config_base}
+
+            self.config = config_f
+
+        self.h = h
+        self.cls_model = cls_model
+        self.loss = loss
+        self.valid_loss = valid_loss
+
+        self.num_samples = num_samples
+        self.search_alg = search_alg
+        self.cpus = cpus
+        self.gpus = gpus
+        self.refit_with_val = refit_with_val
+        self.verbose = verbose
+        self.alias = alias
+        self.backend = backend
+        self.callbacks = callbacks
+
+        # Base Class attributes
+        self.SAMPLING_TYPE = cls_model.SAMPLING_TYPE
+
+    def __repr__(self):
+        return type(self).__name__ if self.alias is None else self.alias
+
+    def _train_tune(self, config_step, cls_model, dataset, val_size, test_size):
+        """BaseAuto._train_tune
+
+        Internal function that instantiates a NF class model, then automatically
+        explores the validation loss (ptl/val_loss) on which the hyperparameter
+        exploration is based.
+
+        **Parameters:**<br>
+        `config_step`: Dict, initialization parameters of a NF model.<br>
+        `cls_model`: NeuralForecast model class, yet to be instantiated.<br>
+        `dataset`: NeuralForecast dataset, to fit the model.<br>
+        `val_size`: int, validation size for temporal cross-validation.<br>
+        `test_size`: int, test size for temporal cross-validation.<br>
+        """
+        metrics = {"loss": "ptl/val_loss", "train_loss": "train_loss"}
+        callbacks = [TuneReportCallback(metrics, on="validation_end")]
+        if "callbacks" in config_step.keys():
+            callbacks.extend(config_step["callbacks"])
+        config_step = {**config_step, **{"callbacks": callbacks}}
+
+        # Protect dtypes from tune samplers
+        if "batch_size" in config_step.keys():
+            config_step["batch_size"] = int(config_step["batch_size"])
+        if "windows_batch_size" in config_step.keys():
+            config_step["windows_batch_size"] = int(config_step["windows_batch_size"])
+
+        # Tune session receives validation signal
+        # from the specialized PL TuneReportCallback
+        _ = self._fit_model(
+            cls_model=cls_model,
+            config=config_step,
+            dataset=dataset,
+            val_size=val_size,
+            test_size=test_size,
+        )
+
+    def _tune_model(
+        self,
+        cls_model,
+        dataset,
+        val_size,
+        test_size,
+        cpus,
+        gpus,
+        verbose,
+        num_samples,
+        search_alg,
+        config,
+    ):
+        train_fn_with_parameters = tune.with_parameters(
+            self._train_tune,
+            cls_model=cls_model,
+            dataset=dataset,
+            val_size=val_size,
+            test_size=test_size,
+        )
+
+        # Device
+        if gpus > 0:
+            device_dict = {"gpu": gpus}
+        else:
+            device_dict = {"cpu": cpus}
+
+        # on Windows, prevent long trial directory names
+        import platform
+
+        trial_dirname_creator = (
+            (lambda trial: f"{trial.trainable_name}_{trial.trial_id}")
+            if platform.system() == "Windows"
+            else None
+        )
+
+        tuner = tune.Tuner(
+            tune.with_resources(train_fn_with_parameters, device_dict),
+            run_config=air.RunConfig(callbacks=self.callbacks, verbose=verbose),
+            tune_config=tune.TuneConfig(
+                metric="loss",
+                mode="min",
+                num_samples=num_samples,
+                search_alg=search_alg,
+                trial_dirname_creator=trial_dirname_creator,
+            ),
+            param_space=config,
+        )
+        results = tuner.fit()
+        return results
+
+    @staticmethod
+    def _ray_config_to_optuna(ray_config):
+        def optuna_config(trial):
+            out = {}
+            for k, v in ray_config.items():
+                if hasattr(v, "sampler"):
+                    sampler = v.sampler
+                    if isinstance(
+                        sampler, tune.search.sample.Integer.default_sampler_cls
+                    ):
+                        v = trial.suggest_int(k, v.lower, v.upper)
+                    elif isinstance(
+                        sampler, tune.search.sample.Categorical.default_sampler_cls
+                    ):
+                        v = trial.suggest_categorical(k, v.categories)
+                    elif isinstance(sampler, tune.search.sample.Uniform):
+                        v = trial.suggest_uniform(k, v.lower, v.upper)
+                    elif isinstance(sampler, tune.search.sample.LogUniform):
+                        v = trial.suggest_loguniform(k, v.lower, v.upper)
+                    elif isinstance(sampler, tune.search.sample.Quantized):
+                        if isinstance(
+                            sampler.get_sampler(), tune.search.sample.Float._LogUniform
+                        ):
+                            v = trial.suggest_float(k, v.lower, v.upper, log=True)
+                        elif isinstance(
+                            sampler.get_sampler(), tune.search.sample.Float._Uniform
+                        ):
+                            v = trial.suggest_float(k, v.lower, v.upper, step=sampler.q)
+                    else:
+                        raise ValueError(f"Couldn't translate {type(v)} to optuna.")
+                out[k] = v
+            return out
+
+        return optuna_config
+
+    def _optuna_tune_model(
+        self,
+        cls_model,
+        dataset,
+        val_size,
+        test_size,
+        verbose,
+        num_samples,
+        search_alg,
+        config,
+        distributed_config,
+    ):
+        import optuna
+
+        def objective(trial):
+            user_cfg = config(trial)
+            cfg = deepcopy(user_cfg)
+            model = self._fit_model(
+                cls_model=cls_model,
+                config=cfg,
+                dataset=dataset,
+                val_size=val_size,
+                test_size=test_size,
+                distributed_config=distributed_config,
+            )
+            trial.set_user_attr("ALL_PARAMS", user_cfg)
+            metrics = model.metrics
+            trial.set_user_attr(
+                "METRICS",
+                {
+                    "loss": metrics["ptl/val_loss"],
+                    "train_loss": metrics["train_loss"],
+                },
+            )
+            return trial.user_attrs["METRICS"]["loss"]
+
+        if isinstance(search_alg, optuna.samplers.BaseSampler):
+            sampler = search_alg
+        else:
+            sampler = None
+
+        study = optuna.create_study(sampler=sampler, direction="minimize")
+        study.optimize(
+            objective,
+            n_trials=num_samples,
+            show_progress_bar=verbose,
+            callbacks=self.callbacks,
+        )
+        return study
+
+    def _fit_model(
+        self, cls_model, config, dataset, val_size, test_size, distributed_config=None
+    ):
+        model = cls_model(**config)
+        model = model.fit(
+            dataset,
+            val_size=val_size,
+            test_size=test_size,
+            distributed_config=distributed_config,
+        )
+        return model
+
+    def fit(
+        self,
+        dataset,
+        val_size=0,
+        test_size=0,
+        random_seed=None,
+        distributed_config=None,
+    ):
+        """BaseAuto.fit
+
+        Perform the hyperparameter optimization as specified by the BaseAuto configuration
+        dictionary `config`.
+
+        The optimization is performed on the `TimeSeriesDataset` using temporal cross validation with
+        the validation set that sequentially precedes the test set.
+
+        **Parameters:**<br>
+        `dataset`: NeuralForecast's `TimeSeriesDataset` see details [here](https://nixtla.github.io/neuralforecast/tsdataset.html)<br>
+        `val_size`: int, size of temporal validation set (needs to be bigger than 0).<br>
+        `test_size`: int, size of temporal test set (default 0).<br>
+        `random_seed`: int=None, random_seed for hyperparameter exploration algorithms, not yet implemented.<br>
+        **Returns:**<br>
+        `self`: fitted instance of `BaseAuto` with best hyperparameters and results<br>.
+        """
+        # we need val_size > 0 to perform
+        # hyperparameter selection.
+        search_alg = deepcopy(self.search_alg)
+        val_size = val_size if val_size > 0 else self.h
+        if self.backend == "ray":
+            if distributed_config is not None:
+                raise ValueError(
+                    "distributed training is not supported for the ray backend."
+                )
+            results = self._tune_model(
+                cls_model=self.cls_model,
+                dataset=dataset,
+                val_size=val_size,
+                test_size=test_size,
+                cpus=self.cpus,
+                gpus=self.gpus,
+                verbose=self.verbose,
+                num_samples=self.num_samples,
+                search_alg=search_alg,
+                config=self.config,
+            )
+            best_config = results.get_best_result().config
+        else:
+            results = self._optuna_tune_model(
+                cls_model=self.cls_model,
+                dataset=dataset,
+                val_size=val_size,
+                test_size=test_size,
+                verbose=self.verbose,
+                num_samples=self.num_samples,
+                search_alg=search_alg,
+                config=self.config,
+                distributed_config=distributed_config,
+            )
+            best_config = results.best_trial.user_attrs["ALL_PARAMS"]
+        self.model = self._fit_model(
+            cls_model=self.cls_model,
+            config=best_config,
+            dataset=dataset,
+            val_size=val_size * (1 - self.refit_with_val),
+            test_size=test_size,
+            distributed_config=distributed_config,
+        )
+        self.results = results
+
+        # Added attributes for compatibility with NeuralForecast core
+        self.futr_exog_list = self.model.futr_exog_list
+        self.hist_exog_list = self.model.hist_exog_list
+        self.stat_exog_list = self.model.stat_exog_list
+        return self
+
+    def predict(self, dataset, step_size=1, **data_kwargs):
+        """BaseAuto.predict
+
+        Predictions of the best performing model on validation.
+
+        **Parameters:**<br>
+        `dataset`: NeuralForecast's `TimeSeriesDataset` see details [here](https://nixtla.github.io/neuralforecast/tsdataset.html)<br>
+        `step_size`: int, steps between sequential predictions, (default 1).<br>
+        `**data_kwarg`: additional parameters for the dataset module.<br>
+        `random_seed`: int=None, random_seed for hyperparameter exploration algorithms (not implemented).<br>
+        **Returns:**<br>
+        `y_hat`: numpy predictions of the `NeuralForecast` model.<br>
+        """
+        return self.model.predict(dataset=dataset, step_size=step_size, **data_kwargs)
+
+    def set_test_size(self, test_size):
+        self.model.set_test_size(test_size)
+
+    def get_test_size(self):
+        return self.model.test_size
+
+    def save(self, path):
+        """BaseAuto.save
+
+        Save the fitted model to disk.
+
+        **Parameters:**<br>
+        `path`: str, path to save the model.<br>
+        """
+        self.model.save(path)
--- a/neuralforecast/common/_base_model.py
+++ b/neuralforecast/common/_base_model.py
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/common.base_model.ipynb.
+
+# %% auto 0
+__all__ = ['DistributedConfig', 'BaseModel']
+
+# %% ../../nbs/common.base_model.ipynb 2
+import inspect
+import random
+import warnings
+from contextlib import contextmanager
+from copy import deepcopy
+from dataclasses import dataclass
+
+import fsspec
+import numpy as np
+import torch
+import torch.nn as nn
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks.early_stopping import EarlyStopping
+
+from neuralforecast.tsdataset import (
+    TimeSeriesDataModule,
+    TimeSeriesDataset,
+    _DistributedTimeSeriesDataModule,
+)
+
+# %% ../../nbs/common.base_model.ipynb 3
+@dataclass
+class DistributedConfig:
+    partitions_path: str
+    num_nodes: int
+    devices: int
+
+# %% ../../nbs/common.base_model.ipynb 4
+@contextmanager
+def _disable_torch_init():
+    """Context manager used to disable pytorch's weight initialization.
+
+    This is especially useful when loading saved models, since when initializing
+    a model the weights are also initialized following some method
+    (e.g. kaiming uniform), and that time is wasted since we'll override them with
+    the saved weights."""
+
+    def noop(*args, **kwargs):
+        return
+
+    kaiming_uniform = nn.init.kaiming_uniform_
+    kaiming_normal = nn.init.kaiming_normal_
+    xavier_uniform = nn.init.xavier_uniform_
+    xavier_normal = nn.init.xavier_normal_
+
+    nn.init.kaiming_uniform_ = noop
+    nn.init.kaiming_normal_ = noop
+    nn.init.xavier_uniform_ = noop
+    nn.init.xavier_normal_ = noop
+    try:
+        yield
+    finally:
+        nn.init.kaiming_uniform_ = kaiming_uniform
+        nn.init.kaiming_normal_ = kaiming_normal
+        nn.init.xavier_uniform_ = xavier_uniform
+        nn.init.xavier_normal_ = xavier_normal
+
+# %% ../../nbs/common.base_model.ipynb 5
+class BaseModel(pl.LightningModule):
+    def __init__(
+        self,
+        random_seed,
+        loss,
+        valid_loss,
+        optimizer,
+        optimizer_kwargs,
+        futr_exog_list,
+        hist_exog_list,
+        stat_exog_list,
+        max_steps,
+        early_stop_patience_steps,
+        **trainer_kwargs,
+    ):
+        super().__init__()
+        with warnings.catch_warnings(record=False):
+            warnings.filterwarnings("ignore")
+            # the following line issues a warning about the loss attribute being saved
+            # but we do want to save it
+            self.save_hyperparameters()  # Allows instantiation from a checkpoint from class
+        self.random_seed = random_seed
+        pl.seed_everything(self.random_seed, workers=True)
+
+        # Loss
+        self.loss = loss
+        if valid_loss is None:
+            self.valid_loss = loss
+        else:
+            self.valid_loss = valid_loss
+        self.train_trajectories = []
+        self.valid_trajectories = []
+
+        # Optimization
+        if optimizer is not None and not issubclass(optimizer, torch.optim.Optimizer):
+            raise TypeError(
+                "optimizer is not a valid subclass of torch.optim.Optimizer"
+            )
+        self.optimizer = optimizer
+        self.optimizer_kwargs = optimizer_kwargs if optimizer_kwargs else {}
+
+        # Variables
+        self.futr_exog_list = list(futr_exog_list) if futr_exog_list is not None else []
+        self.hist_exog_list = list(hist_exog_list) if hist_exog_list is not None else []
+        self.stat_exog_list = list(stat_exog_list) if stat_exog_list is not None else []
+
+        ## Trainer arguments ##
+        # Max steps, validation steps and check_val_every_n_epoch
+        trainer_kwargs = {**trainer_kwargs, "max_steps": max_steps}
+
+        if "max_epochs" in trainer_kwargs.keys():
+            raise Exception("max_epochs is deprecated, use max_steps instead.")
+
+        # Callbacks
+        if early_stop_patience_steps > 0:
+            if "callbacks" not in trainer_kwargs:
+                trainer_kwargs["callbacks"] = []
+            trainer_kwargs["callbacks"].append(
+                EarlyStopping(
+                    monitor="ptl/val_loss", patience=early_stop_patience_steps
+                )
+            )
+
+        # Add GPU accelerator if available
+        if trainer_kwargs.get("accelerator", None) is None:
+            if torch.cuda.is_available():
+                trainer_kwargs["accelerator"] = "gpu"
+        if trainer_kwargs.get("devices", None) is None:
+            if torch.cuda.is_available():
+                trainer_kwargs["devices"] = -1
+
+        # Avoid saturating local memory, disabled fit model checkpoints
+        if trainer_kwargs.get("enable_checkpointing", None) is None:
+            trainer_kwargs["enable_checkpointing"] = False
+
+        self.trainer_kwargs = trainer_kwargs
+
+    def __repr__(self):
+        return type(self).__name__ if self.alias is None else self.alias
+
+    def _check_exog(self, dataset):
+        temporal_cols = set(dataset.temporal_cols.tolist())
+        static_cols = set(
+            dataset.static_cols.tolist() if dataset.static_cols is not None else []
+        )
+
+        missing_hist = set(self.hist_exog_list) - temporal_cols
+        missing_futr = set(self.futr_exog_list) - temporal_cols
+        missing_stat = set(self.stat_exog_list) - static_cols
+        if missing_hist:
+            raise Exception(
+                f"{missing_hist} historical exogenous variables not found in input dataset"
+            )
+        if missing_futr:
+            raise Exception(
+                f"{missing_futr} future exogenous variables not found in input dataset"
+            )
+        if missing_stat:
+            raise Exception(
+                f"{missing_stat} static exogenous variables not found in input dataset"
+            )
+
+    def _restart_seed(self, random_seed):
+        if random_seed is None:
+            random_seed = self.random_seed
+        torch.manual_seed(random_seed)
+
+    def _get_temporal_exogenous_cols(self, temporal_cols):
+        return list(
+            set(temporal_cols.tolist()) & set(self.hist_exog_list + self.futr_exog_list)
+        )
+
+    def _fit(
+        self,
+        dataset,
+        batch_size,
+        valid_batch_size=1024,
+        val_size=0,
+        test_size=0,
+        random_seed=None,
+        shuffle_train=True,
+        distributed_config=None,
+    ):
+        self._check_exog(dataset)
+        self._restart_seed(random_seed)
+
+        self.val_size = val_size
+        self.test_size = test_size
+        is_local = isinstance(dataset, TimeSeriesDataset)
+        if is_local:
+            datamodule_constructor = TimeSeriesDataModule
+        else:
+            datamodule_constructor = _DistributedTimeSeriesDataModule
+        datamodule = datamodule_constructor(
+            dataset=dataset,
+            batch_size=batch_size,
+            valid_batch_size=valid_batch_size,
+            num_workers=self.num_workers_loader,
+            drop_last=self.drop_last_loader,
+            shuffle_train=shuffle_train,
+        )
+
+        if self.val_check_steps > self.max_steps:
+            warnings.warn(
+                "val_check_steps is greater than max_steps, "
+                "setting val_check_steps to max_steps."
+            )
+        val_check_interval = min(self.val_check_steps, self.max_steps)
+        self.trainer_kwargs["val_check_interval"] = int(val_check_interval)
+        self.trainer_kwargs["check_val_every_n_epoch"] = None
+
+        if is_local:
+            model = self
+            trainer = pl.Trainer(**model.trainer_kwargs)
+            trainer.fit(model, datamodule=datamodule)
+            model.metrics = trainer.callback_metrics
+            model.__dict__.pop("_trainer", None)
+        else:
+            assert distributed_config is not None
+            from pyspark.ml.torch.distributor import TorchDistributor
+
+            def train_fn(
+                model_cls,
+                model_params,
+                datamodule,
+                trainer_kwargs,
+                num_tasks,
+                num_proc_per_task,
+                val_size,
+                test_size,
+            ):
+                import pytorch_lightning as pl
+
+                # we instantiate here to avoid pickling large tensors (weights)
+                model = model_cls(**model_params)
+                model.val_size = val_size
+                model.test_size = test_size
+                for arg in ("devices", "num_nodes"):
+                    trainer_kwargs.pop(arg, None)
+                trainer = pl.Trainer(
+                    strategy="ddp",
+                    use_distributed_sampler=False,  # to ensure our dataloaders are used as-is
+                    num_nodes=num_tasks,
+                    devices=num_proc_per_task,
+                    **trainer_kwargs,
+                )
+                trainer.fit(model=model, datamodule=datamodule)
+                model.metrics = trainer.callback_metrics
+                model.__dict__.pop("_trainer", None)
+                return model
+
+            def is_gpu_accelerator(accelerator):
+                from pytorch_lightning.accelerators.cuda import CUDAAccelerator
+
+                return (
+                    accelerator == "gpu"
+                    or isinstance(accelerator, CUDAAccelerator)
+                    or (accelerator == "auto" and CUDAAccelerator.is_available())
+                )
+
+            local_mode = distributed_config.num_nodes == 1
+            if local_mode:
+                num_tasks = 1
+                num_proc_per_task = distributed_config.devices
+            else:
+                num_tasks = distributed_config.devices * distributed_config.devices
+                num_proc_per_task = 1  # number of GPUs per task
+            num_proc = num_tasks * num_proc_per_task
+            use_gpu = is_gpu_accelerator(self.trainer_kwargs["accelerator"])
+            model = TorchDistributor(
+                num_processes=num_proc,
+                local_mode=local_mode,
+                use_gpu=use_gpu,
+            ).run(
+                train_fn,
+                model_cls=type(self),
+                model_params=self.hparams,
+                datamodule=datamodule,
+                trainer_kwargs=self.trainer_kwargs,
+                num_tasks=num_tasks,
+                num_proc_per_task=num_proc_per_task,
+                val_size=val_size,
+                test_size=test_size,
+            )
+        return model
+
+    def on_fit_start(self):
+        torch.manual_seed(self.random_seed)
+        np.random.seed(self.random_seed)
+        random.seed(self.random_seed)
+
+    def configure_optimizers(self):
+        if self.optimizer:
+            optimizer_signature = inspect.signature(self.optimizer)
+            optimizer_kwargs = deepcopy(self.optimizer_kwargs)
+            if "lr" in optimizer_signature.parameters:
+                if "lr" in optimizer_kwargs:
+                    warnings.warn(
+                        "ignoring learning rate passed in optimizer_kwargs, using the model's learning rate"
+                    )
+                optimizer_kwargs["lr"] = self.learning_rate
+            optimizer = self.optimizer(params=self.parameters(), **optimizer_kwargs)
+        else:
+            optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
+        scheduler = {
+            "scheduler": torch.optim.lr_scheduler.StepLR(
+                optimizer=optimizer, step_size=self.lr_decay_steps, gamma=0.5
+            ),
+            "frequency": 1,
+            "interval": "step",
+        }
+        return {"optimizer": optimizer, "lr_scheduler": scheduler}
+
+    def get_test_size(self):
+        return self.test_size
+
+    def set_test_size(self, test_size):
+        self.test_size = test_size
+
+    def on_validation_epoch_end(self):
+        if self.val_size == 0:
+            return
+        losses = torch.stack(self.validation_step_outputs)
+        avg_loss = losses.mean().item()
+        self.log(
+            "ptl/val_loss",
+            avg_loss,
+            batch_size=losses.size(0),
+            sync_dist=True,
+        )
+        self.valid_trajectories.append((self.global_step, avg_loss))
+        self.validation_step_outputs.clear()  # free memory (compute `avg_loss` per epoch)
+
+    def save(self, path):
+        with fsspec.open(path, "wb") as f:
+            torch.save(
+                {"hyper_parameters": self.hparams, "state_dict": self.state_dict()},
+                f,
+            )
+
+    @classmethod
+    def load(cls, path, **kwargs):
+        with fsspec.open(path, "rb") as f:
+            content = torch.load(f, **kwargs)
+        with _disable_torch_init():
+            model = cls(**content["hyper_parameters"])
+        model.load_state_dict(content["state_dict"], strict=True, assign=True)
+        return model
--- a/neuralforecast/common/_base_multivariate.py
+++ b/neuralforecast/common/_base_multivariate.py
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/common.base_multivariate.ipynb.
+
+# %% auto 0
+__all__ = ['BaseMultivariate']
+
+# %% ../../nbs/common.base_multivariate.ipynb 5
+import numpy as np
+import torch
+import torch.nn as nn
+import pytorch_lightning as pl
+
+import neuralforecast.losses.pytorch as losses
+from ._base_model import BaseModel
+from ._scalers import TemporalNorm
+from ..tsdataset import TimeSeriesDataModule
+from ..utils import get_indexer_raise_missing
+
+# %% ../../nbs/common.base_multivariate.ipynb 6
+class BaseMultivariate(BaseModel):
+    """Base Multivariate
+
+    Base class for all multivariate models. The forecasts for all time-series are produced simultaneously
+    within each window, which are randomly sampled during training.
+
+    This class implements the basic functionality for all windows-based models, including:
+    - PyTorch Lightning's methods training_step, validation_step, predict_step.<br>
+    - fit and predict methods used by NeuralForecast.core class.<br>
+    - sampling and wrangling methods to generate multivariate windows.
+    """
+
+    def __init__(
+        self,
+        h,
+        input_size,
+        loss,
+        valid_loss,
+        learning_rate,
+        max_steps,
+        val_check_steps,
+        n_series,
+        batch_size,
+        step_size=1,
+        num_lr_decays=0,
+        early_stop_patience_steps=-1,
+        scaler_type="robust",
+        futr_exog_list=None,
+        hist_exog_list=None,
+        stat_exog_list=None,
+        num_workers_loader=0,
+        drop_last_loader=False,
+        random_seed=1,
+        alias=None,
+        optimizer=None,
+        optimizer_kwargs=None,
+        **trainer_kwargs,
+    ):
+        super().__init__(
+            random_seed=random_seed,
+            loss=loss,
+            valid_loss=valid_loss,
+            optimizer=optimizer,
+            optimizer_kwargs=optimizer_kwargs,
+            futr_exog_list=futr_exog_list,
+            hist_exog_list=hist_exog_list,
+            stat_exog_list=stat_exog_list,
+            max_steps=max_steps,
+            early_stop_patience_steps=early_stop_patience_steps,
+            **trainer_kwargs,
+        )
+
+        # Padder to complete train windows,
+        # example y=[1,2,3,4,5] h=3 -> last y_output = [5,0,0]
+        self.h = h
+        self.input_size = input_size
+        self.n_series = n_series
+        self.padder = nn.ConstantPad1d(padding=(0, self.h), value=0)
+
+        # Multivariate models do not support these loss functions yet.
+        unsupported_losses = (
+            losses.sCRPS,
+            losses.MQLoss,
+            losses.DistributionLoss,
+            losses.PMM,
+            losses.GMM,
+            losses.HuberMQLoss,
+            losses.MASE,
+            losses.relMSE,
+            losses.NBMM,
+        )
+        if isinstance(self.loss, unsupported_losses):
+            raise Exception(f"{self.loss} is not supported in a Multivariate model.")
+        if isinstance(self.valid_loss, unsupported_losses):
+            raise Exception(
+                f"{self.valid_loss} is not supported in a Multivariate model."
+            )
+
+        self.batch_size = batch_size
+
+        # Optimization
+        self.learning_rate = learning_rate
+        self.max_steps = max_steps
+        self.num_lr_decays = num_lr_decays
+        self.lr_decay_steps = (
+            max(max_steps // self.num_lr_decays, 1) if self.num_lr_decays > 0 else 10e7
+        )
+        self.early_stop_patience_steps = early_stop_patience_steps
+        self.val_check_steps = val_check_steps
+        self.step_size = step_size
+
+        # Scaler
+        self.scaler = TemporalNorm(
+            scaler_type=scaler_type, dim=2
+        )  # Time dimension is in the second axis
+
+        # Fit arguments
+        self.val_size = 0
+        self.test_size = 0
+
+        # Model state
+        self.decompose_forecast = False
+
+        # DataModule arguments
+        self.num_workers_loader = num_workers_loader
+        self.drop_last_loader = drop_last_loader
+        # used by on_validation_epoch_end hook
+        self.validation_step_outputs = []
+        self.alias = alias
+
+    def _create_windows(self, batch, step):
+        # Parse common data
+        window_size = self.input_size + self.h
+        temporal_cols = batch["temporal_cols"]
+        temporal = batch["temporal"]
+
+        if step == "train":
+            if self.val_size + self.test_size > 0:
+                cutoff = -self.val_size - self.test_size
+                temporal = temporal[:, :, :cutoff]
+
+            temporal = self.padder(temporal)
+            windows = temporal.unfold(
+                dimension=-1, size=window_size, step=self.step_size
+            )
+            # [n_series, C, Ws, L+H] 0, 1, 2, 3
+
+            # Sample and Available conditions
+            available_idx = temporal_cols.get_loc("available_mask")
+            sample_condition = windows[:, available_idx, :, -self.h :]
+            sample_condition = torch.sum(sample_condition, axis=2)  # Sum over time
+            sample_condition = torch.sum(
+                sample_condition, axis=0
+            )  # Sum over time-series
+            available_condition = windows[:, available_idx, :, : -self.h]
+            available_condition = torch.sum(
+                available_condition, axis=2
+            )  # Sum over time
+            available_condition = torch.sum(
+                available_condition, axis=0
+            )  # Sum over time-series
+            final_condition = (sample_condition > 0) & (
+                available_condition > 0
+            )  # Of shape [Ws]
+            windows = windows[:, :, final_condition, :]
+
+            # Get Static data
+            static = batch.get("static", None)
+            static_cols = batch.get("static_cols", None)
+
+            # Protection of empty windows
+            if final_condition.sum() == 0:
+                raise Exception("No windows available for training")
+
+            # Sample windows
+            n_windows = windows.shape[2]
+            if self.batch_size is not None:
+                w_idxs = np.random.choice(
+                    n_windows,
+                    size=self.batch_size,
+                    replace=(n_windows < self.batch_size),
+                )
+                windows = windows[:, :, w_idxs, :]
+
+            windows = windows.permute(2, 1, 3, 0)  # [Ws, C, L+H, n_series]
+
+            windows_batch = dict(
+                temporal=windows,
+                temporal_cols=temporal_cols,
+                static=static,
+                static_cols=static_cols,
+            )
+
+            return windows_batch
+
+        elif step in ["predict", "val"]:
+
+            if step == "predict":
+                predict_step_size = self.predict_step_size
+                cutoff = -self.input_size - self.test_size
+                temporal = batch["temporal"][:, :, cutoff:]
+
+            elif step == "val":
+                predict_step_size = self.step_size
+                cutoff = -self.input_size - self.val_size - self.test_size
+                if self.test_size > 0:
+                    temporal = batch["temporal"][:, :, cutoff : -self.test_size]
+                else:
+                    temporal = batch["temporal"][:, :, cutoff:]
+
+            if (
+                (step == "predict")
+                and (self.test_size == 0)
+                and (len(self.futr_exog_list) == 0)
+            ):
+                temporal = self.padder(temporal)
+
+            windows = temporal.unfold(
+                dimension=-1, size=window_size, step=predict_step_size
+            )
+            # [n_series, C, Ws, L+H] -> [Ws, C, L+H, n_series]
+            windows = windows.permute(2, 1, 3, 0)
+
+            # Get Static data
+            static = batch.get("static", None)
+            static_cols = batch.get("static_cols", None)
+
+            windows_batch = dict(
+                temporal=windows,
+                temporal_cols=temporal_cols,
+                static=static,
+                static_cols=static_cols,
+            )
+
+            return windows_batch
+        else:
+            raise ValueError(f"Unknown step {step}")
+
+    def _normalization(self, windows, y_idx):
+
+        # windows are already filtered by train/validation/test
+        # from the `create_windows_method` nor leakage risk
+        temporal = windows["temporal"]  # [Ws, C, L+H, n_series]
+        temporal_cols = windows["temporal_cols"].copy()  # [Ws, C, L+H, n_series]
+
+        # To avoid leakage uses only the lags
+        temporal_data_cols = self._get_temporal_exogenous_cols(
+            temporal_cols=temporal_cols
+        )
+        temporal_idxs = get_indexer_raise_missing(temporal_cols, temporal_data_cols)
+        temporal_idxs = np.append(y_idx, temporal_idxs)
+        temporal_data = temporal[:, temporal_idxs, :, :]
+        temporal_mask = temporal[
+            :, temporal_cols.get_loc("available_mask"), :, :
+        ].clone()
+        temporal_mask[:, -self.h :, :] = 0.0
+
+        # Normalize. self.scaler stores the shift and scale for inverse transform
+        temporal_mask = temporal_mask.unsqueeze(
+            1
+        )  # Add channel dimension for scaler.transform.
+        temporal_data = self.scaler.transform(x=temporal_data, mask=temporal_mask)
+        # Replace values in windows dict
+        temporal[:, temporal_idxs, :, :] = temporal_data
+        windows["temporal"] = temporal
+
+        return windows
+
+    def _inv_normalization(self, y_hat, temporal_cols, y_idx):
+        # Receives window predictions [Ws, H, n_series]
+        # Broadcasts outputs and inverts normalization
+
+        # Add C dimension
+        # if y_hat.ndim == 2:
+        #     remove_dimension = True
+        #     y_hat = y_hat.unsqueeze(-1)
+        # else:
+        #     remove_dimension = False
+
+        y_scale = self.scaler.x_scale[:, [y_idx], :].squeeze(1)
+        y_loc = self.scaler.x_shift[:, [y_idx], :].squeeze(1)
+
+        # y_scale = torch.repeat_interleave(y_scale, repeats=y_hat.shape[-1], dim=-1)
+        # y_loc = torch.repeat_interleave(y_loc, repeats=y_hat.shape[-1], dim=-1)
+
+        y_hat = self.scaler.inverse_transform(z=y_hat, x_scale=y_scale, x_shift=y_loc)
+
+        # if remove_dimension:
+        #     y_hat = y_hat.squeeze(-1)
+        #     y_loc = y_loc.squeeze(-1)
+        #     y_scale = y_scale.squeeze(-1)
+
+        return y_hat, y_loc, y_scale
+
+    def _parse_windows(self, batch, windows):
+        # Temporal: [Ws, C, L+H, n_series]
+
+        # Filter insample lags from outsample horizon
+        mask_idx = batch["temporal_cols"].get_loc("available_mask")
+        y_idx = batch["y_idx"]
+        insample_y = windows["temporal"][:, y_idx, : -self.h, :]
+        insample_mask = windows["temporal"][:, mask_idx, : -self.h, :]
+        outsample_y = windows["temporal"][:, y_idx, -self.h :, :]
+        outsample_mask = windows["temporal"][:, mask_idx, -self.h :, :]
+
+        # Filter historic exogenous variables
+        if len(self.hist_exog_list):
+            hist_exog_idx = get_indexer_raise_missing(
+                windows["temporal_cols"], self.hist_exog_list
+            )
+            hist_exog = windows["temporal"][:, hist_exog_idx, : -self.h, :]
+        else:
+            hist_exog = None
+
+        # Filter future exogenous variables
+        if len(self.futr_exog_list):
+            futr_exog_idx = get_indexer_raise_missing(
+                windows["temporal_cols"], self.futr_exog_list
+            )
+            futr_exog = windows["temporal"][:, futr_exog_idx, :, :]
+        else:
+            futr_exog = None
+
+        # Filter static variables
+        if len(self.stat_exog_list):
+            static_idx = get_indexer_raise_missing(
+                windows["static_cols"], self.stat_exog_list
+            )
+            stat_exog = windows["static"][:, static_idx]
+        else:
+            stat_exog = None
+
+        return (
+            insample_y,
+            insample_mask,
+            outsample_y,
+            outsample_mask,
+            hist_exog,
+            futr_exog,
+            stat_exog,
+        )
+
+    def training_step(self, batch, batch_idx):
+        # Create and normalize windows [batch_size, n_series, C, L+H]
+        windows = self._create_windows(batch, step="train")
+        y_idx = batch["y_idx"]
+        windows = self._normalization(windows=windows, y_idx=y_idx)
+
+        # Parse windows
+        (
+            insample_y,
+            insample_mask,
+            outsample_y,
+            outsample_mask,
+            hist_exog,
+            futr_exog,
+            stat_exog,
+        ) = self._parse_windows(batch, windows)
+
+        windows_batch = dict(
+            insample_y=insample_y,  # [batch_size, L, n_series]
+            insample_mask=insample_mask,  # [batch_size, L, n_series]
+            futr_exog=futr_exog,  # [batch_size, n_feats, L+H, n_series]
+            hist_exog=hist_exog,  # [batch_size, n_feats, L, n_series]
+            stat_exog=stat_exog,
+        )  # [n_series, n_feats]
+
+        # Model Predictions
+        output = self(windows_batch)
+        if self.loss.is_distribution_output:
+            outsample_y, y_loc, y_scale = self._inv_normalization(
+                y_hat=outsample_y, temporal_cols=batch["temporal_cols"], y_idx=y_idx
+            )
+            distr_args = self.loss.scale_decouple(
+                output=output, loc=y_loc, scale=y_scale
+            )
+            loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)
+        else:
+            loss = self.loss(y=outsample_y, y_hat=output, mask=outsample_mask)
+
+        if torch.isnan(loss):
+            print("Model Parameters", self.hparams)
+            print("insample_y", torch.isnan(insample_y).sum())
+            print("outsample_y", torch.isnan(outsample_y).sum())
+            print("output", torch.isnan(output).sum())
+            raise Exception("Loss is NaN, training stopped.")
+
+        self.log(
+            "train_loss",
+            loss.item(),
+            batch_size=outsample_y.size(0),
+            prog_bar=True,
+            on_epoch=True,
+        )
+        self.train_trajectories.append((self.global_step, loss.item()))
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        if self.val_size == 0:
+            return np.nan
+
+        # Create and normalize windows [Ws, L+H, C]
+        windows = self._create_windows(batch, step="val")
+        y_idx = batch["y_idx"]
+        windows = self._normalization(windows=windows, y_idx=y_idx)
+
+        # Parse windows
+        (
+            insample_y,
+            insample_mask,
+            outsample_y,
+            outsample_mask,
+            hist_exog,
+            futr_exog,
+            stat_exog,
+        ) = self._parse_windows(batch, windows)
+
+        windows_batch = dict(
+            insample_y=insample_y,  # [Ws, L]
+            insample_mask=insample_mask,  # [Ws, L]
+            futr_exog=futr_exog,  # [Ws, L+H]
+            hist_exog=hist_exog,  # [Ws, L]
+            stat_exog=stat_exog,
+        )  # [Ws, 1]
+
+        # Model Predictions
+        output = self(windows_batch)
+        if self.loss.is_distribution_output:
+            outsample_y, y_loc, y_scale = self._inv_normalization(
+                y_hat=outsample_y, temporal_cols=batch["temporal_cols"], y_idx=y_idx
+            )
+            distr_args = self.loss.scale_decouple(
+                output=output, loc=y_loc, scale=y_scale
+            )
+
+            if str(type(self.valid_loss)) in [
+                "<class 'neuralforecast.losses.pytorch.sCRPS'>",
+                "<class 'neuralforecast.losses.pytorch.MQLoss'>",
+            ]:
+                _, output = self.loss.sample(distr_args=distr_args)
+
+        # Validation Loss evaluation
+        if self.valid_loss.is_distribution_output:
+            valid_loss = self.valid_loss(
+                y=outsample_y, distr_args=distr_args, mask=outsample_mask
+            )
+        else:
+            valid_loss = self.valid_loss(
+                y=outsample_y, y_hat=output, mask=outsample_mask
+            )
+
+        if torch.isnan(valid_loss):
+            raise Exception("Loss is NaN, training stopped.")
+
+        self.log(
+            "valid_loss",
+            valid_loss.item(),
+            batch_size=outsample_y.size(0),
+            prog_bar=True,
+            on_epoch=True,
+        )
+        self.validation_step_outputs.append(valid_loss)
+        return valid_loss
+
+    def predict_step(self, batch, batch_idx):
+        # Create and normalize windows [Ws, L+H, C]
+        windows = self._create_windows(batch, step="predict")
+        y_idx = batch["y_idx"]
+        windows = self._normalization(windows=windows, y_idx=y_idx)
+
+        # Parse windows
+        insample_y, insample_mask, _, _, hist_exog, futr_exog, stat_exog = (
+            self._parse_windows(batch, windows)
+        )
+
+        windows_batch = dict(
+            insample_y=insample_y,  # [Ws, L]
+            insample_mask=insample_mask,  # [Ws, L]
+            futr_exog=futr_exog,  # [Ws, L+H]
+            hist_exog=hist_exog,  # [Ws, L]
+            stat_exog=stat_exog,
+        )  # [Ws, 1]
+
+        # Model Predictions
+        output = self(windows_batch)
+        if self.loss.is_distribution_output:
+            _, y_loc, y_scale = self._inv_normalization(
+                y_hat=output[0], temporal_cols=batch["temporal_cols"], y_idx=y_idx
+            )
+            distr_args = self.loss.scale_decouple(
+                output=output, loc=y_loc, scale=y_scale
+            )
+            _, y_hat = self.loss.sample(distr_args=distr_args)
+
+            if self.loss.return_params:
+                distr_args = torch.stack(distr_args, dim=-1)
+                distr_args = torch.reshape(
+                    distr_args, (len(windows["temporal"]), self.h, -1)
+                )
+                y_hat = torch.concat((y_hat, distr_args), axis=2)
+        else:
+            y_hat, _, _ = self._inv_normalization(
+                y_hat=output, temporal_cols=batch["temporal_cols"], y_idx=y_idx
+            )
+        return y_hat
+
+    def fit(
+        self,
+        dataset,
+        val_size=0,
+        test_size=0,
+        random_seed=None,
+        distributed_config=None,
+    ):
+        """Fit.
+
+        The `fit` method, optimizes the neural network's weights using the
+        initialization parameters (`learning_rate`, `windows_batch_size`, ...)
+        and the `loss` function as defined during the initialization.
+        Within `fit` we use a PyTorch Lightning `Trainer` that
+        inherits the initialization's `self.trainer_kwargs`, to customize
+        its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
+
+        The method is designed to be compatible with SKLearn-like classes
+        and in particular to be compatible with the StatsForecast library.
+
+        By default the `model` is not saving training checkpoints to protect
+        disk memory, to get them change `enable_checkpointing=True` in `__init__`.
+
+        **Parameters:**<br>
+        `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).<br>
+        `val_size`: int, validation size for temporal cross-validation.<br>
+        `test_size`: int, test size for temporal cross-validation.<br>
+        """
+        if distributed_config is not None:
+            raise ValueError(
+                "multivariate models cannot be trained using distributed data parallel."
+            )
+        return self._fit(
+            dataset=dataset,
+            batch_size=self.n_series,
+            valid_batch_size=self.n_series,
+            val_size=val_size,
+            test_size=test_size,
+            random_seed=random_seed,
+            shuffle_train=False,
+            distributed_config=None,
+        )
+
+    def predict(
+        self,
+        dataset,
+        test_size=None,
+        step_size=1,
+        random_seed=None,
+        **data_module_kwargs,
+    ):
+        """Predict.
+
+        Neural network prediction with PL's `Trainer` execution of `predict_step`.
+
+        **Parameters:**<br>
+        `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).<br>
+        `test_size`: int=None, test size for temporal cross-validation.<br>
+        `step_size`: int=1, Step size between each window.<br>
+        `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).
+        """
+        self._check_exog(dataset)
+        self._restart_seed(random_seed)
+
+        self.predict_step_size = step_size
+        self.decompose_forecast = False
+        datamodule = TimeSeriesDataModule(
+            dataset=dataset,
+            valid_batch_size=self.n_series,
+            batch_size=self.n_series,
+            **data_module_kwargs,
+        )
+
+        # Protect when case of multiple gpu. PL does not support return preds with multiple gpu.
+        pred_trainer_kwargs = self.trainer_kwargs.copy()
+        if (pred_trainer_kwargs.get("accelerator", None) == "gpu") and (
+            torch.cuda.device_count() > 1
+        ):
+            pred_trainer_kwargs["devices"] = [0]
+
+        trainer = pl.Trainer(**pred_trainer_kwargs)
+        fcsts = trainer.predict(self, datamodule=datamodule)
+        fcsts = torch.vstack(fcsts).numpy()
+
+        fcsts = np.transpose(fcsts, (2, 0, 1))
+        fcsts = fcsts.flatten()
+        fcsts = fcsts.reshape(-1, len(self.loss.output_names))
+        return fcsts
+
+    def decompose(self, dataset, step_size=1, random_seed=None, **data_module_kwargs):
+        raise NotImplementedError("decompose")
--- a/neuralforecast/common/_base_recurrent.py
+++ b/neuralforecast/common/_base_recurrent.py
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/common.base_recurrent.ipynb.
+
+# %% auto 0
+__all__ = ['BaseRecurrent']
+
+# %% ../../nbs/common.base_recurrent.ipynb 6
+import numpy as np
+import torch
+import torch.nn as nn
+import pytorch_lightning as pl
+
+from ._base_model import BaseModel
+from ._scalers import TemporalNorm
+from ..tsdataset import TimeSeriesDataModule
+from ..utils import get_indexer_raise_missing
+
+# %% ../../nbs/common.base_recurrent.ipynb 7
+class BaseRecurrent(BaseModel):
+    """Base Recurrent
+
+    Base class for all recurrent-based models. The forecasts are produced sequentially between
+    windows.
+
+    This class implements the basic functionality for all windows-based models, including:
+    - PyTorch Lightning's methods training_step, validation_step, predict_step. <br>
+    - fit and predict methods used by NeuralForecast.core class. <br>
+    - sampling and wrangling methods to sequential windows. <br>
+    """
+
+    def __init__(
+        self,
+        h,
+        input_size,
+        inference_input_size,
+        loss,
+        valid_loss,
+        learning_rate,
+        max_steps,
+        val_check_steps,
+        batch_size,
+        valid_batch_size,
+        scaler_type="robust",
+        num_lr_decays=0,
+        early_stop_patience_steps=-1,
+        futr_exog_list=None,
+        hist_exog_list=None,
+        stat_exog_list=None,
+        num_workers_loader=0,
+        drop_last_loader=False,
+        random_seed=1,
+        alias=None,
+        optimizer=None,
+        optimizer_kwargs=None,
+        **trainer_kwargs,
+    ):
+        super().__init__(
+            random_seed=random_seed,
+            loss=loss,
+            valid_loss=valid_loss,
+            optimizer=optimizer,
+            optimizer_kwargs=optimizer_kwargs,
+            futr_exog_list=futr_exog_list,
+            hist_exog_list=hist_exog_list,
+            stat_exog_list=stat_exog_list,
+            max_steps=max_steps,
+            early_stop_patience_steps=early_stop_patience_steps,
+            **trainer_kwargs,
+        )
+
+        # Padder to complete train windows,
+        # example y=[1,2,3,4,5] h=3 -> last y_output = [5,0,0]
+        self.h = h
+        self.input_size = input_size
+        self.inference_input_size = inference_input_size
+        self.padder = nn.ConstantPad1d(padding=(0, self.h), value=0)
+
+        if (
+            str(type(self.loss))
+            == "<class 'neuralforecast.losses.pytorch.DistributionLoss'>"
+            and self.loss.distribution == "Bernoulli"
+        ):
+            raise Exception(
+                "Temporal Classification not yet available for Recurrent-based models"
+            )
+
+        # Valid batch_size
+        self.batch_size = batch_size
+        if valid_batch_size is None:
+            self.valid_batch_size = batch_size
+        else:
+            self.valid_batch_size = valid_batch_size
+
+        # Optimization
+        self.learning_rate = learning_rate
+        self.max_steps = max_steps
+        self.num_lr_decays = num_lr_decays
+        self.lr_decay_steps = (
+            max(max_steps // self.num_lr_decays, 1) if self.num_lr_decays > 0 else 10e7
+        )
+        self.early_stop_patience_steps = early_stop_patience_steps
+        self.val_check_steps = val_check_steps
+
+        # Scaler
+        self.scaler = TemporalNorm(
+            scaler_type=scaler_type,
+            dim=-1,  # Time dimension is -1.
+            num_features=1 + len(self.hist_exog_list) + len(self.futr_exog_list),
+        )
+
+        # Fit arguments
+        self.val_size = 0
+        self.test_size = 0
+
+        # DataModule arguments
+        self.num_workers_loader = num_workers_loader
+        self.drop_last_loader = drop_last_loader
+        # used by on_validation_epoch_end hook
+        self.validation_step_outputs = []
+        self.alias = alias
+
+    def _normalization(self, batch, val_size=0, test_size=0):
+        temporal = batch["temporal"]  # B, C, T
+        temporal_cols = batch["temporal_cols"].copy()
+        y_idx = batch["y_idx"]
+
+        # Separate data and mask
+        temporal_data_cols = self._get_temporal_exogenous_cols(
+            temporal_cols=temporal_cols
+        )
+        temporal_idxs = get_indexer_raise_missing(temporal_cols, temporal_data_cols)
+        temporal_idxs = np.append(y_idx, temporal_idxs)
+        temporal_data = temporal[:, temporal_idxs, :]
+        temporal_mask = temporal[:, temporal_cols.get_loc("available_mask"), :].clone()
+
+        # Remove validation and test set to prevent leakeage
+        if val_size + test_size > 0:
+            cutoff = val_size + test_size
+            temporal_mask[:, -cutoff:] = 0
+
+        # Normalize. self.scaler stores the shift and scale for inverse transform
+        temporal_mask = temporal_mask.unsqueeze(
+            1
+        )  # Add channel dimension for scaler.transform.
+        temporal_data = self.scaler.transform(x=temporal_data, mask=temporal_mask)
+
+        # Replace values in windows dict
+        temporal[:, temporal_idxs, :] = temporal_data
+        batch["temporal"] = temporal
+
+        return batch
+
+    def _inv_normalization(self, y_hat, temporal_cols, y_idx):
+        # Receives window predictions [B, seq_len, H, output]
+        # Broadcasts outputs and inverts normalization
+
+        # Get 'y' scale and shift, and add W dimension
+        y_loc = self.scaler.x_shift[:, [y_idx], 0].flatten()  # [B,C,T] -> [B]
+        y_scale = self.scaler.x_scale[:, [y_idx], 0].flatten()  # [B,C,T] -> [B]
+
+        # Expand scale and shift to y_hat dimensions
+        y_loc = y_loc.view(*y_loc.shape, *(1,) * (y_hat.ndim - 1))  # .expand(y_hat)
+        y_scale = y_scale.view(
+            *y_scale.shape, *(1,) * (y_hat.ndim - 1)
+        )  # .expand(y_hat)
+
+        y_hat = self.scaler.inverse_transform(z=y_hat, x_scale=y_scale, x_shift=y_loc)
+
+        return y_hat, y_loc, y_scale
+
+    def _create_windows(self, batch, step):
+        temporal = batch["temporal"]
+        temporal_cols = batch["temporal_cols"]
+
+        if step == "train":
+            if self.val_size + self.test_size > 0:
+                cutoff = -self.val_size - self.test_size
+                temporal = temporal[:, :, :cutoff]
+            temporal = self.padder(temporal)
+
+            # Truncate batch to shorter time-series
+            av_condition = torch.nonzero(
+                torch.min(
+                    temporal[:, temporal_cols.get_loc("available_mask")], axis=0
+                ).values
+            )
+            min_time_stamp = int(av_condition.min())
+
+            available_ts = temporal.shape[-1] - min_time_stamp
+            if available_ts < 1 + self.h:
+                raise Exception(
+                    "Time series too short for given input and output size. \n"
+                    f"Available timestamps: {available_ts}"
+                )
+
+            temporal = temporal[:, :, min_time_stamp:]
+
+        if step == "val":
+            if self.test_size > 0:
+                temporal = temporal[:, :, : -self.test_size]
+            temporal = self.padder(temporal)
+
+        if step == "predict":
+            if (self.test_size == 0) and (len(self.futr_exog_list) == 0):
+                temporal = self.padder(temporal)
+
+            # Test size covers all data, pad left one timestep with zeros
+            if temporal.shape[-1] == self.test_size:
+                padder_left = nn.ConstantPad1d(padding=(1, 0), value=0)
+                temporal = padder_left(temporal)
+
+        # Parse batch
+        window_size = 1 + self.h  # 1 for current t and h for future
+        windows = temporal.unfold(dimension=-1, size=window_size, step=1)
+
+        # Truncated backprogatation/inference (shorten sequence where RNNs unroll)
+        n_windows = windows.shape[2]
+        input_size = -1
+        if (step == "train") and (self.input_size > 0):
+            input_size = self.input_size
+            if (input_size > 0) and (n_windows > input_size):
+                max_sampleable_time = n_windows - self.input_size + 1
+                start = np.random.choice(max_sampleable_time)
+                windows = windows[:, :, start : (start + input_size), :]
+
+        if (step == "val") and (self.inference_input_size > 0):
+            cutoff = self.inference_input_size + self.val_size
+            windows = windows[:, :, -cutoff:, :]
+
+        if (step == "predict") and (self.inference_input_size > 0):
+            cutoff = self.inference_input_size + self.test_size
+            windows = windows[:, :, -cutoff:, :]
+
+        # [B, C, input_size, 1+H]
+        windows_batch = dict(
+            temporal=windows,
+            temporal_cols=temporal_cols,
+            static=batch.get("static", None),
+            static_cols=batch.get("static_cols", None),
+        )
+
+        return windows_batch
+
+    def _parse_windows(self, batch, windows):
+        # [B, C, seq_len, 1+H]
+        # Filter insample lags from outsample horizon
+        mask_idx = batch["temporal_cols"].get_loc("available_mask")
+        y_idx = batch["y_idx"]
+        insample_y = windows["temporal"][:, y_idx, :, : -self.h]
+        insample_mask = windows["temporal"][:, mask_idx, :, : -self.h]
+        outsample_y = windows["temporal"][:, y_idx, :, -self.h :].contiguous()
+        outsample_mask = windows["temporal"][:, mask_idx, :, -self.h :].contiguous()
+
+        # Filter historic exogenous variables
+        if len(self.hist_exog_list):
+            hist_exog_idx = get_indexer_raise_missing(
+                windows["temporal_cols"], self.hist_exog_list
+            )
+            hist_exog = windows["temporal"][:, hist_exog_idx, :, : -self.h]
+        else:
+            hist_exog = None
+
+        # Filter future exogenous variables
+        if len(self.futr_exog_list):
+            futr_exog_idx = get_indexer_raise_missing(
+                windows["temporal_cols"], self.futr_exog_list
+            )
+            futr_exog = windows["temporal"][:, futr_exog_idx, :, :]
+        else:
+            futr_exog = None
+        # Filter static variables
+        if len(self.stat_exog_list):
+            static_idx = get_indexer_raise_missing(
+                windows["static_cols"], self.stat_exog_list
+            )
+            stat_exog = windows["static"][:, static_idx]
+        else:
+            stat_exog = None
+
+        return (
+            insample_y,
+            insample_mask,
+            outsample_y,
+            outsample_mask,
+            hist_exog,
+            futr_exog,
+            stat_exog,
+        )
+
+    def training_step(self, batch, batch_idx):
+        # Create and normalize windows [Ws, L+H, C]
+        batch = self._normalization(
+            batch, val_size=self.val_size, test_size=self.test_size
+        )
+        windows = self._create_windows(batch, step="train")
+
+        # Parse windows
+        (
+            insample_y,
+            insample_mask,
+            outsample_y,
+            outsample_mask,
+            hist_exog,
+            futr_exog,
+            stat_exog,
+        ) = self._parse_windows(batch, windows)
+
+        windows_batch = dict(
+            insample_y=insample_y,  # [B, seq_len, 1]
+            insample_mask=insample_mask,  # [B, seq_len, 1]
+            futr_exog=futr_exog,  # [B, F, seq_len, 1+H]
+            hist_exog=hist_exog,  # [B, C, seq_len]
+            stat_exog=stat_exog,
+        )  # [B, S]
+
+        # Model predictions
+        output = self(windows_batch)  # tuple([B, seq_len, H, output])
+        if self.loss.is_distribution_output:
+            outsample_y, y_loc, y_scale = self._inv_normalization(
+                y_hat=outsample_y,
+                temporal_cols=batch["temporal_cols"],
+                y_idx=batch["y_idx"],
+            )
+            B = output[0].size()[0]
+            T = output[0].size()[1]
+            H = output[0].size()[2]
+            output = [arg.view(-1, *(arg.size()[2:])) for arg in output]
+            outsample_y = outsample_y.view(B * T, H)
+            outsample_mask = outsample_mask.view(B * T, H)
+            y_loc = y_loc.repeat_interleave(repeats=T, dim=0).squeeze(-1)
+            y_scale = y_scale.repeat_interleave(repeats=T, dim=0).squeeze(-1)
+            distr_args = self.loss.scale_decouple(
+                output=output, loc=y_loc, scale=y_scale
+            )
+            loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)
+        else:
+            loss = self.loss(y=outsample_y, y_hat=output, mask=outsample_mask)
+
+        if torch.isnan(loss):
+            print("Model Parameters", self.hparams)
+            print("insample_y", torch.isnan(insample_y).sum())
+            print("outsample_y", torch.isnan(outsample_y).sum())
+            print("output", torch.isnan(output).sum())
+            raise Exception("Loss is NaN, training stopped.")
+
+        self.log(
+            "train_loss",
+            loss.item(),
+            batch_size=outsample_y.size(0),
+            prog_bar=True,
+            on_epoch=True,
+        )
+        self.train_trajectories.append((self.global_step, loss.item()))
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        if self.val_size == 0:
+            return np.nan
+
+        # Create and normalize windows [Ws, L+H, C]
+        batch = self._normalization(
+            batch, val_size=self.val_size, test_size=self.test_size
+        )
+        windows = self._create_windows(batch, step="val")
+        y_idx = batch["y_idx"]
+
+        # Parse windows
+        (
+            insample_y,
+            insample_mask,
+            outsample_y,
+            outsample_mask,
+            hist_exog,
+            futr_exog,
+            stat_exog,
+        ) = self._parse_windows(batch, windows)
+
+        windows_batch = dict(
+            insample_y=insample_y,  # [B, seq_len, 1]
+            insample_mask=insample_mask,  # [B, seq_len, 1]
+            futr_exog=futr_exog,  # [B, F, seq_len, 1+H]
+            hist_exog=hist_exog,  # [B, C, seq_len]
+            stat_exog=stat_exog,
+        )  # [B, S]
+
+        # Remove train y_hat (+1 and -1 for padded last window with zeros)
+        # tuple([B, seq_len, H, output]) -> tuple([B, validation_size, H, output])
+        val_windows = (self.val_size) + 1
+        outsample_y = outsample_y[:, -val_windows:-1, :]
+        outsample_mask = outsample_mask[:, -val_windows:-1, :]
+
+        # Model predictions
+        output = self(windows_batch)  # tuple([B, seq_len, H, output])
+        if self.loss.is_distribution_output:
+            output = [arg[:, -val_windows:-1] for arg in output]
+            outsample_y, y_loc, y_scale = self._inv_normalization(
+                y_hat=outsample_y, temporal_cols=batch["temporal_cols"], y_idx=y_idx
+            )
+            B = output[0].size()[0]
+            T = output[0].size()[1]
+            H = output[0].size()[2]
+            output = [arg.reshape(-1, *(arg.size()[2:])) for arg in output]
+            outsample_y = outsample_y.reshape(B * T, H)
+            outsample_mask = outsample_mask.reshape(B * T, H)
+            y_loc = y_loc.repeat_interleave(repeats=T, dim=0).squeeze(-1)
+            y_scale = y_scale.repeat_interleave(repeats=T, dim=0).squeeze(-1)
+            distr_args = self.loss.scale_decouple(
+                output=output, loc=y_loc, scale=y_scale
+            )
+            _, sample_mean, quants = self.loss.sample(distr_args=distr_args)
+
+            if str(type(self.valid_loss)) in [
+                "<class 'neuralforecast.losses.pytorch.sCRPS'>",
+                "<class 'neuralforecast.losses.pytorch.MQLoss'>",
+            ]:
+                output = quants
+            elif str(type(self.valid_loss)) in [
+                "<class 'neuralforecast.losses.pytorch.relMSE'>"
+            ]:
+                output = torch.unsqueeze(sample_mean, dim=-1)  # [N,H,1] -> [N,H]
+
+        else:
+            output = output[:, -val_windows:-1, :]
+
+        # Validation Loss evaluation
+        if self.valid_loss.is_distribution_output:
+            valid_loss = self.valid_loss(
+                y=outsample_y, distr_args=distr_args, mask=outsample_mask
+            )
+        else:
+            outsample_y, _, _ = self._inv_normalization(
+                y_hat=outsample_y, temporal_cols=batch["temporal_cols"], y_idx=y_idx
+            )
+            output, _, _ = self._inv_normalization(
+                y_hat=output, temporal_cols=batch["temporal_cols"], y_idx=y_idx
+            )
+            valid_loss = self.valid_loss(
+                y=outsample_y, y_hat=output, mask=outsample_mask
+            )
+
+        if torch.isnan(valid_loss):
+            raise Exception("Loss is NaN, training stopped.")
+
+        self.log(
+            "valid_loss",
+            valid_loss.item(),
+            batch_size=outsample_y.size(0),
+            prog_bar=True,
+            on_epoch=True,
+        )
+        self.validation_step_outputs.append(valid_loss)
+        return valid_loss
+
+    def predict_step(self, batch, batch_idx):
+        # Create and normalize windows [Ws, L+H, C]
+        batch = self._normalization(batch, val_size=0, test_size=self.test_size)
+        windows = self._create_windows(batch, step="predict")
+        y_idx = batch["y_idx"]
+
+        # Parse windows
+        insample_y, insample_mask, _, _, hist_exog, futr_exog, stat_exog = (
+            self._parse_windows(batch, windows)
+        )
+
+        windows_batch = dict(
+            insample_y=insample_y,  # [B, seq_len, 1]
+            insample_mask=insample_mask,  # [B, seq_len, 1]
+            futr_exog=futr_exog,  # [B, F, seq_len, 1+H]
+            hist_exog=hist_exog,  # [B, C, seq_len]
+            stat_exog=stat_exog,
+        )  # [B, S]
+
+        # Model Predictions
+        output = self(windows_batch)  # tuple([B, seq_len, H], ...)
+        if self.loss.is_distribution_output:
+            _, y_loc, y_scale = self._inv_normalization(
+                y_hat=output[0], temporal_cols=batch["temporal_cols"], y_idx=y_idx
+            )
+            B = output[0].size()[0]
+            T = output[0].size()[1]
+            H = output[0].size()[2]
+            output = [arg.reshape(-1, *(arg.size()[2:])) for arg in output]
+            y_loc = y_loc.repeat_interleave(repeats=T, dim=0).squeeze(-1)
+            y_scale = y_scale.repeat_interleave(repeats=T, dim=0).squeeze(-1)
+            distr_args = self.loss.scale_decouple(
+                output=output, loc=y_loc, scale=y_scale
+            )
+            _, sample_mean, quants = self.loss.sample(distr_args=distr_args)
+            y_hat = torch.concat((sample_mean, quants), axis=2)
+            y_hat = y_hat.view(B, T, H, -1)
+
+            if self.loss.return_params:
+                distr_args = torch.stack(distr_args, dim=-1)
+                distr_args = torch.reshape(distr_args, (B, T, H, -1))
+                y_hat = torch.concat((y_hat, distr_args), axis=3)
+        else:
+            y_hat, _, _ = self._inv_normalization(
+                y_hat=output, temporal_cols=batch["temporal_cols"], y_idx=y_idx
+            )
+        return y_hat
+
+    def fit(
+        self,
+        dataset,
+        val_size=0,
+        test_size=0,
+        random_seed=None,
+        distributed_config=None,
+    ):
+        """Fit.
+
+        The `fit` method, optimizes the neural network's weights using the
+        initialization parameters (`learning_rate`, `batch_size`, ...)
+        and the `loss` function as defined during the initialization.
+        Within `fit` we use a PyTorch Lightning `Trainer` that
+        inherits the initialization's `self.trainer_kwargs`, to customize
+        its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
+
+        The method is designed to be compatible with SKLearn-like classes
+        and in particular to be compatible with the StatsForecast library.
+
+        By default the `model` is not saving training checkpoints to protect
+        disk memory, to get them change `enable_checkpointing=True` in `__init__`.
+
+        **Parameters:**<br>
+        `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).<br>
+        `val_size`: int, validation size for temporal cross-validation.<br>
+        `test_size`: int, test size for temporal cross-validation.<br>
+        `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.<br>
+        """
+        return self._fit(
+            dataset=dataset,
+            batch_size=self.batch_size,
+            valid_batch_size=self.valid_batch_size,
+            val_size=val_size,
+            test_size=test_size,
+            random_seed=random_seed,
+            distributed_config=distributed_config,
+        )
+
+    def predict(self, dataset, step_size=1, random_seed=None, **data_module_kwargs):
+        """Predict.
+
+        Neural network prediction with PL's `Trainer` execution of `predict_step`.
+
+        **Parameters:**<br>
+        `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).<br>
+        `step_size`: int=1, Step size between each window.<br>
+        `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.<br>
+        `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).
+        """
+        self._check_exog(dataset)
+        self._restart_seed(random_seed)
+
+        if step_size > 1:
+            raise Exception("Recurrent models do not support step_size > 1")
+
+        # fcsts (window, batch, h)
+        # Protect when case of multiple gpu. PL does not support return preds with multiple gpu.
+        pred_trainer_kwargs = self.trainer_kwargs.copy()
+        if (pred_trainer_kwargs.get("accelerator", None) == "gpu") and (
+            torch.cuda.device_count() > 1
+        ):
+            pred_trainer_kwargs["devices"] = [0]
+
+        trainer = pl.Trainer(**pred_trainer_kwargs)
+
+        datamodule = TimeSeriesDataModule(
+            dataset=dataset,
+            valid_batch_size=self.valid_batch_size,
+            num_workers=self.num_workers_loader,
+            **data_module_kwargs,
+        )
+        fcsts = trainer.predict(self, datamodule=datamodule)
+        if self.test_size > 0:
+            # Remove warmup windows (from train and validation)
+            # [N,T,H,output], avoid indexing last dim for univariate output compatibility
+            fcsts = torch.vstack(
+                [fcst[:, -(1 + self.test_size - self.h) :, :] for fcst in fcsts]
+            )
+            fcsts = fcsts.numpy().flatten()
+            fcsts = fcsts.reshape(-1, len(self.loss.output_names))
+        else:
+            fcsts = torch.vstack([fcst[:, -1:, :] for fcst in fcsts]).numpy().flatten()
+            fcsts = fcsts.reshape(-1, len(self.loss.output_names))
+        return fcsts
--- a/neuralforecast/common/_base_windows.py
+++ b/neuralforecast/common/_base_windows.py
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/common.base_windows.ipynb.
+
+# %% auto 0
+__all__ = ['BaseWindows']
+
+# %% ../../nbs/common.base_windows.ipynb 5
+import numpy as np
+import torch
+import torch.nn as nn
+import pytorch_lightning as pl
+
+from ._base_model import BaseModel
+from ._scalers import TemporalNorm
+from ..tsdataset import TimeSeriesDataModule
+from ..utils import get_indexer_raise_missing
+
+# %% ../../nbs/common.base_windows.ipynb 6
+class BaseWindows(BaseModel):
+    """Base Windows
+
+    Base class for all windows-based models. The forecasts are produced separately
+    for each window, which are randomly sampled during training.
+
+    This class implements the basic functionality for all windows-based models, including:
+    - PyTorch Lightning's methods training_step, validation_step, predict_step.<br>
+    - fit and predict methods used by NeuralForecast.core class.<br>
+    - sampling and wrangling methods to generate windows.
+    """
+
+    def __init__(
+        self,
+        h,
+        input_size,
+        loss,
+        valid_loss,
+        learning_rate,
+        max_steps,
+        val_check_steps,
+        batch_size,
+        valid_batch_size,
+        windows_batch_size,
+        inference_windows_batch_size,
+        start_padding_enabled,
+        step_size=1,
+        num_lr_decays=0,
+        early_stop_patience_steps=-1,
+        scaler_type="identity",
+        futr_exog_list=None,
+        hist_exog_list=None,
+        stat_exog_list=None,
+        exclude_insample_y=False,
+        num_workers_loader=0,
+        drop_last_loader=False,
+        random_seed=1,
+        alias=None,
+        optimizer=None,
+        optimizer_kwargs=None,
+        **trainer_kwargs,
+    ):
+        super().__init__(
+            random_seed=random_seed,
+            loss=loss,
+            valid_loss=valid_loss,
+            optimizer=optimizer,
+            optimizer_kwargs=optimizer_kwargs,
+            futr_exog_list=futr_exog_list,
+            hist_exog_list=hist_exog_list,
+            stat_exog_list=stat_exog_list,
+            max_steps=max_steps,
+            early_stop_patience_steps=early_stop_patience_steps,
+            **trainer_kwargs,
+        )
+
+        # Padder to complete train windows,
+        # example y=[1,2,3,4,5] h=3 -> last y_output = [5,0,0]
+        self.h = h
+        self.input_size = input_size
+        self.windows_batch_size = windows_batch_size
+        self.start_padding_enabled = start_padding_enabled
+        if start_padding_enabled:
+            self.padder_train = nn.ConstantPad1d(
+                padding=(self.input_size - 1, self.h), value=0
+            )
+        else:
+            self.padder_train = nn.ConstantPad1d(padding=(0, self.h), value=0)
+
+        # Batch sizes
+        self.batch_size = batch_size
+        if valid_batch_size is None:
+            self.valid_batch_size = batch_size
+        else:
+            self.valid_batch_size = valid_batch_size
+        if inference_windows_batch_size is None:
+            self.inference_windows_batch_size = windows_batch_size
+        else:
+            self.inference_windows_batch_size = inference_windows_batch_size
+
+        # Optimization
+        self.learning_rate = learning_rate
+        self.max_steps = max_steps
+        self.num_lr_decays = num_lr_decays
+        self.lr_decay_steps = (
+            max(max_steps // self.num_lr_decays, 1) if self.num_lr_decays > 0 else 10e7
+        )
+        self.early_stop_patience_steps = early_stop_patience_steps
+        self.val_check_steps = val_check_steps
+        self.windows_batch_size = windows_batch_size
+        self.step_size = step_size
+
+        self.exclude_insample_y = exclude_insample_y
+
+        # Scaler
+        self.scaler = TemporalNorm(
+            scaler_type=scaler_type,
+            dim=1,  # Time dimension is 1.
+            num_features=1 + len(self.hist_exog_list) + len(self.futr_exog_list),
+        )
+
+        # Fit arguments
+        self.val_size = 0
+        self.test_size = 0
+
+        # Model state
+        self.decompose_forecast = False
+
+        # DataModule arguments
+        self.num_workers_loader = num_workers_loader
+        self.drop_last_loader = drop_last_loader
+        # used by on_validation_epoch_end hook
+        self.validation_step_outputs = []
+        self.alias = alias
+
+    def _create_windows(self, batch, step, w_idxs=None):
+        # Parse common data
+        window_size = self.input_size + self.h
+        temporal_cols = batch["temporal_cols"]
+        temporal = batch["temporal"]
+
+        if step == "train":
+            if self.val_size + self.test_size > 0:
+                cutoff = -self.val_size - self.test_size
+                temporal = temporal[:, :, :cutoff]
+
+            temporal = self.padder_train(temporal)
+            if temporal.shape[-1] < window_size:
+                raise Exception(
+                    "Time series is too short for training, consider setting a smaller input size or set start_padding_enabled=True"
+                )
+            windows = temporal.unfold(
+                dimension=-1, size=window_size, step=self.step_size
+            )
+
+            # [B, C, Ws, L+H] 0, 1, 2, 3
+            # -> [B * Ws, L+H, C] 0, 2, 3, 1
+            windows_per_serie = windows.shape[2]
+            windows = windows.permute(0, 2, 3, 1).contiguous()
+            windows = windows.reshape(-1, window_size, len(temporal_cols))
+
+            # Sample and Available conditions
+            available_idx = temporal_cols.get_loc("available_mask")
+            available_condition = windows[:, : self.input_size, available_idx]
+            available_condition = torch.sum(available_condition, axis=1)
+            final_condition = available_condition > 0
+            if self.h > 0:
+                sample_condition = windows[:, self.input_size :, available_idx]
+                sample_condition = torch.sum(sample_condition, axis=1)
+                final_condition = (sample_condition > 0) & (available_condition > 0)
+            windows = windows[final_condition]
+
+            # Parse Static data to match windows
+            # [B, S_in] -> [B, Ws, S_in] -> [B*Ws, S_in]
+            static = batch.get("static", None)
+            static_cols = batch.get("static_cols", None)
+            if static is not None:
+                static = torch.repeat_interleave(
+                    static, repeats=windows_per_serie, dim=0
+                )
+                static = static[final_condition]
+
+            # Protection of empty windows
+            if final_condition.sum() == 0:
+                raise Exception("No windows available for training")
+
+            # Sample windows
+            n_windows = len(windows)
+            if self.windows_batch_size is not None:
+                w_idxs = np.random.choice(
+                    n_windows,
+                    size=self.windows_batch_size,
+                    replace=(n_windows < self.windows_batch_size),
+                )
+                windows = windows[w_idxs]
+
+                if static is not None:
+                    static = static[w_idxs]
+
+            # think about interaction available * sample mask
+            # [B, C, Ws, L+H]
+            windows_batch = dict(
+                temporal=windows,
+                temporal_cols=temporal_cols,
+                static=static,
+                static_cols=static_cols,
+            )
+            return windows_batch
+
+        elif step in ["predict", "val"]:
+
+            if step == "predict":
+                initial_input = temporal.shape[-1] - self.test_size
+                if (
+                    initial_input <= self.input_size
+                ):  # There is not enough data to predict first timestamp
+                    padder_left = nn.ConstantPad1d(
+                        padding=(self.input_size - initial_input, 0), value=0
+                    )
+                    temporal = padder_left(temporal)
+                predict_step_size = self.predict_step_size
+                cutoff = -self.input_size - self.test_size
+                temporal = temporal[:, :, cutoff:]
+
+            elif step == "val":
+                predict_step_size = self.step_size
+                cutoff = -self.input_size - self.val_size - self.test_size
+                if self.test_size > 0:
+                    temporal = batch["temporal"][:, :, cutoff : -self.test_size]
+                else:
+                    temporal = batch["temporal"][:, :, cutoff:]
+                if temporal.shape[-1] < window_size:
+                    initial_input = temporal.shape[-1] - self.val_size
+                    padder_left = nn.ConstantPad1d(
+                        padding=(self.input_size - initial_input, 0), value=0
+                    )
+                    temporal = padder_left(temporal)
+
+            if (
+                (step == "predict")
+                and (self.test_size == 0)
+                and (len(self.futr_exog_list) == 0)
+            ):
+                padder_right = nn.ConstantPad1d(padding=(0, self.h), value=0)
+                temporal = padder_right(temporal)
+
+            windows = temporal.unfold(
+                dimension=-1, size=window_size, step=predict_step_size
+            )
+
+            # [batch, channels, windows, window_size] 0, 1, 2, 3
+            # -> [batch * windows, window_size, channels] 0, 2, 3, 1
+            windows_per_serie = windows.shape[2]
+            windows = windows.permute(0, 2, 3, 1).contiguous()
+            windows = windows.reshape(-1, window_size, len(temporal_cols))
+
+            static = batch.get("static", None)
+            static_cols = batch.get("static_cols", None)
+            if static is not None:
+                static = torch.repeat_interleave(
+                    static, repeats=windows_per_serie, dim=0
+                )
+
+            # Sample windows for batched prediction
+            if w_idxs is not None:
+                windows = windows[w_idxs]
+                if static is not None:
+                    static = static[w_idxs]
+
+            windows_batch = dict(
+                temporal=windows,
+                temporal_cols=temporal_cols,
+                static=static,
+                static_cols=static_cols,
+            )
+            return windows_batch
+        else:
+            raise ValueError(f"Unknown step {step}")
+
+    def _normalization(self, windows, y_idx):
+        # windows are already filtered by train/validation/test
+        # from the `create_windows_method` nor leakage risk
+        temporal = windows["temporal"]  # B, L+H, C
+        temporal_cols = windows["temporal_cols"].copy()  # B, L+H, C
+
+        # To avoid leakage uses only the lags
+        # temporal_data_cols = temporal_cols.drop('available_mask').tolist()
+        temporal_data_cols = self._get_temporal_exogenous_cols(
+            temporal_cols=temporal_cols
+        )
+        temporal_idxs = get_indexer_raise_missing(temporal_cols, temporal_data_cols)
+        temporal_idxs = np.append(y_idx, temporal_idxs)
+        temporal_data = temporal[:, :, temporal_idxs]
+        temporal_mask = temporal[:, :, temporal_cols.get_loc("available_mask")].clone()
+        if self.h > 0:
+            temporal_mask[:, -self.h :] = 0.0
+
+        # Normalize. self.scaler stores the shift and scale for inverse transform
+        temporal_mask = temporal_mask.unsqueeze(
+            -1
+        )  # Add channel dimension for scaler.transform.
+        temporal_data = self.scaler.transform(x=temporal_data, mask=temporal_mask)
+
+        # Replace values in windows dict
+        temporal[:, :, temporal_idxs] = temporal_data
+        windows["temporal"] = temporal
+
+        return windows
+
+    def _inv_normalization(self, y_hat, temporal_cols, y_idx):
+        # Receives window predictions [B, H, output]
+        # Broadcasts outputs and inverts normalization
+
+        # Add C dimension
+        if y_hat.ndim == 2:
+            remove_dimension = True
+            y_hat = y_hat.unsqueeze(-1)
+        else:
+            remove_dimension = False
+
+        y_scale = self.scaler.x_scale[:, :, [y_idx]]
+        y_loc = self.scaler.x_shift[:, :, [y_idx]]
+
+        y_scale = torch.repeat_interleave(y_scale, repeats=y_hat.shape[-1], dim=-1).to(
+            y_hat.device
+        )
+        y_loc = torch.repeat_interleave(y_loc, repeats=y_hat.shape[-1], dim=-1).to(
+            y_hat.device
+        )
+
+        y_hat = self.scaler.inverse_transform(z=y_hat, x_scale=y_scale, x_shift=y_loc)
+        y_loc = y_loc.to(y_hat.device)
+        y_scale = y_scale.to(y_hat.device)
+
+        if remove_dimension:
+            y_hat = y_hat.squeeze(-1)
+            y_loc = y_loc.squeeze(-1)
+            y_scale = y_scale.squeeze(-1)
+
+        return y_hat, y_loc, y_scale
+
+    def _parse_windows(self, batch, windows):
+        # Filter insample lags from outsample horizon
+        y_idx = batch["y_idx"]
+        mask_idx = batch["temporal_cols"].get_loc("available_mask")
+
+        insample_y = windows["temporal"][:, : self.input_size, y_idx]
+        insample_mask = windows["temporal"][:, : self.input_size, mask_idx]
+
+        # Declare additional information
+        outsample_y = None
+        outsample_mask = None
+        hist_exog = None
+        futr_exog = None
+        stat_exog = None
+
+        if self.h > 0:
+            outsample_y = windows["temporal"][:, self.input_size :, y_idx]
+            outsample_mask = windows["temporal"][:, self.input_size :, mask_idx]
+
+        if len(self.hist_exog_list):
+            hist_exog_idx = get_indexer_raise_missing(
+                windows["temporal_cols"], self.hist_exog_list
+            )
+            hist_exog = windows["temporal"][:, : self.input_size, hist_exog_idx]
+
+        if len(self.futr_exog_list):
+            futr_exog_idx = get_indexer_raise_missing(
+                windows["temporal_cols"], self.futr_exog_list
+            )
+            futr_exog = windows["temporal"][:, :, futr_exog_idx]
+
+        if len(self.stat_exog_list):
+            static_idx = get_indexer_raise_missing(
+                windows["static_cols"], self.stat_exog_list
+            )
+            stat_exog = windows["static"][:, static_idx]
+
+        # TODO: think a better way of removing insample_y features
+        if self.exclude_insample_y:
+            insample_y = insample_y * 0
+
+        return (
+            insample_y,
+            insample_mask,
+            outsample_y,
+            outsample_mask,
+            hist_exog,
+            futr_exog,
+            stat_exog,
+        )
+
+    def training_step(self, batch, batch_idx):
+        # Create and normalize windows [Ws, L+H, C]
+        windows = self._create_windows(batch, step="train")
+        y_idx = batch["y_idx"]
+        original_outsample_y = torch.clone(windows["temporal"][:, -self.h :, y_idx])
+        windows = self._normalization(windows=windows, y_idx=y_idx)
+
+        # Parse windows
+        (
+            insample_y,
+            insample_mask,
+            outsample_y,
+            outsample_mask,
+            hist_exog,
+            futr_exog,
+            stat_exog,
+        ) = self._parse_windows(batch, windows)
+
+        windows_batch = dict(
+            insample_y=insample_y,  # [Ws, L]
+            insample_mask=insample_mask,  # [Ws, L]
+            futr_exog=futr_exog,  # [Ws, L+H]
+            hist_exog=hist_exog,  # [Ws, L]
+            stat_exog=stat_exog,
+        )  # [Ws, 1]
+
+        # Model Predictions
+        output = self(windows_batch)
+        if self.loss.is_distribution_output:
+            _, y_loc, y_scale = self._inv_normalization(
+                y_hat=outsample_y, temporal_cols=batch["temporal_cols"], y_idx=y_idx
+            )
+            outsample_y = original_outsample_y
+            distr_args = self.loss.scale_decouple(
+                output=output, loc=y_loc, scale=y_scale
+            )
+            loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)
+        else:
+            loss = self.loss(y=outsample_y, y_hat=output, mask=outsample_mask)
+
+        if torch.isnan(loss):
+            print("Model Parameters", self.hparams)
+            print("insample_y", torch.isnan(insample_y).sum())
+            print("outsample_y", torch.isnan(outsample_y).sum())
+            print("output", torch.isnan(output).sum())
+            raise Exception("Loss is NaN, training stopped.")
+
+        self.log(
+            "train_loss",
+            loss.item(),
+            batch_size=outsample_y.size(0),
+            prog_bar=True,
+            on_epoch=True,
+        )
+        self.train_trajectories.append((self.global_step, loss.item()))
+        return loss
+
+    def _compute_valid_loss(
+        self, outsample_y, output, outsample_mask, temporal_cols, y_idx
+    ):
+        if self.loss.is_distribution_output:
+            _, y_loc, y_scale = self._inv_normalization(
+                y_hat=outsample_y, temporal_cols=temporal_cols, y_idx=y_idx
+            )
+            distr_args = self.loss.scale_decouple(
+                output=output, loc=y_loc, scale=y_scale
+            )
+            _, sample_mean, quants = self.loss.sample(distr_args=distr_args)
+
+            if str(type(self.valid_loss)) in [
+                "<class 'neuralforecast.losses.pytorch.sCRPS'>",
+                "<class 'neuralforecast.losses.pytorch.MQLoss'>",
+            ]:
+                output = quants
+            elif str(type(self.valid_loss)) in [
+                "<class 'neuralforecast.losses.pytorch.relMSE'>"
+            ]:
+                output = torch.unsqueeze(sample_mean, dim=-1)  # [N,H,1] -> [N,H]
+
+        # Validation Loss evaluation
+        if self.valid_loss.is_distribution_output:
+            valid_loss = self.valid_loss(
+                y=outsample_y, distr_args=distr_args, mask=outsample_mask
+            )
+        else:
+            output, _, _ = self._inv_normalization(
+                y_hat=output, temporal_cols=temporal_cols, y_idx=y_idx
+            )
+            valid_loss = self.valid_loss(
+                y=outsample_y, y_hat=output, mask=outsample_mask
+            )
+        return valid_loss
+
+    def validation_step(self, batch, batch_idx):
+        if self.val_size == 0:
+            return np.nan
+
+        # TODO: Hack to compute number of windows
+        windows = self._create_windows(batch, step="val")
+        n_windows = len(windows["temporal"])
+        y_idx = batch["y_idx"]
+
+        # Number of windows in batch
+        windows_batch_size = self.inference_windows_batch_size
+        if windows_batch_size < 0:
+            windows_batch_size = n_windows
+        n_batches = int(np.ceil(n_windows / windows_batch_size))
+
+        valid_losses = []
+        batch_sizes = []
+        for i in range(n_batches):
+            # Create and normalize windows [Ws, L+H, C]
+            w_idxs = np.arange(
+                i * windows_batch_size, min((i + 1) * windows_batch_size, n_windows)
+            )
+            windows = self._create_windows(batch, step="val", w_idxs=w_idxs)
+            original_outsample_y = torch.clone(windows["temporal"][:, -self.h :, y_idx])
+            windows = self._normalization(windows=windows, y_idx=y_idx)
+
+            # Parse windows
+            (
+                insample_y,
+                insample_mask,
+                _,
+                outsample_mask,
+                hist_exog,
+                futr_exog,
+                stat_exog,
+            ) = self._parse_windows(batch, windows)
+            windows_batch = dict(
+                insample_y=insample_y,  # [Ws, L]
+                insample_mask=insample_mask,  # [Ws, L]
+                futr_exog=futr_exog,  # [Ws, L+H]
+                hist_exog=hist_exog,  # [Ws, L]
+                stat_exog=stat_exog,
+            )  # [Ws, 1]
+
+            # Model Predictions
+            output_batch = self(windows_batch)
+            valid_loss_batch = self._compute_valid_loss(
+                outsample_y=original_outsample_y,
+                output=output_batch,
+                outsample_mask=outsample_mask,
+                temporal_cols=batch["temporal_cols"],
+                y_idx=batch["y_idx"],
+            )
+            valid_losses.append(valid_loss_batch)
+            batch_sizes.append(len(output_batch))
+
+        valid_loss = torch.stack(valid_losses)
+        batch_sizes = torch.tensor(batch_sizes, device=valid_loss.device)
+        batch_size = torch.sum(batch_sizes)
+        valid_loss = torch.sum(valid_loss * batch_sizes) / batch_size
+
+        if torch.isnan(valid_loss):
+            raise Exception("Loss is NaN, training stopped.")
+
+        self.log(
+            "valid_loss",
+            valid_loss.item(),
+            batch_size=batch_size,
+            prog_bar=True,
+            on_epoch=True,
+        )
+        self.validation_step_outputs.append(valid_loss)
+        return valid_loss
+
+    def predict_step(self, batch, batch_idx):
+
+        # TODO: Hack to compute number of windows
+        windows = self._create_windows(batch, step="predict")
+        n_windows = len(windows["temporal"])
+        y_idx = batch["y_idx"]
+
+        # Number of windows in batch
+        windows_batch_size = self.inference_windows_batch_size
+        if windows_batch_size < 0:
+            windows_batch_size = n_windows
+        n_batches = int(np.ceil(n_windows / windows_batch_size))
+
+        y_hats = []
+        for i in range(n_batches):
+            # Create and normalize windows [Ws, L+H, C]
+            w_idxs = np.arange(
+                i * windows_batch_size, min((i + 1) * windows_batch_size, n_windows)
+            )
+            windows = self._create_windows(batch, step="predict", w_idxs=w_idxs)
+            windows = self._normalization(windows=windows, y_idx=y_idx)
+
+            # Parse windows
+            insample_y, insample_mask, _, _, hist_exog, futr_exog, stat_exog = (
+                self._parse_windows(batch, windows)
+            )
+            windows_batch = dict(
+                insample_y=insample_y,  # [Ws, L]
+                insample_mask=insample_mask,  # [Ws, L]
+                futr_exog=futr_exog,  # [Ws, L+H]
+                hist_exog=hist_exog,  # [Ws, L]
+                stat_exog=stat_exog,
+            )  # [Ws, 1]
+
+            # Model Predictions
+            output_batch = self(windows_batch)
+            # Inverse normalization and sampling
+            if self.loss.is_distribution_output:
+                _, y_loc, y_scale = self._inv_normalization(
+                    y_hat=output_batch[0],
+                    temporal_cols=batch["temporal_cols"],
+                    y_idx=y_idx,
+                )
+                distr_args = self.loss.scale_decouple(
+                    output=output_batch, loc=y_loc, scale=y_scale
+                )
+                _, sample_mean, quants = self.loss.sample(distr_args=distr_args)
+                y_hat = torch.concat((sample_mean, quants), axis=2)
+
+                if self.loss.return_params:
+                    distr_args = torch.stack(distr_args, dim=-1)
+                    distr_args = torch.reshape(
+                        distr_args, (len(windows["temporal"]), self.h, -1)
+                    )
+                    y_hat = torch.concat((y_hat, distr_args), axis=2)
+            else:
+                y_hat, _, _ = self._inv_normalization(
+                    y_hat=output_batch,
+                    temporal_cols=batch["temporal_cols"],
+                    y_idx=y_idx,
+                )
+            y_hats.append(y_hat)
+        y_hat = torch.cat(y_hats, dim=0)
+        return y_hat
+
+    def fit(
+        self,
+        dataset,
+        val_size=0,
+        test_size=0,
+        random_seed=None,
+        distributed_config=None,
+    ):
+        """Fit.
+
+        The `fit` method, optimizes the neural network's weights using the
+        initialization parameters (`learning_rate`, `windows_batch_size`, ...)
+        and the `loss` function as defined during the initialization.
+        Within `fit` we use a PyTorch Lightning `Trainer` that
+        inherits the initialization's `self.trainer_kwargs`, to customize
+        its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
+
+        The method is designed to be compatible with SKLearn-like classes
+        and in particular to be compatible with the StatsForecast library.
+
+        By default the `model` is not saving training checkpoints to protect
+        disk memory, to get them change `enable_checkpointing=True` in `__init__`.
+
+        **Parameters:**<br>
+        `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).<br>
+        `val_size`: int, validation size for temporal cross-validation.<br>
+        `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.<br>
+        `test_size`: int, test size for temporal cross-validation.<br>
+        """
+        return self._fit(
+            dataset=dataset,
+            batch_size=self.batch_size,
+            valid_batch_size=self.valid_batch_size,
+            val_size=val_size,
+            test_size=test_size,
+            random_seed=random_seed,
+            distributed_config=distributed_config,
+        )
+
+    def predict(
+        self,
+        dataset,
+        test_size=None,
+        step_size=1,
+        random_seed=None,
+        **data_module_kwargs,
+    ):
+        """Predict.
+
+        Neural network prediction with PL's `Trainer` execution of `predict_step`.
+
+        **Parameters:**<br>
+        `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).<br>
+        `test_size`: int=None, test size for temporal cross-validation.<br>
+        `step_size`: int=1, Step size between each window.<br>
+        `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.<br>
+        `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).
+        """
+        self._check_exog(dataset)
+        self._restart_seed(random_seed)
+
+        self.predict_step_size = step_size
+        self.decompose_forecast = False
+        datamodule = TimeSeriesDataModule(
+            dataset=dataset,
+            valid_batch_size=self.valid_batch_size,
+            **data_module_kwargs,
+        )
+
+        # Protect when case of multiple gpu. PL does not support return preds with multiple gpu.
+        pred_trainer_kwargs = self.trainer_kwargs.copy()
+        if (pred_trainer_kwargs.get("accelerator", None) == "gpu") and (
+            torch.cuda.device_count() > 1
+        ):
+            pred_trainer_kwargs["devices"] = [0]
+
+        trainer = pl.Trainer(**pred_trainer_kwargs)
+        fcsts = trainer.predict(self, datamodule=datamodule)
+        fcsts = torch.vstack(fcsts).numpy().flatten()
+        fcsts = fcsts.reshape(-1, len(self.loss.output_names))
+        return fcsts
+
+    def decompose(self, dataset, step_size=1, random_seed=None, **data_module_kwargs):
+        """Decompose Predictions.
+
+        Decompose the predictions through the network's layers.
+        Available methods are `ESRNN`, `NHITS`, `NBEATS`, and `NBEATSx`.
+
+        **Parameters:**<br>
+        `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation here](https://nixtla.github.io/neuralforecast/tsdataset.html).<br>
+        `step_size`: int=1, step size between each window of temporal data.<br>
+        `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).
+        """
+        # Restart random seed
+        if random_seed is None:
+            random_seed = self.random_seed
+        torch.manual_seed(random_seed)
+
+        self.predict_step_size = step_size
+        self.decompose_forecast = True
+        datamodule = TimeSeriesDataModule(
+            dataset=dataset,
+            valid_batch_size=self.valid_batch_size,
+            **data_module_kwargs,
+        )
+        trainer = pl.Trainer(**self.trainer_kwargs)
+        fcsts = trainer.predict(self, datamodule=datamodule)
+        self.decompose_forecast = False  # Default decomposition back to false
+        return torch.vstack(fcsts).numpy()
--- a/neuralforecast/common/_modules.py
+++ b/neuralforecast/common/_modules.py
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/common.modules.ipynb.
+
+# %% auto 0
+__all__ = ['ACTIVATIONS', 'MLP', 'Chomp1d', 'CausalConv1d', 'TemporalConvolutionEncoder', 'TransEncoderLayer', 'TransEncoder',
+           'TransDecoderLayer', 'TransDecoder', 'AttentionLayer', 'PositionalEmbedding', 'TokenEmbedding',
+           'TimeFeatureEmbedding', 'DataEmbedding']
+
+# %% ../../nbs/common.modules.ipynb 3
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# %% ../../nbs/common.modules.ipynb 5
+ACTIVATIONS = ["ReLU", "Softplus", "Tanh", "SELU", "LeakyReLU", "PReLU", "Sigmoid"]
+
+# %% ../../nbs/common.modules.ipynb 7
+class MLP(nn.Module):
+    """Multi-Layer Perceptron Class
+
+    **Parameters:**<br>
+    `in_features`: int, dimension of input.<br>
+    `out_features`: int, dimension of output.<br>
+    `activation`: str, activation function to use.<br>
+    `hidden_size`: int, dimension of hidden layers.<br>
+    `num_layers`: int, number of hidden layers.<br>
+    `dropout`: float, dropout rate.<br>
+    """
+
+    def __init__(
+        self, in_features, out_features, activation, hidden_size, num_layers, dropout
+    ):
+        super().__init__()
+        assert activation in ACTIVATIONS, f"{activation} is not in {ACTIVATIONS}"
+
+        self.activation = getattr(nn, activation)()
+
+        # MultiLayer Perceptron
+        # Input layer
+        layers = [
+            nn.Linear(in_features=in_features, out_features=hidden_size),
+            self.activation,
+            nn.Dropout(dropout),
+        ]
+        # Hidden layers
+        for i in range(num_layers - 2):
+            layers += [
+                nn.Linear(in_features=hidden_size, out_features=hidden_size),
+                self.activation,
+                nn.Dropout(dropout),
+            ]
+        # Output layer
+        layers += [nn.Linear(in_features=hidden_size, out_features=out_features)]
+
+        # Store in layers as ModuleList
+        self.layers = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.layers(x)
+
+# %% ../../nbs/common.modules.ipynb 9
+class Chomp1d(nn.Module):
+    """Chomp1d
+
+    Receives `x` input of dim [N,C,T], and trims it so that only
+    'time available' information is used.
+    Used by one dimensional causal convolutions `CausalConv1d`.
+
+    **Parameters:**<br>
+    `horizon`: int, length of outsample values to skip.
+    """
+
+    def __init__(self, horizon):
+        super(Chomp1d, self).__init__()
+        self.horizon = horizon
+
+    def forward(self, x):
+        return x[:, :, : -self.horizon].contiguous()
+
+
+class CausalConv1d(nn.Module):
+    """Causal Convolution 1d
+
+    Receives `x` input of dim [N,C_in,T], and computes a causal convolution
+    in the time dimension. Skipping the H steps of the forecast horizon, through
+    its dilation.
+    Consider a batch of one element, the dilated convolution operation on the
+    $t$ time step is defined:
+
+    $\mathrm{Conv1D}(\mathbf{x},\mathbf{w})(t) = (\mathbf{x}_{[*d]} \mathbf{w})(t) = \sum^{K}_{k=1} w_{k} \mathbf{x}_{t-dk}$
+
+    where $d$ is the dilation factor, $K$ is the kernel size, $t-dk$ is the index of
+    the considered past observation. The dilation effectively applies a filter with skip
+    connections. If $d=1$ one recovers a normal convolution.
+
+    **Parameters:**<br>
+    `in_channels`: int, dimension of `x` input's initial channels.<br>
+    `out_channels`: int, dimension of `x` outputs's channels.<br>
+    `activation`: str, identifying activations from PyTorch activations.
+        select from 'ReLU','Softplus','Tanh','SELU', 'LeakyReLU','PReLU','Sigmoid'.<br>
+    `padding`: int, number of zero padding used to the left.<br>
+    `kernel_size`: int, convolution's kernel size.<br>
+    `dilation`: int, dilation skip connections.<br>
+
+    **Returns:**<br>
+    `x`: tensor, torch tensor of dim [N,C_out,T] activation(conv1d(inputs, kernel) + bias). <br>
+    """
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        padding,
+        dilation,
+        activation,
+        stride: int = 1,
+    ):
+        super(CausalConv1d, self).__init__()
+        assert activation in ACTIVATIONS, f"{activation} is not in {ACTIVATIONS}"
+
+        self.conv = nn.Conv1d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+        )
+
+        self.chomp = Chomp1d(padding)
+        self.activation = getattr(nn, activation)()
+        self.causalconv = nn.Sequential(self.conv, self.chomp, self.activation)
+
+    def forward(self, x):
+        return self.causalconv(x)
+
+# %% ../../nbs/common.modules.ipynb 11
+class TemporalConvolutionEncoder(nn.Module):
+    """Temporal Convolution Encoder
+
+    Receives `x` input of dim [N,T,C_in], permutes it to  [N,C_in,T]
+    applies a deep stack of exponentially dilated causal convolutions.
+    The exponentially increasing dilations of the convolutions allow for
+    the creation of weighted averages of exponentially large long-term memory.
+
+    **Parameters:**<br>
+    `in_channels`: int, dimension of `x` input's initial channels.<br>
+    `out_channels`: int, dimension of `x` outputs's channels.<br>
+    `kernel_size`: int, size of the convolving kernel.<br>
+    `dilations`: int list, controls the temporal spacing between the kernel points.<br>
+    `activation`: str, identifying activations from PyTorch activations.
+        select from 'ReLU','Softplus','Tanh','SELU', 'LeakyReLU','PReLU','Sigmoid'.<br>
+
+    **Returns:**<br>
+    `x`: tensor, torch tensor of dim [N,T,C_out].<br>
+    """
+
+    # TODO: Add dilations parameter and change layers declaration to for loop
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        dilations,
+        activation: str = "ReLU",
+    ):
+        super(TemporalConvolutionEncoder, self).__init__()
+        layers = []
+        for dilation in dilations:
+            layers.append(
+                CausalConv1d(
+                    in_channels=in_channels,
+                    out_channels=out_channels,
+                    kernel_size=kernel_size,
+                    padding=(kernel_size - 1) * dilation,
+                    activation=activation,
+                    dilation=dilation,
+                )
+            )
+            in_channels = out_channels
+        self.tcn = nn.Sequential(*layers)
+
+    def forward(self, x):
+        # [N,T,C_in] -> [N,C_in,T] -> [N,T,C_out]
+        x = x.permute(0, 2, 1).contiguous()
+        x = self.tcn(x)
+        x = x.permute(0, 2, 1).contiguous()
+        return x
+
+# %% ../../nbs/common.modules.ipynb 15
+class TransEncoderLayer(nn.Module):
+    def __init__(
+        self,
+        attention,
+        hidden_size,
+        conv_hidden_size=None,
+        dropout=0.1,
+        activation="relu",
+    ):
+        super(TransEncoderLayer, self).__init__()
+        conv_hidden_size = conv_hidden_size or 4 * hidden_size
+        self.attention = attention
+        self.conv1 = nn.Conv1d(
+            in_channels=hidden_size, out_channels=conv_hidden_size, kernel_size=1
+        )
+        self.conv2 = nn.Conv1d(
+            in_channels=conv_hidden_size, out_channels=hidden_size, kernel_size=1
+        )
+        self.norm1 = nn.LayerNorm(hidden_size)
+        self.norm2 = nn.LayerNorm(hidden_size)
+        self.dropout = nn.Dropout(dropout)
+        self.activation = F.relu if activation == "relu" else F.gelu
+
+    def forward(self, x, attn_mask=None):
+        new_x, attn = self.attention(x, x, x, attn_mask=attn_mask)
+
+        x = x + self.dropout(new_x)
+
+        y = x = self.norm1(x)
+        y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
+        y = self.dropout(self.conv2(y).transpose(-1, 1))
+
+        return self.norm2(x + y), attn
+
+
+class TransEncoder(nn.Module):
+    def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
+        super(TransEncoder, self).__init__()
+        self.attn_layers = nn.ModuleList(attn_layers)
+        self.conv_layers = (
+            nn.ModuleList(conv_layers) if conv_layers is not None else None
+        )
+        self.norm = norm_layer
+
+    def forward(self, x, attn_mask=None):
+        # x [B, L, D]
+        attns = []
+        if self.conv_layers is not None:
+            for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
+                x, attn = attn_layer(x, attn_mask=attn_mask)
+                x = conv_layer(x)
+                attns.append(attn)
+            x, attn = self.attn_layers[-1](x)
+            attns.append(attn)
+        else:
+            for attn_layer in self.attn_layers:
+                x, attn = attn_layer(x, attn_mask=attn_mask)
+                attns.append(attn)
+
+        if self.norm is not None:
+            x = self.norm(x)
+
+        return x, attns
+
+# %% ../../nbs/common.modules.ipynb 16
+class TransDecoderLayer(nn.Module):
+    def __init__(
+        self,
+        self_attention,
+        cross_attention,
+        hidden_size,
+        conv_hidden_size=None,
+        dropout=0.1,
+        activation="relu",
+    ):
+        super(TransDecoderLayer, self).__init__()
+        conv_hidden_size = conv_hidden_size or 4 * hidden_size
+        self.self_attention = self_attention
+        self.cross_attention = cross_attention
+        self.conv1 = nn.Conv1d(
+            in_channels=hidden_size, out_channels=conv_hidden_size, kernel_size=1
+        )
+        self.conv2 = nn.Conv1d(
+            in_channels=conv_hidden_size, out_channels=hidden_size, kernel_size=1
+        )
+        self.norm1 = nn.LayerNorm(hidden_size)
+        self.norm2 = nn.LayerNorm(hidden_size)
+        self.norm3 = nn.LayerNorm(hidden_size)
+        self.dropout = nn.Dropout(dropout)
+        self.activation = F.relu if activation == "relu" else F.gelu
+
+    def forward(self, x, cross, x_mask=None, cross_mask=None):
+        x = x + self.dropout(self.self_attention(x, x, x, attn_mask=x_mask)[0])
+        x = self.norm1(x)
+
+        x = x + self.dropout(
+            self.cross_attention(x, cross, cross, attn_mask=cross_mask)[0]
+        )
+
+        y = x = self.norm2(x)
+        y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
+        y = self.dropout(self.conv2(y).transpose(-1, 1))
+
+        return self.norm3(x + y)
+
+
+class TransDecoder(nn.Module):
+    def __init__(self, layers, norm_layer=None, projection=None):
+        super(TransDecoder, self).__init__()
+        self.layers = nn.ModuleList(layers)
+        self.norm = norm_layer
+        self.projection = projection
+
+    def forward(self, x, cross, x_mask=None, cross_mask=None):
+        for layer in self.layers:
+            x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
+
+        if self.norm is not None:
+            x = self.norm(x)
+
+        if self.projection is not None:
+            x = self.projection(x)
+        return x
+
+# %% ../../nbs/common.modules.ipynb 17
+class AttentionLayer(nn.Module):
+    def __init__(self, attention, hidden_size, n_head, d_keys=None, d_values=None):
+        super(AttentionLayer, self).__init__()
+
+        d_keys = d_keys or (hidden_size // n_head)
+        d_values = d_values or (hidden_size // n_head)
+
+        self.inner_attention = attention
+        self.query_projection = nn.Linear(hidden_size, d_keys * n_head)
+        self.key_projection = nn.Linear(hidden_size, d_keys * n_head)
+        self.value_projection = nn.Linear(hidden_size, d_values * n_head)
+        self.out_projection = nn.Linear(d_values * n_head, hidden_size)
+        self.n_head = n_head
+
+    def forward(self, queries, keys, values, attn_mask):
+        B, L, _ = queries.shape
+        _, S, _ = keys.shape
+        H = self.n_head
+
+        queries = self.query_projection(queries).view(B, L, H, -1)
+        keys = self.key_projection(keys).view(B, S, H, -1)
+        values = self.value_projection(values).view(B, S, H, -1)
+
+        out, attn = self.inner_attention(queries, keys, values, attn_mask)
+        out = out.view(B, L, -1)
+
+        return self.out_projection(out), attn
+
+# %% ../../nbs/common.modules.ipynb 18
+class PositionalEmbedding(nn.Module):
+    def __init__(self, hidden_size, max_len=5000):
+        super(PositionalEmbedding, self).__init__()
+        # Compute the positional encodings once in log space.
+        pe = torch.zeros(max_len, hidden_size).float()
+        pe.require_grad = False
+
+        position = torch.arange(0, max_len).float().unsqueeze(1)
+        div_term = (
+            torch.arange(0, hidden_size, 2).float() * -(math.log(10000.0) / hidden_size)
+        ).exp()
+
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+
+        pe = pe.unsqueeze(0)
+        self.register_buffer("pe", pe)
+
+    def forward(self, x):
+        return self.pe[:, : x.size(1)]
+
+
+class TokenEmbedding(nn.Module):
+    def __init__(self, c_in, hidden_size):
+        super(TokenEmbedding, self).__init__()
+        padding = 1 if torch.__version__ >= "1.5.0" else 2
+        self.tokenConv = nn.Conv1d(
+            in_channels=c_in,
+            out_channels=hidden_size,
+            kernel_size=3,
+            padding=padding,
+            padding_mode="circular",
+            bias=False,
+        )
+        for m in self.modules():
+            if isinstance(m, nn.Conv1d):
+                nn.init.kaiming_normal_(
+                    m.weight, mode="fan_in", nonlinearity="leaky_relu"
+                )
+
+    def forward(self, x):
+        x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
+        return x
+
+
+class TimeFeatureEmbedding(nn.Module):
+    def __init__(self, input_size, hidden_size):
+        super(TimeFeatureEmbedding, self).__init__()
+        self.embed = nn.Linear(input_size, hidden_size, bias=False)
+
+    def forward(self, x):
+        return self.embed(x)
+
+
+class DataEmbedding(nn.Module):
+    def __init__(
+        self, c_in, exog_input_size, hidden_size, pos_embedding=True, dropout=0.1
+    ):
+        super(DataEmbedding, self).__init__()
+
+        self.value_embedding = TokenEmbedding(c_in=c_in, hidden_size=hidden_size)
+
+        if pos_embedding:
+            self.position_embedding = PositionalEmbedding(hidden_size=hidden_size)
+        else:
+            self.position_embedding = None
+
+        if exog_input_size > 0:
+            self.temporal_embedding = TimeFeatureEmbedding(
+                input_size=exog_input_size, hidden_size=hidden_size
+            )
+        else:
+            self.temporal_embedding = None
+
+        self.dropout = nn.Dropout(p=dropout)
+
+    def forward(self, x, x_mark=None):
+
+        # Convolution
+        x = self.value_embedding(x)
+
+        # Add positional (relative withing window) embedding with sines and cosines
+        if self.position_embedding is not None:
+            x = x + self.position_embedding(x)
+
+        # Add temporal (absolute in time series) embedding with linear layer
+        if self.temporal_embedding is not None:
+            x = x + self.temporal_embedding(x_mark)
+
+        return self.dropout(x)
--- a/neuralforecast/common/_scalers.py
+++ b/neuralforecast/common/_scalers.py
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/common.scalers.ipynb.
+
+# %% auto 0
+__all__ = ['masked_median', 'masked_mean', 'minmax_statistics', 'minmax1_statistics', 'std_statistics', 'robust_statistics',
+           'invariant_statistics', 'identity_statistics', 'TemporalNorm']
+
+# %% ../../nbs/common.scalers.ipynb 6
+import torch
+import torch.nn as nn
+
+# %% ../../nbs/common.scalers.ipynb 10
+def masked_median(x, mask, dim=-1, keepdim=True):
+    """Masked Median
+
+    Compute the median of tensor `x` along dim, ignoring values where
+    `mask` is False. `x` and `mask` need to be broadcastable.
+
+    **Parameters:**<br>
+    `x`: torch.Tensor to compute median of along `dim` dimension.<br>
+    `mask`: torch Tensor bool with same shape as `x`, where `x` is valid and False
+            where `x` should be masked. Mask should not be all False in any column of
+            dimension dim to avoid NaNs from zero division.<br>
+    `dim` (int, optional): Dimension to take median of. Defaults to -1.<br>
+    `keepdim` (bool, optional): Keep dimension of `x` or not. Defaults to True.<br>
+
+    **Returns:**<br>
+    `x_median`: torch.Tensor with normalized values.
+    """
+    x_nan = x.float().masked_fill(mask < 1, float("nan"))
+    x_median, _ = x_nan.nanmedian(dim=dim, keepdim=keepdim)
+    x_median = torch.nan_to_num(x_median, nan=0.0)
+    return x_median
+
+
+def masked_mean(x, mask, dim=-1, keepdim=True):
+    """Masked  Mean
+
+    Compute the mean of tensor `x` along dimension, ignoring values where
+    `mask` is False. `x` and `mask` need to be broadcastable.
+
+    **Parameters:**<br>
+    `x`: torch.Tensor to compute mean of along `dim` dimension.<br>
+    `mask`: torch Tensor bool with same shape as `x`, where `x` is valid and False
+            where `x` should be masked. Mask should not be all False in any column of
+            dimension dim to avoid NaNs from zero division.<br>
+    `dim` (int, optional): Dimension to take mean of. Defaults to -1.<br>
+    `keepdim` (bool, optional): Keep dimension of `x` or not. Defaults to True.<br>
+
+    **Returns:**<br>
+    `x_mean`: torch.Tensor with normalized values.
+    """
+    x_nan = x.float().masked_fill(mask < 1, float("nan"))
+    x_mean = x_nan.nanmean(dim=dim, keepdim=keepdim)
+    x_mean = torch.nan_to_num(x_mean, nan=0.0)
+    return x_mean
+
+# %% ../../nbs/common.scalers.ipynb 14
+def minmax_statistics(x, mask, eps=1e-6, dim=-1):
+    """MinMax Scaler
+
+    Standardizes temporal features by ensuring its range dweels between
+    [0,1] range. This transformation is often used as an alternative
+    to the standard scaler. The scaled features are obtained as:
+
+    $$
+    \mathbf{z} = (\mathbf{x}_{[B,T,C]}-\mathrm{min}({\mathbf{x}})_{[B,1,C]})/
+        (\mathrm{max}({\mathbf{x}})_{[B,1,C]}- \mathrm{min}({\mathbf{x}})_{[B,1,C]})
+    $$
+
+    **Parameters:**<br>
+    `x`: torch.Tensor input tensor.<br>
+    `mask`: torch Tensor bool, same dimension as `x`, indicates where `x` is valid and False
+            where `x` should be masked. Mask should not be all False in any column of
+            dimension dim to avoid NaNs from zero division.<br>
+    `eps` (float, optional): Small value to avoid division by zero. Defaults to 1e-6.<br>
+    `dim` (int, optional): Dimension over to compute min and max. Defaults to -1.<br>
+
+    **Returns:**<br>
+    `z`: torch.Tensor same shape as `x`, except scaled.
+    """
+    mask = mask.clone()
+    mask[mask == 0] = torch.inf
+    mask[mask == 1] = 0
+    x_max = torch.max(
+        torch.nan_to_num(x - mask, nan=-torch.inf), dim=dim, keepdim=True
+    )[0]
+    x_min = torch.min(torch.nan_to_num(x + mask, nan=torch.inf), dim=dim, keepdim=True)[
+        0
+    ]
+    x_max = x_max.type(x.dtype)
+    x_min = x_min.type(x.dtype)
+
+    # x_range and prevent division by zero
+    x_range = x_max - x_min
+    x_range[x_range == 0] = 1.0
+    x_range = x_range + eps
+    return x_min, x_range
+
+# %% ../../nbs/common.scalers.ipynb 15
+def minmax_scaler(x, x_min, x_range):
+    return (x - x_min) / x_range
+
+
+def inv_minmax_scaler(z, x_min, x_range):
+    return z * x_range + x_min
+
+# %% ../../nbs/common.scalers.ipynb 17
+def minmax1_statistics(x, mask, eps=1e-6, dim=-1):
+    """MinMax1 Scaler
+
+    Standardizes temporal features by ensuring its range dweels between
+    [-1,1] range. This transformation is often used as an alternative
+    to the standard scaler or classic Min Max Scaler.
+    The scaled features are obtained as:
+
+    $$\mathbf{z} = 2 (\mathbf{x}_{[B,T,C]}-\mathrm{min}({\mathbf{x}})_{[B,1,C]})/ (\mathrm{max}({\mathbf{x}})_{[B,1,C]}- \mathrm{min}({\mathbf{x}})_{[B,1,C]})-1$$
+
+    **Parameters:**<br>
+    `x`: torch.Tensor input tensor.<br>
+    `mask`: torch Tensor bool, same dimension as `x`, indicates where `x` is valid and False
+            where `x` should be masked. Mask should not be all False in any column of
+            dimension dim to avoid NaNs from zero division.<br>
+    `eps` (float, optional): Small value to avoid division by zero. Defaults to 1e-6.<br>
+    `dim` (int, optional): Dimension over to compute min and max. Defaults to -1.<br>
+
+    **Returns:**<br>
+    `z`: torch.Tensor same shape as `x`, except scaled.
+    """
+    # Mask values (set masked to -inf or +inf)
+    mask = mask.clone()
+    mask[mask == 0] = torch.inf
+    mask[mask == 1] = 0
+    x_max = torch.max(
+        torch.nan_to_num(x - mask, nan=-torch.inf), dim=dim, keepdim=True
+    )[0]
+    x_min = torch.min(torch.nan_to_num(x + mask, nan=torch.inf), dim=dim, keepdim=True)[
+        0
+    ]
+    x_max = x_max.type(x.dtype)
+    x_min = x_min.type(x.dtype)
+
+    # x_range and prevent division by zero
+    x_range = x_max - x_min
+    x_range[x_range == 0] = 1.0
+    x_range = x_range + eps
+    return x_min, x_range
+
+# %% ../../nbs/common.scalers.ipynb 18
+def minmax1_scaler(x, x_min, x_range):
+    x = (x - x_min) / x_range
+    z = x * (2) - 1
+    return z
+
+
+def inv_minmax1_scaler(z, x_min, x_range):
+    z = (z + 1) / 2
+    return z * x_range + x_min
+
+# %% ../../nbs/common.scalers.ipynb 20
+def std_statistics(x, mask, dim=-1, eps=1e-6):
+    """Standard Scaler
+
+    Standardizes features by removing the mean and scaling
+    to unit variance along the `dim` dimension.
+
+    For example, for `base_windows` models, the scaled features are obtained as (with dim=1):
+
+    $$\mathbf{z} = (\mathbf{x}_{[B,T,C]}-\\bar{\mathbf{x}}_{[B,1,C]})/\hat{\sigma}_{[B,1,C]}$$
+
+    **Parameters:**<br>
+    `x`: torch.Tensor.<br>
+    `mask`: torch Tensor bool, same dimension as `x`, indicates where `x` is valid and False
+            where `x` should be masked. Mask should not be all False in any column of
+            dimension dim to avoid NaNs from zero division.<br>
+    `eps` (float, optional): Small value to avoid division by zero. Defaults to 1e-6.<br>
+    `dim` (int, optional): Dimension over to compute mean and std. Defaults to -1.<br>
+
+    **Returns:**<br>
+    `z`: torch.Tensor same shape as `x`, except scaled.
+    """
+    x_means = masked_mean(x=x, mask=mask, dim=dim)
+    x_stds = torch.sqrt(masked_mean(x=(x - x_means) ** 2, mask=mask, dim=dim))
+
+    # Protect against division by zero
+    x_stds[x_stds == 0] = 1.0
+    x_stds = x_stds + eps
+    return x_means, x_stds
+
+# %% ../../nbs/common.scalers.ipynb 21
+def std_scaler(x, x_means, x_stds):
+    return (x - x_means) / x_stds
+
+
+def inv_std_scaler(z, x_mean, x_std):
+    return (z * x_std) + x_mean
+
+# %% ../../nbs/common.scalers.ipynb 23
+def robust_statistics(x, mask, dim=-1, eps=1e-6):
+    """Robust Median Scaler
+
+    Standardizes features by removing the median and scaling
+    with the mean absolute deviation (mad) a robust estimator of variance.
+    This scaler is particularly useful with noisy data where outliers can
+    heavily influence the sample mean / variance in a negative way.
+    In these scenarios the median and amd give better results.
+
+    For example, for `base_windows` models, the scaled features are obtained as (with dim=1):
+
+    $$\mathbf{z} = (\mathbf{x}_{[B,T,C]}-\\textrm{median}(\mathbf{x})_{[B,1,C]})/\\textrm{mad}(\mathbf{x})_{[B,1,C]}$$
+
+    $$\\textrm{mad}(\mathbf{x}) = \\frac{1}{N} \sum_{}|\mathbf{x} - \mathrm{median}(x)|$$
+
+    **Parameters:**<br>
+    `x`: torch.Tensor input tensor.<br>
+    `mask`: torch Tensor bool, same dimension as `x`, indicates where `x` is valid and False
+            where `x` should be masked. Mask should not be all False in any column of
+            dimension dim to avoid NaNs from zero division.<br>
+    `eps` (float, optional): Small value to avoid division by zero. Defaults to 1e-6.<br>
+    `dim` (int, optional): Dimension over to compute median and mad. Defaults to -1.<br>
+
+    **Returns:**<br>
+    `z`: torch.Tensor same shape as `x`, except scaled.
+    """
+    x_median = masked_median(x=x, mask=mask, dim=dim)
+    x_mad = masked_median(x=torch.abs(x - x_median), mask=mask, dim=dim)
+
+    # Protect x_mad=0 values
+    # Assuming normality and relationship between mad and std
+    x_means = masked_mean(x=x, mask=mask, dim=dim)
+    x_stds = torch.sqrt(masked_mean(x=(x - x_means) ** 2, mask=mask, dim=dim))
+    x_mad_aux = x_stds * 0.6744897501960817
+    x_mad = x_mad * (x_mad > 0) + x_mad_aux * (x_mad == 0)
+
+    # Protect against division by zero
+    x_mad[x_mad == 0] = 1.0
+    x_mad = x_mad + eps
+    return x_median, x_mad
+
+# %% ../../nbs/common.scalers.ipynb 24
+def robust_scaler(x, x_median, x_mad):
+    return (x - x_median) / x_mad
+
+
+def inv_robust_scaler(z, x_median, x_mad):
+    return z * x_mad + x_median
+
+# %% ../../nbs/common.scalers.ipynb 26
+def invariant_statistics(x, mask, dim=-1, eps=1e-6):
+    """Invariant Median Scaler
+
+    Standardizes features by removing the median and scaling
+    with the mean absolute deviation (mad) a robust estimator of variance.
+    Aditionally it complements the transformation with the arcsinh transformation.
+
+    For example, for `base_windows` models, the scaled features are obtained as (with dim=1):
+
+    $$\mathbf{z} = (\mathbf{x}_{[B,T,C]}-\\textrm{median}(\mathbf{x})_{[B,1,C]})/\\textrm{mad}(\mathbf{x})_{[B,1,C]}$$
+
+    $$\mathbf{z} = \\textrm{arcsinh}(\mathbf{z})$$
+
+    **Parameters:**<br>
+    `x`: torch.Tensor input tensor.<br>
+    `mask`: torch Tensor bool, same dimension as `x`, indicates where `x` is valid and False
+            where `x` should be masked. Mask should not be all False in any column of
+            dimension dim to avoid NaNs from zero division.<br>
+    `eps` (float, optional): Small value to avoid division by zero. Defaults to 1e-6.<br>
+    `dim` (int, optional): Dimension over to compute median and mad. Defaults to -1.<br>
+
+    **Returns:**<br>
+    `z`: torch.Tensor same shape as `x`, except scaled.
+    """
+    x_median = masked_median(x=x, mask=mask, dim=dim)
+    x_mad = masked_median(x=torch.abs(x - x_median), mask=mask, dim=dim)
+
+    # Protect x_mad=0 values
+    # Assuming normality and relationship between mad and std
+    x_means = masked_mean(x=x, mask=mask, dim=dim)
+    x_stds = torch.sqrt(masked_mean(x=(x - x_means) ** 2, mask=mask, dim=dim))
+    x_mad_aux = x_stds * 0.6744897501960817
+    x_mad = x_mad * (x_mad > 0) + x_mad_aux * (x_mad == 0)
+
+    # Protect against division by zero
+    x_mad[x_mad == 0] = 1.0
+    x_mad = x_mad + eps
+    return x_median, x_mad
+
+# %% ../../nbs/common.scalers.ipynb 27
+def invariant_scaler(x, x_median, x_mad):
+    return torch.arcsinh((x - x_median) / x_mad)
+
+
+def inv_invariant_scaler(z, x_median, x_mad):
+    return torch.sinh(z) * x_mad + x_median
+
+# %% ../../nbs/common.scalers.ipynb 29
+def identity_statistics(x, mask, dim=-1, eps=1e-6):
+    """Identity Scaler
+
+    A placeholder identity scaler, that is argument insensitive.
+
+    **Parameters:**<br>
+    `x`: torch.Tensor input tensor.<br>
+    `mask`: torch Tensor bool, same dimension as `x`, indicates where `x` is valid and False
+            where `x` should be masked. Mask should not be all False in any column of
+            dimension dim to avoid NaNs from zero division.<br>
+    `eps` (float, optional): Small value to avoid division by zero. Defaults to 1e-6.<br>
+    `dim` (int, optional): Dimension over to compute median and mad. Defaults to -1.<br>
+
+    **Returns:**<br>
+    `x`: original torch.Tensor `x`.
+    """
+    # Collapse dim dimension
+    shape = list(x.shape)
+    shape[dim] = 1
+
+    x_shift = torch.zeros(shape)
+    x_scale = torch.ones(shape)
+
+    return x_shift, x_scale
+
+# %% ../../nbs/common.scalers.ipynb 30
+def identity_scaler(x, x_shift, x_scale):
+    return x
+
+
+def inv_identity_scaler(z, x_shift, x_scale):
+    return z
+
+# %% ../../nbs/common.scalers.ipynb 33
+class TemporalNorm(nn.Module):
+    """Temporal Normalization
+
+    Standardization of the features is a common requirement for many
+    machine learning estimators, and it is commonly achieved by removing
+    the level and scaling its variance. The `TemporalNorm` module applies
+    temporal normalization over the batch of inputs as defined by the type of scaler.
+
+    $$\mathbf{z}_{[B,T,C]} = \\textrm{Scaler}(\mathbf{x}_{[B,T,C]})$$
+
+    If `scaler_type` is `revin` learnable normalization parameters are added on top of
+    the usual normalization technique, the parameters are learned through scale decouple
+    global skip connections. The technique is available for point and probabilistic outputs.
+
+    $$\mathbf{\hat{z}}_{[B,T,C]} = \\boldsymbol{\hat{\\gamma}}_{[1,1,C]} \mathbf{z}_{[B,T,C]} +\\boldsymbol{\hat{\\beta}}_{[1,1,C]}$$
+
+    **Parameters:**<br>
+    `scaler_type`: str, defines the type of scaler used by TemporalNorm. Available [`identity`, `standard`, `robust`, `minmax`, `minmax1`, `invariant`, `revin`].<br>
+    `dim` (int, optional): Dimension over to compute scale and shift. Defaults to -1.<br>
+    `eps` (float, optional): Small value to avoid division by zero. Defaults to 1e-6.<br>
+    `num_features`: int=None, for RevIN-like learnable affine parameters initialization.<br>
+
+    **References**<br>
+    - [Kin G. Olivares, David Luo, Cristian Challu, Stefania La Vattiata, Max Mergenthaler, Artur Dubrawski (2023). "HINT: Hierarchical Mixture Networks For Coherent Probabilistic Forecasting". Neural Information Processing Systems, submitted. Working Paper version available at arxiv.](https://arxiv.org/abs/2305.07089)<br>
+    """
+
+    def __init__(self, scaler_type="robust", dim=-1, eps=1e-6, num_features=None):
+        super().__init__()
+        compute_statistics = {
+            None: identity_statistics,
+            "identity": identity_statistics,
+            "standard": std_statistics,
+            "revin": std_statistics,
+            "robust": robust_statistics,
+            "minmax": minmax_statistics,
+            "minmax1": minmax1_statistics,
+            "invariant": invariant_statistics,
+        }
+        scalers = {
+            None: identity_scaler,
+            "identity": identity_scaler,
+            "standard": std_scaler,
+            "revin": std_scaler,
+            "robust": robust_scaler,
+            "minmax": minmax_scaler,
+            "minmax1": minmax1_scaler,
+            "invariant": invariant_scaler,
+        }
+        inverse_scalers = {
+            None: inv_identity_scaler,
+            "identity": inv_identity_scaler,
+            "standard": inv_std_scaler,
+            "revin": inv_std_scaler,
+            "robust": inv_robust_scaler,
+            "minmax": inv_minmax_scaler,
+            "minmax1": inv_minmax1_scaler,
+            "invariant": inv_invariant_scaler,
+        }
+        assert scaler_type in scalers.keys(), f"{scaler_type} not defined"
+        if (scaler_type == "revin") and (num_features is None):
+            raise Exception("You must pass num_features for ReVIN scaler.")
+
+        self.compute_statistics = compute_statistics[scaler_type]
+        self.scaler = scalers[scaler_type]
+        self.inverse_scaler = inverse_scalers[scaler_type]
+        self.scaler_type = scaler_type
+        self.dim = dim
+        self.eps = eps
+
+        if scaler_type == "revin":
+            self._init_params(num_features=num_features)
+
+    def _init_params(self, num_features):
+        # Initialize RevIN scaler params to broadcast:
+        if self.dim == 1:  # [B,T,C]  [1,1,C]
+            self.revin_bias = nn.Parameter(torch.zeros(1, 1, num_features))
+            self.revin_weight = nn.Parameter(torch.ones(1, 1, num_features))
+        elif self.dim == -1:  # [B,C,T]  [1,C,1]
+            self.revin_bias = nn.Parameter(torch.zeros(1, num_features, 1))
+            self.revin_weight = nn.Parameter(torch.ones(1, num_features, 1))
+
+    # @torch.no_grad()
+    def transform(self, x, mask):
+        """Center and scale the data.
+
+        **Parameters:**<br>
+        `x`: torch.Tensor shape [batch, time, channels].<br>
+        `mask`: torch Tensor bool, shape  [batch, time] where `x` is valid and False
+                where `x` should be masked. Mask should not be all False in any column of
+                dimension dim to avoid NaNs from zero division.<br>
+
+        **Returns:**<br>
+        `z`: torch.Tensor same shape as `x`, except scaled.
+        """
+        x_shift, x_scale = self.compute_statistics(
+            x=x, mask=mask, dim=self.dim, eps=self.eps
+        )
+        self.x_shift = x_shift
+        self.x_scale = x_scale
+
+        # Original Revin performs this operation
+        # z = self.revin_weight * z
+        # z = z + self.revin_bias
+        # However this is only valid for point forecast not for
+        # distribution's scale decouple technique.
+        if self.scaler_type == "revin":
+            self.x_shift = self.x_shift + self.revin_bias
+            self.x_scale = self.x_scale * (torch.relu(self.revin_weight) + self.eps)
+
+        z = self.scaler(x, x_shift, x_scale)
+        return z
+
+    # @torch.no_grad()
+    def inverse_transform(self, z, x_shift=None, x_scale=None):
+        """Scale back the data to the original representation.
+
+        **Parameters:**<br>
+        `z`: torch.Tensor shape [batch, time, channels], scaled.<br>
+
+        **Returns:**<br>
+        `x`: torch.Tensor original data.
+        """
+
+        if x_shift is None:
+            x_shift = self.x_shift
+        if x_scale is None:
+            x_scale = self.x_scale
+
+        # Original Revin performs this operation
+        # z = z - self.revin_bias
+        # z = (z / (self.revin_weight + self.eps))
+        # However this is only valid for point forecast not for
+        # distribution's scale decouple technique.
+
+        x = self.inverse_scaler(z, x_shift, x_scale)
+        return x
+
+    def forward(self, x):
+        # The gradients are optained from BaseWindows/BaseRecurrent forwards.
+        pass
--- a/neuralforecast/compat.py
+++ b/neuralforecast/compat.py
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/compat.ipynb.
+
+# %% auto 0
+__all__ = []
+
+# %% ../nbs/compat.ipynb 1
+try:
+    from pyspark.sql import DataFrame as SparkDataFrame
+except ImportError:
+
+    class SparkDataFrame: ...