Commit 9c0053b7 authored by chenzk's avatar chenzk
Browse files

v1.0

parents
Pipeline #953 canceled with stages
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
*/.DS_Store
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
\ No newline at end of file
MIT License
Copyright (c) 2022 THUML @ Tsinghua University
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
# iTransformer
传统Transformer模型在一个时间步上查看所有特征,不能高效利用长程时序特征,iTransformer可以跨多个时间步查看一个特征,能同时预测多个指标。
## 论文
`未发表论文`
## 模型结构
采用标准的transformer decoder结构,对于backbone,无需修改transformer标准结构的代码即可实现本算法。
<div align=center>
<img src="./doc/transformer.png"/>
</div>
## 算法原理
iTransformer通过简单地转置输入的形状来实现跨多个时间步查看一个特征,模型不是对输入的子序列进行令牌化,而是对整个输入序列进行令牌化,通过这种方式,注意力层可以专注于学习多元相关性,而前馈网络则负责对整个输入序列进行编码。
<div align=center>
<img src="./doc/iTransformer.png"/>
</div>
## 环境配置
```
mv itransformer_pytorch iTransformer # 去框架名后缀
```
### Docker(方法一)
```
docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-centos7.6-dtk23.10-py38
# <your IMAGE ID>为以上拉取的docker的镜像ID替换,本镜像为:ffa1f63239fc
docker run -it --shm-size=32G -v $PWD/iTransformer:/home/iTransformer -v /opt/hyhal:/opt/hyhal --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name itransformer <your IMAGE ID> bash
cd /home/iTransformer
pip install -r requirements.txt # requirements.txt
```
### Dockerfile(方法二)
```
cd iTransformer/docker
docker build --no-cache -t itransformer:latest .
docker run --shm-size=32G --name itransformer -v /opt/hyhal:/opt/hyhal:ro --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video -v $PWD/../../iTransformer:/home/iTransformer -it itransformer bash
# 若遇到Dockerfile启动的方式安装环境需要长时间等待,可注释掉里面的pip安装,启动容器后再安装python库:pip install -r requirements.txt。
```
### Anaconda(方法三)
1、关于本项目DCU显卡所需的特殊深度学习库可从光合开发者社区下载安装:
- https://developer.hpccube.com/tool/
```
DTK驱动:dtk23.10
python:python3.8
torch:2.1.0
torchvision:0.16.0
```
`Tips:以上dtk驱动、python、torch等DCU相关工具版本需要严格一一对应。`
2、其它非特殊库参照requirements.txt安装
```
pip install -r requirements.txt # requirements.txt
```
## 数据集
本步骤说明采用ETT-small中的`ETTm2`
- https://cloud.tsinghua.edu.cn/f/2ea5ca3d621e4e5ba36a/?dl=1
数据目录结构如下:
```
dataset/ETT-small
├── ETTh1.csv
├── ETTh2.csv
├── ETTm1.csv
└── ETTm2.csv
```
更多资料可参考源项目的[`README_origin`](./README_origin.md)
## 训练
### 单机单卡
```
export HIP_VISIBLE_DEVICES=0
cd iTransformer
sh ./scripts/multivariate_forecasting/ETT/iTransformer_ETTm2_train.sh
```
更多资料可参考源项目的[`README_origin`](./README_origin.md)
## 推理
修改[`run.py`](./run.py)末尾几行如下:
```
# exp.test(setting, test=1)
exp.predict(setting, load=True)
```
```
export HIP_VISIBLE_DEVICES=0
sh ./scripts/multivariate_forecasting/ETT/iTransformer_ETTm2_infer.sh
# 默认按天预测, 故pred_len为96。
```
## result
# 输入
```
2018-06-26 08:45:00,38.198001861572266,12.314000129699707,50.18000030517578,13.37600040435791,-11.53600025177002,-2.5910000801086426,42.03099822998047
2018-06-26 09:00:00,38.36600112915039,11.47599983215332,50.26100158691406,12.62600040435791,-11.53600025177002,-2.5910000801086426,42.69049835205078
...
2018-06-26 19:30:00,40.459999084472656,11.392000198364258,51.84199905395508,11.928999900817873,-11.53600025177002,-1.4179999828338623,45.54650115966797
2018-06-26 19:45:00,43.2239990234375,12.145999908447266,54.73699951171875,12.678999900817873,-11.53600025177002,-1.4179999828338623,45.32699966430664
```
# 输出
```
# shape: (1, 96, 7)
[[[ 0.34952432 0.52950954 0.60350233 0.88908595 -1.1544497 0.14222175 1.547624 ]
[ 0.33467558 0.5304026 0.5766822 0.8634169 -1.1414794 0.15061441 1.5383883 ]
...
[ 0.38313037 0.55777836 0.58653885 0.8580381 -1.0596789 0.18568955 1.5027612 ]
[ 0.3644999 0.55291736 0.57515836 0.8770145 -1.0512501 0.18641812 1.5099163 ]]]
```
### 精度
测试数据:[`ETTm2`](./dataset/ETT-small/ETTm2.csv)中划出一部分作验证集,推理框架:pytorch。
| device | train_loss | mse | mae |
|:---------:|:----------:|:----------:|:----------:|
| DCU Z100L | 0.2107 | 0.1852 | 0.2718 |
| GPU V100S | 0.2107 | 0.1852 | 0.2718 |
## 应用场景
### 算法类别
`时序预测`
### 热点应用行业
`金融,运维,电商,制造,能源,医疗`
## 源码仓库及问题反馈
- http://developer.hpccube.com/codes/modelzoo/minicpm_pytorch.git
## 参考资料
- https://github.com/thuml/iTransformer.git
# iTransformer
The repo is the official implementation for the paper: [iTransformer: Inverted Transformers Are Effective for Time Series Forecasting](https://arxiv.org/abs/2310.06625). It currently includes code implementations for the following tasks:
> **[Multivariate Forecasting](./scripts/multivariate_forecasting/README.md)**: We provide all scripts as well as datasets for the reproduction of forecasting results in this repo.
> **[Boosting Forecasting of Transformers](./scripts/boost_performance/README.md)**: iTransformer framework can consistently promote Transformer variants, and take advantage of the booming efficient attention mechanisms.
> **[Generalization on Unseen Variates](scripts/variate_generalization/README.md)**: iTransformer is demonstrated to generalize well on unseen time series, making it a nice alternative as the fundamental backbone of the large time series model.
> **[Better Utilization of Lookback Windows](scripts/increasing_lookback/README.md)**: While Transformer does not necessarily benefit from the larger lookback window, inverted Transformers exhibit better utilization of the enlarged lookback window.
> **[Adopt Efficient Attention and Training Strategy](scripts/model_efficiency/README.md)**: By inverting, efficient attention mechanisms and strategy can be leveraged to reduce the complexity of high-dimensional time series.
# Updates
:triangular_flag_on_post: **News** (2024.03) Introduction of our work in [Chinese](https://mp.weixin.qq.com/s/-pvBnA1_NSloNxa6TYXTSg) is available.
:triangular_flag_on_post: **News** (2024.02) iTransformer has been accepted as **ICLR 2024 Spotlight**.
:triangular_flag_on_post: **News** (2023.12) iTransformer available in [GluonTS](https://github.com/awslabs/gluonts/pull/3017) with probablistic emission head and support for static covariates.
:triangular_flag_on_post: **News** (2023.12) We received lots of valuable suggestions. A [revised version](https://arxiv.org/pdf/2310.06625v2.pdf) (**24 Pages**) is now available, which includes extensive experiments, intuitive cases, in-depth analysis and further improvement of our work.
:triangular_flag_on_post: **News** (2023.10) iTransformer has been included in [[Time-Series-Library]](https://github.com/thuml/Time-Series-Library) and achieve the consistent state-of-the-art in long-term time series forecasting.
:triangular_flag_on_post: **News** (2023.10) All the scripts for the above tasks in our [paper](https://arxiv.org/pdf/2310.06625.pdf) are available in this repo.
## Introduction
🌟 Considering the characteristics of multivariate time series, iTransformer breaks the conventional model structure without the burden of modifying any Transformer modules. **Inverted Transformer is all you need in MTSF**.
<p align="center">
<img src="./figures/motivation.png" alt="" align=center />
</p>
🏆 iTransformer achieves the comprehensive state-of-the-art in challenging multivariate forecasting tasks and solves several pain points of Transformer on extensive time series data.
<p align="center">
<img src="./figures/radar.png" height = "360" alt="" align=center />
</p>
😊 **iTransformer** is repurposed on the vanilla Transformer. We think the "passionate modification" of Transformer has got too much attention in the research area of time series. Hopefully, the mainstream work in the following can focus more on the dataset infrastructure and consider the scale-up ability of Transformer.
## Overall Architecture
iTransformer regards **independent time series as variate tokens** to **capture multivariate correlations by attention** and **utilize layernorm and feed-forward networks to learn series representations**.
<p align="center">
<img src="./figures/architecture.png" alt="" align=center />
</p>
The pseudo-code of iTransformer is as simple as the following:
<p align="center">
<img src="./figures/algorithm.png" alt="" align=center />
</p>
## Usage
1. Install Pytorch and necessary dependencies.
```
pip install -r requirements.txt
```
1. The datasets can be obtained from [Google Drive](https://drive.google.com/file/d/1l51QsKvQPcqILT3DwfjCgx8Dsg2rpjot/view?usp=drive_link) or [Tsinghua Cloud](https://cloud.tsinghua.edu.cn/f/2ea5ca3d621e4e5ba36a/).
2. Train and evaluate the model. We provide all the above tasks under the folder ./scripts/. You can reproduce the results as the following examples:
```
# Multivariate forecasting with iTransformer
bash ./scripts/multivariate_forecasting/Traffic/iTransformer.sh
# Compare the performance of Transformer and iTransformer
bash ./scripts/boost_performance/Weather/iTransformer.sh
# Train the model with partial variates, and generalize on the unseen variates
bash ./scripts/variate_generalization/Electricity/iTransformer.sh
# Test the performance on the enlarged lookback window
bash ./scripts/increasing_lookback/Traffic/iTransformer.sh
# Utilize FlashAttention for acceleration
bash ./scripts/efficient_attentions/iFlashTransformer.sh
```
## Main Result of Multivariate Forecasting
We evaluate the iTransformer on extensive challenging multivariate forecasting benchmarks as well as the server load prediction of Alipay online transactions (**generally hundreds of variates**, denoted as *Dim*). **Comprehensive good performance** (MSE/MAE) is achieved by iTransformer. iTransformer is particularly good at forecasting high-dimensional time series.
<p align="center">
<img src="./figures/datasets.png" alt="" align=center />
</p>
### Challenging Multivariate Time Series Forecasting Benchmarks (Avg Results)
<p align="center">
<img src="./figures/main_results.png" alt="" align=center />
</p>
### Online Transaction Load Prediction of Alipay Trading Platform (Avg Results)
<p align="center">
<img src="./figures/main_results_alipay.png" alt="" align=center />
</p>
## General Performance Boosting on Transformers
By introducing the proposed framework, Transformer and its variants achieve **significant performance improvement**, demonstrating the **generality of the iTransformer approach** and **benefiting from efficient attention mechanisms**.
<p align="center">
<img src="./figures/boosting.png" alt="" align=center />
</p>
## Generalization on Unseen Variates
**Technically, iTransformer can forecast with arbitrary numbers of variables** during inference. We partition the variates of each dataset into five folders, train models with 20% variates, and use the partially trained model to forecast all varieties. iTransformers can be trained efficiently and forecast unseen variates with good generalizability.
<p align="center">
<img src="./figures/generability.png" alt="" align=center />
</p>
## Better Utilization of Lookback Windows
While previous Transformers do not necessarily benefit from the increase of historical observation. iTransformers show a surprising **improvement in forecasting performance with the increasing length of the lookback window**.
<p align="center">
<img src="./figures/increase_lookback.png" alt="" align=center />
</p>
## Model Analysis
Benefiting from inverted Transformer modules:
- (Left) Inverted Transformers learn **better time series representations** (more similar [CKA](https://github.com/jayroxis/CKA-similarity)) favored by time series forecasting.
- (Right) The inverted self-attention module learns **interpretable multivariate correlations**.
<p align="center">
<img src="./figures/analysis.png" alt="" align=center />
</p>
- Visualization of the variates from Market and the learned multivariate correlations. Each variate represents the monitored interface values of an application, and the applications can be further grouped into refined categories.
<p align="center">
<img src="./figures/groups.png" alt="" align=center />
</p>
## Model Abalations
iTransformer that utilizes attention on variate dimensions and feed-forward on temporal dimension generally achieves the best performance. However, the performance of vanilla Transformer (the third row) performs the worst among these designs, **indicating the disaccord of responsibility when the conventional architecture is adopted**.
<p align="center">
<img src="./figures/ablations.png" alt="" align=center />
</p>
## Model Efficiency
We propose a training strategy for multivariate series by taking advantage of its variate generation ability. While the performance (Left) remains stable on partially trained variates of each batch with the sampled ratios, the memory footprint (Right) of the training process can be cut off significantly.
<p align="center">
<img src="./figures/efficient.png" alt="" align=center />
</p>
## Citation
If you find this repo helpful, please cite our paper.
```
@article{liu2023itransformer,
title={iTransformer: Inverted Transformers Are Effective for Time Series Forecasting},
author={Liu, Yong and Hu, Tengge and Zhang, Haoran and Wu, Haixu and Wang, Shiyu and Ma, Lintao and Long, Mingsheng},
journal={arXiv preprint arXiv:2310.06625},
year={2023}
}
```
## Acknowledgement
We appreciate the following GitHub repos a lot for their valuable code and efforts.
- Reformer (https://github.com/lucidrains/reformer-pytorch)
- Informer (https://github.com/zhouhaoyi/Informer2020)
- FlashAttention (https://github.com/shreyansh26/FlashAttention-PyTorch)
- Autoformer (https://github.com/thuml/Autoformer)
- Stationary (https://github.com/thuml/Nonstationary_Transformers)
- Time-Series-Library (https://github.com/thuml/Time-Series-Library)
## Contact
If you have any questions or want to use the code, feel free to contact:
* Yong Liu (liuyong21@mails.tsinghua.edu.cn)
* Haoran Zhang (z-hr20@mails.tsinghua.edu.cn)
from data_provider.data_loader import Dataset_ETT_hour, Dataset_ETT_minute, Dataset_Custom, Dataset_Solar, Dataset_PEMS, \
Dataset_Pred
from torch.utils.data import DataLoader
data_dict = {
'ETTh1': Dataset_ETT_hour,
'ETTh2': Dataset_ETT_hour,
'ETTm1': Dataset_ETT_minute,
'ETTm2': Dataset_ETT_minute,
'Solar': Dataset_Solar,
'PEMS': Dataset_PEMS,
'custom': Dataset_Custom,
}
def data_provider(args, flag):
Data = data_dict[args.data]
timeenc = 0 if args.embed != 'timeF' else 1
if flag == 'test':
shuffle_flag = False
drop_last = True
batch_size = 1 # bsz=1 for evaluation
freq = args.freq
elif flag == 'pred':
shuffle_flag = False
drop_last = False
batch_size = 1
freq = args.freq
Data = Dataset_Pred
else:
shuffle_flag = True
drop_last = True
batch_size = args.batch_size # bsz for train and valid
freq = args.freq
data_set = Data(
root_path=args.root_path,
data_path=args.data_path,
flag=flag,
size=[args.seq_len, args.label_len, args.pred_len],
features=args.features,
target=args.target,
timeenc=timeenc,
freq=freq,
)
print(flag, len(data_set))
data_loader = DataLoader(
data_set,
batch_size=batch_size,
shuffle=shuffle_flag,
num_workers=args.num_workers,
drop_last=drop_last)
return data_set, data_loader
import os
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from utils.timefeatures import time_features
import warnings
warnings.filterwarnings('ignore')
class Dataset_ETT_hour(Dataset):
def __init__(self, root_path, flag='train', size=None,
features='S', data_path='ETTh1.csv',
target='OT', scale=True, timeenc=0, freq='h'):
# size [seq_len, label_len, pred_len]
# info
if size == None:
self.seq_len = 24 * 4 * 4
self.label_len = 24 * 4
self.pred_len = 24 * 4
else:
self.seq_len = size[0]
self.label_len = size[1]
self.pred_len = size[2]
# init
assert flag in ['train', 'test', 'val']
type_map = {'train': 0, 'val': 1, 'test': 2}
self.set_type = type_map[flag]
self.features = features
self.target = target
self.scale = scale
self.timeenc = timeenc
self.freq = freq
self.root_path = root_path
self.data_path = data_path
self.__read_data__()
def __read_data__(self):
self.scaler = StandardScaler()
df_raw = pd.read_csv(os.path.join(self.root_path,
self.data_path))
border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]
border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]
border1 = border1s[self.set_type]
border2 = border2s[self.set_type]
if self.features == 'M' or self.features == 'MS':
cols_data = df_raw.columns[1:]
df_data = df_raw[cols_data]
elif self.features == 'S':
df_data = df_raw[[self.target]]
if self.scale:
train_data = df_data[border1s[0]:border2s[0]]
self.scaler.fit(train_data.values)
data = self.scaler.transform(df_data.values)
else:
data = df_data.values
df_stamp = df_raw[['date']][border1:border2]
df_stamp['date'] = pd.to_datetime(df_stamp.date)
if self.timeenc == 0:
df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
data_stamp = df_stamp.drop(['date'], 1).values
elif self.timeenc == 1:
data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
data_stamp = data_stamp.transpose(1, 0)
self.data_x = data[border1:border2]
self.data_y = data[border1:border2]
self.data_stamp = data_stamp
def __getitem__(self, index):
s_begin = index
s_end = s_begin + self.seq_len
r_begin = s_end - self.label_len
r_end = r_begin + self.label_len + self.pred_len
seq_x = self.data_x[s_begin:s_end]
seq_y = self.data_y[r_begin:r_end]
seq_x_mark = self.data_stamp[s_begin:s_end]
seq_y_mark = self.data_stamp[r_begin:r_end]
return seq_x, seq_y, seq_x_mark, seq_y_mark
def __len__(self):
return len(self.data_x) - self.seq_len - self.pred_len + 1
def inverse_transform(self, data):
return self.scaler.inverse_transform(data)
class Dataset_ETT_minute(Dataset):
def __init__(self, root_path, flag='train', size=None,
features='S', data_path='ETTm1.csv',
target='OT', scale=True, timeenc=0, freq='t'):
# size [seq_len, label_len, pred_len]
# info
if size == None:
self.seq_len = 24 * 4 * 4
self.label_len = 24 * 4
self.pred_len = 24 * 4
else:
self.seq_len = size[0]
self.label_len = size[1]
self.pred_len = size[2]
# init
assert flag in ['train', 'test', 'val']
type_map = {'train': 0, 'val': 1, 'test': 2}
self.set_type = type_map[flag]
self.features = features
self.target = target
self.scale = scale
self.timeenc = timeenc
self.freq = freq
self.root_path = root_path
self.data_path = data_path
self.__read_data__()
def __read_data__(self):
self.scaler = StandardScaler()
df_raw = pd.read_csv(os.path.join(self.root_path,
self.data_path))
border1s = [0, 12 * 30 * 24 * 4 - self.seq_len, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len]
border2s = [12 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4]
border1 = border1s[self.set_type]
border2 = border2s[self.set_type]
if self.features == 'M' or self.features == 'MS':
cols_data = df_raw.columns[1:]
df_data = df_raw[cols_data]
elif self.features == 'S':
df_data = df_raw[[self.target]]
if self.scale:
train_data = df_data[border1s[0]:border2s[0]]
self.scaler.fit(train_data.values)
data = self.scaler.transform(df_data.values)
else:
data = df_data.values
df_stamp = df_raw[['date']][border1:border2]
df_stamp['date'] = pd.to_datetime(df_stamp.date)
if self.timeenc == 0:
df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1)
df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15)
data_stamp = df_stamp.drop(['date'], 1).values
elif self.timeenc == 1:
data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
data_stamp = data_stamp.transpose(1, 0)
self.data_x = data[border1:border2]
self.data_y = data[border1:border2]
self.data_stamp = data_stamp
def __getitem__(self, index):
s_begin = index
s_end = s_begin + self.seq_len
r_begin = s_end - self.label_len
r_end = r_begin + self.label_len + self.pred_len
seq_x = self.data_x[s_begin:s_end]
seq_y = self.data_y[r_begin:r_end]
seq_x_mark = self.data_stamp[s_begin:s_end]
seq_y_mark = self.data_stamp[r_begin:r_end]
return seq_x, seq_y, seq_x_mark, seq_y_mark
def __len__(self):
return len(self.data_x) - self.seq_len - self.pred_len + 1
def inverse_transform(self, data):
return self.scaler.inverse_transform(data)
class Dataset_Custom(Dataset):
def __init__(self, root_path, flag='train', size=None,
features='S', data_path='ETTh1.csv',
target='OT', scale=True, timeenc=0, freq='h'):
# size [seq_len, label_len, pred_len]
# info
if size == None:
self.seq_len = 24 * 4 * 4
self.label_len = 24 * 4
self.pred_len = 24 * 4
else:
self.seq_len = size[0]
self.label_len = size[1]
self.pred_len = size[2]
# init
assert flag in ['train', 'test', 'val']
type_map = {'train': 0, 'val': 1, 'test': 2}
self.set_type = type_map[flag]
self.features = features
self.target = target
self.scale = scale
self.timeenc = timeenc
self.freq = freq
self.root_path = root_path
self.data_path = data_path
self.__read_data__()
def __read_data__(self):
self.scaler = StandardScaler()
df_raw = pd.read_csv(os.path.join(self.root_path,
self.data_path))
'''
df_raw.columns: ['date', ...(other features), target feature]
'''
cols = list(df_raw.columns)
cols.remove(self.target)
cols.remove('date')
df_raw = df_raw[['date'] + cols + [self.target]]
num_train = int(len(df_raw) * 0.7)
num_test = int(len(df_raw) * 0.2)
num_vali = len(df_raw) - num_train - num_test
border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len]
border2s = [num_train, num_train + num_vali, len(df_raw)]
border1 = border1s[self.set_type]
border2 = border2s[self.set_type]
if self.features == 'M' or self.features == 'MS':
cols_data = df_raw.columns[1:]
df_data = df_raw[cols_data]
elif self.features == 'S':
df_data = df_raw[[self.target]]
if self.scale:
train_data = df_data[border1s[0]:border2s[0]]
self.scaler.fit(train_data.values)
data = self.scaler.transform(df_data.values)
else:
data = df_data.values
df_stamp = df_raw[['date']][border1:border2]
df_stamp['date'] = pd.to_datetime(df_stamp.date)
if self.timeenc == 0:
df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
data_stamp = df_stamp.drop(['date'], 1).values
elif self.timeenc == 1:
data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
data_stamp = data_stamp.transpose(1, 0)
self.data_x = data[border1:border2]
self.data_y = data[border1:border2]
self.data_stamp = data_stamp
def __getitem__(self, index):
s_begin = index
s_end = s_begin + self.seq_len
r_begin = s_end - self.label_len
r_end = r_begin + self.label_len + self.pred_len
seq_x = self.data_x[s_begin:s_end]
seq_y = self.data_y[r_begin:r_end]
seq_x_mark = self.data_stamp[s_begin:s_end]
seq_y_mark = self.data_stamp[r_begin:r_end]
return seq_x, seq_y, seq_x_mark, seq_y_mark
def __len__(self):
return len(self.data_x) - self.seq_len - self.pred_len + 1
def inverse_transform(self, data):
return self.scaler.inverse_transform(data)
class Dataset_PEMS(Dataset):
def __init__(self, root_path, flag='train', size=None,
features='S', data_path='ETTh1.csv',
target='OT', scale=True, timeenc=0, freq='h'):
# size [seq_len, label_len, pred_len]
# info
self.seq_len = size[0]
self.label_len = size[1]
self.pred_len = size[2]
# init
assert flag in ['train', 'test', 'val']
type_map = {'train': 0, 'val': 1, 'test': 2}
self.set_type = type_map[flag]
self.features = features
self.target = target
self.scale = scale
self.timeenc = timeenc
self.freq = freq
self.root_path = root_path
self.data_path = data_path
self.__read_data__()
def __read_data__(self):
self.scaler = StandardScaler()
data_file = os.path.join(self.root_path, self.data_path)
data = np.load(data_file, allow_pickle=True)
data = data['data'][:, :, 0]
train_ratio = 0.6
valid_ratio = 0.2
train_data = data[:int(train_ratio * len(data))]
valid_data = data[int(train_ratio * len(data)): int((train_ratio + valid_ratio) * len(data))]
test_data = data[int((train_ratio + valid_ratio) * len(data)):]
total_data = [train_data, valid_data, test_data]
data = total_data[self.set_type]
if self.scale:
self.scaler.fit(train_data)
data = self.scaler.transform(data)
df = pd.DataFrame(data)
df = df.fillna(method='ffill', limit=len(df)).fillna(method='bfill', limit=len(df)).values
self.data_x = df
self.data_y = df
def __getitem__(self, index):
s_begin = index
s_end = s_begin + self.seq_len
r_begin = s_end - self.label_len
r_end = r_begin + self.label_len + self.pred_len
seq_x = self.data_x[s_begin:s_end]
seq_y = self.data_y[r_begin:r_end]
seq_x_mark = torch.zeros((seq_x.shape[0], 1))
seq_y_mark = torch.zeros((seq_x.shape[0], 1))
return seq_x, seq_y, seq_x_mark, seq_y_mark
def __len__(self):
return len(self.data_x) - self.seq_len - self.pred_len + 1
def inverse_transform(self, data):
return self.scaler.inverse_transform(data)
class Dataset_Solar(Dataset):
def __init__(self, root_path, flag='train', size=None,
features='S', data_path='ETTh1.csv',
target='OT', scale=True, timeenc=0, freq='h'):
# size [seq_len, label_len, pred_len]
# info
self.seq_len = size[0]
self.label_len = size[1]
self.pred_len = size[2]
# init
assert flag in ['train', 'test', 'val']
type_map = {'train': 0, 'val': 1, 'test': 2}
self.set_type = type_map[flag]
self.features = features
self.target = target
self.scale = scale
self.timeenc = timeenc
self.freq = freq
self.root_path = root_path
self.data_path = data_path
self.__read_data__()
def __read_data__(self):
self.scaler = StandardScaler()
df_raw = []
with open(os.path.join(self.root_path, self.data_path), "r", encoding='utf-8') as f:
for line in f.readlines():
line = line.strip('\n').split(',')
data_line = np.stack([float(i) for i in line])
df_raw.append(data_line)
df_raw = np.stack(df_raw, 0)
df_raw = pd.DataFrame(df_raw)
num_train = int(len(df_raw) * 0.7)
num_test = int(len(df_raw) * 0.2)
num_valid = int(len(df_raw) * 0.1)
border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len]
border2s = [num_train, num_train + num_valid, len(df_raw)]
border1 = border1s[self.set_type]
border2 = border2s[self.set_type]
df_data = df_raw.values
if self.scale:
train_data = df_data[border1s[0]:border2s[0]]
self.scaler.fit(train_data)
data = self.scaler.transform(df_data)
else:
data = df_data
self.data_x = data[border1:border2]
self.data_y = data[border1:border2]
def __getitem__(self, index):
s_begin = index
s_end = s_begin + self.seq_len
r_begin = s_end - self.label_len
r_end = r_begin + self.label_len + self.pred_len
seq_x = self.data_x[s_begin:s_end]
seq_y = self.data_y[r_begin:r_end]
seq_x_mark = torch.zeros((seq_x.shape[0], 1))
seq_y_mark = torch.zeros((seq_x.shape[0], 1))
return seq_x, seq_y, seq_x_mark, seq_y_mark
def __len__(self):
return len(self.data_x) - self.seq_len - self.pred_len + 1
def inverse_transform(self, data):
return self.scaler.inverse_transform(data)
class Dataset_Pred(Dataset):
def __init__(self, root_path, flag='pred', size=None,
features='S', data_path='ETTh1.csv',
target='OT', scale=True, inverse=False, timeenc=0, freq='15min', cols=None):
# size [seq_len, label_len, pred_len]
# info
if size == None:
self.seq_len = 24 * 4 * 4
self.label_len = 24 * 4
self.pred_len = 24 * 4
else:
self.seq_len = size[0]
self.label_len = size[1]
self.pred_len = size[2]
# init
assert flag in ['pred']
self.features = features
self.target = target
self.scale = scale
self.inverse = inverse
self.timeenc = timeenc
self.freq = freq
self.cols = cols
self.root_path = root_path
self.data_path = data_path
self.__read_data__()
def __read_data__(self):
self.scaler = StandardScaler()
df_raw = pd.read_csv(os.path.join(self.root_path,
self.data_path))
'''
df_raw.columns: ['date', ...(other features), target feature]
'''
if self.cols:
cols = self.cols.copy()
cols.remove(self.target)
else:
cols = list(df_raw.columns)
cols.remove(self.target)
cols.remove('date')
df_raw = df_raw[['date'] + cols + [self.target]]
border1 = len(df_raw) - self.seq_len
border2 = len(df_raw)
if self.features == 'M' or self.features == 'MS':
cols_data = df_raw.columns[1:]
df_data = df_raw[cols_data]
elif self.features == 'S':
df_data = df_raw[[self.target]]
if self.scale:
self.scaler.fit(df_data.values)
data = self.scaler.transform(df_data.values)
else:
data = df_data.values
tmp_stamp = df_raw[['date']][border1:border2]
tmp_stamp['date'] = pd.to_datetime(tmp_stamp.date)
pred_dates = pd.date_range(tmp_stamp.date.values[-1], periods=self.pred_len + 1, freq=self.freq)
# print("date: ", pred_dates[1:])
df_stamp = pd.DataFrame(columns=['date'])
df_stamp.date = list(tmp_stamp.date.values) + list(pred_dates[1:])
if self.timeenc == 0:
df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1)
df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15)
data_stamp = df_stamp.drop(['date'], 1).values
elif self.timeenc == 1:
data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
data_stamp = data_stamp.transpose(1, 0)
self.data_x = data[border1:border2]
if self.inverse:
self.data_y = df_data.values[border1:border2]
else:
self.data_y = data[border1:border2]
self.data_stamp = data_stamp
def __getitem__(self, index):
s_begin = index
s_end = s_begin + self.seq_len
r_begin = s_end - self.label_len
r_end = r_begin + self.label_len + self.pred_len
seq_x = self.data_x[s_begin:s_end]
if self.inverse:
seq_y = self.data_x[r_begin:r_begin + self.label_len]
else:
seq_y = self.data_y[r_begin:r_begin + self.label_len]
seq_x_mark = self.data_stamp[s_begin:s_end]
seq_y_mark = self.data_stamp[r_begin:r_end]
return seq_x, seq_y, seq_x_mark, seq_y_mark
def __len__(self):
return len(self.data_x) - self.seq_len + 1
def inverse_transform(self, data):
return self.scaler.inverse_transform(data)
This diff is collapsed.
FROM image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-centos7.6-dtk23.10-py38
ENV DEBIAN_FRONTEND=noninteractive
# RUN yum update && yum install -y git cmake wget build-essential
RUN source /opt/dtk-23.10/env.sh
# 安装pip相关依赖
COPY requirements.txt requirements.txt
RUN pip3 install -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com -r requirements.txt
pandas==1.5.3
scikit-learn==1.2.2
numpy==1.23.5
matplotlib==3.7.0
# torch==2.0.0
reformer-pytorch==1.4.4
docker run -it --shm-size=32G -v $PWD/iTransformer:/home/iTransformer -v /opt/hyhal:/opt/hyhal:ro --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name iTransformer ffa1f63239fc bash
# python -m torch.utils.collect_env
import os
import torch
from model import Transformer, Informer, Reformer, Flowformer, Flashformer, \
iTransformer, iInformer, iReformer, iFlowformer, iFlashformer
class Exp_Basic(object):
def __init__(self, args):
self.args = args
self.model_dict = {
'Transformer': Transformer,
'Informer': Informer,
'Reformer': Reformer,
'Flowformer': Flowformer,
'Flashformer': Flashformer,
'iTransformer': iTransformer,
'iInformer': iInformer,
'iReformer': iReformer,
'iFlowformer': iFlowformer,
'iFlashformer': iFlashformer,
}
self.device = self._acquire_device()
self.model = self._build_model().to(self.device)
def _build_model(self):
raise NotImplementedError
return None
def _acquire_device(self):
if self.args.use_gpu:
os.environ["CUDA_VISIBLE_DEVICES"] = str(
self.args.gpu) if not self.args.use_multi_gpu else self.args.devices
device = torch.device('cuda:{}'.format(self.args.gpu))
print('Use GPU: cuda:{}'.format(self.args.gpu))
else:
device = torch.device('cpu')
print('Use CPU')
return device
def _get_data(self):
pass
def vali(self):
pass
def train(self):
pass
def test(self):
pass
from data_provider.data_factory import data_provider
from experiments.exp_basic import Exp_Basic
from utils.tools import EarlyStopping, adjust_learning_rate, visual
from utils.metrics import metric
import torch
import torch.nn as nn
from torch import optim
import os
import time
import warnings
import numpy as np
warnings.filterwarnings('ignore')
class Exp_Long_Term_Forecast(Exp_Basic):
def __init__(self, args):
super(Exp_Long_Term_Forecast, self).__init__(args)
def _build_model(self):
model = self.model_dict[self.args.model].Model(self.args).float()
if self.args.use_multi_gpu and self.args.use_gpu:
model = nn.DataParallel(model, device_ids=self.args.device_ids)
return model
def _get_data(self, flag):
data_set, data_loader = data_provider(self.args, flag)
return data_set, data_loader
def _select_optimizer(self):
model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
return model_optim
def _select_criterion(self):
criterion = nn.MSELoss()
return criterion
def vali(self, vali_data, vali_loader, criterion):
total_loss = []
self.model.eval()
with torch.no_grad():
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float()
if 'PEMS' in self.args.data or 'Solar' in self.args.data:
batch_x_mark = None
batch_y_mark = None
else:
batch_x_mark = batch_x_mark.float().to(self.device)
batch_y_mark = batch_y_mark.float().to(self.device)
# decoder input
dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
else:
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, -self.args.pred_len:, f_dim:]
batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
pred = outputs.detach().cpu()
true = batch_y.detach().cpu()
loss = criterion(pred, true)
total_loss.append(loss)
total_loss = np.average(total_loss)
self.model.train()
return total_loss
def train(self, setting):
train_data, train_loader = self._get_data(flag='train')
vali_data, vali_loader = self._get_data(flag='val')
test_data, test_loader = self._get_data(flag='test')
path = os.path.join(self.args.checkpoints, setting)
if not os.path.exists(path):
os.makedirs(path)
time_now = time.time()
train_steps = len(train_loader)
early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
model_optim = self._select_optimizer()
criterion = self._select_criterion()
if self.args.use_amp:
scaler = torch.cuda.amp.GradScaler()
for epoch in range(self.args.train_epochs):
iter_count = 0
train_loss = []
self.model.train()
epoch_time = time.time()
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
iter_count += 1
model_optim.zero_grad()
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float().to(self.device)
if 'PEMS' in self.args.data or 'Solar' in self.args.data:
batch_x_mark = None
batch_y_mark = None
else:
batch_x_mark = batch_x_mark.float().to(self.device)
batch_y_mark = batch_y_mark.float().to(self.device)
# decoder input
dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, -self.args.pred_len:, f_dim:]
batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
loss = criterion(outputs, batch_y)
train_loss.append(loss.item())
else:
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, -self.args.pred_len:, f_dim:]
batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
loss = criterion(outputs, batch_y)
train_loss.append(loss.item())
if (i + 1) % 100 == 0:
print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
speed = (time.time() - time_now) / iter_count
left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
iter_count = 0
time_now = time.time()
if self.args.use_amp:
scaler.scale(loss).backward()
scaler.step(model_optim)
scaler.update()
else:
loss.backward()
model_optim.step()
print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
train_loss = np.average(train_loss)
vali_loss = self.vali(vali_data, vali_loader, criterion)
test_loss = self.vali(test_data, test_loader, criterion)
print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
epoch + 1, train_steps, train_loss, vali_loss, test_loss))
early_stopping(vali_loss, self.model, path)
if early_stopping.early_stop:
print("Early stopping")
break
adjust_learning_rate(model_optim, epoch + 1, self.args)
# get_cka(self.args, setting, self.model, train_loader, self.device, epoch)
best_model_path = path + '/' + 'checkpoint.pth'
self.model.load_state_dict(torch.load(best_model_path))
return self.model
def test(self, setting, test=0):
test_data, test_loader = self._get_data(flag='test')
if test:
print('loading model')
self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
preds = []
trues = []
folder_path = './test_results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
self.model.eval()
with torch.no_grad():
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float().to(self.device)
if 'PEMS' in self.args.data or 'Solar' in self.args.data:
batch_x_mark = None
batch_y_mark = None
else:
batch_x_mark = batch_x_mark.float().to(self.device)
batch_y_mark = batch_y_mark.float().to(self.device)
# decoder input
dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
else:
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, -self.args.pred_len:, f_dim:]
batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
outputs = outputs.detach().cpu().numpy()
batch_y = batch_y.detach().cpu().numpy()
if test_data.scale and self.args.inverse:
shape = outputs.shape
outputs = test_data.inverse_transform(outputs.squeeze(0)).reshape(shape)
batch_y = test_data.inverse_transform(batch_y.squeeze(0)).reshape(shape)
pred = outputs
true = batch_y
preds.append(pred)
trues.append(true)
if i % 20 == 0:
input = batch_x.detach().cpu().numpy()
if test_data.scale and self.args.inverse:
shape = input.shape
input = test_data.inverse_transform(input.squeeze(0)).reshape(shape)
gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0)
pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0)
visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))
preds = np.array(preds)
trues = np.array(trues)
print('test shape:', preds.shape, trues.shape)
preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
print('test shape:', preds.shape, trues.shape)
# result save
folder_path = './results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
mae, mse, rmse, mape, mspe = metric(preds, trues)
print('mse:{}, mae:{}'.format(mse, mae))
f = open("result_long_term_forecast.txt", 'a')
f.write(setting + " \n")
f.write('mse:{}, mae:{}'.format(mse, mae))
f.write('\n')
f.write('\n')
f.close()
np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
np.save(folder_path + 'pred.npy', preds)
np.save(folder_path + 'true.npy', trues)
return
def predict(self, setting, load=False):
pred_data, pred_loader = self._get_data(flag='pred')
if load:
path = os.path.join(self.args.checkpoints, setting)
best_model_path = path + '/' + 'checkpoint.pth'
self.model.load_state_dict(torch.load(best_model_path))
preds = []
self.model.eval()
with torch.no_grad():
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(pred_loader):
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float()
batch_x_mark = batch_x_mark.float().to(self.device)
batch_y_mark = batch_y_mark.float().to(self.device)
# decoder input
dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
else:
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
outputs = outputs.detach().cpu().numpy()
if pred_data.scale and self.args.inverse:
shape = outputs.shape
outputs = pred_data.inverse_transform(outputs.squeeze(0)).reshape(shape)
preds.append(outputs)
preds = np.array(preds)
preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
# result save
folder_path = './results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
np.save(folder_path + 'real_prediction.npy', preds)
return
\ No newline at end of file
from data_provider.data_factory import data_provider
from experiments.exp_basic import Exp_Basic
from utils.tools import EarlyStopping, adjust_learning_rate, visual
from utils.metrics import metric
import torch
import torch.nn as nn
from torch import optim
import os
import time
import warnings
import pdb
import numpy as np
import random
warnings.filterwarnings('ignore')
# train on partial variate data and test on the full variates, used for two types of experiments:
# (1) Generalize on unseen variate (Figure 5 of our paper)
# (2) Efficient training strategy (Figure 8 of our paper)
class Exp_Long_Term_Forecast_Partial(Exp_Basic):
def __init__(self, args):
super(Exp_Long_Term_Forecast_Partial, self).__init__(args)
def _build_model(self):
model = self.model_dict[self.args.model].Model(self.args).float()
if self.args.use_multi_gpu and self.args.use_gpu:
model = nn.DataParallel(model, device_ids=self.args.device_ids)
return model
def _get_data(self, flag):
data_set, data_loader = data_provider(self.args, flag)
return data_set, data_loader
def _select_optimizer(self):
model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
return model_optim
def _select_criterion(self):
criterion = nn.MSELoss()
return criterion
def vali(self, vali_data, vali_loader, criterion, partial_train=False):
total_loss = []
self.model.eval()
with torch.no_grad():
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float()
if 'PEMS' in self.args.data or 'Solar' in self.args.data:
batch_x_mark = None
batch_y_mark = None
else:
batch_x_mark = batch_x_mark.float().to(self.device)
batch_y_mark = batch_y_mark.float().to(self.device)
if partial_train: # we train models with only partial variates from the dataset
partial_start = self.args.partial_start_index
partial_end = min(self.args.enc_in + partial_start, batch_x.shape[-1])
batch_x = batch_x[:, :, partial_start:partial_end]
batch_y = batch_y[:, :, partial_start:partial_end]
# decoder input
dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
else:
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
elif self.args.channel_independence:
B, Tx, N = batch_x.shape
_, Ty, _ = dec_inp.shape
if batch_x_mark == None:
outputs = self.model(batch_x.permute(0, 2, 1).reshape(B * N, Tx, 1), batch_x_mark, \
dec_inp.permute(0, 2, 1).reshape(B * N, Ty, 1), batch_y_mark).reshape(
B, N, -1).permute(0, 2, 1)
else:
outputs = self.model(batch_x.permute(0, 2, 1).reshape(B * N, Tx, 1),
batch_x_mark.repeat(N, 1, 1), \
dec_inp.permute(0, 2, 1).reshape(B * N, Ty, 1),
batch_y_mark.repeat(N, 1, 1)) \
.reshape(B, N, -1).permute(0, 2, 1)
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, -self.args.pred_len:, f_dim:]
batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
pred = outputs.detach().cpu()
true = batch_y.detach().cpu()
loss = criterion(pred, true)
total_loss.append(loss)
total_loss = np.average(total_loss)
self.model.train()
return total_loss
def train(self, setting):
train_data, train_loader = self._get_data(flag='train')
vali_data, vali_loader = self._get_data(flag='val')
test_data, test_loader = self._get_data(flag='test')
path = os.path.join(self.args.checkpoints, setting)
if not os.path.exists(path):
os.makedirs(path)
time_now = time.time()
train_steps = len(train_loader)
early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
model_optim = self._select_optimizer()
criterion = self._select_criterion()
if self.args.use_amp:
scaler = torch.cuda.amp.GradScaler()
for epoch in range(self.args.train_epochs):
iter_count = 0
train_loss = []
self.model.train()
epoch_time = time.time()
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
iter_count += 1
model_optim.zero_grad()
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float().to(self.device)
if 'PEMS' in self.args.data or 'Solar' in self.args.data:
batch_x_mark = None
batch_y_mark = None
else:
batch_x_mark = batch_x_mark.float().to(self.device)
batch_y_mark = batch_y_mark.float().to(self.device)
# Variate Generalization training:
# We train with partial variates (args.enc_in < number of dataset variates)
# and test the obtained model directly on all variates.
partial_start = self.args.partial_start_index
partial_end = min(self.args.enc_in + partial_start, batch_x.shape[-1])
batch_x = batch_x[:, :, partial_start:partial_end]
batch_y = batch_y[:, :, partial_start:partial_end]
# Efficient training strategy: randomly choose part of the variates
# and only train the model with selected variates in each batch
if self.args.efficient_training:
_, _, N = batch_x.shape
index = np.stack(random.sample(range(N), N))[-self.args.enc_in:]
batch_x = batch_x[:, :, index]
batch_y = batch_y[:, :, index]
# decoder input
dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, -self.args.pred_len:, f_dim:]
batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
loss = criterion(outputs, batch_y)
train_loss.append(loss.item())
else:
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
elif self.args.channel_independence:
B, Tx, N = batch_x.shape
_, Ty, _ = dec_inp.shape
if batch_x_mark == None:
outputs = self.model(batch_x.permute(0, 2, 1).reshape(B * N, Tx, 1), batch_x_mark, \
dec_inp.permute(0, 2, 1).reshape(B * N, Ty, 1), batch_y_mark).reshape(
B, N, -1).permute(0, 2, 1)
else:
a = batch_x.permute(0, 2, 1)
b = batch_x.permute(0, 2, 1).reshape(B * N, Tx, 1)
outputs = self.model(batch_x.permute(0, 2, 1).reshape(B * N, Tx, 1),
batch_x_mark.repeat(N, 1, 1), \
dec_inp.permute(0, 2, 1).reshape(B * N, Ty, 1),
batch_y_mark.repeat(N, 1, 1)) \
.reshape(B, N, -1).permute(0, 2, 1)
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, -self.args.pred_len:, f_dim:]
batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
loss = criterion(outputs, batch_y)
train_loss.append(loss.item())
if (i + 1) % 100 == 0:
print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
speed = (time.time() - time_now) / iter_count
left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
iter_count = 0
time_now = time.time()
if self.args.use_amp:
scaler.scale(loss).backward()
scaler.step(model_optim)
scaler.update()
else:
loss.backward()
model_optim.step()
print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
train_loss = np.average(train_loss)
vali_loss = self.vali(vali_data, vali_loader, criterion, partial_train=True)
test_loss = self.vali(test_data, test_loader, criterion, partial_train=False)
print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
epoch + 1, train_steps, train_loss, vali_loss, test_loss))
early_stopping(vali_loss, self.model, path)
if early_stopping.early_stop:
print("Early stopping")
break
adjust_learning_rate(model_optim, epoch + 1, self.args)
best_model_path = path + '/' + 'checkpoint.pth'
self.model.load_state_dict(torch.load(best_model_path))
return self.model
def test(self, setting, test=0):
test_data, test_loader = self._get_data(flag='test')
if test:
print('loading model')
self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
preds = []
trues = []
folder_path = './test_results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
self.model.eval()
with torch.no_grad():
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
# During model inference, test the obtained model directly on all variates.
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float().to(self.device)
if 'PEMS' in self.args.data or 'Solar' in self.args.data:
batch_x_mark = None
batch_y_mark = None
else:
batch_x_mark = batch_x_mark.float().to(self.device)
batch_y_mark = batch_y_mark.float().to(self.device)
# decoder input
dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
else:
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
elif self.args.channel_independence: # compare the result with channel_independence
B, Tx, N = batch_x.shape
_, Ty, _ = dec_inp.shape
if batch_x_mark == None:
outputs = self.model(batch_x.permute(0, 2, 1).reshape(B * N, Tx, 1), batch_x_mark, \
dec_inp.permute(0, 2, 1).reshape(B * N, Ty, 1), batch_y_mark).reshape(
B, N, -1).permute(0, 2, 1)
else:
outputs = self.model(batch_x.permute(0, 2, 1).reshape(B * N, Tx, 1),
batch_x_mark.repeat(N, 1, 1), \
dec_inp.permute(0, 2, 1).reshape(B * N, Ty, 1),
batch_y_mark.repeat(N, 1, 1)) \
.reshape(B, N, -1).permute(0, 2, 1)
else:
# directly test the trained model on all variates without fine-tuning.
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
f_dim = -1 if self.args.features == 'MS' else 0
outputs = outputs[:, -self.args.pred_len:, f_dim:]
batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
outputs = outputs.detach().cpu().numpy()
batch_y = batch_y.detach().cpu().numpy()
if test_data.scale and self.args.inverse:
shape = outputs.shape
outputs = test_data.inverse_transform(outputs.squeeze(0)).reshape(shape)
batch_y = test_data.inverse_transform(batch_y.squeeze(0)).reshape(shape)
pred = outputs
true = batch_y
preds.append(pred)
trues.append(true)
if i % 20 == 0:
input = batch_x.detach().cpu().numpy()
if test_data.scale and self.args.inverse:
shape = input.shape
input = test_data.inverse_transform(input.squeeze(0)).reshape(shape)
gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0)
pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0)
visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))
preds = np.array(preds)
trues = np.array(trues)
print('test shape:', preds.shape, trues.shape)
preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
print('test shape:', preds.shape, trues.shape)
# result save
folder_path = './results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
mae, mse, rmse, mape, mspe = metric(preds, trues)
print('mse:{}, mae:{}'.format(mse, mae))
f = open("result_long_term_forecast.txt", 'a')
f.write(setting + " \n")
f.write('mse:{}, mae:{}'.format(mse, mae))
f.write('\n')
f.write('\n')
f.close()
np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
np.save(folder_path + 'pred.npy', preds)
np.save(folder_path + 'true.npy', trues)
return
def predict(self, setting, load=False):
pred_data, pred_loader = self._get_data(flag='pred')
if load:
path = os.path.join(self.args.checkpoints, setting)
best_model_path = path + '/' + 'checkpoint.pth'
self.model.load_state_dict(torch.load(best_model_path))
preds = []
self.model.eval()
with torch.no_grad():
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(pred_loader):
batch_x = batch_x.float().to(self.device)
batch_y = batch_y.float()
batch_x_mark = batch_x_mark.float().to(self.device)
batch_y_mark = batch_y_mark.float().to(self.device)
# decoder input
dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
# encoder - decoder
if self.args.use_amp:
with torch.cuda.amp.autocast():
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
else:
if self.args.output_attention:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
else:
outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
outputs = outputs.detach().cpu().numpy()
if pred_data.scale and self.args.inverse:
shape = outputs.shape
outputs = pred_data.inverse_transform(outputs.squeeze(0)).reshape(shape)
preds.append(outputs)
preds = np.array(preds)
preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
# result save
folder_path = './results/' + setting + '/'
if not os.path.exists(folder_path):
os.makedirs(folder_path)
np.save(folder_path + 'real_prediction.npy', preds)
return
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment