Commit 2c6c0f28 authored by flyingdown's avatar flyingdown
Browse files

add README_HIP

fix test for torch 1.10.0
parent 2d8b3600
Pipeline #257 failed with stages
in 0 seconds
...@@ -148,3 +148,4 @@ cython_debug/ ...@@ -148,3 +148,4 @@ cython_debug/
*.hip *.hip
*_hip.* *_hip.*
*hip* *hip*
!README_HIP.md
# APEX
## 安装
### System Requirements
- Linux.
- Python 3.7, 3.8, 3.9
- (**推荐**) Upgrade pip
```
python3 -m pip install --upgrade pip #--user
```
### 使用pip安装(以dtk-23.04版本为例)
可以在光合[光合开发者社区](https://developer.hpccube.com/tool/#sdk) AI 生态包中获取最新的 apex Release 版本(需对应 DCU Toolkit 版本与 python 版本)
```bash
python3 -m pip install apex-0.1+git2d8b360.abi0.dtk2304-cp37-cp37m-linux_x86_64.whl
```
### 使用源码安装
#### 编译环境准备(以dtk-23.04版本为例)
- 拉取 apex 代码
```
git clone -b dtk-23.04 http://developer.hpccube.com/codes/aicomponent/apex.git
```
-[开发者社区](https://developer.hpccube.com/tool/#sdk) DCU Toolkit 中下载 DTK-23.04 解压至 /opt/ 路径下,并建立软链接
```
cd /opt && ln -s dtk-23.04 dtk
```
- 在光合[光合开发者社区](https://developer.hpccube.com/tool/#sdk) AI 生态包中获取对应的 pytorch Release 版本(需对应 DCU Toolkit 版本与 python 版本)
```bash
python3 -m pip install torch-1.13.1a0+git4c8a1fe.abi0.dtk2304-cp37-cp37m-linux_x86_64.whl
```
- 导入环境变量以及安装必要依赖库
```bash
source /opt/dtk/env.sh
export PYTORCH_ROCM_ARCH="gfx906;gfx926"
MAX_JOBS=16
sha=`git rev-parse HEAD`
sed -i "/version=/{s/\(.*=\)['\"]\(.*\)['\"]/\1'\2\+git${sha:0:7}\.abi0.dtk23.04'/}" setup.py
pip3 install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn
pip3 install wheel -i https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn
```
#### 编译安装
- 执行编译命令
```shell
cd apex
CXX=hipcc CC=hipcc python3 setup.py --cpp_ext --cuda_ext bdist_wheel
pip install dist/apex*
```
...@@ -228,6 +228,11 @@ class TestCheckpointing(unittest.TestCase): ...@@ -228,6 +228,11 @@ class TestCheckpointing(unittest.TestCase):
continue continue
model = MyModel().to('cuda') model = MyModel().to('cuda')
torch_ver = torch.__version__.split('a0')[0]
optimizer = None
if torch_ver == '1.10.0':
optimizer = optim.Adam(model.parameters(), lr=1e-3)
else:
optimizer = optim.Adam(model.parameters(), lr=1e-3, capturable=True) optimizer = optim.Adam(model.parameters(), lr=1e-3, capturable=True)
model, optimizer = amp.initialize( model, optimizer = amp.initialize(
model, optimizer, opt_level=opt_level, verbosity=0) model, optimizer, opt_level=opt_level, verbosity=0)
......
...@@ -92,6 +92,11 @@ class TestFusedAdam(TestFusedOptimizer): ...@@ -92,6 +92,11 @@ class TestFusedAdam(TestFusedOptimizer):
def setUp(self): def setUp(self):
super().setUp() super().setUp()
torch_ver = torch.__version__.split('a0')[0]
if torch_ver == '1.10.0':
self.options = {'lr':5e-4, 'betas':(0.9, 0.999), 'eps':1e-08,
'weight_decay': 0, 'amsgrad': False}
else:
self.options = {'lr':5e-4, 'betas':(0.9, 0.999), 'eps':1e-08, self.options = {'lr':5e-4, 'betas':(0.9, 0.999), 'eps':1e-08,
'weight_decay': 0, 'amsgrad': False, "capturable": True} 'weight_decay': 0, 'amsgrad': False, "capturable": True}
self.tst_options = {'lr':5e-4, 'betas':(0.9, 0.999), 'eps':1e-08, self.tst_options = {'lr':5e-4, 'betas':(0.9, 0.999), 'eps':1e-08,
...@@ -180,6 +185,12 @@ class TestFusedAdam(TestFusedOptimizer): ...@@ -180,6 +185,12 @@ class TestFusedAdam(TestFusedOptimizer):
def test_adam_option(self): def test_adam_option(self):
nelem = 1 nelem = 1
torch_ver = torch.__version__.split('a0')[0]
adam_option = None
if torch_ver == '1.10.0':
adam_option = {'lr':0.01, 'betas':(0.6, 0.9), 'eps':3e-06,
'weight_decay':0, 'amsgrad':False}
else:
adam_option = {'lr':0.01, 'betas':(0.6, 0.9), 'eps':3e-06, adam_option = {'lr':0.01, 'betas':(0.6, 0.9), 'eps':3e-06,
'weight_decay':0, 'amsgrad':False, 'capturable':True} 'weight_decay':0, 'amsgrad':False, 'capturable':True}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment