"packaging/pre_build_script.sh" did not exist on "e17f5ea2d322f5eb3cb7cb14aab9849fba013c7c"
Commit 8c112561 authored by renzhc's avatar renzhc
Browse files

update README.md

parent 8b36aa0f
# MMPretrain-MMCV
## 环境配置
### Docker(方法一)
推荐使用docker方式运行,拉取提供的docker镜像
```shell
docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-ubuntu20.04-dtk24.04.1-py3.10
```
基于拉取的镜像创建容器
```shell
# <your IMAGE ID or NAME>用以上拉取的docker的镜像ID或名称替换
docker run -it --name=mobilenetv2 --network=host --ipc=host --shm-size=16g --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --privileged --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v /opt/hyhal:/opt/hyhal:ro -v $PWD/mobilenetv2_mmcv:/home/mobilenetv2_mmcv <your IMAGE ID> bash
```
克隆git仓库,并安装相关依赖
```shell
git clone --recursive http://developer.hpccube.com/codes/modelzoo/mobilenetv2_mmcv.git
cd mobilenetv2_mmcv/mmpretrain-mmcv
pip install -r requirements.txt
```
### Dockerfile(方法二)
cd mobilenetv2_mmcv/docker
docker build --no-cache -t mobilenetv2_mmcv:latest .
docker run -it --name=mobilenetv2 --network=host --ipc=host --shm-size=16g --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --privileged --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v /opt/hyhal:/opt/hyhal:ro -v $PWD/mobilenetv2_mmcv:/home/mobilenetv2_mmcv <your IMAGE ID> bash
# 若遇到Dockerfile启动的方式安装环境需要长时间等待,可注释掉里面的pip安装,启动容器后再安装python库:pip install -r requirements.txt
### Anaconda(方法三)
1、关于本项目DCU显卡所需的特殊深度学习库可从光合开发者社区下载安装: https://developer.hpccube.com/tool/
```shell
DTK驱动: DTK-24.04.1
python==3.10
torch==2.1.0
torchvision==0.16.0+das1.1.git7d45932.abi1.dtk2404.torch2.1 mmcv==2.0.1+das1.1.gite58da25.abi1.dtk2404.torch2.1.0
```
Tips:以上dtk驱动、python、torch等DCU相关工具版本需要严格一一对应
2、其它非特殊库参照requirements.txt安装
pip install -r requirements.txt
## 示例
本仓库中提供了几个在tiny imagenet下进行测试的脚本
如用8卡从零开始训练resnet50的运行方式如下,
```shell
bash tools/dist_train.sh resnet50-test.py 8
```
更多的配置在configs目录下,均可通过以下方式运行
```shell
bash tools/dist_train.sh <配置文件脚本> <训练用卡数>
```
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='MobileNetV2', widen_factor=1.0),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=200,
in_channels=1280,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))
......@@ -5,11 +5,11 @@ _base_ = [
'configs/_base_/default_runtime.py',
]
import os
import torch
#import os
#import torch
torch.backends.cuda.matmul.allow_tf32=True
torch.backends.cudnn.allow_tf32=True
#torch.backends.cuda.matmul.allow_tf32=True
#torch.backends.cudnn.allow_tf32=True
train_pipeline = [
dict(type='LoadImageFromFile'),
......@@ -36,11 +36,11 @@ optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001))
# 自定义hooks,添加ProfilerHook, 只在rank0启用
custom_hooks = [
dict(type='ProfilerHook', by_epoch=False,
profile_times=5,
on_trace_ready=dict(type="log_trace", sort_by="self_cuda_time_total"),
json_trace_path=f"trace_inceptionv3_tf32.json",
activity_with_cuda=True,
schedule=dict(wait=3, warmup=1, active=1, repeat=1)) # 这样的设置是10次
] if os.environ['LOCAL_RANK'] == '0' else []
#custom_hooks = [
# dict(type='ProfilerHook', by_epoch=False,
# profile_times=5,
# on_trace_ready=dict(type="log_trace", sort_by="self_cuda_time_total"),
# json_trace_path=f"trace_inceptionv3_tf32.json",
# activity_with_cuda=True,
# schedule=dict(wait=3, warmup=1, active=1, repeat=1)) # 这样的设置是10次
#] if os.environ['LOCAL_RANK'] == '0' else []
_base_ = [
'configs/_base_/models/tiny_mobilenet_v2_1x.py',
'configs/_base_/datasets/tiny_imagenet_bs32_pil_resize.py',
'configs/_base_/schedules/imagenet_bs256_epochstep.py',
'configs/_base_/default_runtime.py'
]
......@@ -3,23 +3,20 @@ _base_ = [
'configs/_base_/schedules/imagenet_bs256.py', 'configs/_base_/default_runtime.py'
]
import torch
torch.backends.cuda.matmul.allow_tf32=True
torch.backends.cudnn.allow_tf32=True
#import os
# optimizer
optim_wrapper = dict(
#type='AmpOptimWrapper',
#dtype='float16',
type='AmpOptimWrapper',
dtype='bfloat16',
optimizer=dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001))
custom_hooks = [
dict(type='ProfilerHook', by_epoch=False,
profile_times=12,
with_stack=True,
with_flops=True,
on_trace_ready=dict(type="log_trace", sort_by="self_cuda_time_total"),
activity_with_cuda=True,
schedule=dict(wait=1, warmup=1, active=10, repeat=1))
]
# 自定义hooks,添加ProfilerHook, 只在rank0启用
#custom_hooks = [
# dict(type='ProfilerHook', by_epoch=False,
# profile_times=12,
# on_trace_ready=dict(type="log_trace", sort_by="self_cuda_time_total"),
# json_trace_path=f"trace_resnet50_8xb32_bf16.json",
# activity_with_cuda=True,
# schedule=dict(wait=1, warmup=1, active=10, repeat=1)) # 这样的设置是10次
#] if os.environ['LOCAL_RANK'] == '0' else []
_base_ = [
'configs/_base_/models/resnet50.py', 'configs/_base_/datasets/tiny_imagenet_bs32.py',
'configs/_base_/schedules/imagenet_bs256.py', 'configs/_base_/default_runtime.py'
]
import os
# optimizer
optim_wrapper = dict(
type='AmpOptimWrapper',
dtype='bfloat16',
optimizer=dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001))
# 自定义hooks,添加ProfilerHook, 只在rank0启用
custom_hooks = [
dict(type='ProfilerHook', by_epoch=False,
profile_times=12,
on_trace_ready=dict(type="log_trace", sort_by="self_cuda_time_total"),
json_trace_path=f"trace_resnet50_8xb32_bf16.json",
activity_with_cuda=True,
schedule=dict(wait=1, warmup=1, active=10, repeat=1)) # 这样的设置是10次
] if os.environ['LOCAL_RANK'] == '0' else []
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment