Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
MMPretrain-MMCV
Commits
8c112561
"packaging/pre_build_script.sh" did not exist on "e17f5ea2d322f5eb3cb7cb14aab9849fba013c7c"
Commit
8c112561
authored
Sep 03, 2024
by
renzhc
Browse files
update README.md
parent
8b36aa0f
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
108 additions
and
49 deletions
+108
-49
README.md
README.md
+66
-0
configs/_base_/models/tiny_mobilenet_v2_1x.py
configs/_base_/models/tiny_mobilenet_v2_1x.py
+12
-0
inception-v3-test.py
inception-v3-test.py
+12
-12
mobilenet-v2-test.py
mobilenet-v2-test.py
+6
-0
resnet50-test.py
resnet50-test.py
+12
-15
resnet50_imagenet200_8b32.py
resnet50_imagenet200_8b32.py
+0
-22
vgg16-test.py
vgg16-test.py
+0
-0
No files found.
README.md
View file @
8c112561
# MMPretrain-MMCV
## 环境配置
### Docker(方法一)
推荐使用docker方式运行,拉取提供的docker镜像
```
shell
docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-ubuntu20.04-dtk24.04.1-py3.10
```
基于拉取的镜像创建容器
```
shell
# <your IMAGE ID or NAME>用以上拉取的docker的镜像ID或名称替换
docker run
-it
--name
=
mobilenetv2
--network
=
host
--ipc
=
host
--shm-size
=
16g
--device
=
/dev/kfd
--device
=
/dev/dri
--device
=
/dev/mkfd
--group-add
video
--privileged
--cap-add
=
SYS_PTRACE
--security-opt
seccomp
=
unconfined
-v
/opt/hyhal:/opt/hyhal:ro
-v
$PWD
/mobilenetv2_mmcv:/home/mobilenetv2_mmcv <your IMAGE ID> bash
```
克隆git仓库,并安装相关依赖
```
shell
git clone
--recursive
http://developer.hpccube.com/codes/modelzoo/mobilenetv2_mmcv.git
cd
mobilenetv2_mmcv/mmpretrain-mmcv
pip
install
-r
requirements.txt
```
### Dockerfile(方法二)
cd mobilenetv2_mmcv/docker
docker build --no-cache -t mobilenetv2_mmcv:latest .
docker run -it --name=mobilenetv2 --network=host --ipc=host --shm-size=16g --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --privileged --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v /opt/hyhal:/opt/hyhal:ro -v $PWD/mobilenetv2_mmcv:/home/mobilenetv2_mmcv <your IMAGE ID> bash
# 若遇到Dockerfile启动的方式安装环境需要长时间等待,可注释掉里面的pip安装,启动容器后再安装python库:pip install -r requirements.txt
### Anaconda(方法三)
1、关于本项目DCU显卡所需的特殊深度学习库可从光合开发者社区下载安装: https://developer.hpccube.com/tool/
```
shell
DTK驱动: DTK-24.04.1
python
==
3.10
torch
==
2.1.0
torchvision
==
0.16.0+das1.1.git7d45932.abi1.dtk2404.torch2.1
mmcv
==
2.0.1+das1.1.gite58da25.abi1.dtk2404.torch2.1.0
```
Tips:以上dtk驱动、python、torch等DCU相关工具版本需要严格一一对应
2、其它非特殊库参照requirements.txt安装
pip install -r requirements.txt
## 示例
本仓库中提供了几个在tiny imagenet下进行测试的脚本
如用8卡从零开始训练resnet50的运行方式如下,
```
shell
bash tools/dist_train.sh resnet50-test.py 8
```
更多的配置在configs目录下,均可通过以下方式运行
```
shell
bash tools/dist_train.sh <配置文件脚本> <训练用卡数>
```
configs/_base_/models/tiny_mobilenet_v2_1x.py
0 → 100644
View file @
8c112561
# model settings
model
=
dict
(
type
=
'ImageClassifier'
,
backbone
=
dict
(
type
=
'MobileNetV2'
,
widen_factor
=
1.0
),
neck
=
dict
(
type
=
'GlobalAveragePooling'
),
head
=
dict
(
type
=
'LinearClsHead'
,
num_classes
=
200
,
in_channels
=
1280
,
loss
=
dict
(
type
=
'CrossEntropyLoss'
,
loss_weight
=
1.0
),
topk
=
(
1
,
5
),
))
inception-v3
_8xb32_in1k
.py
→
inception-v3
-test
.py
View file @
8c112561
...
...
@@ -5,11 +5,11 @@ _base_ = [
'configs/_base_/default_runtime.py'
,
]
import
os
import
torch
#
import os
#
import torch
torch
.
backends
.
cuda
.
matmul
.
allow_tf32
=
True
torch
.
backends
.
cudnn
.
allow_tf32
=
True
#
torch.backends.cuda.matmul.allow_tf32=True
#
torch.backends.cudnn.allow_tf32=True
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
...
...
@@ -36,11 +36,11 @@ optim_wrapper = dict(
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.1
,
momentum
=
0.9
,
weight_decay
=
0.0001
))
# 自定义hooks,添加ProfilerHook, 只在rank0启用
custom_hooks
=
[
dict
(
type
=
'ProfilerHook'
,
by_epoch
=
False
,
profile_times
=
5
,
on_trace_ready
=
dict
(
type
=
"log_trace"
,
sort_by
=
"self_cuda_time_total"
),
json_trace_path
=
f
"trace_inceptionv3_tf32.json"
,
activity_with_cuda
=
True
,
schedule
=
dict
(
wait
=
3
,
warmup
=
1
,
active
=
1
,
repeat
=
1
))
# 这样的设置是10次
]
if
os
.
environ
[
'LOCAL_RANK'
]
==
'0'
else
[]
#
custom_hooks = [
#
dict(type='ProfilerHook', by_epoch=False,
#
profile_times=5,
#
on_trace_ready=dict(type="log_trace", sort_by="self_cuda_time_total"),
#
json_trace_path=f"trace_inceptionv3_tf32.json",
#
activity_with_cuda=True,
#
schedule=dict(wait=3, warmup=1, active=1, repeat=1)) # 这样的设置是10次
#
] if os.environ['LOCAL_RANK'] == '0' else []
mobilenet-v2-test.py
0 → 100644
View file @
8c112561
_base_
=
[
'configs/_base_/models/tiny_mobilenet_v2_1x.py'
,
'configs/_base_/datasets/tiny_imagenet_bs32_pil_resize.py'
,
'configs/_base_/schedules/imagenet_bs256_epochstep.py'
,
'configs/_base_/default_runtime.py'
]
resnet50-test.py
View file @
8c112561
...
...
@@ -3,23 +3,20 @@ _base_ = [
'configs/_base_/schedules/imagenet_bs256.py'
,
'configs/_base_/default_runtime.py'
]
import
torch
torch
.
backends
.
cuda
.
matmul
.
allow_tf32
=
True
torch
.
backends
.
cudnn
.
allow_tf32
=
True
#import os
# optimizer
optim_wrapper
=
dict
(
#
type='AmpOptimWrapper',
#
dtype='float16',
type
=
'AmpOptimWrapper'
,
dtype
=
'
b
float16'
,
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.1
,
momentum
=
0.9
,
weight_decay
=
0.0001
))
custom_hooks
=
[
dict
(
type
=
'ProfilerHook'
,
by_epoch
=
False
,
profile_times
=
12
,
with_stack
=
True
,
with_flops
=
True
,
on_trace_
ready
=
dict
(
type
=
"log_trace"
,
sort_by
=
"self_cuda_time_total"
)
,
activity_with_cuda
=
True
,
schedule
=
dict
(
wait
=
1
,
warmup
=
1
,
active
=
10
,
repeat
=
1
))
]
# 自定义hooks,添加ProfilerHook, 只在rank0启用
#custom_hooks = [
# dict(type='ProfilerHook', by_epoch=False
,
#
profile_times=12
,
#
on_trace_ready=dict(type="log_trace", sort_by="self_cuda_time_total")
,
#
js
on_trace_
path=f"trace_resnet50_8xb32_bf16.json"
,
#
activity_with_cuda=True,
#
schedule=dict(wait=1, warmup=1, active=10, repeat=1))
# 这样的设置是10次
#] if os.environ['LOCAL_RANK'] == '0' else [
]
resnet50_imagenet200_8b32.py
deleted
100644 → 0
View file @
8b36aa0f
_base_
=
[
'configs/_base_/models/resnet50.py'
,
'configs/_base_/datasets/tiny_imagenet_bs32.py'
,
'configs/_base_/schedules/imagenet_bs256.py'
,
'configs/_base_/default_runtime.py'
]
import
os
# optimizer
optim_wrapper
=
dict
(
type
=
'AmpOptimWrapper'
,
dtype
=
'bfloat16'
,
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.1
,
momentum
=
0.9
,
weight_decay
=
0.0001
))
# 自定义hooks,添加ProfilerHook, 只在rank0启用
custom_hooks
=
[
dict
(
type
=
'ProfilerHook'
,
by_epoch
=
False
,
profile_times
=
12
,
on_trace_ready
=
dict
(
type
=
"log_trace"
,
sort_by
=
"self_cuda_time_total"
),
json_trace_path
=
f
"trace_resnet50_8xb32_bf16.json"
,
activity_with_cuda
=
True
,
schedule
=
dict
(
wait
=
1
,
warmup
=
1
,
active
=
10
,
repeat
=
1
))
# 这样的设置是10次
]
if
os
.
environ
[
'LOCAL_RANK'
]
==
'0'
else
[]
vgg16
_8xb32_in1k
.py
→
vgg16
-test
.py
View file @
8c112561
File moved
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment