Commit e1023aa7 authored by renzhc's avatar renzhc
Browse files

update readme

parent 8c112561
...@@ -22,15 +22,18 @@ docker run -it --name=mobilenetv2 --network=host --ipc=host --shm-size=16g --dev ...@@ -22,15 +22,18 @@ docker run -it --name=mobilenetv2 --network=host --ipc=host --shm-size=16g --dev
```shell ```shell
git clone --recursive http://developer.hpccube.com/codes/modelzoo/mobilenetv2_mmcv.git git clone --recursive http://developer.hpccube.com/codes/modelzoo/mobilenetv2_mmcv.git
cd mobilenetv2_mmcv/mmpretrain-mmcv cd mobilenetv2_mmcv/mmpretrain-mmcv
pip install -e .
pip install -r requirements.txt pip install -r requirements.txt
``` ```
### Dockerfile(方法二) ### Dockerfile(方法二)
cd mobilenetv2_mmcv/docker cd mmpretrain/docker
docker build --no-cache -t mobilenetv2_mmcv:latest . docker build --no-cache -t mmpretrain:latest .
docker run -it --name=mobilenetv2 --network=host --ipc=host --shm-size=16g --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --privileged --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v /opt/hyhal:/opt/hyhal:ro -v $PWD/mobilenetv2_mmcv:/home/mobilenetv2_mmcv <your IMAGE ID> bash docker run -it --name=mobilenetv2 --network=host --ipc=host --shm-size=16g --device=/dev/kfd --device=/dev/dri --device=/dev/mkfd --group-add video --privileged --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v /opt/hyhal:/opt/hyhal:ro -v $PWD/mmpretrain:/home/mmpretrain <your IMAGE ID> bash
# 若遇到Dockerfile启动的方式安装环境需要长时间等待,可注释掉里面的pip安装,启动容器后再安装python库:pip install -r requirements.txt pip install -e .
# 若遇到Dockerfile启动的方式安装环境需要长时间等待,可注释掉里面的pip安装,启动容器后再安装python库:
pip install -r requirements.txt
### Anaconda(方法三) ### Anaconda(方法三)
...@@ -45,8 +48,22 @@ torchvision==0.16.0+das1.1.git7d45932.abi1.dtk2404.torch2.1 mmcv==2.0.1+das1.1.g ...@@ -45,8 +48,22 @@ torchvision==0.16.0+das1.1.git7d45932.abi1.dtk2404.torch2.1 mmcv==2.0.1+das1.1.g
Tips:以上dtk驱动、python、torch等DCU相关工具版本需要严格一一对应 Tips:以上dtk驱动、python、torch等DCU相关工具版本需要严格一一对应
2、其它非特殊库参照requirements.txt安装 2、安装mmpretrain仓库源码
pip install -r requirements.txt
```shell
cd mmpretrain
pip install -e .
```
3、其它非特殊库参照requirements.txt安装
```shell
pip install -r requirements.txt
```
## 示例 ## 示例
...@@ -63,5 +80,3 @@ bash tools/dist_train.sh resnet50-test.py 8 ...@@ -63,5 +80,3 @@ bash tools/dist_train.sh resnet50-test.py 8
```shell ```shell
bash tools/dist_train.sh <配置文件脚本> <训练用卡数> bash tools/dist_train.sh <配置文件脚本> <训练用卡数>
``` ```
ARG PYTORCH="1.12.1" FROM image.sourcefind.cn:5000/dcu/admin/base/pytorch:pytorch:2.1.0-ubuntu20.04-dtk24.04.1-py3.10
ARG CUDA="11.3" ENV DEBIAN_FRONTEND=noninteractive
ARG CUDNN="8" # 安装pip相关依赖
COPY requirements.txt requirements.txt
FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel RUN pip3 install -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com -r requirements.txt
# fetch the key refer to https://forums.developer.nvidia.com/t/18-04-cuda-docker-image-is-broken/212892/9
RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub 32
RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX"
ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
ENV CMAKE_PREFIX_PATH="(dirname(which conda))/../"
RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Install MIM
RUN pip install openmim
# Install MMPretrain
RUN conda clean --all
RUN git clone https://github.com/open-mmlab/mmpretrain.git
WORKDIR ./mmpretrain
RUN mim install --no-cache-dir -e .
albumentations>=0.3.2 --no-binary qudida,albumentations
colorama
requests
rich
scipy
matplotlib>=3.1.0
numpy
packaging
codecov
flake8
interrogate
isort==4.3.21
pytest
xdoctest >= 0.10.0
yapf
ARG PYTORCH="2.0.1"
ARG CUDA="11.7"
ARG CUDNN="8"
FROM pytorch/torchserve:latest-gpu
ARG MMPRE="1.2.0"
ENV PYTHONUNBUFFERED TRUE
ENV HOME="/home/model-server"
ENV PATH="/opt/conda/bin:$HOME/.local/bin:$PATH"
RUN export FORCE_CUDA=1
# TORCHSEVER
RUN pip install torchserve torch-model-archiver
RUN pip install nvgpu
# OPEN-MMLAB
ARG PYTORCH
ARG CUDA
RUN pip install openmim
RUN mim install mmpretrain==${MMPRE}
RUN mkdir -p $HOME/tmp
COPY --chown=model-server entrypoint.sh $HOME/.local/bin/entrypoint.sh
RUN chmod +x $HOME/.local/bin/entrypoint.sh
COPY --chown=model-server config.properties $HOME/config.properties
EXPOSE 8080 8081 8082
USER model-server
WORKDIR $HOME
ENV TEMP=$HOME/tmp
ENTRYPOINT ["/home/model-server/.local/bin/entrypoint.sh"]
CMD ["serve"]
inference_address=http://0.0.0.0:8080
management_address=http://0.0.0.0:8081
metrics_address=http://0.0.0.0:8082
model_store=/home/model-server/model-store
load_models=all
#!/bin/bash
set -e
if [[ "$1" = "serve" ]]; then
shift 1
torchserve --start --ts-config /home/model-server/config.properties
else
eval "$@"
fi
# prevent docker exit
tail -f /dev/null
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment