Commit 16f2edf3 authored by liangjing's avatar liangjing
Browse files

update

parent 61a5b743
...@@ -12,55 +12,6 @@ ...@@ -12,55 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
ARG FROM_IMAGE_NAME=nvcr.io/nvidia/tensorflow:22.09-tf1-py3 FROM=image.sourcefind.cn:5000/dcu/admin/base/custom:mlperf-minigo-latest
FROM ${FROM_IMAGE_NAME}
# Install system dependencies
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
cmake \
python2.7 \
infiniband-diags \
pciutils \
&& rm -rf /var/lib/apt/lists/*
# Copy MiniGo code
WORKDIR /opt/reinforcement
COPY . .
ENV BOARD_SIZE 19
ENV MINIGO_BAZEL_CACHE_DIR /opt/reinforcement/minigo-bazel-cache
# Copy TF dependency
RUN mkdir minigo/cc/tensorflow/lib \
&& cp /usr/local/lib/python3.8/dist-packages/tensorflow_core/libtensorflow_framework.so.1 minigo/cc/tensorflow/lib \
&& cp /usr/local/lib/python3.8/dist-packages/tensorflow_core/libtensorflow_cc.so.1 minigo/cc/tensorflow/lib \
&& cp -r /usr/local/lib/python3.8/dist-packages/tensorflow_core/include minigo/cc/tensorflow/include
# Install Python dependencies
WORKDIR /opt/reinforcement/minigo
RUN pip3 install --no-cache-dir git+https://github.com/mlcommons/logging.git \
&& pip3 install --no-cache-dir -r requirements.txt
# Install pybind11 to enable C++-python interface
RUN pip3 install --no-cache-dir pytest \
&& git clone --branch v2.4.3 --depth 1 https://github.com/pybind/pybind11 /usr/local/src/pybind11 \
&& cd /usr/local/src/pybind11 \
&& cmake . \
&& make install -j$(nproc) \
&& pip3 install --no-cache-dir .
# Build Minigo
RUN mkdir -p "${MINIGO_BAZEL_CACHE_DIR}" \
&& bazel --output_user_root="${MINIGO_BAZEL_CACHE_DIR}" build -c opt \
--cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \
--copt=-O3 \
--define=board_size="${BOARD_SIZE}" \
--define=tf=1 \
cc:minigo_python.so
ENV PYTHONPATH "${PYTHONPATH}:/opt/reinforcement/minigo/bazel-bin/cc"
RUN echo '/usr/local/lib/python3.8/dist-packages/tensorflow_core' > /etc/ld.so.conf.d/tensorflow.conf && ldconfig
# back to where run* files are
WORKDIR /opt/reinforcement
...@@ -19,6 +19,8 @@ Minogo是一个基于深度强化学习的围棋程序,模型灵感来源于Go ...@@ -19,6 +19,8 @@ Minogo是一个基于深度强化学习的围棋程序,模型灵感来源于Go
## 环境配置 ## 环境配置
**Docker (方法一)**
提供[光源](https://www.sourcefind.cn/#/service-details)拉取的训练的docker镜像: 提供[光源](https://www.sourcefind.cn/#/service-details)拉取的训练的docker镜像:
docker pull image.sourcefind.cn:5000/dcu/admin/base/custom:mlperf-minigo-latest docker pull image.sourcefind.cn:5000/dcu/admin/base/custom:mlperf-minigo-latest
...@@ -27,6 +29,14 @@ Minogo是一个基于深度强化学习的围棋程序,模型灵感来源于Go ...@@ -27,6 +29,14 @@ Minogo是一个基于深度强化学习的围棋程序,模型灵感来源于Go
# <Container Path>容器映射路径 # <Container Path>容器映射路径
docker run -it --name mlperf_minigo --shm-size=32G --device=/dev/kfd --device=/dev/dri/ --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --ulimit memlock=-1:-1 --ipc=host --network host --group-add video -v <Host Path>:<Container Path> <Image ID> /bin/bash docker run -it --name mlperf_minigo --shm-size=32G --device=/dev/kfd --device=/dev/dri/ --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --ulimit memlock=-1:-1 --ipc=host --network host --group-add video -v <Host Path>:<Container Path> <Image ID> /bin/bash
**Dockerfile (方法二)**
docker build --no-cache -t mlperf_minigo:latest
docker run -it --name mlperf_minigo --shm-size=32G --device=/dev/kfd --device=/dev/dri/ --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --ulimit memlock=-1:-1 --ipc=host --network host --group-add video -v <Host Path>:<Container Path> <Image ID> /bin/bash
# <Image ID>用上面拉取docker镜像的ID替换
# <Host Path>主机端路径
# <Container Path>容器映射路径
镜像版本依赖: 镜像版本依赖:
* DTK驱动:dtk22.04.2 * DTK驱动:dtk22.04.2
...@@ -85,7 +95,7 @@ Minogo是一个基于深度强化学习的围棋程序,模型灵感来源于Go ...@@ -85,7 +95,7 @@ Minogo是一个基于深度强化学习的围棋程序,模型灵感来源于Go
## 源码仓库及问题反馈 ## 源码仓库及问题反馈
* https://developer.hpccube.com/codes/modelzoo/mlperf_minigo_tensorflow * https://developer.hpccube.com/codes/modelzoo/mlperf_minigo_tensorflow
## 参考 ## 参考资料
* https://mlcommons.org/en/ * https://mlcommons.org/en/
* https://github.com/mlcommons * https://github.com/mlcommons
* https://github.com/mlcommons/training_results_v2.1/tree/main/NVIDIA/benchmarks/minigo/implementations/tensorflow-22.09 * https://github.com/mlcommons/training_results_v2.1/tree/main/NVIDIA/benchmarks/minigo/implementations/tensorflow-22.09
result.png

105 KB | W: | H:

result.png

1.52 MB | W: | H:

result.png
result.png
result.png
result.png
  • 2-up
  • Swipe
  • Onion skin
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment