Commit 9eaaf126 authored by chenpangpang's avatar chenpangpang
Browse files

feat: 2.16.1的bug修复

parent ed01888f
......@@ -14,6 +14,7 @@ ARG TORCHAUDIO_VERSION
# ----- tensorflow args -----
ARG TENSORFLOW_VERSION
ARG IMAGE_TAG
#ARG CONDA_URL="https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-py310_24.7.1-0-Linux-x86_64.sh"
# ----- paddlepaddle args -----
......@@ -85,8 +86,14 @@ RUN if [ -n "$TORCH_VERSION" ];then \
RUN if [ -n "$TENSORFLOW_VERSION" ]; then \
tf_version_minor=$(echo $TENSORFLOW_VERSION | cut -d'.' -f1-2 ); \
[ "$tf_version_minor" == "2.13" ] || [ "$tf_version_minor" == "2.18" ] && tensorflow_text_version=$tf_version_minor.0rc0 || tensorflow_text_version=$tf_version_minor.*; \
pip install --no-cache-dir tensorflow[and-cuda]==$TENSORFLOW_VERSION tensorflow-text==$tensorflow_text_version tensorflow-hub; fi
pip install --no-cache-dir tensorflow[and-cuda]==$TENSORFLOW_VERSION tensorflow-text==$tensorflow_text_version tensorflow-hub; \
# 2.16.1必须手动添加环境变量
if [ $TENSORFLOW_VERSION == 2.16.1 ]; then \
python_version=$(echo $IMAGE_TAG | awk -F'[-:]' '{for(i=3;i<=NF;i++) if($i ~ /^py[0-9]+\.[0-9]+$/) {gsub(/^py/,"",$i); print $i; exit}}') && \
CUDNN_PATH=/opt/conda/lib/python$python_version/site-packages/nvidia/cudnn && \
echo "export CUDNN_PATH=$CUDNN_PATH" >> /etc/bash.bashrc && \
echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CUDNN_PATH/lib:/usr/local/cuda/lib64" >> /etc/bash.bashrc; \
fi; fi
# ----- paddlepaddle install -----
RUN if [ -n "$PADDLEPADDLE_VERSION" ] && [ -n "$PADDLE_URL" ]; then \
......@@ -140,7 +147,6 @@ RUN jupytersite="$(python3 -m pip show jupyterlab | grep -i '^location' | awk '{
&& ssh-keygen -A \
&& sed -i "s/#UseDNS .*/UseDNS no/" /etc/ssh/sshd_config
EXPOSE 8888
......
......@@ -9,9 +9,10 @@ build_args=" --build-arg BASE_IMAGE=$base_image"
if [ ${base_image%%:*} = "pytorch/pytorch" ]; then
build_args="$build_args --build-arg BASE_IMAGE_IS_TORCH=1 "
fi
build_args="$build_args --build-arg IMAGE_TAG=$image_tag"
for arg in ${*:4}
do
build_args="$build_args --build-arg $arg "
build_args="$build_args --build-arg $arg"
done
tmp_dockerfile="Dockerfile.${RANDOM}"
......
......@@ -24,7 +24,23 @@ if [[ "$1" == *"pytorch"* ]]; then
print(\"torchaudio version: \", torchaudio.__version__);
"
elif [[ "$1" == *"tensorflow"* ]]; then
docker run --rm --platform=linux/amd64 --gpus all $1 python -c \
tensorflow_version=$(echo "$1" | cut -d: -f2 | cut -d- -f1)
# 当tensorflow版本为2.16.1时,不添加环境变量找不到cuda,所以需要这样执行验证。在正常交互式启动容器时,会默认激活/etc/bash.bashrc,可以正常找到cuda
if [[ "$tensorflow_version" == "2.16.1" ]]; then
python_version=$(echo $1 | awk -F'[-:]' '{for(i=3;i<=NF;i++) if($i ~ /^py[0-9]+\.[0-9]+$/) {gsub(/^py/,"",$i); print $i; exit}}')
docker run --rm --platform=linux/amd64 --gpus all \
-e CUDNN_PATH="/opt/conda/lib/python$python_version/site-packages/nvidia/cudnn" \
-e LD_LIBRARY_PATH="/opt/conda/lib/python$python_version/site-packages/nvidia/cudnn/lib:/usr/local/cuda/lib64" \
$1 python -c "import os; \
os.system(\"cat /etc/issue\"); \
import sys; \
print(\"python version: \", sys.version); \
import tensorflow as tf; \
print(\"tensorflow version: \", tf.__version__); \
print(\"tensorflow cuda available: \", tf.test.is_gpu_available()); \
os.system('nvcc -V | tail -n 2')
";
else docker run --rm --platform=linux/amd64 --gpus all $1 python -c \
"import os; \
os.system(\"cat /etc/issue\"); \
import sys; \
......@@ -33,7 +49,7 @@ elif [[ "$1" == *"tensorflow"* ]]; then
print(\"tensorflow version: \", tf.__version__); \
print(\"tensorflow cuda available: \", tf.test.is_gpu_available()); \
os.system('nvcc -V | tail -n 2')
"
"; fi
elif [[ "$1" == *"paddle"* ]]; then
TARGET_DIR=gpu-base-image-test/paddletest
docker run --rm --platform=linux/amd64 --gpus all -v ./$TARGET_DIR:/workspace --workdir /workspace $1 python base_test.py
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment