Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
gpu-base-image-build
Commits
b073e39e
Commit
b073e39e
authored
Oct 24, 2024
by
chenpangpang
Browse files
feat: 解决tensorflow2.7.0、2.4.0的问题
parent
fcea8d38
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
43 additions
and
10 deletions
+43
-10
build_space/Dockerfile.jupyterlab_ubuntu
build_space/Dockerfile.jupyterlab_ubuntu
+5
-2
script/1_base_test.sh
script/1_base_test.sh
+15
-5
script/2_text_test.sh
script/2_text_test.sh
+12
-2
script/3_image_test.sh
script/3_image_test.sh
+11
-1
No files found.
build_space/Dockerfile.jupyterlab_ubuntu
View file @
b073e39e
...
@@ -88,11 +88,15 @@ RUN if [ -n "$TENSORFLOW_VERSION" ]; then \
...
@@ -88,11 +88,15 @@ RUN if [ -n "$TENSORFLOW_VERSION" ]; then \
[ "$tf_version_minor" == "2.13" ] || [ "$tf_version_minor" == "2.18" ] && tensorflow_text_version=$tf_version_minor.0rc0 || tensorflow_text_version=$tf_version_minor.*; \
[ "$tf_version_minor" == "2.13" ] || [ "$tf_version_minor" == "2.18" ] && tensorflow_text_version=$tf_version_minor.0rc0 || tensorflow_text_version=$tf_version_minor.*; \
pip install --no-cache-dir tensorflow[and-cuda]==$TENSORFLOW_VERSION tensorflow-text==$tensorflow_text_version tensorflow-hub; \
pip install --no-cache-dir tensorflow[and-cuda]==$TENSORFLOW_VERSION tensorflow-text==$tensorflow_text_version tensorflow-hub; \
# 2.16.1必须手动添加环境变量
# 2.16.1必须手动添加环境变量
if [ $TENSORFLOW_VERSION == 2.16.1 ]; then \
if [ $TENSORFLOW_VERSION ==
"
2.16.1
"
]; then \
python_version=$(echo $IMAGE_TAG | awk -F'[-:]' '{for(i=3;i<=NF;i++) if($i ~ /^py[0-9]+\.[0-9]+$/) {gsub(/^py/,"",$i); print $i; exit}}') && \
python_version=$(echo $IMAGE_TAG | awk -F'[-:]' '{for(i=3;i<=NF;i++) if($i ~ /^py[0-9]+\.[0-9]+$/) {gsub(/^py/,"",$i); print $i; exit}}') && \
CUDNN_PATH=/opt/conda/lib/python$python_version/site-packages/nvidia/cudnn && \
CUDNN_PATH=/opt/conda/lib/python$python_version/site-packages/nvidia/cudnn && \
echo "export CUDNN_PATH=$CUDNN_PATH" >> /etc/bash.bashrc && \
echo "export CUDNN_PATH=$CUDNN_PATH" >> /etc/bash.bashrc && \
echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CUDNN_PATH/lib:/usr/local/cuda/lib64" >> /etc/bash.bashrc; \
echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CUDNN_PATH/lib:/usr/local/cuda/lib64" >> /etc/bash.bashrc; \
elif [ $TENSORFLOW_VERSION == "2.7.0" ]; then \
pip install --no-cache-dir protobuf==3.20.*; \
elif [ $TENSORFLOW_VERSION == "2.4.0" ]; then \
pip install --no-cache-dir numpy==1.19.2 matplotlib==3.6.*; \
fi; fi
fi; fi
# ----- paddlepaddle install -----
# ----- paddlepaddle install -----
...
@@ -111,7 +115,6 @@ RUN if [ -n "$PADDLENLP_VERSION" ] ; then \
...
@@ -111,7 +115,6 @@ RUN if [ -n "$PADDLENLP_VERSION" ] ; then \
pip install --upgrade ppdiffusers --no-deps && rm -r /root/.cache/pip; \
pip install --upgrade ppdiffusers --no-deps && rm -r /root/.cache/pip; \
fi
fi
COPY ./python-requirements.txt /tmp/
COPY ./python-requirements.txt /tmp/
RUN pip install --no-cache-dir -r /tmp/python-requirements.txt
RUN pip install --no-cache-dir -r /tmp/python-requirements.txt
...
...
script/1_base_test.sh
View file @
b073e39e
...
@@ -25,13 +25,22 @@ if [[ "$1" == *"pytorch"* ]]; then
...
@@ -25,13 +25,22 @@ if [[ "$1" == *"pytorch"* ]]; then
"
"
elif
[[
"
$1
"
==
*
"tensorflow"
*
]]
;
then
elif
[[
"
$1
"
==
*
"tensorflow"
*
]]
;
then
tensorflow_version
=
$(
echo
"
$1
"
|
cut
-d
:
-f2
|
cut
-d-
-f1
)
tensorflow_version
=
$(
echo
"
$1
"
|
cut
-d
:
-f2
|
cut
-d-
-f1
)
docker_run_arg
=
""
# 当tensorflow版本为2.16.1时,不添加环境变量找不到cuda,所以需要这样执行验证。在正常交互式启动容器时,会默认激活/etc/bash.bashrc,可以正常找到cuda
# 当tensorflow版本为2.16.1时,不添加环境变量找不到cuda,所以需要这样执行验证。在正常交互式启动容器时,会默认激活/etc/bash.bashrc,可以正常找到cuda
if
[[
"
$tensorflow_version
"
==
"2.16.1"
]]
;
then
if
[[
"
$tensorflow_version
"
==
"2.16.1"
]]
;
then
python_version
=
$(
echo
$1
|
awk
-F
'[-:]'
'{for(i=3;i<=NF;i++) if($i ~ /^py[0-9]+\.[0-9]+$/) {gsub(/^py/,"",$i); print $i; exit}}'
)
python_version
=
$(
echo
$1
|
awk
-F
'[-:]'
'{for(i=3;i<=NF;i++) if($i ~ /^py[0-9]+\.[0-9]+$/) {gsub(/^py/,"",$i); print $i; exit}}'
)
docker_run_arg
=
"-e CUDNN_PATH=
\"
/opt/conda/lib/python
$python_version
/site-packages/nvidia/cudnn
\"
\
docker run
--rm
--platform
=
linux/amd64
--gpus
all
\
-e LD_LIBRARY_PATH=
\"
/opt/conda/lib/python
$python_version
/site-packages/nvidia/cudnn/lib:/usr/local/cuda/lib64
\"
"
;
fi
-e
CUDNN_PATH
=
"/opt/conda/lib/python
$python_version
/site-packages/nvidia/cudnn"
\
docker run
--rm
--platform
=
linux/amd64
--gpus
all
$docker_run_arg
$1
python
-c
\
-e
LD_LIBRARY_PATH
=
"/opt/conda/lib/python
$python_version
/site-packages/nvidia/cudnn/lib:/usr/local/cuda/lib64"
\
$1
python
-c
"import os;
\
os.system(
\"
cat /etc/issue
\"
);
\
import sys;
\
print(
\"
python version:
\"
, sys.version);
\
import tensorflow as tf;
\
print(
\"
tensorflow version:
\"
, tf.__version__);
\
print(
\"
tensorflow cuda available:
\"
, tf.test.is_gpu_available());
\
os.system('nvcc -V | tail -n 2')
"
;
else
docker run
--rm
--platform
=
linux/amd64
--gpus
all
$1
python
-c
\
"import os;
\
"import os;
\
os.system(
\"
cat /etc/issue
\"
);
\
os.system(
\"
cat /etc/issue
\"
);
\
import sys;
\
import sys;
\
...
@@ -40,7 +49,7 @@ elif [[ "$1" == *"tensorflow"* ]]; then
...
@@ -40,7 +49,7 @@ elif [[ "$1" == *"tensorflow"* ]]; then
print(
\"
tensorflow version:
\"
, tf.__version__);
\
print(
\"
tensorflow version:
\"
, tf.__version__);
\
print(
\"
tensorflow cuda available:
\"
, tf.test.is_gpu_available());
\
print(
\"
tensorflow cuda available:
\"
, tf.test.is_gpu_available());
\
os.system('nvcc -V | tail -n 2')
os.system('nvcc -V | tail -n 2')
"
"
;
fi
elif
[[
"
$1
"
==
*
"paddle"
*
]]
;
then
elif
[[
"
$1
"
==
*
"paddle"
*
]]
;
then
TARGET_DIR
=
gpu-base-image-test/paddletest
TARGET_DIR
=
gpu-base-image-test/paddletest
docker run
--rm
--platform
=
linux/amd64
--gpus
all
-v
./
$TARGET_DIR
:/workspace
--workdir
/workspace
$1
python base_test.py
docker run
--rm
--platform
=
linux/amd64
--gpus
all
-v
./
$TARGET_DIR
:/workspace
--workdir
/workspace
$1
python base_test.py
...
@@ -50,3 +59,4 @@ else
...
@@ -50,3 +59,4 @@ else
exit
1
exit
1
fi
fi
script/2_text_test.sh
View file @
b073e39e
...
@@ -9,9 +9,19 @@ fi
...
@@ -9,9 +9,19 @@ fi
if
[[
"
$1
"
==
*
"pytorch"
*
]]
;
then
if
[[
"
$1
"
==
*
"pytorch"
*
]]
;
then
docker run
--rm
--platform
=
linux/amd64
--gpus
all
-v
./
$TARGET_DIR
:/workspace
--workdir
/workspace/pytorch/gpt2
$1
python infer.py
;
fi
docker run
--rm
--platform
=
linux/amd64
--gpus
all
-v
./
$TARGET_DIR
:/workspace
--workdir
/workspace/pytorch/gpt2
$1
python infer.py
;
fi
if
[[
"
$1
"
==
*
"tensorflow"
*
]]
;
then
if
[[
"
$1
"
==
*
"tensorflow"
*
]]
;
then
docker run
--rm
--platform
=
linux/amd64
--gpus
all
-v
./
$TARGET_DIR
:/workspace
--workdir
/workspace/tensorflow/bert
$1
python infer.py
;
fi
tensorflow_version
=
$(
echo
"
$1
"
|
cut
-d
:
-f2
|
cut
-d-
-f1
)
# 当tensorflow版本为2.16.1时,不添加环境变量找不到cuda,所以需要这样执行验证。在正常交互式启动容器时,会默认激活/etc/bash.bashrc,可以正常找到cuda
if
[[
"
$tensorflow_version
"
==
"2.16.1"
]]
;
then
python_version
=
$(
echo
$1
|
awk
-F
'[-:]'
'{for(i=3;i<=NF;i++) if($i ~ /^py[0-9]+\.[0-9]+$/) {gsub(/^py/,"",$i); print $i; exit}}'
)
docker run
--rm
--platform
=
linux/amd64
--gpus
all
\
-e
CUDNN_PATH
=
"/opt/conda/lib/python
$python_version
/site-packages/nvidia/cudnn"
\
-e
LD_LIBRARY_PATH
=
"/opt/conda/lib/python
$python_version
/site-packages/nvidia/cudnn/lib:/usr/local/cuda/lib64"
\
-v
./
$TARGET_DIR
:/workspace
--workdir
/workspace/tensorflow/bert
$1
python infer.py
else
docker run
--rm
--platform
=
linux/amd64
--gpus
all
-v
./
$TARGET_DIR
:/workspace
--workdir
/workspace/tensorflow/bert
$1
python infer.py
;
fi
;
fi
if
[[
"
$1
"
==
*
"paddle"
*
]]
;
then
if
[[
"
$1
"
==
*
"paddle"
*
]]
;
then
TARGET_DIR
=
gpu-base-image-test/paddletest
TARGET_DIR
=
gpu-base-image-test/paddletest
docker run
--rm
--platform
=
linux/amd64
--gpus
all
-v
./
$TARGET_DIR
:/workspace
--workdir
/workspace
$1
python text.py
;
fi
docker run
--rm
--platform
=
linux/amd64
--gpus
all
-v
./
$TARGET_DIR
:/workspace
--workdir
/workspace
$1
python text.py
;
fi
script/3_image_test.sh
View file @
b073e39e
...
@@ -10,9 +10,19 @@ if [[ "$1" == *"pytorch"* ]]; then
...
@@ -10,9 +10,19 @@ if [[ "$1" == *"pytorch"* ]]; then
docker run
--rm
--platform
=
linux/amd64
--gpus
all
-v
./
$TARGET_DIR
:/workspace
--workdir
/workspace/pytorch/stable-diffusion-v1-4
$1
python infer.py
;
fi
docker run
--rm
--platform
=
linux/amd64
--gpus
all
-v
./
$TARGET_DIR
:/workspace
--workdir
/workspace/pytorch/stable-diffusion-v1-4
$1
python infer.py
;
fi
if
[[
"
$1
"
==
*
"tensorflow"
*
]]
;
then
if
[[
"
$1
"
==
*
"tensorflow"
*
]]
;
then
docker run
--rm
--platform
=
linux/amd64
--gpus
all
-v
./
$TARGET_DIR
:/workspace
--workdir
/workspace/tensorflow/mnist
$1
python train.py
;
fi
tensorflow_version
=
$(
echo
"
$1
"
|
cut
-d
:
-f2
|
cut
-d-
-f1
)
# 当tensorflow版本为2.16.1时,不添加环境变量找不到cuda,所以需要这样执行验证。在正常交互式启动容器时,会默认激活/etc/bash.bashrc,可以正常找到cuda
if
[[
"
$tensorflow_version
"
==
"2.16.1"
]]
;
then
python_version
=
$(
echo
$1
|
awk
-F
'[-:]'
'{for(i=3;i<=NF;i++) if($i ~ /^py[0-9]+\.[0-9]+$/) {gsub(/^py/,"",$i); print $i; exit}}'
)
docker run
--rm
--platform
=
linux/amd64
--gpus
all
\
-e
CUDNN_PATH
=
"/opt/conda/lib/python
$python_version
/site-packages/nvidia/cudnn"
\
-e
LD_LIBRARY_PATH
=
"/opt/conda/lib/python
$python_version
/site-packages/nvidia/cudnn/lib:/usr/local/cuda/lib64"
\
-v
./
$TARGET_DIR
:/workspace
--workdir
/workspace/tensorflow/mnist
$1
python train.py
else
docker run
--rm
--platform
=
linux/amd64
--gpus
all
-v
./
$TARGET_DIR
:/workspace
--workdir
/workspace/tensorflow/mnist
$1
python train.py
;
fi
;
fi
if
[[
"
$1
"
==
*
"paddle"
*
]]
;
then
if
[[
"
$1
"
==
*
"paddle"
*
]]
;
then
TARGET_DIR
=
gpu-base-image-test/paddletest
TARGET_DIR
=
gpu-base-image-test/paddletest
docker run
--rm
--platform
=
linux/amd64
--gpus
all
-v
./
$TARGET_DIR
:/workspace
--workdir
/workspace
$1
python image.py
;
fi
docker run
--rm
--platform
=
linux/amd64
--gpus
all
-v
./
$TARGET_DIR
:/workspace
--workdir
/workspace
$1
python image.py
;
fi
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment