Commit 21d47d0e authored by yuguo's avatar yuguo
Browse files

Oneflow 0.8 for DCU

parents
sphinx==3.5.4
jinja2<3.1
recommonmark==0.6.0
furo==2021.4.11b34
sphinx-copybutton==0.5.0
# dependencies above must be identical to docs/requirements.txt
pycocotools
opencv-python==4.2.0.34
scipy
pillow
tensorflow-addons==0.9.1
https://oneflow-static.oss-cn-beijing.aliyuncs.com/pipindex/pipindex-0.1.3-py2.py3-none-any.whl
ARG from
FROM ${from}
WORKDIR /workspace/build
COPY cmake /workspace/cmake
COPY CMakeLists.txt /workspace/CMakeLists.txt
# BUILD DEPENDENCY
COPY build/third_party /workspace/build/third_party
RUN export LD_LIBRARY_PATH=/opt/intel/lib/intel64_lin:/opt/intel/mkl/lib/intel64:$LD_LIBRARY_PATH; \
cmake -DTHIRD_PARTY=ON -DONEFLOW=OFF -DCMAKE_BUILD_TYPE=Release -DRELEASE_VERSION=ON .. && make -j prepare_oneflow_third_party
# CentOS-Base.repo
#
# From https://mirror.tuna.tsinghua.edu.cn/help/centos/
#
# The mirror system uses the connecting IP address of the client and the
# update status of each mirror to pick mirrors that are updated to and
# geographically close to the client. You should use this for CentOS updates
# unless you are manually picking other mirrors.
#
# If the mirrorlist= does not work for you, as a fall back you can try the
# remarked out baseurl= line instead.
#
#
[base]
name=CentOS-$releasever - Base
baseurl=https://mirrors.tuna.tsinghua.edu.cn/centos/$releasever/os/$basearch/
http://mirrors.aliyun.com/centos/$releasever/os/$basearch/
http://mirrors.aliyuncs.com/centos/$releasever/os/$basearch/
#mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=os
enabled=1
gpgcheck=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-7
#released updates
[updates]
name=CentOS-$releasever - Updates
baseurl=https://mirrors.tuna.tsinghua.edu.cn/centos/$releasever/updates/$basearch/
http://mirrors.aliyun.com/centos/$releasever/updates/$basearch/
http://mirrors.aliyuncs.com/centos/$releasever/updates/$basearch/
#mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=updates
enabled=1
gpgcheck=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-7
#additional packages that may be useful
[extras]
name=CentOS-$releasever - Extras
baseurl=https://mirrors.tuna.tsinghua.edu.cn/centos/$releasever/extras/$basearch/
http://mirrors.aliyun.com/centos/$releasever/extras/$basearch/
http://mirrors.aliyuncs.com/centos/$releasever/extras/$basearch/
#mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=extras
enabled=1
gpgcheck=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-7
#additional packages that extend functionality of existing packages
[centosplus]
name=CentOS-$releasever - Plus
baseurl=https://mirrors.tuna.tsinghua.edu.cn/centos/$releasever/centosplus/$basearch/
http://mirrors.aliyun.com/centos/$releasever/centosplus/$basearch/
http://mirrors.aliyuncs.com/centos/$releasever/centosplus/$basearch/
#mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=centosplus
gpgcheck=1
enabled=0
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-7
# CentOS-Base.repo
#
# The mirror system uses the connecting IP address of the client and the
# update status of each mirror to pick mirrors that are updated to and
# geographically close to the client. You should use this for CentOS updates
# unless you are manually picking other mirrors.
#
# If the mirrorlist= does not work for you, as a fall back you can try the
# remarked out baseurl= line instead.
#
#
[base]
name=CentOS-$releasever - Base - 163.com
#mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=os
baseurl=http://mirrors.163.com/centos/$releasever/os/$basearch/
gpgcheck=1
gpgkey=http://mirrors.163.com/centos/RPM-GPG-KEY-CentOS-7
#released updates
[updates]
name=CentOS-$releasever - Updates - 163.com
#mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=updates
baseurl=http://mirrors.163.com/centos/$releasever/updates/$basearch/
gpgcheck=1
gpgkey=http://mirrors.163.com/centos/RPM-GPG-KEY-CentOS-7
#additional packages that may be useful
[extras]
name=CentOS-$releasever - Extras - 163.com
#mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=extras
baseurl=http://mirrors.163.com/centos/$releasever/extras/$basearch/
gpgcheck=1
gpgkey=http://mirrors.163.com/centos/RPM-GPG-KEY-CentOS-7
#additional packages that extend functionality of existing packages
[centosplus]
name=CentOS-$releasever - Plus - 163.com
baseurl=http://mirrors.163.com/centos/$releasever/centosplus/$basearch/
gpgcheck=1
enabled=0
gpgkey=http://mirrors.163.com/centos/RPM-GPG-KEY-CentOS-7
ARG from
FROM ${from}
ARG use_tuna_yum=0
ARG pip_args=""
ARG bazel_url="https://github.com/bazelbuild/bazel/releases/download/3.4.1/bazel-3.4.1-linux-x86_64"
LABEL maintainer="OneFlow Maintainers"
# manylinux2014
ENV AUDITWHEEL_ARCH x86_64
ENV AUDITWHEEL_PLAT manylinux2014_$AUDITWHEEL_ARCH
ENV LC_ALL en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8
ENV PATH $PATH:/usr/local/bin
ENV LD_LIBRARY_PATH /usr/local/lib64:/usr/local/lib
ENV PKG_CONFIG_PATH /usr/local/lib/pkgconfig
# use tuna mirror
COPY docker/package/manylinux/CentOS7-Base-163.repo /tmp/CentOS-Base.repo
RUN if [ "${use_tuna_yum}" = "1" ]; then mv /tmp/CentOS-Base.repo /etc/yum.repos.d/ && yum makecache ; fi
# to speed up docker img building disable cuda repo
# in 10.1, cuda yum repo will update cublas to 10.2 and breaks build
RUN yum-config-manager --disable cuda nvidia-ml
ARG MANYLINUX_SHA=b634044
RUN yum -y install unzip && curl -L -o manylinux.zip https://github.com/Oneflow-Inc/manylinux/archive/${MANYLINUX_SHA}.zip && unzip manylinux.zip -d tmp && cp -r tmp/*/docker/build_scripts /build_scripts && bash build_scripts/build.sh && rm -r build_scripts tmp manylinux.zip
ENV SSL_CERT_FILE=/opt/_internal/certs.pem
# manylinux2014 end
RUN yum-config-manager --add-repo https://yum.repos.intel.com/oneapi && \
rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB && \
yum update -y && yum install -y epel-release && \
yum -y install centos-release-scl && \
yum install -y intel-oneapi-mkl-devel-2021.2.0 nasm rdma-core-devel devtoolset-7-gcc* rsync gdb
RUN /opt/python/cp35-cp35m/bin/pip install $pip_args -U cmake==3.18.4.post1 && ln -s /opt/_internal/cpython-3.5.9/bin/cmake /usr/bin/cmake
RUN mkdir -p /tmp && cd /tmp && \
curl -L -o patchelf-src.zip \
https://github.com/Oneflow-Inc/patchelf/archive/64bf5388ef7d45d3697c4aadbd3f5d7d68a22aa3.zip && \
unzip patchelf-src.zip && cd patchelf-* && ./bootstrap.sh && ./configure && make -j`nproc` && \
make install && cd .. && rm -rf patchelf-*
RUN curl -L $bazel_url -o /usr/local/bin/bazel \
&& chmod +x /usr/local/bin/bazel \
&& bazel
COPY dev-requirements.txt /tmp/dev-requirements.txt
RUN /opt/python/cp36-cp36m/bin/pip install $pip_args -r /tmp/dev-requirements.txt --user \
&& /opt/python/cp37-cp37m/bin/pip install $pip_args -r /tmp/dev-requirements.txt --user \
&& /opt/python/cp38-cp38/bin/pip install $pip_args -r /tmp/dev-requirements.txt --user \
&& rm /tmp/dev-requirements.txt
# 使用 docker 生成 OneFlow wheel 包
### 创建 docker 容器
在 OneFlow 源码根目录下运行:
```
docker build -f docker/package/manylinux/Dockerfile --build-arg from=nvidia/cuda:10.2-cudnn7-devel-centos7 -t oneflow:manylinux2014-cuda10.2 .
```
### 打包 manylinux python wheel
这里有 manylinux2014(centos7) + cuda10.2 的 Dockerfile,里面安装了编译 oneflow 所需的库,假设你已经用 Dockerfile build 了一个 docker 镜像,叫做 oneflow:manylinux2014-cuda10.2,那么只要在 oneflow 源码目录运行
```bash
docker run --rm -it -v `pwd`:/oneflow-src -w /oneflow-src oneflow:manylinux2014-cuda10.2
```
If you prefer operate inside docker:
```bash
docker run --rm -it -v `pwd`:/oneflow-src -w /oneflow-src oneflow:manylinux2014-cuda10.2 bash
```
```bash
/oneflow-src/docker/package/manylinux/build_wheel.sh --python3.6 --wheel-dir /oneflow-src/wheel-test
```
就会在 docker 镜像里执行 build_wheel.sh 来编译生成 python 3.5 到 python 3.8 的 oneflow manylinux2014 wheel。生成的包在 oneflow 源码目录下的 wheelhouse/ 文件夹内
#### 注意事项
1. 运行 `docker run` 时可能需要添加 `-e http_proxy=$http_proxy -e https_proxy=$https_proxy` 参数,以在容器内使用宿主机的代理,避免编译第三方库时因为网络问题而出错
2. 只要运行了 `cmake -DTHIRD_PARTY=ON ..`,oneflow 本体都会从头编译,所以如果第三方库已经由 docker 容器编译过,这次只想增量编译 oneflow 本体,可以用命令
```bash
docker run --rm -it -v `pwd`:/oneflow-src oneflow:manylinux2014-cuda10.2 /oneflow-src/docker/package/manylinux/build_wheel.sh --skip-third-party
```
这会给 build_wheel.sh 传一个 `--skip-third-party` 参数,跳过第三方库的编译
3. 只想在生成某些 python 版本的包,例如 python3.5,可以用命令
```bash
docker run --rm -it -v `pwd`:/oneflow-src oneflow:manylinux2014-cuda10.2 /oneflow-src/docker/package/manylinux/build_wheel.sh --python3.5
```
支持的参数是 `--python3.5`、`--python3.6`、`--python3.7`、`--python3.8`,需要生成多个版本可以同时传入多个参数。不传入版本参数则会生成所有的 python 版本的包
3. 如果想自定义 oneflow 编译时的 cmake 参数,可以直接把 cmake 参数写出来,如:
```bash
docker run --rm -it -v `pwd`:/oneflow-src oneflow:manylinux2014-cuda10.2 /oneflow-src/docker/package/manylinux/build_wheel.sh -DWITH_XLA=ON
```
import os
import subprocess
import tempfile
from pathlib import Path
import getpass
import uuid
def get_arg_env(env_var_name: str, mode="run"):
val = os.getenv(env_var_name)
assert val, f"system environment variable {env_var_name} found empty"
if mode == "run":
return f"--env {env_var_name}={val}"
elif mode == "build":
return f"--build-arg {env_var_name}={val}"
else:
raise f"{mode} not supported"
def get_proxy_build_args():
proxy_build_args = []
if os.getenv("HTTP_PROXY"):
for v in ["HTTP_PROXY", "HTTPS_PROXY"]:
proxy_build_args.append(get_arg_env(v, mode="build"))
if os.getenv("http_proxy"):
for v in ["http_proxy", "https_proxy"]:
proxy_build_args.append(get_arg_env(v, mode="build"))
return " ".join(proxy_build_args)
def get_proxy_env_args():
proxy_build_args = []
if os.getenv("HTTP_PROXY"):
for v in ["HTTP_PROXY", "HTTPS_PROXY"]:
proxy_build_args.append(get_arg_env(v))
if os.getenv("http_proxy"):
for v in ["http_proxy", "https_proxy"]:
proxy_build_args.append(get_arg_env(v))
return " ".join(proxy_build_args)
def build_img(
cuda_version,
oneflow_src_dir,
use_aliyun_mirror,
use_tuna,
use_system_proxy,
img_tag,
dry,
):
cudnn_version = 7
if str(cuda_version).startswith("11"):
cudnn_version = 8
cuda_version_img = cuda_version
if cuda_version == "11.2":
cuda_version_img = "11.2.2"
if cuda_version == "11.1":
cuda_version_img = "11.1.1"
if cuda_version == "11.0":
cuda_version_img = "11.0.3"
from_img = f"nvidia/cuda:{cuda_version_img}-cudnn{cudnn_version}-devel-centos7"
tuna_build_arg = ""
if use_tuna:
tuna_build_arg = '--build-arg use_tuna_yum=1 --build-arg pip_args="-i https://mirrors.aliyun.com/pypi/simple"'
if use_aliyun_mirror:
tuna_build_arg += ' --build-arg bazel_url="https://oneflow-static.oss-cn-beijing.aliyuncs.com/deps/bazel-3.4.1-linux-x86_64"'
proxy_build_arg = get_proxy_build_args() if use_system_proxy else ""
cmd = f"docker build -f docker/package/manylinux/Dockerfile {proxy_build_arg} {tuna_build_arg} --build-arg from={from_img} -t {img_tag} ."
print(cmd)
if dry == False:
subprocess.check_call(cmd, cwd=oneflow_src_dir, shell=True)
def common_cmake_args(cache_dir=None, extra_oneflow_cmake_args=None):
assert cache_dir
ret = ""
if (
not extra_oneflow_cmake_args
or "-DCMAKE_BUILD_TYPE" not in extra_oneflow_cmake_args
):
ret += " -DCMAKE_BUILD_TYPE=Release"
if not extra_oneflow_cmake_args or "-DBUILD_RDMA" not in extra_oneflow_cmake_args:
ret += " -DBUILD_RDMA=ON"
third_party_install_dir = os.path.join(cache_dir, "build-third-party-install")
ret += f" -DTHIRD_PARTY_DIR={third_party_install_dir}"
return ret
def get_build_dir_arg(cache_dir, oneflow_src_dir):
return ""
build_dir_real = os.path.join(cache_dir, "build")
build_dir_mount = os.path.join(oneflow_src_dir, "build")
return f"-v {build_dir_real}:{build_dir_mount}"
def force_rm_dir(dir_to_clean):
print("cleaning:", dir_to_clean)
assert dir_to_clean
clean_cmd = f"docker run --network=host --rm -v {dir_to_clean}:{dir_to_clean} -w {dir_to_clean} busybox rm -rf {dir_to_clean}/*"
subprocess.check_call(clean_cmd, shell=True)
def create_tmp_bash_and_run(docker_cmd, img, bash_cmd, bash_args, bash_wrap, dry):
with tempfile.NamedTemporaryFile(mode="w+", encoding="utf-8") as wrapper_f:
with tempfile.NamedTemporaryFile(mode="w+", encoding="utf-8") as f:
w_name = "/host" + wrapper_f.name
f_name = "/host" + f.name
bash_cmd = "PATH=/opt/python/cp37-cp37m/bin:$PATH\n" + bash_cmd
f.write(bash_cmd)
f.flush()
wrapped = f"""
{bash_wrap}
bash {bash_args} {f_name}
"""
wrapper_f.write(wrapped)
wrapper_f.flush()
print("=" * 5 + f"bash_cmd: {f_name}" + "=" * 5)
print(bash_cmd)
print("=" * 5 + f"bash_cmd: {f_name}" + "=" * 5)
print("=" * 5 + f"wrapped: {w_name}" + "=" * 5)
print(wrapped)
print("=" * 5 + f"wrapped: {w_name}" + "=" * 5)
docker_cmd = f"{docker_cmd} -v /tmp:/host/tmp {img}"
cmd = f"{docker_cmd} bash {bash_args} {w_name}"
print(cmd)
if dry:
print("dry run, skipping")
else:
subprocess.check_call(cmd, shell=True)
def get_common_docker_args(
oneflow_src_dir=None,
cache_dir=None,
current_dir=None,
house_dir=None,
use_system_proxy=True,
inplace=False,
):
root = Path(cache_dir)
child = Path(current_dir)
assert root in child.parents
cwd = os.getcwd()
pwd_arg = f"-v {cwd}:{cwd}"
cache_dir_arg = f"-v {cache_dir}:{cache_dir}"
house_dir_arg = ""
if house_dir:
house_dir_arg = f"-v {house_dir}:{house_dir}"
build_dir_arg = get_build_dir_arg(cache_dir, oneflow_src_dir)
proxy_env_arg = get_proxy_env_args() if use_system_proxy else ""
inplace_attr = ""
if inplace == False:
inplace_attr = ":ro"
cache_dir_args = " ".join(
[
f"-v {os.path.join(cache_dir, 'ccache')}:/root/.ccache",
f"-v {os.path.join(cache_dir, 'local')}:/root/.local",
f"-v {os.path.join(cache_dir, 'cache')}:/root/.cache",
]
)
return f"{cache_dir_args} -v {oneflow_src_dir}:{oneflow_src_dir}{inplace_attr} {proxy_env_arg} {pwd_arg} {house_dir_arg} {cache_dir_arg} {build_dir_arg} -w {current_dir} --shm-size=8g"
def get_python_dir(inplace=True, oneflow_src_dir=None, cache_dir=None):
if inplace:
assert oneflow_src_dir
return os.path.join(oneflow_src_dir, "python")
else:
assert cache_dir
return os.path.join(cache_dir, "python")
def build_third_party(
img_tag,
oneflow_src_dir,
cache_dir,
extra_oneflow_cmake_args,
extra_docker_args,
bash_args,
bash_wrap,
dry,
use_system_proxy,
inplace,
):
third_party_build_dir = os.path.join(cache_dir, "build-third-party")
oneflow_python_dir = get_python_dir(
inplace=inplace, oneflow_src_dir=oneflow_src_dir, cache_dir=cache_dir
)
if inplace:
inplace_arg = ""
oneflow_python_dir_cmd = ""
else:
inplace_arg = f"-DONEFLOW_PYTHON_DIR={oneflow_python_dir}"
oneflow_python_dir_cmd = f"""
rm -rf {oneflow_python_dir}
cp -r {oneflow_src_dir}/python {oneflow_python_dir}
cd {oneflow_python_dir}
git init
git clean -nXd
git clean -fXd
cd -
"""
cmake_cmd = " ".join(
[
"cmake",
common_cmake_args(
cache_dir=cache_dir, extra_oneflow_cmake_args=extra_oneflow_cmake_args
),
"-DTHIRD_PARTY=ON -DONEFLOW=OFF",
extra_oneflow_cmake_args,
oneflow_src_dir,
inplace_arg,
]
)
bash_cmd = f"""set -ex
export ONEFLOW_PYTHON_DIR={oneflow_python_dir}
{oneflow_python_dir_cmd}
export PATH="$PATH:$(dirname {get_python_bin('3.6')})"
export PYTHON_BIN_PATH={get_python_bin('3.6')}
$PYTHON_BIN_PATH -m pip install -i https://mirrors.aliyun.com/pypi/simple --user -r {os.path.join(oneflow_src_dir, "ci/fixed-dev-requirements.txt")}
$PYTHON_BIN_PATH -c "from __future__ import print_function;import numpy; print(numpy.get_include());"
{cmake_cmd}
cmake --build . -j `nproc` --target oneflow_deps
"""
common_docker_args = get_common_docker_args(
oneflow_src_dir=oneflow_src_dir,
cache_dir=cache_dir,
current_dir=third_party_build_dir,
use_system_proxy=use_system_proxy,
inplace=inplace,
)
docker_cmd = (
f"docker run --network=host {extra_docker_args} --rm {common_docker_args}"
)
create_tmp_bash_and_run(docker_cmd, img_tag, bash_cmd, bash_args, bash_wrap, dry)
def get_python_bin(version):
assert version in ["3.5", "3.6", "3.7", "3.8", "3.9"]
py_ver = "".join(version.split("."))
py_abi = f"cp{py_ver}-cp{py_ver}"
if version in ["3.5", "3.6", "3.7"]:
py_abi = f"{py_abi}m"
py_root = f"/opt/python/{py_abi}"
py_bin = f"{py_root}/bin/python"
return py_bin
def build_oneflow(
img_tag,
oneflow_src_dir,
cache_dir,
extra_oneflow_cmake_args,
extra_docker_args,
python_version,
skip_wheel,
package_name,
house_dir,
bash_args,
bash_wrap,
dry,
use_system_proxy,
enter_bash,
skip_audit,
inplace,
):
oneflow_build_dir = os.path.join(cache_dir, "build-oneflow")
python_bin = get_python_bin(python_version)
oneflow_python_dir = get_python_dir(
inplace=inplace, oneflow_src_dir=oneflow_src_dir, cache_dir=cache_dir
)
if inplace:
inplace_arg = ""
else:
inplace_arg = f"-DONEFLOW_PYTHON_DIR={oneflow_python_dir}"
cmake_cmd = " ".join(
[
"cmake",
common_cmake_args(
cache_dir=cache_dir, extra_oneflow_cmake_args=extra_oneflow_cmake_args
),
"-DTHIRD_PARTY=OFF -DONEFLOW=ON",
extra_oneflow_cmake_args,
"-DCMAKE_EXPORT_COMPILE_COMMANDS=1",
f"-DPython3_EXECUTABLE={python_bin}",
f"-DCODEGEN_PYTHON_EXECUTABLE={get_python_bin('3.6')}",
oneflow_src_dir,
inplace_arg,
]
)
common_docker_args = get_common_docker_args(
oneflow_src_dir=oneflow_src_dir,
cache_dir=cache_dir,
current_dir=oneflow_build_dir,
house_dir=house_dir,
use_system_proxy=use_system_proxy,
inplace=inplace,
)
docker_cmd = (
f"docker run --network=host --rm {common_docker_args} {extra_docker_args}"
)
if enter_bash:
docker_cmd += " -it"
bash_cmd = f"""set -ex
export LD_LIBRARY_PATH=/opt/intel/lib/intel64_lin:/opt/intel/mkl/lib/intel64:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/intel/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/intel/oneapi/mkl/latest/lib/intel64:$LD_LIBRARY_PATH
export ONEFLOW_SRC_DIR={oneflow_src_dir}
export ONEFLOW_CMAKE_CMD="{cmake_cmd}"
{python_bin} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user -r {os.path.join(oneflow_src_dir, "ci/fixed-dev-requirements.txt")}
"""
if enter_bash:
bash_cmd += "\nbash"
else:
bash_cmd += f"""
cd {oneflow_python_dir}
git clean -nXd -e \!oneflow/include -e \!oneflow/include/**
git clean -fXd -e \!oneflow/include -e \!oneflow/include/**
cd -
{cmake_cmd}
cmake --build . -j `nproc`
"""
if skip_wheel or enter_bash:
pass
else:
bash_cmd += f"""
cd {oneflow_python_dir}
{python_bin} setup.py bdist_wheel -d /tmp/tmp_wheel --package_name {package_name}
cd -
"""
if skip_wheel == False:
if skip_audit:
bash_cmd += f"""
cp /tmp/tmp_wheel/*.whl {house_dir}
"""
else:
bash_cmd += f"""
auditwheel repair /tmp/tmp_wheel/*.whl --wheel-dir {house_dir}
"""
return create_tmp_bash_and_run(
docker_cmd, img_tag, bash_cmd, bash_args, bash_wrap, dry
)
def is_img_existing(tag):
returncode = subprocess.run(
f"docker image inspect {tag}",
shell=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
).returncode
if returncode == 0:
return True
else:
return False
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"--custom_img_tag", type=str, required=False, default=None,
)
parser.add_argument(
"--container_name", type=str, required=False, default=None,
)
parser.add_argument(
"--cache_dir", type=str, required=False, default=None,
)
default_wheel_house_dir = os.path.join(os.getcwd(), "wheelhouse")
parser.add_argument(
"--wheel_house_dir", type=str, required=False, default=default_wheel_house_dir,
)
parser.add_argument("--python_version", type=str, required=True)
parser.add_argument(
"--cuda_version", type=str, required=False, default="10.2",
)
parser.add_argument(
"--package_name", type=str, required=False, default="oneflow",
)
parser.add_argument(
"--extra_oneflow_cmake_args", action="append", nargs="+", default=[]
)
parser.add_argument(
"--extra_docker_args", type=str, required=False, default="",
)
parser.add_argument(
"--oneflow_src_dir", type=str, required=False, default=os.getcwd(),
)
parser.add_argument(
"--skip_third_party", default=False, action="store_true", required=False
)
parser.add_argument(
"--skip_wheel", default=False, action="store_true", required=False
)
parser.add_argument(
"--skip_img", default=False, action="store_true", required=False
)
parser.add_argument(
"--skip_audit", default=False, action="store_true", required=False
)
parser.add_argument(
"--build_img", default=False, action="store_true", required=False
)
parser.add_argument(
"--use_tuna", default=False, action="store_true", required=False
)
parser.add_argument("--dry", default=False, action="store_true", required=False)
parser.add_argument(
"--use_system_proxy", default=False, action="store_true", required=False
)
parser.add_argument("--mlir", default=False, action="store_true", required=False)
parser.add_argument("--gcc4", default=False, action="store_true", required=False)
parser.add_argument("--gcc7", default=False, action="store_true", required=False)
parser.add_argument("--gcc9", default=False, action="store_true", required=False)
parser.add_argument(
"--use_aliyun_mirror", default=False, action="store_true", required=False
)
parser.add_argument("--cpu", default=False, action="store_true", required=False)
parser.add_argument("--bash", default=False, action="store_true", required=False)
parser.add_argument("--inplace", default=False, action="store_true", required=False)
parser.add_argument(
"--shared_lib", default=False, action="store_true", required=False
)
parser.add_argument("--retry", default=0, type=int)
args = parser.parse_args()
if args.skip_img:
"Arg skip_img is deprecated. Setting it has no effect. If you want to build image, use --build_img"
if args.skip_wheel:
args.skip_audit = True
print("args.extra_oneflow_cmake_args", args.extra_oneflow_cmake_args)
assert args.package_name
extra_oneflow_cmake_args = " ".join(
[" ".join(l) for l in args.extra_oneflow_cmake_args]
)
if (not args.gcc4) and (not args.gcc7) and (not args.gcc9):
args.gcc7 = True
cuda_versions = []
if args.use_aliyun_mirror:
extra_oneflow_cmake_args += " -DTHIRD_PARTY_MIRROR=aliyun"
if args.shared_lib:
extra_oneflow_cmake_args += " -DBUILD_SHARED_LIBS=ON"
if args.cpu:
extra_oneflow_cmake_args += " -DBUILD_CUDA=OFF"
cuda_versions = ["10.2"]
else:
extra_oneflow_cmake_args += " -DBUILD_CUDA=ON"
cuda_versions = args.cuda_version.split(",")
cuda_versions = [v.strip() for v in cuda_versions]
if args.mlir:
extra_oneflow_cmake_args += " -DWITH_MLIR=ON"
else:
extra_oneflow_cmake_args += " -DWITH_MLIR=Off"
for cuda_version in cuda_versions:
cache_dir = None
def build():
img_tag = None
img_prefix = f"oneflow-manylinux2014-cuda{cuda_version}"
user = getpass.getuser()
versioned_img_tag = f"{img_prefix}:0.1"
if cuda_version in ["11.0", "11.1"]:
versioned_img_tag = f"{img_prefix}:0.2"
enforced_oneflow_cmake_args = ""
enforced_oneflow_cmake_args += " -DBUILD_TESTING=ON"
if float(cuda_version) >= 11:
assert (
"CUDNN_STATIC" not in extra_oneflow_cmake_args
), "CUDNN_STATIC will be set to OFF if cuda_version > 11"
enforced_oneflow_cmake_args += " -DCUDNN_STATIC=OFF"
extra_docker_args = args.extra_docker_args
if not args.container_name:
args.container_name = f"manylinux-build-run-by-{getpass.getuser()}"
assert args.container_name
subprocess.call(
f"docker rm -f {args.container_name}", shell=True,
)
extra_docker_args += f" --name {args.container_name}"
user_img_tag = f"{img_prefix}:{user}"
inc_img_tag = f"oneflowinc/{versioned_img_tag}"
img_tag = inc_img_tag
if args.build_img:
img_tag = user_img_tag
elif args.custom_img_tag:
img_tag = args.custom_img_tag
else:
if is_img_existing(versioned_img_tag):
img_tag = versioned_img_tag
elif is_img_existing(inc_img_tag):
img_tag = inc_img_tag
else:
raise ValueError(
f"img not found, please run 'docker pull {inc_img_tag}'"
)
assert img_tag is not None
print("using", img_tag)
if args.build_img:
build_img(
cuda_version,
args.oneflow_src_dir,
args.use_aliyun_mirror,
args.use_tuna,
args.use_system_proxy,
img_tag,
args.dry,
)
bash_args = ""
bash_wrap = ""
if args.gcc4:
bash_wrap = "gcc --version"
elif args.gcc7:
bash_wrap = """
source scl_source enable devtoolset-7
gcc --version
"""
elif args.gcc9:
bash_wrap = """
source scl_source enable devtoolset-9
gcc --version
"""
else:
raise ValueError("either one in gcc4, gcc7, gcc9 must be enabled")
global cache_dir
if args.cache_dir:
cache_dir = args.cache_dir
else:
cache_dir = os.path.join(os.getcwd(), "manylinux2014-build-cache")
sub_dir = cuda_version
if args.mlir:
sub_dir += "-mlir"
if args.gcc4:
sub_dir += "-gcc4"
if args.gcc7:
sub_dir += "-gcc7"
if args.gcc9:
sub_dir += "-gcc9"
if args.cpu:
assert len(cuda_versions) == 1
sub_dir += "-cpu"
if args.shared_lib:
sub_dir += "-shared"
cache_dir = os.path.join(cache_dir, sub_dir)
if args.build_img:
return
if args.skip_third_party == False:
build_third_party(
img_tag,
args.oneflow_src_dir,
cache_dir,
extra_oneflow_cmake_args + enforced_oneflow_cmake_args,
extra_docker_args,
bash_args,
bash_wrap,
args.dry,
args.use_system_proxy,
args.inplace,
)
print(cuda_version.split("."))
cuda_version_literal = "".join(cuda_version.split(".")[:2])
assert len(cuda_version_literal) == 3
python_versions = args.python_version.split(",")
python_versions = [pv.strip() for pv in python_versions]
for python_version in python_versions:
print("building for python version:", python_version)
build_oneflow(
img_tag,
args.oneflow_src_dir,
cache_dir,
extra_oneflow_cmake_args + enforced_oneflow_cmake_args,
extra_docker_args,
python_version,
args.skip_wheel,
args.package_name,
args.wheel_house_dir,
bash_args,
bash_wrap,
args.dry,
args.use_system_proxy,
args.bash,
args.skip_audit,
args.inplace,
)
try:
build()
except subprocess.CalledProcessError as e:
print("failed: ", e.cmd, e.args)
if cache_dir and args.retry > 0:
print("clean: ", cache_dir, flush=True)
print("start retrying...", flush=True)
if args.dry:
pass
else:
force_rm_dir(cache_dir)
build()
else:
exit(1)
set -ex
docker run --rm -it \
-v `pwd`:`pwd` \
-w `pwd` oneflow:rel-manylinux2014-cuda-11.0 bash
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
html_cn: Makefile
@CN_DOCS=1 $(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)-cn" $(SPHINXOPTS) $(O)
html: Makefile
@$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
clean: Makefile
@rm -rf build build-cn
sphinx==3.5.4
jinja2<3.1
recommonmark==0.6.0
furo==2021.4.11b34
sphinx-copybutton==0.5.0
# above are dev dependencies
--pre
--find-links https://staging.oneflow.info/branch/master/cpu
oneflow
oneflow.autograd
================================================
Functions and classes for autograd.
---------------------------------------------------
.. currentmodule:: oneflow.autograd
.. autoclass:: oneflow.autograd.Function
:members: apply,
:special-members: __call__,
.. automodule:: oneflow.autograd
:members: grad,
backward,
from .math_ops import *
from .activation import *
import oneflow
from oneflow.framework.docstr.utils import reset_docstr
reset_docstr(
oneflow.nn.ReLU,
r"""ReLU(inplace=False)
ReLU 激活函数,对张量中的每一个元素做 element-wise 运算,公式如下:
:math:`\text{ReLU}(x) = (x)^+ = \max(0, x)`
参数:
inplace: 是否做 in-place 操作。 默认为 ``False``
形状:
- Input: :math:`(N, *)` 其中 `*` 的意思是,可以指定任意维度
- Output: :math:`(N, *)` 输入形状与输出形状一致
示例:
.. code-block:: python
>>> import oneflow as flow
>>> import numpy as np
>>> relu = flow.nn.ReLU()
>>> ndarr = np.asarray([1, -2, 3])
>>> x = flow.Tensor(ndarr)
>>> relu(x)
tensor([1., 0., 3.], dtype=oneflow.float32)
""",
)
import oneflow
from oneflow.framework.docstr.utils import reset_docstr
reset_docstr(
oneflow.add,
r"""add(input, other)
计算 `input` 和 `other` 的和。支持 element-wise、标量和广播形式的加法。
公式为:
.. math::
out = input + other
示例:
.. code-block:: python
>>> import numpy as np
>>> import oneflow as flow
# element-wise 加法
>>> x = flow.tensor(np.random.randn(2,3), dtype=flow.float32)
>>> y = flow.tensor(np.random.randn(2,3), dtype=flow.float32)
>>> out = flow.add(x, y).numpy()
>>> out.shape
(2, 3)
# 标量加法
>>> x = 5
>>> y = flow.tensor(np.random.randn(2,3), dtype=flow.float32)
>>> out = flow.add(x, y).numpy()
>>> out.shape
(2, 3)
# 广播加法
>>> x = flow.tensor(np.random.randn(1,1), dtype=flow.float32)
>>> y = flow.tensor(np.random.randn(2,3), dtype=flow.float32)
>>> out = flow.add(x, y).numpy()
>>> out.shape
(2, 3)
""",
)
oneflow.comm
===================================
oneflow communication function
----------------------------------
.. currentmodule:: oneflow.comm
.. automodule:: oneflow.comm
:members: all_reduce,
all_gather,
broadcast,
scatter,
all_to_all,
reduce,
gather,
reduce_scatter,
send,
recv,
barrier,
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
import oneflow
sys.path.insert(0, os.path.abspath("."))
CN_DOCS = os.getenv("CN_DOCS")
if CN_DOCS:
import cn
# -- Project information -----------------------------------------------------
project = u"OneFlow"
copyright = u"2020, OneFlow"
author = u"OneFlow"
# The short X.Y version
version = u""
# The full version, including alpha/beta/rc tags
release = u""
# -- General configuration ---------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.napoleon",
"recommonmark",
"sphinx_copybutton",
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
source_suffix = {
".rst": "restructuredtext",
".txt": "markdown",
".md": "markdown",
}
# The master toctree document.
master_doc = "index"
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = u"en"
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = []
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = None
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = "furo"
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]
# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself. Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}
# -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = "OneFlowdoc"
# -- Options for LaTeX output ------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(
master_doc,
"OneFlow.tex",
u"OneFlow API Reference",
u"Oneflow Contributors",
"manual",
),
]
# -- Options for manual page output ------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [(master_doc, "oneflow", u"OneFlow API Reference", [author], 1)]
# -- Options for Texinfo output ----------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(
master_doc,
"OneFlow",
u"OneFlow API Reference",
author,
"OneFlow",
"OneFlow API Reference",
"Miscellaneous",
),
]
# -- Options for Epub output -------------------------------------------------
# Bibliographic Dublin Core info.
epub_title = project
# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#
# epub_identifier = ''
# A unique identification for the text.
#
# epub_uid = ''
# A list of files that should not be packed into the epub file.
epub_exclude_files = ["search.html"]
# -- Extension configuration -------------------------------------------------
autodoc_default_options = {
"undoc-members": True,
"exclude-members": "forward, extra_repr, reset_parameters",
}
def should_skip_member(app, what, name, obj, skip, options):
import collections
is_deprecated = oneflow.is_deprecated(obj)
if is_deprecated:
print("skipping deprecated", what, name, obj)
magical = name in ["__weakref__", "__doc__", "__module__", "__dict__"]
return skip or is_deprecated or magical
def setup(app):
app.connect("autodoc-skip-member", should_skip_member)
oneflow.cuda
===================================
ONEFLOW.CUDA
----------------------------------
.. currentmodule:: oneflow.cuda
.. automodule:: oneflow.cuda
:members: is_available,
device_count,
current_device,
set_device,
synchronize,
manual_seed_all,
manual_seed,
empty_cache,
HalfTensor,
FloatTensor,
DoubleTensor,
BoolTensor,
ByteTensor,
CharTensor,
IntTensor,
LongTensor,
\ No newline at end of file
oneflow.distributed
=========================================================
.. currentmodule:: oneflow.distributed
run commands below to see more about usage.
::
python3 -m oneflow.distributed.launch -h
.. code-block::
usage: launch.py [-h] [--nnodes NNODES] [--node_rank NODE_RANK]
[--nproc_per_node NPROC_PER_NODE] [--master_addr MASTER_ADDR]
[--master_port MASTER_PORT] [-m] [--no_python]
[--redirect_stdout_and_stderr] [--logdir LOGDIR]
training_script ...
OneFlow distributed training launch helper utility that will spawn up multiple
distributed processes
positional arguments:
training_script The full path to the single GPU training program/script to be
launched in parallel, followed by all the arguments for the
training script
training_script_args
optional arguments:
-h, --help show this help message and exit
--nnodes NNODES The number of nodes to use for distributed training
--node_rank NODE_RANK
The rank of the node for multi-node distributed training
--nproc_per_node NPROC_PER_NODE
The number of processes to launch on each node, for GPU
training, this is recommended to be set to the number of GPUs in
your system so that each process can be bound to a single GPU.
--master_addr MASTER_ADDR
Master node (rank 0)'s address, should be either the IP address
or the hostname of node 0, for single node multi-proc training,
the --master_addr can simply be 127.0.0.1
--master_port MASTER_PORT
Master node (rank 0)'s free port that needs to be used for
communication during distributed training
-m, --module Changes each process to interpret the launch script as a python
module, executing with the same behavior as'python -m'.
--no_python Do not prepend the training script with "python" - just exec it
directly. Useful when the script is not a Python script.
--redirect_stdout_and_stderr
write the stdout and stderr to files 'stdout' and 'stderr'. Only
available when logdir is set
--logdir LOGDIR Relative path to write subprocess logs to. Passing in a relative
path will create a directory if needed. Note that successive
runs with the same path to write logs to will overwrite existing
logs, so be sure to save logs as needed.
\ No newline at end of file
oneflow.env
===================================
Environment
----------------------------------
.. currentmodule:: oneflow
.. autofunction:: oneflow.env.get_world_size
.. autofunction:: oneflow.env.get_rank
.. autofunction:: oneflow.env.get_local_rank
.. autofunction:: oneflow.env.get_node_size
.. autofunction:: oneflow.env.init_rdma
.. autofunction:: oneflow.env.rdma_is_initialized
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment