Commit 9a16b7ea authored by aiss's avatar aiss
Browse files

modify version and dcu_version

parent af82b300
# DeepSpeed
## 安装
DeepSpeed 支持
+ Python 3.7.
+ Python 3.8.
+ Python 3.9.
### 使用pip安装
DeepSpeed whl包下载目录:[https://cancon.hpccube.com:65024/4/main/deepspeed/dtk23.04](https://cancon.hpccube.com:65024/4/main/deepspeed/dtk23.04)
根据对应的pytorch版本和python版本,下载对应deepspeed的whl包
```shell
pip install deepspeed* (下载的deepspeed的whl包)
```
### 使用源码安装
编译之前,需要先安装对应版本python,安装相应的三方包依赖项,并配置DTK环境变量(以Centos7.x为例)。
pytorch whl包下载目录:[https://cancon.hpccube.com:65024/4/main/pytorch/dtk23.04](https://cancon.hpccube.com:65024/4/main/pytorch/dtk23.04)
根据python版本,下载对应pytorch的whl包。如果是基于pytorch1.13,需要注释掉op_builder/builder.py中大概L659:
```bash
#sources[i] = str(src.relative_to(curr_file))
```
安装依赖项:
```bash
# 安装三方包的源
yum install epel-release -y
# 安装相关依赖项
yum install libffi-devel -y
yum -y install openssl openssl-devel
yum install -y libaio-devel
yum install -y libaio
ls -l
# 若python内未包含相关项,需基于上面安装的三方包重新源码编译python,再配置python环境
python3 -m pip install --upgrade pip setuptools
pip3 install wheel -i https://pypi.tuna.tsinghua.edu.cn/simple
pip3 install ninja -i https://pypi.tuna.tsinghua.edu.cn/simple
```
下载DTK并配置环境变量:
```bash
# DTK tar包下载目录:光合社区/资源工具/DCU Toolkit/DTK23.04(https://cancon.hpccube.com:65024/1/main/DTK-23.04),根据系统选择对应DTK的tar包,并解压至/opt目录。
# 如果使用的是dtk23.04前的版本,可以参考以图片下方式修改torch中的hipify文件
export ROCM_PATH=/opt/dtk-23.04
source /opt/dtk-23.04/env.sh
```
![logo](hipify_20230511113250.png)
编译deepspeed
```bash
# 下载源码
git clone -b ds-v0.9.2-rocm http://developer.hpccube.com/codes/aicomponent/deepspeed.git
cd deepspeed
sh requirements/run_pip.sh
DS_BUILD_STRING=.dtk22.10.1.torch1.10 DS_BUILD_RANDOM_LTD=0 DS_BUILD_QUANTIZER=0 DS_BUILD_TRANSFORMER_INFERENCE=0 DS_BUILD_OPS=1 verbose=1 CXX=hipcc CC=hipcc python3 setup.py install bdist_wheel
```
安装deepspeed
```bash
# deepspeed的whl包会在dist文件夹生成
pip3 install ./dist/deepspeed*
```
## Note
+ 若使用 pip install 下载安装过慢,可添加源:-i https://pypi.tuna.tsinghua.edu.cn/simple/
+ DS_BUILD_STRING为编译的版本号设置,例如版本号为.dtk22.10.1.torch1.10, dtkxxx为基于编译的dtk版本号,torchxxx为编译依赖的torch版本号等。
+ deepspeed共设置两种版本号查询方式,__version__和__dcu_version__,分别标识主版本号(与官网版本一致)和基于dcu适配的内部版本号。
......@@ -36,7 +36,7 @@ from .runtime import DeepSpeedOptimizer, ZeROOptimizer
from .pipe import PipelineModule
from .git_version_info import version, git_hash, git_branch
from .git_version_info import version, dcu_version, git_hash, git_branch
def _parse_version(version_str):
......@@ -47,6 +47,8 @@ def _parse_version(version_str):
# Export version information
__version__ = version
#aiss
__dcu_version__ = dcu_version
__version_major__, __version_minor__, __version_patch__ = _parse_version(__version__)
__git_hash__ = git_hash
__git_branch__ = git_branch
......
......@@ -15,6 +15,7 @@ except ModuleNotFoundError:
version = "0.0.0"
git_hash = '[none]'
git_branch = '[none]'
dcu_version = '[none]'
from .ops.op_builder.all_ops import ALL_OPS
installed_ops = dict.fromkeys(ALL_OPS.keys(), False)
......
......@@ -179,7 +179,8 @@ print(f'Install Ops={install_ops}')
# Write out version/git info.
git_hash_cmd = "git rev-parse --short HEAD"
git_branch_cmd = "git rev-parse --abbrev-ref HEAD"
if command_exists('git') and 'DS_BUILD_STRING' not in os.environ:
#if command_exists('git') and 'DS_BUILD_STRING' not in os.environ:
if command_exists('git'):
try:
result = subprocess.check_output(git_hash_cmd, shell=True)
git_hash = result.decode('utf-8').strip()
......@@ -216,7 +217,7 @@ if sys.platform == "win32":
# Parse the DeepSpeed version string from version.txt.
version_str = open('version.txt', 'r').read().strip()
dcu_version_str = version_str
# Build specifiers like .devX can be added at install time. Otherwise, add the git hash.
# Example: DS_BUILD_STRING=".dev20201022" python setup.py sdist bdist_wheel.
......@@ -225,14 +226,16 @@ if 'DS_BUILD_STRING' in os.environ:
# Build string env specified, probably building for distribution.
with open('build.txt', 'w') as fd:
fd.write(os.environ.get('DS_BUILD_STRING'))
version_str += os.environ.get('DS_BUILD_STRING')
dcu_version_str += f'+{git_hash}'+ os.environ.get('DS_BUILD_STRING')
elif os.path.isfile('build.txt'):
# build.txt exists, probably installing from distribution.
with open('build.txt', 'r') as fd:
version_str += fd.read().strip()
dcu_version_str += fd.read().strip()
#aiss
dcu_version_str += f'+{git_hash}'
else:
# None of the above, probably installing from source.
version_str += f'+{git_hash}'
dcu_version_str += f'+{git_hash}'
torch_version = ".".join([TORCH_MAJOR, TORCH_MINOR])
bf16_support = False
......@@ -261,9 +264,10 @@ torch_info = {
"hip_version": hip_version
}
print(f"version={version_str}, git_hash={git_hash}, git_branch={git_branch}")
print(f"version={version_str}, dcu_version={dcu_version_str}, git_hash={git_hash}, git_branch={git_branch}")
with open('deepspeed/git_version_info_installed.py', 'w') as fd:
fd.write(f"version='{version_str}'\n")
fd.write(f"dcu_version='{dcu_version_str}'\n")
fd.write(f"git_hash='{git_hash}'\n")
fd.write(f"git_branch='{git_branch}'\n")
fd.write(f"installed_ops={install_ops}\n")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment