Commit 321b42b8 authored by limm's avatar limm
Browse files

support no transcoding

parent c0ccf15e
# <div align="center"><strong>MMCV</strong></div>
## 简介
MMCV是计算机视觉研究的基础库,主要提供以下功能:图像处理、图像和标注结果可视化、图像转换、多种CNN网络结构、高质量实现的常见CUDA算子。
MMCV是计算机视觉研究的基础库,主要提供以下功能:图像处理、图像和标注结果可视化、图像转换、多种CNN网络结构、高质量实现的常见CUDA算子。DAS软件栈中的MMCV版本,不仅保证了组件核心功能在DCU加速卡的可用性,还针对DCU特有的硬件架构进行了深度定制优化。这使得开发者能够以极低的成本,轻松实现应用程序在DCU加速卡上的快速迁移和性能提升。
## 安装
组件支持
+ Python 3.7、3.8、3.9
组件支持组合
| PyTorch版本 | fastpt版本 |MMCV版本 | DTK版本 | Python版本 | 推荐编译方式 |
| ----------- | ----------- | ----------- | ------------------------ | -----------------| ------------ |
| 2.5.1 | 2.1.0 |2.0.1 | >= 25.04 | 3.8、3.10、3.11 | fastpt不转码 |
| 2.4.1 | 2.0.1 |2.0.1 | >= 25.04 | 3.8、3.10、3.11 | fastpt不转码 |
| 其他 | 其他 | 其他 | 其他 | 3.8、3.10、3.11 | hip转码 |
+ pytorch版本大于2.4.1 && dtk版本大于25.04 推荐使用fastpt不转码编译。
### 1、使用pip方式安装
mmcv whl包下载目录:[https://cancon.hpccube.com:65024/4/main/mmcv/dtk24.04](https://cancon.hpccube.com:65024/4/main/mmcv/dtk24.04),选择对应的pytorch版本和python版本下载对应mmcv的whl包
mmcv whl包下载目录:[光和开发者社区](https://download.sourcefind.cn:65024/4/main/mmcv),选择对应的pytorch版本和python版本下载对应mmcv的whl包
```shell
pip install mmcv* (下载的mmcv的whl包)
pip install torch* (下载torch的whl包)
pip install fastpt* --no-deps (下载fastpt的whl包)
source /usr/local/bin/fastpt -E
pip install mmcv* (下载的mmcv-fastpt的whl包)
```
### 2、使用源码编译方式安装
#### 编译环境准备
提供2种环境准备方式
提供基于fastpt不转码编译
1. 基于光源pytorch基础镜像环境:镜像下载地址:[https://sourcefind.cn/#/image/dcu/pytorch](https://sourcefind.cn/#/image/dcu/pytorch),根据pytorch、python、dtk及系统下载对应的镜像版本。
1. 基于光源pytorch基础镜像环境:镜像下载地址:[光合开发者社区](https://sourcefind.cn/#/image/dcu/pytorch),根据pytorch、python、dtk及系统下载对应的镜像版本。
2. 基于现有python环境:安装pytorch,pytorch whl包下载目录:[https://cancon.hpccube.com:65024/4/main/pytorch/dtk24.04](https://cancon.hpccube.com:65024/4/main/pytorch/dtk24.04),根据python、dtk版本,下载对应pytorch的whl包。安装命令如下:
2. 基于现有python环境:安装pytorch,fastpt whl包下载目录:[光合开发者社区](https://sourcefind.cn/#/image/dcu/pytorch),根据python、dtk版本,下载对应pytorch的whl包。安装命令如下:
```shell
pip install torch* (下载的torch的whl包)
pip install torch* (下载torch的whl包)
pip install fastpt* --no-deps (下载fastpt的whl包, 安装顺序,先安装torch,后安装fastpt)
pip install setuptools==59.5.0 wheel
```
#### 源码编译安装
- 代码下载
```shell
git http://developer.hpccube.com/codes/OpenDAS/mmcv.git # 根据编译需要切换分支
git clone http://developer.sourcefind.cn/codes/OpenDAS/mmcv.git # 根据编译需要切换分支
```
- 提供2种源码编译方式(进入mmcv目录):
```
1. 编译whl包并安装
MMCV_WITH_OPS=1 ROCM_HOME=${ROCM_PATH} python3 setup.py -v bdist_wheel
1. 设置不转码编译环境变量
export FORCE_CUDA=1
source /usr/local/bin/fastpt -C
2. 编译whl包并安装
MMCV_WITH_OPS=1 python3 setup.py -v bdist_wheel
pip install dist/mmcv*
2. 源码编译安装
MMCV_WITH_OPS=1 ROCM_HOME=${ROCM_PATH} python3 setup.py install
3. 源码编译安装
MMCV_WITH_OPS=1 python3 setup.py install
```
#### 注意事项
+ 若使用pip install下载安装过慢,可添加pypi清华源:-i https://pypi.tuna.tsinghua.edu.cn/simple/
+ ROCM_PATH为dtk的路径,默认为/opt/dtk
+ 在pytorch2.5.1环境下编译需要支持c++17语法,打开setup.py文件,把文件中的 -std=c++14 修改为 -std=c++17
## 验证
- python -c "import mmcv; mmcv.\_\_version__",版本号与官方版本同步,查询该软件的版本号,例如2.0.0
- python -c "import mmcv; mmcv.\_\_version__",版本号与官方版本同步,查询该软件的版本号,例如2.0.1
## Known Issue
-
......
import os
import re
import subprocess
from pathlib import Path
import torch
ROOT_DIR = Path(__file__).parent.resolve()
def _run_cmd(cmd, shell=False):
try:
return subprocess.check_output(cmd, cwd=ROOT_DIR, stderr=subprocess.DEVNULL, shell=shell).decode("ascii").strip()
except Exception:
return None
def _get_version():
if os.path.exists(ROOT_DIR / "mmcv/version.py"):
with open(ROOT_DIR / "mmcv/version.py", "r") as f:
content = f.read().strip()
version_match = re.search("__version__\s*=\s*['\"]([^'\"]+)['\"]", content)
if version_match:
print(f"version_match.group(1) = {version_match.group(1)}")
print(f"version_match.group(0) = {version_match.group(0)}")
version = version_match.group(1)
else:
version = '2.2.0'
if os.getenv("BUILD_VERSION"):
version = os.getenv("BUILD_VERSION")
return version
def _update_hcu_version(version, das_version, sha, abi, dtk, torch_version, branch):
"""
修改 __hcu_version__ 的值,不改变其他内容。
:param file_path: 要修改的 .py 文件路径
:param new_value: 新的版本号字符串,比如 '2.2.0+das.opt1.dtk25041'
"""
sha = "Unknown" if sha is None else sha
hcu_version = f"{das_version}"
file_path = ROOT_DIR / "mmcv" / "version.py"
# 读取整个文件内容
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
# 正则匹配 __hcu_version__ 行,并替换引号内的值
pattern = r"(__hcu_version__\s*=\s*)['\"]([^'\"]*)['\"]"
replacement = rf"\g<1>'{hcu_version}'"
# 执行替换
updated_content = re.sub(pattern, replacement, content)
# 写回文件
with open(file_path, "w", encoding="utf-8") as f:
f.write(updated_content)
return hcu_version
def get_origin_version():
version_file = ROOT_DIR / 'mmcv/version.py'
with open(version_file, encoding='utf-8') as f:
exec(compile(f.read(), version_file, 'exec'))
return locals()['__version__']
def _make_version_file(version, das_version, sha, abi, dtk, torch_version, branch):
sha = "Unknown" if sha is None else sha
# torch_version = '.'.join(torch_version.split('.')[:2])
# hcu_version = f"{das_version}.git{sha}.abi{abi}.dtk{dtk}.torch{torch_version}"
hcu_version = f"{das_version}"
version_path = ROOT_DIR / "mmcv" / "version.py"
with open(version_path, "a", encoding="utf-8") as f:
#f.write(f"version = '{version}'\n")
#f.write(f"git_hash = '{sha}'\n")
#f.write(f"git_branch = '{branch}'\n")
#f.write(f"abi = 'abi{abi}'\n")
#f.write(f"dtk = '{dtk}'\n")
#f.write(f"torch_version = '{torch_version}'\n")
f.write(f"__hcu_version__ = '{hcu_version}'\n")
return hcu_version
def _get_pytorch_version():
if "PYTORCH_VERSION" in os.environ:
return f"{os.environ['PYTORCH_VERSION']}"
return torch.__version__
def get_version():
ROCM_HOME = os.getenv("ROCM_PATH")
print("ROCM_HOME = {ROCM_HOME}")
if not ROCM_HOME:
return get_origin_version()
sha = _run_cmd(["git", "rev-parse", "HEAD"])
sha = sha[:7]
branch = _run_cmd(["git", "rev-parse", "--abbrev-ref", "HEAD"])
tag = _run_cmd(["git", "describe", "--tags", "--exact-match", "@"])
das_tag = _run_cmd(["git", "describe", "--abbrev=0"])
print("-- Git branch:", branch)
print("-- Git SHA:", sha)
print("-- Git tag:", tag)
torch_version = _get_pytorch_version()
print("-- PyTorch:", torch_version)
version = _get_version()
print("-- Building version", version)
# das_version = tag
das_version = version+"+das.opt1"
print("-- Building das_version", das_version)
abi = _run_cmd(["echo '#include <string>' | gcc -x c++ -E -dM - | fgrep _GLIBCXX_USE_CXX11_ABI | awk '{print $3}'"], shell=True)
print("-- _GLIBCXX_USE_CXX11_ABI:", abi)
dtk = _run_cmd(["cat", os.path.join(ROCM_HOME, '.info/rocm_version')])
dtk = ''.join(dtk.split('.'))
print("-- DTK:", dtk)
das_version += ".dtk" +dtk
#return _make_version_file(version, das_version, sha, abi, dtk, torch_version, branch)
return _update_hcu_version(version, das_version, sha, abi, dtk, torch_version, branch)
......@@ -88,7 +88,7 @@ __global__ void bbox_overlaps_cuda_kernel(const T* bbox1, const T* bbox2,
}
}
#if __CUDA_ARCH__ >= 530
#if __CUDACC__ >= 530
__device__ __forceinline__ __half __half_area(const __half x1, const __half y1,
const __half x2, const __half y2,
const __half offset) {
......@@ -142,6 +142,6 @@ __device__ void bbox_overlaps_cuda_kernel_half(
ious[index] = __hdiv(interS, baseS);
}
}
#endif // __CUDA_ARCH__ >= 530
#endif // __CUDACC__ >= 530
#endif // BBOX_OVERLAPS_CUDA_KERNEL_CUH
......@@ -15,13 +15,13 @@
#ifdef PARROTS_USE_HALF
#include <cuda_fp16.h>
#endif
#ifdef __CUDA_ARCH__
#ifdef __CUDACC__
#define CUDA_INTRINSIC_FUNC(Expr) Expr
#else
#define CUDA_INTRINSIC_FUNC(Expr)
#endif
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
#if !defined(__CUDACC__) || __CUDACC__ >= 300
#ifdef PARROTS_USE_HALF
......@@ -104,6 +104,6 @@ __device__ inline T __shfl_xor_sync(unsigned mask, T var, int laneMask,
#endif // CUDA_VERSION < 9000
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
#endif // !defined(__CUDACC__) || __CUDACC__ >= 300
#endif // INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_
......@@ -42,9 +42,9 @@ __device__ __forceinline__ static void reduceAdd(double *address, double val) {
atomicAdd(address, val);
}
#else
#ifdef __CUDA_ARCH__
#ifdef __CUDACC__
__device__ __forceinline__ static void reduceAdd(float *address, float val) {
#if (__CUDA_ARCH__ < 200)
#if (__CUDACC__ < 200)
#ifdef _MSC_VER
#pragma message( \
"compute capability lower than 2.x. fall back to use CAS version of atomicAdd for float32")
......@@ -65,7 +65,7 @@ __device__ __forceinline__ static void reduceAdd(float *address, float val) {
}
__device__ __forceinline__ static void reduceAdd(double *address, double val) {
#if (__CUDA_ARCH__ < 600)
#if (__CUDACC__ < 600)
#ifdef _MSC_VER
#pragma message( \
"compute capability lower than 6.x. fall back to use CAS version of atomicAdd for float64")
......@@ -85,7 +85,7 @@ __device__ __forceinline__ static void reduceAdd(double *address, double val) {
atomicAdd(address, val);
#endif
}
#endif // __CUDA_ARCH__
#endif // __CUDACC__
#endif // MMCV_WITH_HIP
template <typename T>
......
......@@ -60,7 +60,7 @@ using phalf = float16;
}()
/** atomicAdd **/
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
#if defined(__CUDACC__) && __CUDACC__ < 600
static __inline__ __device__ double atomicAdd(double* address, double val) {
unsigned long long int* address_as_ull = (unsigned long long int*)address;
......
......@@ -561,7 +561,7 @@ struct TensorView {
}
TV_HOST_DEVICE_INLINE scalar_t &operator()() {
#if defined TV_DEBUG
#if defined(__CUDA_ARCH__)
#if defined(__CUDACC__)
TV_DEVICE_REQUIRE(mPtr != nullptr,
"you want get value but the view is empty.%s", "\n");
TV_DEVICE_REQUIRE(mShape.ndim() == 0,
......@@ -577,7 +577,7 @@ struct TensorView {
}
TV_HOST_DEVICE_INLINE const scalar_t &operator()() const {
#if defined TV_DEBUG
#if defined(__CUDA_ARCH__)
#if defined(__CUDACC__)
TV_DEVICE_REQUIRE(mPtr != nullptr,
"you want get value but the view is empty.%s", "\n");
TV_DEVICE_REQUIRE(mShape.ndim() == 0,
......@@ -595,7 +595,7 @@ struct TensorView {
template <class T1>
TV_HOST_DEVICE_INLINE scalar_t &operator()(T1 i1) {
#if defined TV_DEBUG
#if defined(__CUDA_ARCH__)
#if defined(__CUDACC__)
TV_DEVICE_REQUIRE(mShape.ndim() == 1,
"you provide 1 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
......@@ -612,7 +612,7 @@ struct TensorView {
template <class T1, class T2>
TV_HOST_DEVICE_INLINE scalar_t &operator()(T1 i1, T2 i2) {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
#if defined(__CUDACC__)
TV_DEVICE_REQUIRE(mShape.ndim() == 2,
"you provide 2 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
......@@ -635,7 +635,7 @@ struct TensorView {
template <class T1, class T2, class T3>
TV_HOST_DEVICE_INLINE scalar_t &operator()(T1 i1, T2 i2, T3 i3) {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
#if defined(__CUDACC__)
TV_DEVICE_REQUIRE(mShape.ndim() == 3,
"you provide 3 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
......@@ -663,7 +663,7 @@ struct TensorView {
template <class T1, class T2, class T3, class T4>
TV_HOST_DEVICE_INLINE scalar_t &operator()(T1 i1, T2 i2, T3 i3, T4 i4) {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
#if defined(__CUDACC__)
TV_DEVICE_REQUIRE(mShape.ndim() == 4,
"you provide 4 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
......@@ -697,7 +697,7 @@ struct TensorView {
template <class T1>
TV_HOST_DEVICE_INLINE const scalar_t &operator()(T1 i1) const {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
#if defined(__CUDACC__)
TV_DEVICE_REQUIRE(mShape.ndim() == 1,
"you provide 1 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
......@@ -715,7 +715,7 @@ struct TensorView {
template <class T1, class T2>
TV_HOST_DEVICE_INLINE const scalar_t &operator()(T1 i1, T2 i2) const {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
#if defined(__CUDACC__)
TV_DEVICE_REQUIRE(mShape.ndim() == 2,
"you provide 2 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
......@@ -739,7 +739,7 @@ struct TensorView {
template <class T1, class T2, class T3>
TV_HOST_DEVICE_INLINE const scalar_t &operator()(T1 i1, T2 i2, T3 i3) const {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
#if defined(__CUDACC__)
TV_DEVICE_REQUIRE(mShape.ndim() == 3,
"you provide 3 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
......@@ -768,7 +768,7 @@ struct TensorView {
TV_HOST_DEVICE_INLINE const scalar_t &operator()(T1 i1, T2 i2, T3 i3,
T4 i4) const {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
#if defined(__CUDACC__)
TV_DEVICE_REQUIRE(mShape.ndim() == 4,
"you provide 4 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
......@@ -801,7 +801,7 @@ struct TensorView {
TV_HOST_DEVICE_INLINE scalar_t &operator[](int idx) {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
#if defined(__CUDACC__)
TV_DEVICE_REQUIRE(idx >= 0 && idx < size(),
"index(%d) out-of-range: [0, %ld)\n", int(idx), size());
#else
......
......@@ -4,7 +4,7 @@
// Disable fp16 on ROCm device
#ifndef MMCV_WITH_HIP
#if __CUDA_ARCH__ >= 530
#if __CUDACC__ >= 530
template <>
__global__ void bbox_overlaps_cuda_kernel<at::Half>(
const at::Half* bbox1, const at::Half* bbox2, at::Half* ious,
......@@ -16,7 +16,7 @@ __global__ void bbox_overlaps_cuda_kernel<at::Half>(
num_bbox2, mode, aligned, offset);
}
#endif // __CUDA_ARCH__ >= 530
#endif // __CUDACC__ >= 530
#endif // MMCV_WITH_HIP
void BBoxOverlapsCUDAKernelLauncher(const Tensor bboxes1, const Tensor bboxes2,
......
# Copyright (c) OpenMMLab. All rights reserved.
__version__ = '2.0.1'
__hcu_version__ = '2.0.1'
def parse_version_info(version_str: str, length: int = 4) -> tuple:
......@@ -32,4 +33,4 @@ def parse_version_info(version_str: str, length: int = 4) -> tuple:
version_info = tuple(int(x) for x in __version__.split('.')[:3])
__all__ = ['__version__', '__dcu_version__', 'version_info', 'parse_version_info']
__all__ = ['__version__', '__hcu_version__', 'version_info', 'parse_version_info']
......@@ -4,10 +4,7 @@ import platform
import re
from pkg_resources import DistributionNotFound, get_distribution, parse_version
from setuptools import find_packages, setup
import subprocess
from typing import Optional, Union
from pathlib import Path
from get_version import get_version
EXT_TYPE = ''
try:
......@@ -27,7 +24,6 @@ except ModuleNotFoundError:
cmd_class = {}
print('Skip building ext ops due to the absence of torch.')
def choose_requirement(primary, secondary):
"""If some version of primary requirement installed, return primary, else
return secondary."""
......@@ -39,65 +35,11 @@ def choose_requirement(primary, secondary):
return str(primary)
def get_sha(pytorch_root: Union[str, Path]) -> str:
try:
return subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=pytorch_root).decode('ascii').strip()
except Exception:
return 'Unknown'
def get_abi():
try:
command = "echo '#include <string>' | gcc -x c++ -E -dM - | fgrep _GLIBCXX_USE_CXX11_ABI"
result = subprocess.run(command, shell=True, capture_output=True, text=True)
output = result.stdout.strip()
abi = "abi" + output.split(" ")[-1]
return abi
except Exception:
return 'abiUnknown'
def get_version_add(sha: Optional[str] = None) -> str:
version=''
mmcv_root = os.path.dirname(os.path.abspath(__file__))
add_version_path = os.path.join(os.path.join(mmcv_root, "mmcv"), "version.py")
if sha != 'Unknown':
if sha is None:
sha = get_sha(mmcv_root)
version = 'git' + sha[:7]
# abi
version += "." + get_abi()
# dtk version
if os.getenv("ROCM_PATH"):
rocm_path = os.getenv('ROCM_PATH', "")
rocm_version_path = os.path.join(rocm_path, '.info', "rocm_version")
with open(rocm_version_path, 'r',encoding='utf-8') as file:
lines = file.readlines()
rocm_version=lines[0][:-2].replace(".", "")
version += ".dtk" + rocm_version
# torch version
version += ".torch" + torch.__version__[:4]
lines=[]
with open(add_version_path, 'r',encoding='utf-8') as file:
lines = file.readlines()
lines[2] = "__dcu_version__ = '2.0.1+{}'\n".format(version)
with open(add_version_path, encoding="utf-8",mode="w") as file:
file.writelines(lines)
file.close()
def get_version():
get_version_add()
version_file = 'mmcv/version.py'
with open(version_file, encoding='utf-8') as f:
exec(compile(f.read(), version_file, 'exec'))
return locals()['__dcu_version__']
#def get_version():
# version_file = 'mmcv/version.py'
# with open(version_file, encoding='utf-8') as f:
# exec(compile(f.read(), version_file, 'exec'))
# return locals()['__version__']
def parse_requirements(fname='requirements/runtime.txt', with_version=True):
"""Parse the package dependencies listed in a requirements file but strips
......@@ -189,7 +131,6 @@ except ImportError:
for main, secondary in CHOOSE_INSTALL_REQUIRES:
install_requires.append(choose_requirement(main, secondary))
def get_extensions():
extensions = []
......@@ -319,7 +260,7 @@ def get_extensions():
define_macros += [('MMCV_WITH_CUDA', None)]
cuda_args = os.getenv('MMCV_CUDA_ARGS')
extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
if is_rocm_pytorch and platform.system() != 'Windows':
if platform.system() != 'Windows':
extra_compile_args['nvcc'] += \
['--gpu-max-threads-per-block=1024']
op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment