Commit 321b42b8 authored by limm's avatar limm
Browse files

support no transcoding

parent c0ccf15e
# <div align="center"><strong>MMCV</strong></div> # <div align="center"><strong>MMCV</strong></div>
## 简介 ## 简介
MMCV是计算机视觉研究的基础库,主要提供以下功能:图像处理、图像和标注结果可视化、图像转换、多种CNN网络结构、高质量实现的常见CUDA算子。 MMCV是计算机视觉研究的基础库,主要提供以下功能:图像处理、图像和标注结果可视化、图像转换、多种CNN网络结构、高质量实现的常见CUDA算子。DAS软件栈中的MMCV版本,不仅保证了组件核心功能在DCU加速卡的可用性,还针对DCU特有的硬件架构进行了深度定制优化。这使得开发者能够以极低的成本,轻松实现应用程序在DCU加速卡上的快速迁移和性能提升。
## 安装 ## 安装
组件支持 组件支持组合
+ Python 3.7、3.8、3.9
| PyTorch版本 | fastpt版本 |MMCV版本 | DTK版本 | Python版本 | 推荐编译方式 |
| ----------- | ----------- | ----------- | ------------------------ | -----------------| ------------ |
| 2.5.1 | 2.1.0 |2.0.1 | >= 25.04 | 3.8、3.10、3.11 | fastpt不转码 |
| 2.4.1 | 2.0.1 |2.0.1 | >= 25.04 | 3.8、3.10、3.11 | fastpt不转码 |
| 其他 | 其他 | 其他 | 其他 | 3.8、3.10、3.11 | hip转码 |
+ pytorch版本大于2.4.1 && dtk版本大于25.04 推荐使用fastpt不转码编译。
### 1、使用pip方式安装 ### 1、使用pip方式安装
mmcv whl包下载目录:[https://cancon.hpccube.com:65024/4/main/mmcv/dtk24.04](https://cancon.hpccube.com:65024/4/main/mmcv/dtk24.04),选择对应的pytorch版本和python版本下载对应mmcv的whl包 mmcv whl包下载目录:[光和开发者社区](https://download.sourcefind.cn:65024/4/main/mmcv),选择对应的pytorch版本和python版本下载对应mmcv的whl包
```shell ```shell
pip install mmcv* (下载的mmcv的whl包) pip install torch* (下载torch的whl包)
pip install fastpt* --no-deps (下载fastpt的whl包)
source /usr/local/bin/fastpt -E
pip install mmcv* (下载的mmcv-fastpt的whl包)
``` ```
### 2、使用源码编译方式安装 ### 2、使用源码编译方式安装
#### 编译环境准备 #### 编译环境准备
提供2种环境准备方式 提供基于fastpt不转码编译
1. 基于光源pytorch基础镜像环境:镜像下载地址:[https://sourcefind.cn/#/image/dcu/pytorch](https://sourcefind.cn/#/image/dcu/pytorch),根据pytorch、python、dtk及系统下载对应的镜像版本。 1. 基于光源pytorch基础镜像环境:镜像下载地址:[光合开发者社区](https://sourcefind.cn/#/image/dcu/pytorch),根据pytorch、python、dtk及系统下载对应的镜像版本。
2. 基于现有python环境:安装pytorch,pytorch whl包下载目录:[https://cancon.hpccube.com:65024/4/main/pytorch/dtk24.04](https://cancon.hpccube.com:65024/4/main/pytorch/dtk24.04),根据python、dtk版本,下载对应pytorch的whl包。安装命令如下: 2. 基于现有python环境:安装pytorch,fastpt whl包下载目录:[光合开发者社区](https://sourcefind.cn/#/image/dcu/pytorch),根据python、dtk版本,下载对应pytorch的whl包。安装命令如下:
```shell ```shell
pip install torch* (下载的torch的whl包) pip install torch* (下载torch的whl包)
pip install fastpt* --no-deps (下载fastpt的whl包, 安装顺序,先安装torch,后安装fastpt)
pip install setuptools==59.5.0 wheel
``` ```
#### 源码编译安装 #### 源码编译安装
- 代码下载 - 代码下载
```shell ```shell
git http://developer.hpccube.com/codes/OpenDAS/mmcv.git # 根据编译需要切换分支 git clone http://developer.sourcefind.cn/codes/OpenDAS/mmcv.git # 根据编译需要切换分支
``` ```
- 提供2种源码编译方式(进入mmcv目录): - 提供2种源码编译方式(进入mmcv目录):
``` ```
1. 编译whl包并安装 1. 设置不转码编译环境变量
MMCV_WITH_OPS=1 ROCM_HOME=${ROCM_PATH} python3 setup.py -v bdist_wheel export FORCE_CUDA=1
source /usr/local/bin/fastpt -C
2. 编译whl包并安装
MMCV_WITH_OPS=1 python3 setup.py -v bdist_wheel
pip install dist/mmcv* pip install dist/mmcv*
2. 源码编译安装 3. 源码编译安装
MMCV_WITH_OPS=1 ROCM_HOME=${ROCM_PATH} python3 setup.py install MMCV_WITH_OPS=1 python3 setup.py install
``` ```
#### 注意事项 #### 注意事项
+ 若使用pip install下载安装过慢,可添加pypi清华源:-i https://pypi.tuna.tsinghua.edu.cn/simple/ + 若使用pip install下载安装过慢,可添加pypi清华源:-i https://pypi.tuna.tsinghua.edu.cn/simple/
+ ROCM_PATH为dtk的路径,默认为/opt/dtk + ROCM_PATH为dtk的路径,默认为/opt/dtk
+ 在pytorch2.5.1环境下编译需要支持c++17语法,打开setup.py文件,把文件中的 -std=c++14 修改为 -std=c++17
## 验证 ## 验证
- python -c "import mmcv; mmcv.\_\_version__",版本号与官方版本同步,查询该软件的版本号,例如2.0.0 - python -c "import mmcv; mmcv.\_\_version__",版本号与官方版本同步,查询该软件的版本号,例如2.0.1
## Known Issue ## Known Issue
- -
......
import os
import re
import subprocess
from pathlib import Path
import torch
ROOT_DIR = Path(__file__).parent.resolve()
def _run_cmd(cmd, shell=False):
try:
return subprocess.check_output(cmd, cwd=ROOT_DIR, stderr=subprocess.DEVNULL, shell=shell).decode("ascii").strip()
except Exception:
return None
def _get_version():
if os.path.exists(ROOT_DIR / "mmcv/version.py"):
with open(ROOT_DIR / "mmcv/version.py", "r") as f:
content = f.read().strip()
version_match = re.search("__version__\s*=\s*['\"]([^'\"]+)['\"]", content)
if version_match:
print(f"version_match.group(1) = {version_match.group(1)}")
print(f"version_match.group(0) = {version_match.group(0)}")
version = version_match.group(1)
else:
version = '2.2.0'
if os.getenv("BUILD_VERSION"):
version = os.getenv("BUILD_VERSION")
return version
def _update_hcu_version(version, das_version, sha, abi, dtk, torch_version, branch):
"""
修改 __hcu_version__ 的值,不改变其他内容。
:param file_path: 要修改的 .py 文件路径
:param new_value: 新的版本号字符串,比如 '2.2.0+das.opt1.dtk25041'
"""
sha = "Unknown" if sha is None else sha
hcu_version = f"{das_version}"
file_path = ROOT_DIR / "mmcv" / "version.py"
# 读取整个文件内容
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
# 正则匹配 __hcu_version__ 行,并替换引号内的值
pattern = r"(__hcu_version__\s*=\s*)['\"]([^'\"]*)['\"]"
replacement = rf"\g<1>'{hcu_version}'"
# 执行替换
updated_content = re.sub(pattern, replacement, content)
# 写回文件
with open(file_path, "w", encoding="utf-8") as f:
f.write(updated_content)
return hcu_version
def get_origin_version():
version_file = ROOT_DIR / 'mmcv/version.py'
with open(version_file, encoding='utf-8') as f:
exec(compile(f.read(), version_file, 'exec'))
return locals()['__version__']
def _make_version_file(version, das_version, sha, abi, dtk, torch_version, branch):
sha = "Unknown" if sha is None else sha
# torch_version = '.'.join(torch_version.split('.')[:2])
# hcu_version = f"{das_version}.git{sha}.abi{abi}.dtk{dtk}.torch{torch_version}"
hcu_version = f"{das_version}"
version_path = ROOT_DIR / "mmcv" / "version.py"
with open(version_path, "a", encoding="utf-8") as f:
#f.write(f"version = '{version}'\n")
#f.write(f"git_hash = '{sha}'\n")
#f.write(f"git_branch = '{branch}'\n")
#f.write(f"abi = 'abi{abi}'\n")
#f.write(f"dtk = '{dtk}'\n")
#f.write(f"torch_version = '{torch_version}'\n")
f.write(f"__hcu_version__ = '{hcu_version}'\n")
return hcu_version
def _get_pytorch_version():
if "PYTORCH_VERSION" in os.environ:
return f"{os.environ['PYTORCH_VERSION']}"
return torch.__version__
def get_version():
ROCM_HOME = os.getenv("ROCM_PATH")
print("ROCM_HOME = {ROCM_HOME}")
if not ROCM_HOME:
return get_origin_version()
sha = _run_cmd(["git", "rev-parse", "HEAD"])
sha = sha[:7]
branch = _run_cmd(["git", "rev-parse", "--abbrev-ref", "HEAD"])
tag = _run_cmd(["git", "describe", "--tags", "--exact-match", "@"])
das_tag = _run_cmd(["git", "describe", "--abbrev=0"])
print("-- Git branch:", branch)
print("-- Git SHA:", sha)
print("-- Git tag:", tag)
torch_version = _get_pytorch_version()
print("-- PyTorch:", torch_version)
version = _get_version()
print("-- Building version", version)
# das_version = tag
das_version = version+"+das.opt1"
print("-- Building das_version", das_version)
abi = _run_cmd(["echo '#include <string>' | gcc -x c++ -E -dM - | fgrep _GLIBCXX_USE_CXX11_ABI | awk '{print $3}'"], shell=True)
print("-- _GLIBCXX_USE_CXX11_ABI:", abi)
dtk = _run_cmd(["cat", os.path.join(ROCM_HOME, '.info/rocm_version')])
dtk = ''.join(dtk.split('.'))
print("-- DTK:", dtk)
das_version += ".dtk" +dtk
#return _make_version_file(version, das_version, sha, abi, dtk, torch_version, branch)
return _update_hcu_version(version, das_version, sha, abi, dtk, torch_version, branch)
...@@ -88,7 +88,7 @@ __global__ void bbox_overlaps_cuda_kernel(const T* bbox1, const T* bbox2, ...@@ -88,7 +88,7 @@ __global__ void bbox_overlaps_cuda_kernel(const T* bbox1, const T* bbox2,
} }
} }
#if __CUDA_ARCH__ >= 530 #if __CUDACC__ >= 530
__device__ __forceinline__ __half __half_area(const __half x1, const __half y1, __device__ __forceinline__ __half __half_area(const __half x1, const __half y1,
const __half x2, const __half y2, const __half x2, const __half y2,
const __half offset) { const __half offset) {
...@@ -142,6 +142,6 @@ __device__ void bbox_overlaps_cuda_kernel_half( ...@@ -142,6 +142,6 @@ __device__ void bbox_overlaps_cuda_kernel_half(
ious[index] = __hdiv(interS, baseS); ious[index] = __hdiv(interS, baseS);
} }
} }
#endif // __CUDA_ARCH__ >= 530 #endif // __CUDACC__ >= 530
#endif // BBOX_OVERLAPS_CUDA_KERNEL_CUH #endif // BBOX_OVERLAPS_CUDA_KERNEL_CUH
...@@ -15,13 +15,13 @@ ...@@ -15,13 +15,13 @@
#ifdef PARROTS_USE_HALF #ifdef PARROTS_USE_HALF
#include <cuda_fp16.h> #include <cuda_fp16.h>
#endif #endif
#ifdef __CUDA_ARCH__ #ifdef __CUDACC__
#define CUDA_INTRINSIC_FUNC(Expr) Expr #define CUDA_INTRINSIC_FUNC(Expr) Expr
#else #else
#define CUDA_INTRINSIC_FUNC(Expr) #define CUDA_INTRINSIC_FUNC(Expr)
#endif #endif
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300 #if !defined(__CUDACC__) || __CUDACC__ >= 300
#ifdef PARROTS_USE_HALF #ifdef PARROTS_USE_HALF
...@@ -104,6 +104,6 @@ __device__ inline T __shfl_xor_sync(unsigned mask, T var, int laneMask, ...@@ -104,6 +104,6 @@ __device__ inline T __shfl_xor_sync(unsigned mask, T var, int laneMask,
#endif // CUDA_VERSION < 9000 #endif // CUDA_VERSION < 9000
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300 #endif // !defined(__CUDACC__) || __CUDACC__ >= 300
#endif // INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_ #endif // INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_
...@@ -42,9 +42,9 @@ __device__ __forceinline__ static void reduceAdd(double *address, double val) { ...@@ -42,9 +42,9 @@ __device__ __forceinline__ static void reduceAdd(double *address, double val) {
atomicAdd(address, val); atomicAdd(address, val);
} }
#else #else
#ifdef __CUDA_ARCH__ #ifdef __CUDACC__
__device__ __forceinline__ static void reduceAdd(float *address, float val) { __device__ __forceinline__ static void reduceAdd(float *address, float val) {
#if (__CUDA_ARCH__ < 200) #if (__CUDACC__ < 200)
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma message( \ #pragma message( \
"compute capability lower than 2.x. fall back to use CAS version of atomicAdd for float32") "compute capability lower than 2.x. fall back to use CAS version of atomicAdd for float32")
...@@ -65,7 +65,7 @@ __device__ __forceinline__ static void reduceAdd(float *address, float val) { ...@@ -65,7 +65,7 @@ __device__ __forceinline__ static void reduceAdd(float *address, float val) {
} }
__device__ __forceinline__ static void reduceAdd(double *address, double val) { __device__ __forceinline__ static void reduceAdd(double *address, double val) {
#if (__CUDA_ARCH__ < 600) #if (__CUDACC__ < 600)
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma message( \ #pragma message( \
"compute capability lower than 6.x. fall back to use CAS version of atomicAdd for float64") "compute capability lower than 6.x. fall back to use CAS version of atomicAdd for float64")
...@@ -85,7 +85,7 @@ __device__ __forceinline__ static void reduceAdd(double *address, double val) { ...@@ -85,7 +85,7 @@ __device__ __forceinline__ static void reduceAdd(double *address, double val) {
atomicAdd(address, val); atomicAdd(address, val);
#endif #endif
} }
#endif // __CUDA_ARCH__ #endif // __CUDACC__
#endif // MMCV_WITH_HIP #endif // MMCV_WITH_HIP
template <typename T> template <typename T>
......
...@@ -60,7 +60,7 @@ using phalf = float16; ...@@ -60,7 +60,7 @@ using phalf = float16;
}() }()
/** atomicAdd **/ /** atomicAdd **/
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600 #if defined(__CUDACC__) && __CUDACC__ < 600
static __inline__ __device__ double atomicAdd(double* address, double val) { static __inline__ __device__ double atomicAdd(double* address, double val) {
unsigned long long int* address_as_ull = (unsigned long long int*)address; unsigned long long int* address_as_ull = (unsigned long long int*)address;
......
...@@ -561,7 +561,7 @@ struct TensorView { ...@@ -561,7 +561,7 @@ struct TensorView {
} }
TV_HOST_DEVICE_INLINE scalar_t &operator()() { TV_HOST_DEVICE_INLINE scalar_t &operator()() {
#if defined TV_DEBUG #if defined TV_DEBUG
#if defined(__CUDA_ARCH__) #if defined(__CUDACC__)
TV_DEVICE_REQUIRE(mPtr != nullptr, TV_DEVICE_REQUIRE(mPtr != nullptr,
"you want get value but the view is empty.%s", "\n"); "you want get value but the view is empty.%s", "\n");
TV_DEVICE_REQUIRE(mShape.ndim() == 0, TV_DEVICE_REQUIRE(mShape.ndim() == 0,
...@@ -577,7 +577,7 @@ struct TensorView { ...@@ -577,7 +577,7 @@ struct TensorView {
} }
TV_HOST_DEVICE_INLINE const scalar_t &operator()() const { TV_HOST_DEVICE_INLINE const scalar_t &operator()() const {
#if defined TV_DEBUG #if defined TV_DEBUG
#if defined(__CUDA_ARCH__) #if defined(__CUDACC__)
TV_DEVICE_REQUIRE(mPtr != nullptr, TV_DEVICE_REQUIRE(mPtr != nullptr,
"you want get value but the view is empty.%s", "\n"); "you want get value but the view is empty.%s", "\n");
TV_DEVICE_REQUIRE(mShape.ndim() == 0, TV_DEVICE_REQUIRE(mShape.ndim() == 0,
...@@ -595,7 +595,7 @@ struct TensorView { ...@@ -595,7 +595,7 @@ struct TensorView {
template <class T1> template <class T1>
TV_HOST_DEVICE_INLINE scalar_t &operator()(T1 i1) { TV_HOST_DEVICE_INLINE scalar_t &operator()(T1 i1) {
#if defined TV_DEBUG #if defined TV_DEBUG
#if defined(__CUDA_ARCH__) #if defined(__CUDACC__)
TV_DEVICE_REQUIRE(mShape.ndim() == 1, TV_DEVICE_REQUIRE(mShape.ndim() == 1,
"you provide 1 indexes, but dim is %ld\n", mShape.ndim()); "you provide 1 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0], TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
...@@ -612,7 +612,7 @@ struct TensorView { ...@@ -612,7 +612,7 @@ struct TensorView {
template <class T1, class T2> template <class T1, class T2>
TV_HOST_DEVICE_INLINE scalar_t &operator()(T1 i1, T2 i2) { TV_HOST_DEVICE_INLINE scalar_t &operator()(T1 i1, T2 i2) {
#ifdef TV_DEBUG #ifdef TV_DEBUG
#if defined(__CUDA_ARCH__) #if defined(__CUDACC__)
TV_DEVICE_REQUIRE(mShape.ndim() == 2, TV_DEVICE_REQUIRE(mShape.ndim() == 2,
"you provide 2 indexes, but dim is %ld\n", mShape.ndim()); "you provide 2 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0], TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
...@@ -635,7 +635,7 @@ struct TensorView { ...@@ -635,7 +635,7 @@ struct TensorView {
template <class T1, class T2, class T3> template <class T1, class T2, class T3>
TV_HOST_DEVICE_INLINE scalar_t &operator()(T1 i1, T2 i2, T3 i3) { TV_HOST_DEVICE_INLINE scalar_t &operator()(T1 i1, T2 i2, T3 i3) {
#ifdef TV_DEBUG #ifdef TV_DEBUG
#if defined(__CUDA_ARCH__) #if defined(__CUDACC__)
TV_DEVICE_REQUIRE(mShape.ndim() == 3, TV_DEVICE_REQUIRE(mShape.ndim() == 3,
"you provide 3 indexes, but dim is %ld\n", mShape.ndim()); "you provide 3 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0], TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
...@@ -663,7 +663,7 @@ struct TensorView { ...@@ -663,7 +663,7 @@ struct TensorView {
template <class T1, class T2, class T3, class T4> template <class T1, class T2, class T3, class T4>
TV_HOST_DEVICE_INLINE scalar_t &operator()(T1 i1, T2 i2, T3 i3, T4 i4) { TV_HOST_DEVICE_INLINE scalar_t &operator()(T1 i1, T2 i2, T3 i3, T4 i4) {
#ifdef TV_DEBUG #ifdef TV_DEBUG
#if defined(__CUDA_ARCH__) #if defined(__CUDACC__)
TV_DEVICE_REQUIRE(mShape.ndim() == 4, TV_DEVICE_REQUIRE(mShape.ndim() == 4,
"you provide 4 indexes, but dim is %ld\n", mShape.ndim()); "you provide 4 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0], TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
...@@ -697,7 +697,7 @@ struct TensorView { ...@@ -697,7 +697,7 @@ struct TensorView {
template <class T1> template <class T1>
TV_HOST_DEVICE_INLINE const scalar_t &operator()(T1 i1) const { TV_HOST_DEVICE_INLINE const scalar_t &operator()(T1 i1) const {
#ifdef TV_DEBUG #ifdef TV_DEBUG
#if defined(__CUDA_ARCH__) #if defined(__CUDACC__)
TV_DEVICE_REQUIRE(mShape.ndim() == 1, TV_DEVICE_REQUIRE(mShape.ndim() == 1,
"you provide 1 indexes, but dim is %ld\n", mShape.ndim()); "you provide 1 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0], TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
...@@ -715,7 +715,7 @@ struct TensorView { ...@@ -715,7 +715,7 @@ struct TensorView {
template <class T1, class T2> template <class T1, class T2>
TV_HOST_DEVICE_INLINE const scalar_t &operator()(T1 i1, T2 i2) const { TV_HOST_DEVICE_INLINE const scalar_t &operator()(T1 i1, T2 i2) const {
#ifdef TV_DEBUG #ifdef TV_DEBUG
#if defined(__CUDA_ARCH__) #if defined(__CUDACC__)
TV_DEVICE_REQUIRE(mShape.ndim() == 2, TV_DEVICE_REQUIRE(mShape.ndim() == 2,
"you provide 2 indexes, but dim is %ld\n", mShape.ndim()); "you provide 2 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0], TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
...@@ -739,7 +739,7 @@ struct TensorView { ...@@ -739,7 +739,7 @@ struct TensorView {
template <class T1, class T2, class T3> template <class T1, class T2, class T3>
TV_HOST_DEVICE_INLINE const scalar_t &operator()(T1 i1, T2 i2, T3 i3) const { TV_HOST_DEVICE_INLINE const scalar_t &operator()(T1 i1, T2 i2, T3 i3) const {
#ifdef TV_DEBUG #ifdef TV_DEBUG
#if defined(__CUDA_ARCH__) #if defined(__CUDACC__)
TV_DEVICE_REQUIRE(mShape.ndim() == 3, TV_DEVICE_REQUIRE(mShape.ndim() == 3,
"you provide 3 indexes, but dim is %ld\n", mShape.ndim()); "you provide 3 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0], TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
...@@ -768,7 +768,7 @@ struct TensorView { ...@@ -768,7 +768,7 @@ struct TensorView {
TV_HOST_DEVICE_INLINE const scalar_t &operator()(T1 i1, T2 i2, T3 i3, TV_HOST_DEVICE_INLINE const scalar_t &operator()(T1 i1, T2 i2, T3 i3,
T4 i4) const { T4 i4) const {
#ifdef TV_DEBUG #ifdef TV_DEBUG
#if defined(__CUDA_ARCH__) #if defined(__CUDACC__)
TV_DEVICE_REQUIRE(mShape.ndim() == 4, TV_DEVICE_REQUIRE(mShape.ndim() == 4,
"you provide 4 indexes, but dim is %ld\n", mShape.ndim()); "you provide 4 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0], TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
...@@ -801,7 +801,7 @@ struct TensorView { ...@@ -801,7 +801,7 @@ struct TensorView {
TV_HOST_DEVICE_INLINE scalar_t &operator[](int idx) { TV_HOST_DEVICE_INLINE scalar_t &operator[](int idx) {
#ifdef TV_DEBUG #ifdef TV_DEBUG
#if defined(__CUDA_ARCH__) #if defined(__CUDACC__)
TV_DEVICE_REQUIRE(idx >= 0 && idx < size(), TV_DEVICE_REQUIRE(idx >= 0 && idx < size(),
"index(%d) out-of-range: [0, %ld)\n", int(idx), size()); "index(%d) out-of-range: [0, %ld)\n", int(idx), size());
#else #else
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
// Disable fp16 on ROCm device // Disable fp16 on ROCm device
#ifndef MMCV_WITH_HIP #ifndef MMCV_WITH_HIP
#if __CUDA_ARCH__ >= 530 #if __CUDACC__ >= 530
template <> template <>
__global__ void bbox_overlaps_cuda_kernel<at::Half>( __global__ void bbox_overlaps_cuda_kernel<at::Half>(
const at::Half* bbox1, const at::Half* bbox2, at::Half* ious, const at::Half* bbox1, const at::Half* bbox2, at::Half* ious,
...@@ -16,7 +16,7 @@ __global__ void bbox_overlaps_cuda_kernel<at::Half>( ...@@ -16,7 +16,7 @@ __global__ void bbox_overlaps_cuda_kernel<at::Half>(
num_bbox2, mode, aligned, offset); num_bbox2, mode, aligned, offset);
} }
#endif // __CUDA_ARCH__ >= 530 #endif // __CUDACC__ >= 530
#endif // MMCV_WITH_HIP #endif // MMCV_WITH_HIP
void BBoxOverlapsCUDAKernelLauncher(const Tensor bboxes1, const Tensor bboxes2, void BBoxOverlapsCUDAKernelLauncher(const Tensor bboxes1, const Tensor bboxes2,
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
__version__ = '2.0.1' __version__ = '2.0.1'
__hcu_version__ = '2.0.1'
def parse_version_info(version_str: str, length: int = 4) -> tuple: def parse_version_info(version_str: str, length: int = 4) -> tuple:
...@@ -32,4 +33,4 @@ def parse_version_info(version_str: str, length: int = 4) -> tuple: ...@@ -32,4 +33,4 @@ def parse_version_info(version_str: str, length: int = 4) -> tuple:
version_info = tuple(int(x) for x in __version__.split('.')[:3]) version_info = tuple(int(x) for x in __version__.split('.')[:3])
__all__ = ['__version__', '__dcu_version__', 'version_info', 'parse_version_info'] __all__ = ['__version__', '__hcu_version__', 'version_info', 'parse_version_info']
...@@ -4,10 +4,7 @@ import platform ...@@ -4,10 +4,7 @@ import platform
import re import re
from pkg_resources import DistributionNotFound, get_distribution, parse_version from pkg_resources import DistributionNotFound, get_distribution, parse_version
from setuptools import find_packages, setup from setuptools import find_packages, setup
import subprocess from get_version import get_version
from typing import Optional, Union
from pathlib import Path
EXT_TYPE = '' EXT_TYPE = ''
try: try:
...@@ -27,7 +24,6 @@ except ModuleNotFoundError: ...@@ -27,7 +24,6 @@ except ModuleNotFoundError:
cmd_class = {} cmd_class = {}
print('Skip building ext ops due to the absence of torch.') print('Skip building ext ops due to the absence of torch.')
def choose_requirement(primary, secondary): def choose_requirement(primary, secondary):
"""If some version of primary requirement installed, return primary, else """If some version of primary requirement installed, return primary, else
return secondary.""" return secondary."""
...@@ -39,65 +35,11 @@ def choose_requirement(primary, secondary): ...@@ -39,65 +35,11 @@ def choose_requirement(primary, secondary):
return str(primary) return str(primary)
#def get_version():
def get_sha(pytorch_root: Union[str, Path]) -> str: # version_file = 'mmcv/version.py'
try: # with open(version_file, encoding='utf-8') as f:
return subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=pytorch_root).decode('ascii').strip() # exec(compile(f.read(), version_file, 'exec'))
except Exception: # return locals()['__version__']
return 'Unknown'
def get_abi():
try:
command = "echo '#include <string>' | gcc -x c++ -E -dM - | fgrep _GLIBCXX_USE_CXX11_ABI"
result = subprocess.run(command, shell=True, capture_output=True, text=True)
output = result.stdout.strip()
abi = "abi" + output.split(" ")[-1]
return abi
except Exception:
return 'abiUnknown'
def get_version_add(sha: Optional[str] = None) -> str:
version=''
mmcv_root = os.path.dirname(os.path.abspath(__file__))
add_version_path = os.path.join(os.path.join(mmcv_root, "mmcv"), "version.py")
if sha != 'Unknown':
if sha is None:
sha = get_sha(mmcv_root)
version = 'git' + sha[:7]
# abi
version += "." + get_abi()
# dtk version
if os.getenv("ROCM_PATH"):
rocm_path = os.getenv('ROCM_PATH', "")
rocm_version_path = os.path.join(rocm_path, '.info', "rocm_version")
with open(rocm_version_path, 'r',encoding='utf-8') as file:
lines = file.readlines()
rocm_version=lines[0][:-2].replace(".", "")
version += ".dtk" + rocm_version
# torch version
version += ".torch" + torch.__version__[:4]
lines=[]
with open(add_version_path, 'r',encoding='utf-8') as file:
lines = file.readlines()
lines[2] = "__dcu_version__ = '2.0.1+{}'\n".format(version)
with open(add_version_path, encoding="utf-8",mode="w") as file:
file.writelines(lines)
file.close()
def get_version():
get_version_add()
version_file = 'mmcv/version.py'
with open(version_file, encoding='utf-8') as f:
exec(compile(f.read(), version_file, 'exec'))
return locals()['__dcu_version__']
def parse_requirements(fname='requirements/runtime.txt', with_version=True): def parse_requirements(fname='requirements/runtime.txt', with_version=True):
"""Parse the package dependencies listed in a requirements file but strips """Parse the package dependencies listed in a requirements file but strips
...@@ -189,7 +131,6 @@ except ImportError: ...@@ -189,7 +131,6 @@ except ImportError:
for main, secondary in CHOOSE_INSTALL_REQUIRES: for main, secondary in CHOOSE_INSTALL_REQUIRES:
install_requires.append(choose_requirement(main, secondary)) install_requires.append(choose_requirement(main, secondary))
def get_extensions(): def get_extensions():
extensions = [] extensions = []
...@@ -319,7 +260,7 @@ def get_extensions(): ...@@ -319,7 +260,7 @@ def get_extensions():
define_macros += [('MMCV_WITH_CUDA', None)] define_macros += [('MMCV_WITH_CUDA', None)]
cuda_args = os.getenv('MMCV_CUDA_ARGS') cuda_args = os.getenv('MMCV_CUDA_ARGS')
extra_compile_args['nvcc'] = [cuda_args] if cuda_args else [] extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
if is_rocm_pytorch and platform.system() != 'Windows': if platform.system() != 'Windows':
extra_compile_args['nvcc'] += \ extra_compile_args['nvcc'] += \
['--gpu-max-threads-per-block=1024'] ['--gpu-max-threads-per-block=1024']
op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \ op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment