Commit cfb0ec1a authored by lishen01's avatar lishen01
Browse files

Merge branch 'develop-pytorch1.13.1' into 'develop'

Develop pytorch1.13.1

See merge request aicomponent/warpctc!1
parents 5e65c1c3 c9a60a98
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PublishConfigData" autoUpload="On explicit save action" serverName="10.6.10.62" preserveTimestamps="false" deleteMissingItems="true" createEmptyFolders="true" filePermissions="420" folderPermissions="493" confirmBeforeUploading="false" confirmBeforeDeletion="false" autoUploadExternalChanges="true">
<option name="confirmBeforeDeletion" value="false" />
<option name="confirmBeforeUploading" value="false" />
<serverData>
<paths name="10.6.10.61">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="10.6.10.62">
<serverdata>
<mappings>
<mapping deploy="/public/home/lishen/warpctc/warpctc_dcu" local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="10.6.10.69">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="10.6.6.220">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="lishen_89_docker (c96f91f6-1c12-4fb0-b3d8-cc68ccf3f77c)">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="lishen_90">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="lishen_93">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="lishen_95">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
</serverData>
<option name="myAutoUpload" value="ON_EXPLICIT_SAVE" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/warpctc_dcu.iml" filepath="$PROJECT_DIR$/.idea/warpctc_dcu.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="CPP_MODULE" version="4">
<component name="FacetManager">
<facet type="Python" name="Python facet">
<configuration sdkName="Python 3.7" />
</facet>
</component>
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="Python 3.7 interpreter library" level="application" />
</component>
</module>
\ No newline at end of file
# PyTorch bindings for Warp-ctc
# DLIB
[![Build Status](https://travis-ci.org/SeanNaren/warp-ctc.svg?branch=pytorch_bindings)](https://travis-ci.org/SeanNaren/warp-ctc)
## 环境配置
This is an extension onto the original repo found [here](https://github.com/baidu-research/warp-ctc).
使用DCU编译之前,需要准备编译环境。参考
[environment prepare](environment_prepare.md)
## Installation
## 使用源码安装
Install [PyTorch](https://github.com/pytorch/pytorch#installation) v0.4.
### 编译环境准备(以dtk-23.04版本为例)
`WARP_CTC_PATH` should be set to the location of a built WarpCTC
(i.e. `libwarpctc.so`). This defaults to `../build`, so from within a
new warp-ctc clone you could build WarpCTC like this:
- 拉取代码
```bash
git clone https://github.com/SeanNaren/warp-ctc.git
cd warp-ctc
mkdir build; cd build
cmake ..
make
```
```
git clone -b develop http://developer.hpccube.com/codes/aicomponent/warpctc.git
```
-[开发者社区](https://developer.hpccube.com/tool/#sdk) DCU Toolkit 中下载 DTK-23.04 解压至 /opt/ 路径下,并建立软链接
```
cd /opt && ln -s dtk-23.04 dtk
```
- 导入环境变量以及安装必要依赖库
```shell
source /opt/dtk/env.sh
```
Now install the bindings:
```bash
### 编译安装
#### 编译 Python API
- 使用python安装
```shell
cd pytorch_binding
python setup.py install
```
If you try the above and get a dlopen error on OSX with anaconda3 (as recommended by pytorch):
```bash
cd ../pytorch_binding
python setup.py install
cd ../build
cp libwarpctc.dylib /Users/$WHOAMI/anaconda3/lib
- 使用python编译whl包
```shell
cd pytorch_binding
python setup.py bdist_wheel
```
This will resolve the library not loaded error. This can be easily modified to work with other python installs if needed.
Example to use the bindings below.
```python
import torch
from warpctc_pytorch import CTCLoss
ctc_loss = CTCLoss()
# expected shape of seqLength x batchSize x alphabet_size
probs = torch.FloatTensor([[[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]]]).transpose(0, 1).contiguous()
labels = torch.IntTensor([1, 2])
label_sizes = torch.IntTensor([2])
probs_sizes = torch.IntTensor([2])
probs.requires_grad_(True) # tells autograd to compute gradients for probs
cost = ctc_loss(probs, labels, probs_sizes, label_sizes)
cost.backward()
### 测试
- 验证warpctc的loss正确性(CPU和GPU的一致性)
```shell
cd pytorch_binding/tests
python3 test_gpu.py
```
## Documentation
- 验证warpctc的loss的GPU加速效果
```shell
cd pytorch_binding/tests
python3 test_gpu_speed.py
```
CTCLoss(size_average=False, length_average=False)
# size_average (bool): normalize the loss by the batch size (default: False)
# length_average (bool): normalize the loss by the total number of frames in the batch. If True, supersedes size_average (default: False)
forward(acts, labels, act_lens, label_lens)
# acts: Tensor of (seqLength x batch x outputDim) containing output activations from network (before softmax)
# labels: 1 dimensional Tensor containing all the targets of the batch in one large sequence
# act_lens: Tensor of size (batch) containing size of each output sequence from the network
# label_lens: Tensor of (batch) containing label length of each example
```
\ No newline at end of file
# DLIB
## 环境配置
使用DCU编译之前,需要准备编译环境。参考
[environment prepare](environment_prepare.md)
## 使用源码安装
### 编译环境准备(以dtk-23.04版本为例)
- 拉取代码
```
git clone -b develop http://developer.hpccube.com/codes/aicomponent/warpctc.git
```
-[开发者社区](https://developer.hpccube.com/tool/#sdk) DCU Toolkit 中下载 DTK-23.04 解压至 /opt/ 路径下,并建立软链接
```
cd /opt && ln -s dtk-23.04 dtk
```
- 导入环境变量以及安装必要依赖库
```shell
source /opt/dtk/env.sh
```
### 编译安装
#### 编译 Python API
- 使用python安装
```shell
cd pytorch_binding
python setup.py install
```
- 使用python编译whl包
```shell
cd pytorch_binding
python setup.py bdist_wheel
```
### 测试
- 验证warpctc的loss正确性(CPU和GPU的一致性)
```shell
cd pytorch_binding/tests
python3 test_gpu.py
```
- 验证warpctc的loss的GPU加速效果
```shell
cd pytorch_binding/tests
python3 test_gpu_speed.py
```
# PyTorch bindings for Warp-ctc
[![Build Status](https://travis-ci.org/SeanNaren/warp-ctc.svg?branch=pytorch_bindings)](https://travis-ci.org/SeanNaren/warp-ctc)
This is an extension onto the original repo found [here](https://github.com/baidu-research/warp-ctc).
## Installation
Install [PyTorch](https://github.com/pytorch/pytorch#installation) v0.4.
`WARP_CTC_PATH` should be set to the location of a built WarpCTC
(i.e. `libwarpctc.so`). This defaults to `../build`, so from within a
new warp-ctc clone you could build WarpCTC like this:
```bash
git clone https://github.com/SeanNaren/warp-ctc.git
cd warp-ctc
mkdir build; cd build
cmake ..
make
```
Now install the bindings:
```bash
cd pytorch_binding
python setup.py install
```
If you try the above and get a dlopen error on OSX with anaconda3 (as recommended by pytorch):
```bash
cd ../pytorch_binding
python setup.py install
cd ../build
cp libwarpctc.dylib /Users/$WHOAMI/anaconda3/lib
```
This will resolve the library not loaded error. This can be easily modified to work with other python installs if needed.
Example to use the bindings below.
```python
import torch
from warpctc_pytorch import CTCLoss
ctc_loss = CTCLoss()
# expected shape of seqLength x batchSize x alphabet_size
probs = torch.FloatTensor([[[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]]]).transpose(0, 1).contiguous()
labels = torch.IntTensor([1, 2])
label_sizes = torch.IntTensor([2])
probs_sizes = torch.IntTensor([2])
probs.requires_grad_(True) # tells autograd to compute gradients for probs
cost = ctc_loss(probs, labels, probs_sizes, label_sizes)
cost.backward()
```
## Documentation
```
CTCLoss(size_average=False, length_average=False)
# size_average (bool): normalize the loss by the batch size (default: False)
# length_average (bool): normalize the loss by the total number of frames in the batch. If True, supersedes size_average (default: False)
forward(acts, labels, act_lens, label_lens)
# acts: Tensor of (seqLength x batch x outputDim) containing output activations from network (before softmax)
# labels: 1 dimensional Tensor containing all the targets of the batch in one large sequence
# act_lens: Tensor of size (batch) containing size of each output sequence from the network
# label_lens: Tensor of (batch) containing label length of each example
```
\ No newline at end of file
......@@ -130,15 +130,14 @@ void compute_alpha_kernel(const ProbT *probs, const int *label_sizes,
const int *label_global = &labels[blockIdx.x * S_memoffset];
ProbT *alpha = &alphas[blockIdx.x * (S_memoffset * T_memoffset)];
// Set the first row of alpha neg_inf - it is much more efficient to do it
// here than outside
#pragma unroll
// Set the first row of alpha neg_inf - it is much more efficient to do it here than outside
//#pragma unroll
for (int idx = tid; idx < min(S, NV); idx += blockDim.x) {
alpha[idx] = ctc_helper::neg_inf<ProbT>();
}
// Load labels into shared memory
#pragma unroll
//#pragma unroll
for (int i = tid; i < S; i += NT) {
label[i] = label_global[i];
}
......@@ -272,8 +271,8 @@ void compute_betas_and_grad_kernel(const ProbT *probs, const int *label_sizes,
int start = S > 1 ? (S - 2) : 0;
int end = (L + repeats < T) ? S : S - 1;
// Setup shared memory buffers
#pragma unroll
// // Setup shared memory buffers
//#pragma unroll
for (int idx = tid; idx < NV; idx += NT) {
label[idx] = (idx < S) ? label_global[idx] : INT_MAX;
}
......@@ -290,7 +289,7 @@ void compute_betas_and_grad_kernel(const ProbT *probs, const int *label_sizes,
int key[VT];
int gather_val[VT];
#pragma unroll
//#pragma unroll
for (int i = 0; i < VT; ++i) {
const int idx = tid * VT + i;
gather_val[i] = idx;
......
import os
import subprocess
from pathlib import Path
import torch
ROOT_DIR = Path(__file__).parent.resolve()
def _run_cmd(cmd, shell=False):
try:
return subprocess.check_output(cmd, cwd=ROOT_DIR, stderr=subprocess.DEVNULL, shell=shell).decode("ascii").strip()
except Exception:
return None
def _get_version():
if os.path.exists(ROOT_DIR / "version.txt"):
with open(ROOT_DIR / "version.txt", "r") as f:
version = f.read().strip()
else:
version = '0.1'
if os.getenv("BUILD_VERSION"):
version = os.getenv("BUILD_VERSION")
return version
def _make_version_file(version, sha, abi, dtk, torch_version, branch):
sha = "Unknown" if sha is None else sha
torch_version = '.'.join(torch_version.split('.')[:2])
dcu_version = f"{version}+{sha}.abi{abi}.dtk{dtk}.torch{torch_version}"
version_path = ROOT_DIR / "warpctc_pytorch" / "version.py"
with open(version_path, "w") as f:
f.write(f"version = '{version}'\n")
f.write(f"git_hash = '{sha}'\n")
f.write(f"git_branch = '{branch}'\n")
f.write(f"abi = 'abi{abi}'\n")
f.write(f"dtk = '{dtk}'\n")
f.write(f"torch_version = '{torch_version}'\n")
f.write(f"dcu_version = '{dcu_version}'\n")
return dcu_version
def _get_pytorch_version():
if "PYTORCH_VERSION" in os.environ:
return f"{os.environ['PYTORCH_VERSION']}"
return torch.__version__
def get_version(ROCM_HOME):
sha = _run_cmd(["git", "rev-parse", "HEAD"])
sha = sha[:7]
branch = _run_cmd(["git", "rev-parse", "--abbrev-ref", "HEAD"])
tag = _run_cmd(["git", "describe", "--tags", "--exact-match", "@"])
print("-- Git branch:", branch)
print("-- Git SHA:", sha)
print("-- Git tag:", tag)
torch_version = _get_pytorch_version()
print("-- PyTorch:", torch_version)
version = _get_version()
print("-- Building version", version)
abi = _run_cmd(["echo '#include <string>' | gcc -x c++ -E -dM - | fgrep _GLIBCXX_USE_CXX11_ABI | awk '{print $3}'"], shell=True)
print("-- _GLIBCXX_USE_CXX11_ABI:", abi)
dtk = _run_cmd(["cat", os.path.join(ROCM_HOME, '.info/rocm_version')])
dtk = ''.join(dtk.split('.')[:2])
print("-- DTK:", dtk)
return _make_version_file(version, sha, abi, dtk, torch_version, branch)
......@@ -9,6 +9,7 @@ from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CppExtensio
from setuptools import find_packages, setup
from setuptools.command.build_ext import build_ext
from pkg_resources import packaging # type: ignore[attr-defined]
from get_version import get_version
def _find_rocm_home() -> Optional[str]:
......@@ -291,10 +292,6 @@ class BuildReleaseExtension(BuildExtension):
build_ext.build_extensions(self)
def get_version():
return "0.1"
def get_extensions():
extensions = []
include_dirs = []
......@@ -331,7 +328,7 @@ def get_extensions():
def main():
setup(
name='warpctc_pytorch',
version="0.1",
version=get_version(_find_rocm_home()),
description='Torch fuseop Computer Vision Foundation',
keywords='computer vision',
packages=find_packages(),
......
......@@ -7,9 +7,7 @@
#include "ATen/cuda/CUDAContext.h"
#include <c10/cuda/CUDAGuard.h>
#include "ATen/cuda/CUDAEvent.h"
#include <THC/THCGeneral.h>
extern THCState* state;
#include <ATen/cuda/ThrustAllocator.h>
#endif
#include "ctc.h"
......@@ -91,7 +89,7 @@ int gpu_ctc(torch::Tensor probs,
probs_size, minibatch_size,
options, &gpu_size_bytes);
void* gpu_workspace = THCudaMalloc(state, gpu_size_bytes);
void* gpu_workspace = c10::cuda::CUDACachingAllocator::raw_alloc(gpu_size_bytes);
compute_ctc_loss(probs_ptr, grads_ptr,
labels_ptr, label_sizes_ptr,
......@@ -99,7 +97,8 @@ int gpu_ctc(torch::Tensor probs,
minibatch_size, costs_ptr,
gpu_workspace, options);
THCudaFree(state, (void *) gpu_workspace);
c10::cuda::CUDACachingAllocator::raw_delete((void *) gpu_workspace);
return 1;
}
#endif
......
#pragma once
/*
int gpu_ctc(THCudaTensor *probs,
THCudaTensor *grads,
THIntTensor *labels_ptr,
THIntTensor *label_sizes_ptr,
THIntTensor *sizes,
int minibatch_size,
THFloatTensor *costs,
int blank_label);
*/
int gpu_ctc(torch::Tensor probs,
torch::Tensor grads,
torch::Tensor labels,
......
0.1
\ No newline at end of file
......@@ -87,3 +87,9 @@ class CTCLoss(Module):
_assert_no_grad(label_lens)
return self.ctc(acts, labels, act_lens, label_lens, self.size_average,
self.length_average, self.blank)
try:
from .version import version, git_hash, git_branch, dtk, abi, torch_version, dcu_version # noqa: F401
__version__, __dcu_version__ = version, dcu_version
except ImportError:
pass
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment