Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
ff295599
Commit
ff295599
authored
Jun 07, 2024
by
sangwzh
Browse files
update marcros for torch2.1 and import fastpt
parent
43ff1d4f
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
26 additions
and
8 deletions
+26
-8
README.md
README.md
+8
-1
colossalai/kernel/cuda_native/csrc/kernels/cublas_wrappers.cu
...ssalai/kernel/cuda_native/csrc/kernels/cublas_wrappers.cu
+1
-1
colossalai/kernel/cuda_native/csrc/kernels/include/cublas_wrappers.h
...kernel/cuda_native/csrc/kernels/include/cublas_wrappers.h
+1
-1
colossalai/kernel/cuda_native/csrc/kernels/include/feed_forward.h
...ai/kernel/cuda_native/csrc/kernels/include/feed_forward.h
+2
-2
colossalai/kernel/cuda_native/csrc/kernels/include/strided_batch_gemm.h
...nel/cuda_native/csrc/kernels/include/strided_batch_gemm.h
+3
-3
custom_hipify_mappings.json
custom_hipify_mappings.json
+9
-0
setup.py
setup.py
+2
-0
No files found.
README.md
View file @
ff295599
...
@@ -23,6 +23,13 @@ pytorch whl包下载目录:[https://cancon.hpccube.com:65024/4/main/pytorch/dt
...
@@ -23,6 +23,13 @@ pytorch whl包下载目录:[https://cancon.hpccube.com:65024/4/main/pytorch/dt
pip
install
torch
*
(
下载的torch的whl包
)
pip
install
torch
*
(
下载的torch的whl包
)
```
```
#### 源码编译安装
torch2.1下,首先安装fastpt工具包,下载地址:http://10.6.10.68:8000/debug/fastpt/
执行
```
shell
pip
install
fastpt
*
.whl
```
```
shell
```
shell
pip
install
setuptools wheel
pip
install
setuptools wheel
```
```
...
@@ -40,4 +47,4 @@ pip install dist/colossalai*
...
@@ -40,4 +47,4 @@ pip install dist/colossalai*
## 参考
## 参考
-
[
README_ORIGIN
](
README_ORIGIN.md
)
-
[
README_ORIGIN
](
README_ORIGIN.md
)
-
[
README_zh-Hans
](
README_zh-Hans.md
)
-
[
README_zh-Hans
](
README_zh-Hans.md
)
\ No newline at end of file
colossalai/kernel/cuda_native/csrc/kernels/cublas_wrappers.cu
View file @
ff295599
...
@@ -4,7 +4,7 @@
...
@@ -4,7 +4,7 @@
*/
*/
#include "cublas_wrappers.h"
#include "cublas_wrappers.h"
#ifdef
COLOSSAL_HIP
#if
def
ined(
COLOSSAL_HIP
) && !defined(HIPBLAS_H)
int
cublas_gemm_ex
(
cublasHandle_t
handle
,
cublasOperation_t
transa
,
int
cublas_gemm_ex
(
cublasHandle_t
handle
,
cublasOperation_t
transa
,
cublasOperation_t
transb
,
int
m
,
int
n
,
int
k
,
cublasOperation_t
transb
,
int
m
,
int
n
,
int
k
,
const
float
*
alpha
,
const
float
*
beta
,
const
float
*
A
,
const
float
*
alpha
,
const
float
*
beta
,
const
float
*
A
,
...
...
colossalai/kernel/cuda_native/csrc/kernels/include/cublas_wrappers.h
View file @
ff295599
...
@@ -14,7 +14,7 @@
...
@@ -14,7 +14,7 @@
#endif
#endif
#include <stdio.h>
#include <stdio.h>
#ifdef
COLOSSAL_HIP
#if
def
ined(
COLOSSAL_HIP
) && !defined(HIPBLAS_H)
int
cublas_gemm_ex
(
cublasHandle_t
handle
,
cublasOperation_t
transa
,
int
cublas_gemm_ex
(
cublasHandle_t
handle
,
cublasOperation_t
transa
,
cublasOperation_t
transb
,
int
m
,
int
n
,
int
k
,
cublasOperation_t
transb
,
int
m
,
int
n
,
int
k
,
const
float
*
alpha
,
const
float
*
beta
,
const
float
*
A
,
const
float
*
alpha
,
const
float
*
beta
,
const
float
*
A
,
...
...
colossalai/kernel/cuda_native/csrc/kernels/include/feed_forward.h
View file @
ff295599
...
@@ -35,7 +35,7 @@ class FeedForward {
...
@@ -35,7 +35,7 @@ class FeedForward {
float
alpha
=
T
(
1.
);
float
alpha
=
T
(
1.
);
float
beta
=
T
(
0.
);
float
beta
=
T
(
0.
);
#ifdef
COLOSSAL_HIP
#if
def
ined(
COLOSSAL_HIP
)&& !defined(HIPBLAS_H)
cublas_gemm_ex
(
_cublasHandle
,
CUBLAS_OP_T
,
CUBLAS_OP_N
,
config_
.
outputSize
,
cublas_gemm_ex
(
_cublasHandle
,
CUBLAS_OP_T
,
CUBLAS_OP_N
,
config_
.
outputSize
,
bsz
,
config_
.
inputSize
,
&
alpha
,
&
beta
,
weights
,
input_ptr
,
bsz
,
config_
.
inputSize
,
&
alpha
,
&
beta
,
weights
,
input_ptr
,
out
,
rocblas_gemm_algo
(
rocblas_gemm_algo_standard
));
out
,
rocblas_gemm_algo
(
rocblas_gemm_algo_standard
));
...
@@ -51,7 +51,7 @@ class FeedForward {
...
@@ -51,7 +51,7 @@ class FeedForward {
T
*
inp_grad_out
=
nullptr
,
T
*
out_grad_trans_out
=
nullptr
,
T
*
inp_grad_out
=
nullptr
,
T
*
out_grad_trans_out
=
nullptr
,
bool
compute_bias
=
true
)
{
bool
compute_bias
=
true
)
{
float
alpha
=
(
T
)
1.0
,
beta
=
(
T
)
0.0
;
float
alpha
=
(
T
)
1.0
,
beta
=
(
T
)
0.0
;
#ifdef
COLOSSAL_HIP
#if
def
ined(
COLOSSAL_HIP
)&& !defined(HIPBLAS_H)
cublas_gemm_ex
(
_cublasHandle
,
CUBLAS_OP_N
,
CUBLAS_OP_T
,
config_
.
inputSize
,
cublas_gemm_ex
(
_cublasHandle
,
CUBLAS_OP_N
,
CUBLAS_OP_T
,
config_
.
inputSize
,
config_
.
outputSize
,
bsz
,
&
alpha
,
&
beta
,
input_ptr
,
out_grad
,
config_
.
outputSize
,
bsz
,
&
alpha
,
&
beta
,
input_ptr
,
out_grad
,
weights_grad
,
rocblas_gemm_algo
(
rocblas_gemm_algo_standard
));
weights_grad
,
rocblas_gemm_algo
(
rocblas_gemm_algo_standard
));
...
...
colossalai/kernel/cuda_native/csrc/kernels/include/strided_batch_gemm.h
View file @
ff295599
...
@@ -49,7 +49,7 @@ class StridedBatchGemm {
...
@@ -49,7 +49,7 @@ class StridedBatchGemm {
int
stride_b
=
_config
.
n
*
_config
.
k
;
int
stride_b
=
_config
.
n
*
_config
.
k
;
int
stride_c
=
_config
.
m
*
_config
.
n
;
int
stride_c
=
_config
.
m
*
_config
.
n
;
#ifdef
COLOSSAL_HIP
#if
def
ined(
COLOSSAL_HIP
)&& !defined(HIPBLAS_H)
cublas_strided_batched_gemm
(
cublas_strided_batched_gemm
(
handle
,
_config
.
m
,
_config
.
n
,
_config
.
k
,
&
_config
.
alpha
,
&
_config
.
beta
,
handle
,
_config
.
m
,
_config
.
n
,
_config
.
k
,
&
_config
.
alpha
,
&
_config
.
beta
,
_buffer_a
,
_buffer_b
,
output
,
_config
.
op_A
,
_config
.
op_B
,
stride_a
,
_buffer_a
,
_buffer_b
,
output
,
_config
.
op_A
,
_config
.
op_B
,
stride_a
,
...
@@ -77,7 +77,7 @@ class StridedBatchGemm {
...
@@ -77,7 +77,7 @@ class StridedBatchGemm {
(
_config
.
op_B
==
CUBLAS_OP_T
?
CUBLAS_OP_N
:
CUBLAS_OP_T
);
(
_config
.
op_B
==
CUBLAS_OP_T
?
CUBLAS_OP_N
:
CUBLAS_OP_T
);
// Calculate d_A.
// Calculate d_A.
#ifdef
COLOSSAL_HIP
#if
def
ined(
COLOSSAL_HIP
)&& !defined(HIPBLAS_H)
cublas_strided_batched_gemm
(
cublas_strided_batched_gemm
(
handle
,
mb
,
kb
,
_config
.
n
,
&
_config
.
alpha
,
&
_config
.
beta
,
handle
,
mb
,
kb
,
_config
.
n
,
&
_config
.
alpha
,
&
_config
.
beta
,
(
_config
.
op_A
==
CUBLAS_OP_T
?
_buffer_b
:
d_output
),
(
_config
.
op_A
==
CUBLAS_OP_T
?
_buffer_b
:
d_output
),
...
@@ -102,7 +102,7 @@ class StridedBatchGemm {
...
@@ -102,7 +102,7 @@ class StridedBatchGemm {
stride_c
=
_config
.
n
*
_config
.
k
;
stride_c
=
_config
.
n
*
_config
.
k
;
// Calculate d_B.
// Calculate d_B.
#ifdef
COLOSSAL_HIP
#if
def
ined(
COLOSSAL_HIP
)&& !defined(HIPBLAS_H)
cublas_strided_batched_gemm
(
cublas_strided_batched_gemm
(
handle
,
_config
.
k
,
_config
.
n
,
_config
.
m
,
&
_config
.
alpha
,
&
_config
.
beta
,
handle
,
_config
.
k
,
_config
.
n
,
_config
.
m
,
&
_config
.
alpha
,
&
_config
.
beta
,
_buffer_a
,
d_output
,
inpGradB
,
op_a
,
CUBLAS_OP_N
,
stride_a
,
stride_b
,
_buffer_a
,
d_output
,
inpGradB
,
op_a
,
CUBLAS_OP_N
,
stride_a
,
stride_b
,
...
...
custom_hipify_mappings.json
0 → 100644
View file @
ff295599
{
"custom_map"
:
{
"#if TORCH_VERSION_MINOR >= 13"
:
"#if TORCH_VERSION_MINOR >= 13 || TORCH_VERSION_MAJOR >= 2"
,
"cublasGemmAlgo_t"
:
"hipblasGemmAlgo_t"
,
"CUDA_R_32F"
:
"HIPBLAS_R_32F"
,
"CUDA_R_16F"
:
"HIPBLAS_R_16F"
}
}
setup.py
View file @
ff295599
...
@@ -189,6 +189,7 @@ if build_cuda_ext or build_hip_ext:
...
@@ -189,6 +189,7 @@ if build_cuda_ext or build_hip_ext:
try
:
try
:
import
torch
import
torch
from
torch.utils.cpp_extension
import
CUDA_HOME
,
BuildExtension
,
CUDAExtension
from
torch.utils.cpp_extension
import
CUDA_HOME
,
BuildExtension
,
CUDAExtension
from
fastpt
import
CUDAExtension
print
(
"
\n\n
torch.__version__ = {}
\n\n
"
.
format
(
torch
.
__version__
))
print
(
"
\n\n
torch.__version__ = {}
\n\n
"
.
format
(
torch
.
__version__
))
TORCH_MAJOR
=
int
(
torch
.
__version__
.
split
(
'.'
)[
0
])
TORCH_MAJOR
=
int
(
torch
.
__version__
.
split
(
'.'
)[
0
])
TORCH_MINOR
=
int
(
torch
.
__version__
.
split
(
'.'
)[
1
])
TORCH_MINOR
=
int
(
torch
.
__version__
.
split
(
'.'
)[
1
])
...
@@ -220,6 +221,7 @@ if build_hip_ext:
...
@@ -220,6 +221,7 @@ if build_hip_ext:
'nvcc'
:
[
'-O3'
]
+
version_dependent_macros
+
hip_macros
+
extra_cuda_flags
})
'nvcc'
:
[
'-O3'
]
+
version_dependent_macros
+
hip_macros
+
extra_cuda_flags
})
from
torch.utils.hipify
import
hipify_python
from
torch.utils.hipify
import
hipify_python
from
fastpt
import
hipify_python
hipify_python
.
hipify
(
hipify_python
.
hipify
(
project_directory
=
this_dir
,
project_directory
=
this_dir
,
output_directory
=
this_dir
,
output_directory
=
this_dir
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment