Commit ff295599 authored by sangwzh's avatar sangwzh
Browse files

update marcros for torch2.1 and import fastpt

parent 43ff1d4f
...@@ -23,6 +23,13 @@ pytorch whl包下载目录:[https://cancon.hpccube.com:65024/4/main/pytorch/dt ...@@ -23,6 +23,13 @@ pytorch whl包下载目录:[https://cancon.hpccube.com:65024/4/main/pytorch/dt
pip install torch* (下载的torch的whl包) pip install torch* (下载的torch的whl包)
``` ```
#### 源码编译安装
torch2.1下,首先安装fastpt工具包,下载地址:http://10.6.10.68:8000/debug/fastpt/
执行
```shell
pip install fastpt*.whl
```
```shell ```shell
pip install setuptools wheel pip install setuptools wheel
``` ```
...@@ -40,4 +47,4 @@ pip install dist/colossalai* ...@@ -40,4 +47,4 @@ pip install dist/colossalai*
## 参考 ## 参考
- [README_ORIGIN](README_ORIGIN.md) - [README_ORIGIN](README_ORIGIN.md)
- [README_zh-Hans](README_zh-Hans.md) - [README_zh-Hans](README_zh-Hans.md)
\ No newline at end of file
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
*/ */
#include "cublas_wrappers.h" #include "cublas_wrappers.h"
#ifdef COLOSSAL_HIP #if defined(COLOSSAL_HIP) && !defined(HIPBLAS_H)
int cublas_gemm_ex(cublasHandle_t handle, cublasOperation_t transa, int cublas_gemm_ex(cublasHandle_t handle, cublasOperation_t transa,
cublasOperation_t transb, int m, int n, int k, cublasOperation_t transb, int m, int n, int k,
const float *alpha, const float *beta, const float *A, const float *alpha, const float *beta, const float *A,
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
#endif #endif
#include <stdio.h> #include <stdio.h>
#ifdef COLOSSAL_HIP #if defined(COLOSSAL_HIP) && !defined(HIPBLAS_H)
int cublas_gemm_ex(cublasHandle_t handle, cublasOperation_t transa, int cublas_gemm_ex(cublasHandle_t handle, cublasOperation_t transa,
cublasOperation_t transb, int m, int n, int k, cublasOperation_t transb, int m, int n, int k,
const float *alpha, const float *beta, const float *A, const float *alpha, const float *beta, const float *A,
......
...@@ -35,7 +35,7 @@ class FeedForward { ...@@ -35,7 +35,7 @@ class FeedForward {
float alpha = T(1.); float alpha = T(1.);
float beta = T(0.); float beta = T(0.);
#ifdef COLOSSAL_HIP #if defined(COLOSSAL_HIP)&& !defined(HIPBLAS_H)
cublas_gemm_ex(_cublasHandle, CUBLAS_OP_T, CUBLAS_OP_N, config_.outputSize, cublas_gemm_ex(_cublasHandle, CUBLAS_OP_T, CUBLAS_OP_N, config_.outputSize,
bsz, config_.inputSize, &alpha, &beta, weights, input_ptr, bsz, config_.inputSize, &alpha, &beta, weights, input_ptr,
out, rocblas_gemm_algo(rocblas_gemm_algo_standard)); out, rocblas_gemm_algo(rocblas_gemm_algo_standard));
...@@ -51,7 +51,7 @@ class FeedForward { ...@@ -51,7 +51,7 @@ class FeedForward {
T *inp_grad_out = nullptr, T *out_grad_trans_out = nullptr, T *inp_grad_out = nullptr, T *out_grad_trans_out = nullptr,
bool compute_bias = true) { bool compute_bias = true) {
float alpha = (T)1.0, beta = (T)0.0; float alpha = (T)1.0, beta = (T)0.0;
#ifdef COLOSSAL_HIP #if defined(COLOSSAL_HIP)&& !defined(HIPBLAS_H)
cublas_gemm_ex(_cublasHandle, CUBLAS_OP_N, CUBLAS_OP_T, config_.inputSize, cublas_gemm_ex(_cublasHandle, CUBLAS_OP_N, CUBLAS_OP_T, config_.inputSize,
config_.outputSize, bsz, &alpha, &beta, input_ptr, out_grad, config_.outputSize, bsz, &alpha, &beta, input_ptr, out_grad,
weights_grad, rocblas_gemm_algo(rocblas_gemm_algo_standard)); weights_grad, rocblas_gemm_algo(rocblas_gemm_algo_standard));
......
...@@ -49,7 +49,7 @@ class StridedBatchGemm { ...@@ -49,7 +49,7 @@ class StridedBatchGemm {
int stride_b = _config.n * _config.k; int stride_b = _config.n * _config.k;
int stride_c = _config.m * _config.n; int stride_c = _config.m * _config.n;
#ifdef COLOSSAL_HIP #if defined(COLOSSAL_HIP)&& !defined(HIPBLAS_H)
cublas_strided_batched_gemm( cublas_strided_batched_gemm(
handle, _config.m, _config.n, _config.k, &_config.alpha, &_config.beta, handle, _config.m, _config.n, _config.k, &_config.alpha, &_config.beta,
_buffer_a, _buffer_b, output, _config.op_A, _config.op_B, stride_a, _buffer_a, _buffer_b, output, _config.op_A, _config.op_B, stride_a,
...@@ -77,7 +77,7 @@ class StridedBatchGemm { ...@@ -77,7 +77,7 @@ class StridedBatchGemm {
(_config.op_B == CUBLAS_OP_T ? CUBLAS_OP_N : CUBLAS_OP_T); (_config.op_B == CUBLAS_OP_T ? CUBLAS_OP_N : CUBLAS_OP_T);
// Calculate d_A. // Calculate d_A.
#ifdef COLOSSAL_HIP #if defined(COLOSSAL_HIP)&& !defined(HIPBLAS_H)
cublas_strided_batched_gemm( cublas_strided_batched_gemm(
handle, mb, kb, _config.n, &_config.alpha, &_config.beta, handle, mb, kb, _config.n, &_config.alpha, &_config.beta,
(_config.op_A == CUBLAS_OP_T ? _buffer_b : d_output), (_config.op_A == CUBLAS_OP_T ? _buffer_b : d_output),
...@@ -102,7 +102,7 @@ class StridedBatchGemm { ...@@ -102,7 +102,7 @@ class StridedBatchGemm {
stride_c = _config.n * _config.k; stride_c = _config.n * _config.k;
// Calculate d_B. // Calculate d_B.
#ifdef COLOSSAL_HIP #if defined(COLOSSAL_HIP)&& !defined(HIPBLAS_H)
cublas_strided_batched_gemm( cublas_strided_batched_gemm(
handle, _config.k, _config.n, _config.m, &_config.alpha, &_config.beta, handle, _config.k, _config.n, _config.m, &_config.alpha, &_config.beta,
_buffer_a, d_output, inpGradB, op_a, CUBLAS_OP_N, stride_a, stride_b, _buffer_a, d_output, inpGradB, op_a, CUBLAS_OP_N, stride_a, stride_b,
......
{
"custom_map" : {
"#if TORCH_VERSION_MINOR >= 13":"#if TORCH_VERSION_MINOR >= 13 || TORCH_VERSION_MAJOR >= 2",
"cublasGemmAlgo_t":"hipblasGemmAlgo_t",
"CUDA_R_32F":"HIPBLAS_R_32F",
"CUDA_R_16F":"HIPBLAS_R_16F"
}
}
...@@ -189,6 +189,7 @@ if build_cuda_ext or build_hip_ext: ...@@ -189,6 +189,7 @@ if build_cuda_ext or build_hip_ext:
try: try:
import torch import torch
from torch.utils.cpp_extension import CUDA_HOME, BuildExtension, CUDAExtension from torch.utils.cpp_extension import CUDA_HOME, BuildExtension, CUDAExtension
from fastpt import CUDAExtension
print("\n\ntorch.__version__ = {}\n\n".format(torch.__version__)) print("\n\ntorch.__version__ = {}\n\n".format(torch.__version__))
TORCH_MAJOR = int(torch.__version__.split('.')[0]) TORCH_MAJOR = int(torch.__version__.split('.')[0])
TORCH_MINOR = int(torch.__version__.split('.')[1]) TORCH_MINOR = int(torch.__version__.split('.')[1])
...@@ -220,6 +221,7 @@ if build_hip_ext: ...@@ -220,6 +221,7 @@ if build_hip_ext:
'nvcc': ['-O3'] + version_dependent_macros + hip_macros + extra_cuda_flags}) 'nvcc': ['-O3'] + version_dependent_macros + hip_macros + extra_cuda_flags})
from torch.utils.hipify import hipify_python from torch.utils.hipify import hipify_python
from fastpt import hipify_python
hipify_python.hipify( hipify_python.hipify(
project_directory=this_dir, project_directory=this_dir,
output_directory=this_dir, output_directory=this_dir,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment