Unverified Commit 35d64462 authored by lvhan028's avatar lvhan028 Committed by GitHub
Browse files

build turbomind (#35)

* build turbomind

* change namespace fastertransformer to turbomind

* change logger name
parent 53d2e42c
...@@ -14,10 +14,10 @@ ...@@ -14,10 +14,10 @@
* limitations under the License. * limitations under the License.
*/ */
#include "src/fastertransformer/utils/cuda_utils.h" #include "src/turbomind/utils/cuda_utils.h"
#include "src/fastertransformer/utils/cuda_fp8_utils.h" #include "src/turbomind/utils/cuda_fp8_utils.h"
namespace fastertransformer { namespace turbomind {
/* **************************** debug tools ********************************* */ /* **************************** debug tools ********************************* */
...@@ -57,7 +57,7 @@ template<typename T> ...@@ -57,7 +57,7 @@ template<typename T>
void print_abs_mean(const T* buf, uint size, cudaStream_t stream, std::string name) void print_abs_mean(const T* buf, uint size, cudaStream_t stream, std::string name)
{ {
if (buf == nullptr) { if (buf == nullptr) {
FT_LOG_WARNING("It is an nullptr, skip!"); TM_LOG_WARNING("It is an nullptr, skip!");
return; return;
} }
cudaDeviceSynchronize(); cudaDeviceSynchronize();
...@@ -110,7 +110,7 @@ template<typename T> ...@@ -110,7 +110,7 @@ template<typename T>
void print_to_screen(const T* result, const int size) void print_to_screen(const T* result, const int size)
{ {
if (result == nullptr) { if (result == nullptr) {
FT_LOG_WARNING("It is an nullptr, skip! \n"); TM_LOG_WARNING("It is an nullptr, skip! \n");
return; return;
} }
T* tmp = reinterpret_cast<T*>(malloc(sizeof(T) * size)); T* tmp = reinterpret_cast<T*>(malloc(sizeof(T) * size));
...@@ -366,7 +366,7 @@ FtCudaDataType getModelFileType(std::string ini_file, std::string section_name) ...@@ -366,7 +366,7 @@ FtCudaDataType getModelFileType(std::string ini_file, std::string section_name)
FtCudaDataType model_file_type; FtCudaDataType model_file_type;
INIReader reader = INIReader(ini_file); INIReader reader = INIReader(ini_file);
if (reader.ParseError() < 0) { if (reader.ParseError() < 0) {
FT_LOG_WARNING("Can't load %s. Use FP32 as default", ini_file.c_str()); TM_LOG_WARNING("Can't load %s. Use FP32 as default", ini_file.c_str());
model_file_type = FtCudaDataType::FP32; model_file_type = FtCudaDataType::FP32;
} }
else { else {
...@@ -381,7 +381,7 @@ FtCudaDataType getModelFileType(std::string ini_file, std::string section_name) ...@@ -381,7 +381,7 @@ FtCudaDataType getModelFileType(std::string ini_file, std::string section_name)
model_file_type = FtCudaDataType::BF16; model_file_type = FtCudaDataType::BF16;
} }
else { else {
FT_LOG_WARNING("Invalid type %s. Use FP32 as default", weight_data_type_str.c_str()); TM_LOG_WARNING("Invalid type %s. Use FP32 as default", weight_data_type_str.c_str());
model_file_type = FtCudaDataType::FP32; model_file_type = FtCudaDataType::FP32;
} }
} }
...@@ -389,4 +389,4 @@ FtCudaDataType getModelFileType(std::string ini_file, std::string section_name) ...@@ -389,4 +389,4 @@ FtCudaDataType getModelFileType(std::string ini_file, std::string section_name)
} }
/* ************************** end of common utils ************************** */ /* ************************** end of common utils ************************** */
} // namespace fastertransformer } // namespace turbomind
...@@ -17,8 +17,8 @@ ...@@ -17,8 +17,8 @@
#pragma once #pragma once
#include "3rdparty/INIReader.h" #include "3rdparty/INIReader.h"
#include "src/fastertransformer/utils/cuda_bf16_wrapper.h" #include "src/turbomind/utils/cuda_bf16_wrapper.h"
#include "src/fastertransformer/utils/logger.h" #include "src/turbomind/utils/logger.h"
#include <cublasLt.h> #include <cublasLt.h>
#include <cublas_v2.h> #include <cublas_v2.h>
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#include <cusparseLt.h> #include <cusparseLt.h>
#endif #endif
namespace fastertransformer { namespace turbomind {
#define MAX_CONFIG_NUM 20 #define MAX_CONFIG_NUM 20
#define COL32_ 32 #define COL32_ 32
...@@ -137,7 +137,7 @@ inline void syncAndCheck(const char* const file, int const line) ...@@ -137,7 +137,7 @@ inline void syncAndCheck(const char* const file, int const line)
throw std::runtime_error(std::string("[FT][ERROR] CUDA runtime error: ") + (_cudaGetErrorEnum(result)) throw std::runtime_error(std::string("[FT][ERROR] CUDA runtime error: ") + (_cudaGetErrorEnum(result))
+ " " + file + ":" + std::to_string(line) + " \n"); + " " + file + ":" + std::to_string(line) + " \n");
} }
FT_LOG_DEBUG(fmtstr("run syncAndCheck at %s:%d", file, line)); TM_LOG_DEBUG(fmtstr("run syncAndCheck at %s:%d", file, line));
} }
} }
...@@ -212,7 +212,7 @@ inline void myAssert(bool result, const char* const file, int const line, std::s ...@@ -212,7 +212,7 @@ inline void myAssert(bool result, const char* const file, int const line, std::s
do { \ do { \
bool is_valid_val = (val); \ bool is_valid_val = (val); \
if (!is_valid_val) { \ if (!is_valid_val) { \
fastertransformer::myAssert(is_valid_val, __FILE__, __LINE__, (info)); \ turbomind::myAssert(is_valid_val, __FILE__, __LINE__, (info)); \
} \ } \
} while (0) } while (0)
...@@ -442,14 +442,14 @@ void compareTwoTensor( ...@@ -442,14 +442,14 @@ void compareTwoTensor(
} }
if (print_size > 0) { if (print_size > 0) {
FT_LOG_INFO(" id | pred | ref |abs diff | rel diff (%) |"); TM_LOG_INFO(" id | pred | ref |abs diff | rel diff (%) |");
} }
float mean_abs_diff = 0.0f; float mean_abs_diff = 0.0f;
float mean_rel_diff = 0.0f; float mean_rel_diff = 0.0f;
int count = 0; int count = 0;
for (int i = 0; i < size; i++) { for (int i = 0; i < size; i++) {
if (i < print_size) { if (i < print_size) {
FT_LOG_INFO("%4d | % 6.4f | % 6.4f | % 6.4f | % 7.4f |", TM_LOG_INFO("%4d | % 6.4f | % 6.4f | % 6.4f | % 7.4f |",
i, i,
(float)h_pred[i], (float)h_pred[i],
(float)h_ref[i], (float)h_ref[i],
...@@ -474,7 +474,7 @@ void compareTwoTensor( ...@@ -474,7 +474,7 @@ void compareTwoTensor(
} }
mean_abs_diff = mean_abs_diff / (float)count; mean_abs_diff = mean_abs_diff / (float)count;
mean_rel_diff = mean_rel_diff / (float)count; mean_rel_diff = mean_rel_diff / (float)count;
FT_LOG_INFO("mean_abs_diff: % 6.4f, mean_rel_diff: % 6.4f (%%)", mean_abs_diff, mean_rel_diff); TM_LOG_INFO("mean_abs_diff: % 6.4f, mean_rel_diff: % 6.4f (%%)", mean_abs_diff, mean_rel_diff);
if (fd != nullptr) { if (fd != nullptr) {
fprintf(fd, "mean_abs_diff: % 6.4f, mean_rel_diff: % 6.4f (%%)", mean_abs_diff, mean_rel_diff); fprintf(fd, "mean_abs_diff: % 6.4f, mean_rel_diff: % 6.4f (%%)", mean_abs_diff, mean_rel_diff);
...@@ -485,4 +485,4 @@ void compareTwoTensor( ...@@ -485,4 +485,4 @@ void compareTwoTensor(
} }
/* ************************** end of common utils ************************** */ /* ************************** end of common utils ************************** */
} // namespace fastertransformer } // namespace turbomind
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include "custom_ar_comm.h" #include "custom_ar_comm.h"
namespace fastertransformer { namespace turbomind {
template<typename T> template<typename T>
CustomAllReduceComm<T>::CustomAllReduceComm(size_t rank_size, size_t rank): rank_size_(rank_size), rank_(rank) CustomAllReduceComm<T>::CustomAllReduceComm(size_t rank_size, size_t rank): rank_size_(rank_size), rank_(rank)
...@@ -137,7 +137,7 @@ void initCustomAllReduceComm(std::vector<std::shared_ptr<AbstractCustomComm>>* c ...@@ -137,7 +137,7 @@ void initCustomAllReduceComm(std::vector<std::shared_ptr<AbstractCustomComm>>* c
if (rank_size != RANKS_PER_NODE) { if (rank_size != RANKS_PER_NODE) {
#ifdef BUILD_MULTI_GPU #ifdef BUILD_MULTI_GPU
if (rank_size > 1) { if (rank_size > 1) {
FT_LOG_WARNING("Custom All Reduce only supports 8 Ranks currently. Using NCCL as Comm."); TM_LOG_WARNING("Custom All Reduce only supports 8 Ranks currently. Using NCCL as Comm.");
} }
#else #else
FT_CHECK_WITH_INFO(rank_size == 1, FT_CHECK_WITH_INFO(rank_size == 1,
...@@ -158,7 +158,7 @@ void initCustomAllReduceComm(std::vector<std::shared_ptr<AbstractCustomComm>>* c ...@@ -158,7 +158,7 @@ void initCustomAllReduceComm(std::vector<std::shared_ptr<AbstractCustomComm>>* c
} }
custom_all_reduce_comms->at(0)->allocateAndExchangePeerAccessPointer(custom_all_reduce_comms); custom_all_reduce_comms->at(0)->allocateAndExchangePeerAccessPointer(custom_all_reduce_comms);
#else #else
FT_LOG_WARNING("Custom All Reduce is not supported before CUDA 11.2. Using NCCL as Comm."); TM_LOG_WARNING("Custom All Reduce is not supported before CUDA 11.2. Using NCCL as Comm.");
for (size_t i = 0; i < rank_size; i++) { for (size_t i = 0; i < rank_size; i++) {
custom_all_reduce_comms->push_back(nullptr); custom_all_reduce_comms->push_back(nullptr);
} }
...@@ -186,4 +186,4 @@ initCustomAllReduceComm<uint32_t>(std::vector<std::shared_ptr<AbstractCustomComm ...@@ -186,4 +186,4 @@ initCustomAllReduceComm<uint32_t>(std::vector<std::shared_ptr<AbstractCustomComm
int enable_custom_all_reduce, int enable_custom_all_reduce,
size_t rank_size); size_t rank_size);
} // namespace fastertransformer } // namespace turbomind
...@@ -21,12 +21,12 @@ ...@@ -21,12 +21,12 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include "src/fastertransformer/kernels/custom_ar_kernels.h" #include "src/turbomind/kernels/custom_ar_kernels.h"
#include "src/fastertransformer/utils/Tensor.h" #include "src/turbomind/utils/Tensor.h"
#include "src/fastertransformer/utils/cuda_utils.h" #include "src/turbomind/utils/cuda_utils.h"
#include "src/fastertransformer/utils/logger.h" #include "src/turbomind/utils/logger.h"
namespace fastertransformer { namespace turbomind {
class AbstractCustomComm { class AbstractCustomComm {
public: public:
...@@ -84,4 +84,4 @@ struct CustomARCommTypeConverter<__nv_bfloat16> { ...@@ -84,4 +84,4 @@ struct CustomARCommTypeConverter<__nv_bfloat16> {
}; };
#endif #endif
} // namespace fastertransformer } // namespace turbomind
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
* limitations under the License. * limitations under the License.
*/ */
#include "src/fastertransformer/utils/gemm.h" #include "src/turbomind/utils/gemm.h"
namespace fastertransformer { namespace turbomind {
/* ***************************** GEMM Impl ******************************** */ /* ***************************** GEMM Impl ******************************** */
...@@ -222,7 +222,7 @@ void Gemm::gemm(const GemmOp transa, ...@@ -222,7 +222,7 @@ void Gemm::gemm(const GemmOp transa,
const float alpha, const float alpha,
const float beta) const float beta)
{ {
FT_LOG_TRACE("Gemm::gemm [m=%ld, n=%ld, k=%ld, lda=%ld, ldb=%ld, ldc=%ld]", m, n, k, lda, ldb, ldc); TM_LOG_TRACE("Gemm::gemm [m=%ld, n=%ld, k=%ld, lda=%ld, ldb=%ld, ldc=%ld]", m, n, k, lda, ldb, ldc);
// Implementation copied from cublasMMWrapper::Gemm // Implementation copied from cublasMMWrapper::Gemm
// Switch A and B since both cublas and cublasLt assume a column major layout, // Switch A and B since both cublas and cublasLt assume a column major layout,
...@@ -423,7 +423,7 @@ void Gemm::batchedGemm(const GemmOp transa, ...@@ -423,7 +423,7 @@ void Gemm::batchedGemm(const GemmOp transa,
const float alpha, const float alpha,
const float beta) const float beta)
{ {
FT_LOG_TRACE( TM_LOG_TRACE(
"Gemm::batchedGemm [b=%ld m=%ld, n=%ld, k=%ld, lda=%ld, ldb=%ld, ldc=%ld]", batch_size, m, n, k, lda, ldb, ldc); "Gemm::batchedGemm [b=%ld m=%ld, n=%ld, k=%ld, lda=%ld, ldb=%ld, ldc=%ld]", batch_size, m, n, k, lda, ldb, ldc);
// Switch A and B. // Switch A and B.
...@@ -624,7 +624,7 @@ void Gemm::stridedBatchedGemm(GemmOp transa, ...@@ -624,7 +624,7 @@ void Gemm::stridedBatchedGemm(GemmOp transa,
const float alpha, const float alpha,
const float beta) const float beta)
{ {
FT_LOG_TRACE("Gemm::stridedBatchedGemm [b=%ld, m=%ld, n=%ld, k=%ld, lda=%ld, ldb=%ld, ldc=%ld]", TM_LOG_TRACE("Gemm::stridedBatchedGemm [b=%ld, m=%ld, n=%ld, k=%ld, lda=%ld, ldb=%ld, ldc=%ld]",
batch_size, batch_size,
m, m,
n, n,
...@@ -873,7 +873,7 @@ void SpGemm::gemm(const GemmOp transa, ...@@ -873,7 +873,7 @@ void SpGemm::gemm(const GemmOp transa,
const float alpha, const float alpha,
const float beta) const float beta)
{ {
FT_LOG_TRACE("SpGemm::gemm [m=%ld, n=%ld, k=%ld, lda=%ld, ldb=%ld, ldc=%ld]", m, n, k, lda, ldb, ldc); TM_LOG_TRACE("SpGemm::gemm [m=%ld, n=%ld, k=%ld, lda=%ld, ldb=%ld, ldc=%ld]", m, n, k, lda, ldb, ldc);
checkDataTypeValidity(Atype); checkDataTypeValidity(Atype);
checkDataTypeValidity(Btype); checkDataTypeValidity(Btype);
checkDataTypeValidity(Ctype); checkDataTypeValidity(Ctype);
...@@ -994,7 +994,7 @@ void SpGemm::gemm(const GemmOp transa, ...@@ -994,7 +994,7 @@ void SpGemm::gemm(const GemmOp transa,
std::shared_ptr<Gemm> createGemm(IAllocator* allocator, cudaStream_t stream, bool sparse, bool quantized) std::shared_ptr<Gemm> createGemm(IAllocator* allocator, cudaStream_t stream, bool sparse, bool quantized)
{ {
FT_LOG_TRACE( TM_LOG_TRACE(
"Create Gemm instance [sparse=%s, quantized=%s]", sparse ? "true" : "false", quantized ? "true" : "false"); "Create Gemm instance [sparse=%s, quantized=%s]", sparse ? "true" : "false", quantized ? "true" : "false");
std::shared_ptr<Gemm> gemm; std::shared_ptr<Gemm> gemm;
if (!sparse) { if (!sparse) {
...@@ -1105,7 +1105,7 @@ cusparseComputeType getCusparseComputeType(DataType ctype) ...@@ -1105,7 +1105,7 @@ cusparseComputeType getCusparseComputeType(DataType ctype)
void pruneMatrixB(void* data, const cudaStream_t& stream, const size_t k, const size_t n, const GemmOp trans) void pruneMatrixB(void* data, const cudaStream_t& stream, const size_t k, const size_t n, const GemmOp trans)
{ {
FT_LOG_TRACE("Prune matrix B [k=%ld, n=%ld, op=%s]", k, n, getGemmOpString(trans).c_str()); TM_LOG_TRACE("Prune matrix B [k=%ld, n=%ld, op=%s]", k, n, getGemmOpString(trans).c_str());
// Due to A/B switching, the matrix B will be used as a matrix A. // Due to A/B switching, the matrix B will be used as a matrix A.
const cusparseOrder_t order = CUSPARSE_ORDER_COL; const cusparseOrder_t order = CUSPARSE_ORDER_COL;
...@@ -1141,7 +1141,7 @@ size_t compressMatrixB(void** output, ...@@ -1141,7 +1141,7 @@ size_t compressMatrixB(void** output,
const size_t n, const size_t n,
const GemmOp trans) const GemmOp trans)
{ {
FT_LOG_TRACE("compressMatrix [k=%ld, n=%ld, dtype=FP16]", k, n); TM_LOG_TRACE("compressMatrix [k=%ld, n=%ld, dtype=FP16]", k, n);
// swap A/B due to column/row major layout mismatch. // swap A/B due to column/row major layout mismatch.
cusparseOrder_t order = CUSPARSE_ORDER_COL; cusparseOrder_t order = CUSPARSE_ORDER_COL;
...@@ -1181,4 +1181,4 @@ size_t compressMatrixB(void** output, ...@@ -1181,4 +1181,4 @@ size_t compressMatrixB(void** output,
/* ************************* End of GEMM utils **************************** */ /* ************************* End of GEMM utils **************************** */
} // end of namespace fastertransformer } // end of namespace turbomind
...@@ -27,13 +27,13 @@ ...@@ -27,13 +27,13 @@
// TODO: Need to remove the dependency of the layer module. // TODO: Need to remove the dependency of the layer module.
// e.g. refactor Weight class to some base module. // e.g. refactor Weight class to some base module.
#include "src/fastertransformer/layers/DenseWeight.h" #include "src/turbomind/layers/DenseWeight.h"
#include "src/fastertransformer/utils/Tensor.h" #include "src/turbomind/utils/Tensor.h"
#include "src/fastertransformer/utils/allocator.h" #include "src/turbomind/utils/allocator.h"
#include "src/fastertransformer/utils/cublasAlgoMap.h" #include "src/turbomind/utils/cublasAlgoMap.h"
#include "src/fastertransformer/utils/cuda_utils.h" #include "src/turbomind/utils/cuda_utils.h"
#include "src/fastertransformer/utils/logger.h" #include "src/turbomind/utils/logger.h"
#include "src/fastertransformer/utils/memory_utils.h" #include "src/turbomind/utils/memory_utils.h"
#ifndef CUDART_VERSION #ifndef CUDART_VERSION
#error CUDART_VERSION Undefined! #error CUDART_VERSION Undefined!
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
// cublas default workspace size: 32MB. Let me make this as a Gemm property. // cublas default workspace size: 32MB. Let me make this as a Gemm property.
#define WORKSPACE_SIZE 33554432 #define WORKSPACE_SIZE 33554432
namespace fastertransformer { namespace turbomind {
// A wrapper of cublas or cusparse matrix operator. // A wrapper of cublas or cusparse matrix operator.
// - GEMM_OP_N = CUBLAS_OP_N or CUSPARSE_OP_N // - GEMM_OP_N = CUBLAS_OP_N or CUSPARSE_OP_N
...@@ -677,4 +677,4 @@ size_t compressMatrixB(void** output, ...@@ -677,4 +677,4 @@ size_t compressMatrixB(void** output,
/* ************************* End of GEMM utils **************************** */ /* ************************* End of GEMM utils **************************** */
} // end of namespace fastertransformer } // end of namespace turbomind
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
* limitations under the License. * limitations under the License.
*/ */
#include "src/fastertransformer/utils/gemm_test/decoding_gemm_func.h" #include "src/turbomind/utils/gemm_test/decoding_gemm_func.h"
namespace fastertransformer { namespace turbomind {
template<typename T> template<typename T>
void generate_decoding_gemm_config(int batch_size, void generate_decoding_gemm_config(int batch_size,
...@@ -401,4 +401,4 @@ size_t calDecodingGemmTestBufSizeInByte(int batch_size, ...@@ -401,4 +401,4 @@ size_t calDecodingGemmTestBufSizeInByte(int batch_size,
return buf_size_in_byte; return buf_size_in_byte;
} }
} // namespace fastertransformer } // namespace turbomind
...@@ -16,10 +16,10 @@ ...@@ -16,10 +16,10 @@
#pragma once #pragma once
#include "src/fastertransformer/utils/cublasAlgoMap.h" #include "src/turbomind/utils/cublasAlgoMap.h"
#include "src/fastertransformer/utils/cuda_bf16_wrapper.h" #include "src/turbomind/utils/cuda_bf16_wrapper.h"
#include "src/fastertransformer/utils/cuda_utils.h" #include "src/turbomind/utils/cuda_utils.h"
#include "src/fastertransformer/utils/gemm_test/gemm_func.h" #include "src/turbomind/utils/gemm_test/gemm_func.h"
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#include <unistd.h> #include <unistd.h>
#include <vector> #include <vector>
namespace fastertransformer { namespace turbomind {
template<typename T> template<typename T>
void generate_decoding_gemm_config(int batch_size, void generate_decoding_gemm_config(int batch_size,
...@@ -55,4 +55,4 @@ size_t calDecodingGemmTestBufSizeInByte(int batch_size, ...@@ -55,4 +55,4 @@ size_t calDecodingGemmTestBufSizeInByte(int batch_size,
int vocab_size, int vocab_size,
CublasDataType data_type); CublasDataType data_type);
} // namespace fastertransformer } // namespace turbomind
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
* limitations under the License. * limitations under the License.
*/ */
#include "src/fastertransformer/utils/gemm_test/encoder_gemm_func.h" #include "src/turbomind/utils/gemm_test/encoder_gemm_func.h"
namespace fastertransformer { namespace turbomind {
template<typename T> template<typename T>
void generate_encoder_gemm_config( void generate_encoder_gemm_config(
...@@ -560,4 +560,4 @@ template void generate_encoder_gemm_config<__nv_bfloat16>( ...@@ -560,4 +560,4 @@ template void generate_encoder_gemm_config<__nv_bfloat16>(
int batch_size, int seq_len, int head_num, int size_per_head, void* buffer, bool isAppend, int tensor_para_size); int batch_size, int seq_len, int head_num, int size_per_head, void* buffer, bool isAppend, int tensor_para_size);
#endif #endif
} // namespace fastertransformer } // namespace turbomind
...@@ -16,10 +16,10 @@ ...@@ -16,10 +16,10 @@
#pragma once #pragma once
#include "src/fastertransformer/utils/cublasAlgoMap.h" #include "src/turbomind/utils/cublasAlgoMap.h"
#include "src/fastertransformer/utils/cuda_bf16_wrapper.h" #include "src/turbomind/utils/cuda_bf16_wrapper.h"
#include "src/fastertransformer/utils/cuda_utils.h" #include "src/turbomind/utils/cuda_utils.h"
#include "src/fastertransformer/utils/gemm_test/gemm_func.h" #include "src/turbomind/utils/gemm_test/gemm_func.h"
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#include <unistd.h> #include <unistd.h>
#include <vector> #include <vector>
namespace fastertransformer { namespace turbomind {
template<typename T> template<typename T>
void generate_encoder_gemm_config(int batch_size, void generate_encoder_gemm_config(int batch_size,
...@@ -42,4 +42,4 @@ void generate_encoder_gemm_config(int batch_size, ...@@ -42,4 +42,4 @@ void generate_encoder_gemm_config(int batch_size,
bool isAppend = true, bool isAppend = true,
int tensor_para_size = 1); int tensor_para_size = 1);
} // namespace fastertransformer } // namespace turbomind
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#error CUDART_VERSION Undefined! #error CUDART_VERSION Undefined!
#endif #endif
namespace fastertransformer { namespace turbomind {
int batch_size_; int batch_size_;
int seq_len_; int seq_len_;
...@@ -1329,4 +1329,4 @@ int generate_encoder_igemm_config( ...@@ -1329,4 +1329,4 @@ int generate_encoder_igemm_config(
return 0; return 0;
} }
} // namespace fastertransformer } // namespace turbomind
...@@ -16,8 +16,8 @@ ...@@ -16,8 +16,8 @@
#pragma once #pragma once
#include "src/fastertransformer/utils/cublasAlgoMap.h" #include "src/turbomind/utils/cublasAlgoMap.h"
#include "src/fastertransformer/utils/cuda_utils.h" #include "src/turbomind/utils/cuda_utils.h"
#include <algorithm> #include <algorithm>
#include <cublasLt.h> #include <cublasLt.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
#include <unistd.h> #include <unistd.h>
#include <vector> #include <vector>
namespace fastertransformer { namespace turbomind {
/* CAUTION : must match cublasLtMatmulTile_t */ /* CAUTION : must match cublasLtMatmulTile_t */
const char* const matmulTileName[] = {"UNDEF", "8x8", "8x16", "16x8", "8x32", "16x16", "32x8", const char* const matmulTileName[] = {"UNDEF", "8x8", "8x16", "16x8", "8x32", "16x16", "32x8",
...@@ -77,4 +77,4 @@ int LtBatchIgemmCustomFind(cublasLtHandle_t ltHandle, ...@@ -77,4 +77,4 @@ int LtBatchIgemmCustomFind(cublasLtHandle_t ltHandle,
void matInit(int rows, int cols, int8_t* p, int ld); void matInit(int rows, int cols, int8_t* p, int ld);
} // namespace fastertransformer } // namespace turbomind
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#error CUDART_VERSION Undefined! #error CUDART_VERSION Undefined!
#endif #endif
namespace fastertransformer { namespace turbomind {
// Utility function to print customMatmulPerf_t structure // Utility function to print customMatmulPerf_t structure
int printPerfStructure(int batch_size, int printPerfStructure(int batch_size,
...@@ -986,4 +986,4 @@ size_t calGemmTestBufSizeInByteXlnet( ...@@ -986,4 +986,4 @@ size_t calGemmTestBufSizeInByteXlnet(
return max_size * size_per_ele; return max_size * size_per_ele;
} }
} // namespace fastertransformer } // namespace turbomind
...@@ -17,8 +17,8 @@ ...@@ -17,8 +17,8 @@
#pragma once #pragma once
#include "encoder_igemm_func.h" // TODO(bhsueh) Remove this include #include "encoder_igemm_func.h" // TODO(bhsueh) Remove this include
#include "src/fastertransformer/utils/cublasAlgoMap.h" #include "src/turbomind/utils/cublasAlgoMap.h"
#include "src/fastertransformer/utils/cuda_utils.h" #include "src/turbomind/utils/cuda_utils.h"
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <ctime> #include <ctime>
...@@ -34,7 +34,7 @@ ...@@ -34,7 +34,7 @@
#include <unistd.h> #include <unistd.h>
#include <vector> #include <vector>
namespace fastertransformer { namespace turbomind {
// Scale Type Converter // Scale Type Converter
// is_fp16_compute_type is only valid when T = half // is_fp16_compute_type is only valid when T = half
...@@ -98,4 +98,4 @@ int printPerfStructure(int batch_size, ...@@ -98,4 +98,4 @@ int printPerfStructure(int batch_size,
int hasPrint, int hasPrint,
int batch_count = 1); int batch_count = 1);
} // namespace fastertransformer } // namespace turbomind
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
* limitations under the License. * limitations under the License.
*/ */
#include "src/fastertransformer/utils/gemm_test/gpt_gemm_func.h" #include "src/turbomind/utils/gemm_test/gpt_gemm_func.h"
namespace fastertransformer { namespace turbomind {
bool isSparseGemmAvailable(size_t m, size_t n, size_t k) bool isSparseGemmAvailable(size_t m, size_t n, size_t k)
{ {
...@@ -804,4 +804,4 @@ size_t calGptGemmTestBufSizeInByte(int batch_size, ...@@ -804,4 +804,4 @@ size_t calGptGemmTestBufSizeInByte(int batch_size,
return buf_size_in_byte; return buf_size_in_byte;
} }
} // namespace fastertransformer } // namespace turbomind
...@@ -16,10 +16,10 @@ ...@@ -16,10 +16,10 @@
#pragma once #pragma once
#include "src/fastertransformer/utils/cublasAlgoMap.h" #include "src/turbomind/utils/cublasAlgoMap.h"
#include "src/fastertransformer/utils/cuda_bf16_wrapper.h" #include "src/turbomind/utils/cuda_bf16_wrapper.h"
#include "src/fastertransformer/utils/cuda_utils.h" #include "src/turbomind/utils/cuda_utils.h"
#include "src/fastertransformer/utils/gemm_test/gemm_func.h" #include "src/turbomind/utils/gemm_test/gemm_func.h"
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
#include <unistd.h> #include <unistd.h>
#include <vector> #include <vector>
namespace fastertransformer { namespace turbomind {
template<typename T> template<typename T>
void generate_gpt_gemm_config(int batch_size, void generate_gpt_gemm_config(int batch_size,
...@@ -60,4 +60,4 @@ size_t calGptGemmTestBufSizeInByte(int batch_size, ...@@ -60,4 +60,4 @@ size_t calGptGemmTestBufSizeInByte(int batch_size,
int tensor_para_size, int tensor_para_size,
CublasDataType data_type); CublasDataType data_type);
} // namespace fastertransformer } // namespace turbomind
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
* limitations under the License. * limitations under the License.
*/ */
#include "src/fastertransformer/utils/gemm_test/swin_gemm_func.h" #include "src/turbomind/utils/gemm_test/swin_gemm_func.h"
namespace fastertransformer { namespace turbomind {
template<typename T> template<typename T>
void generate_swin_gemm_config( void generate_swin_gemm_config(
...@@ -398,4 +398,4 @@ template void generate_swin_gemm_config<__nv_bfloat16>( ...@@ -398,4 +398,4 @@ template void generate_swin_gemm_config<__nv_bfloat16>(
int batch_size, int seq_len, int head_num, int size_per_head, void* buffer, bool isAppend); int batch_size, int seq_len, int head_num, int size_per_head, void* buffer, bool isAppend);
#endif #endif
} // namespace fastertransformer } // namespace turbomind
...@@ -16,10 +16,10 @@ ...@@ -16,10 +16,10 @@
#pragma once #pragma once
#include "src/fastertransformer/utils/cublasAlgoMap.h" #include "src/turbomind/utils/cublasAlgoMap.h"
#include "src/fastertransformer/utils/cuda_bf16_wrapper.h" #include "src/turbomind/utils/cuda_bf16_wrapper.h"
#include "src/fastertransformer/utils/cuda_utils.h" #include "src/turbomind/utils/cuda_utils.h"
#include "src/fastertransformer/utils/gemm_test/gemm_func.h" #include "src/turbomind/utils/gemm_test/gemm_func.h"
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
...@@ -31,10 +31,10 @@ ...@@ -31,10 +31,10 @@
#include <unistd.h> #include <unistd.h>
#include <vector> #include <vector>
namespace fastertransformer { namespace turbomind {
template<typename T> template<typename T>
void generate_swin_gemm_config( void generate_swin_gemm_config(
int batch_size, int seq_len, int head_num, int size_per_head, void* buffer, bool isAppend = true); int batch_size, int seq_len, int head_num, int size_per_head, void* buffer, bool isAppend = true);
} // namespace fastertransformer } // namespace turbomind
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include "swin_igemm_func.h" #include "swin_igemm_func.h"
namespace fastertransformer { namespace turbomind {
static const char* showStatus(cublasStatus_t error) static const char* showStatus(cublasStatus_t error)
{ {
...@@ -278,4 +278,4 @@ int generate_swin_igemm_config( ...@@ -278,4 +278,4 @@ int generate_swin_igemm_config(
return 0; return 0;
} }
} // namespace fastertransformer } // namespace turbomind
...@@ -16,9 +16,9 @@ ...@@ -16,9 +16,9 @@
#pragma once #pragma once
#include "src/fastertransformer/utils/cublasAlgoMap.h" #include "src/turbomind/utils/cublasAlgoMap.h"
#include "src/fastertransformer/utils/cuda_utils.h" #include "src/turbomind/utils/cuda_utils.h"
#include "src/fastertransformer/utils/gemm_test/encoder_igemm_func.h" #include "src/turbomind/utils/gemm_test/encoder_igemm_func.h"
#include <algorithm> #include <algorithm>
#include <cublasLt.h> #include <cublasLt.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
#include <unistd.h> #include <unistd.h>
#include <vector> #include <vector>
namespace fastertransformer { namespace turbomind {
/* CAUTION : must match cublasLtMatmulTile_t */ /* CAUTION : must match cublasLtMatmulTile_t */
// const char* const matmulTileName[] = { // const char* const matmulTileName[] = {
...@@ -42,4 +42,4 @@ namespace fastertransformer { ...@@ -42,4 +42,4 @@ namespace fastertransformer {
int generate_swin_igemm_config( int generate_swin_igemm_config(
int batch_size, int seq_len, int head_num, int size_per_head, void* buffer, bool isAppend = true); int batch_size, int seq_len, int head_num, int size_per_head, void* buffer, bool isAppend = true);
} // namespace fastertransformer } // namespace turbomind
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment