Unverified Commit 5ea40abf authored by AllentDan's avatar AllentDan Committed by GitHub
Browse files

use format-11.1 (#38)

* format-11.1

* md-link-config
parent 9bbd39b7
...@@ -18,7 +18,8 @@ ...@@ -18,7 +18,8 @@
namespace turbomind { namespace turbomind {
enum IA3_config { enum IA3_config
{
KEY_ADAPTER = 1 << 0, KEY_ADAPTER = 1 << 0,
VALUE_ADAPTER = 1 << 1, VALUE_ADAPTER = 1 << 1,
MLP_ADAPTER = 1 << 2, MLP_ADAPTER = 1 << 2,
......
...@@ -59,9 +59,9 @@ Tensor::Tensor(const MemoryType _where, ...@@ -59,9 +59,9 @@ Tensor::Tensor(const MemoryType _where,
void Tensor::parseNpyIntro(FILE*& f_ptr, uint32_t& header_len, uint32_t& start_data) void Tensor::parseNpyIntro(FILE*& f_ptr, uint32_t& header_len, uint32_t& start_data)
{ {
const char magic[] = "\x93" const char magic[] = "\x93"
"NUMPY"; "NUMPY";
char magic_test[sizeof(magic)] = "\0"; char magic_test[sizeof(magic)] = "\0";
size_t n_elems = fread((void*)magic_test, sizeof(char), sizeof(magic) - 1, f_ptr); size_t n_elems = fread((void*)magic_test, sizeof(char), sizeof(magic) - 1, f_ptr);
if (n_elems != sizeof(magic) - 1 || std::string(magic) != std::string(magic_test)) { if (n_elems != sizeof(magic) - 1 || std::string(magic) != std::string(magic_test)) {
...@@ -292,8 +292,8 @@ void Tensor::saveNpy(const std::string& filename) const ...@@ -292,8 +292,8 @@ void Tensor::saveNpy(const std::string& filename) const
cudaMemcpy(cpu_data, data, tensor_size * Tensor::getTypeSize(type), cudaMemcpyDeviceToHost); cudaMemcpy(cpu_data, data, tensor_size * Tensor::getTypeSize(type), cudaMemcpyDeviceToHost);
} }
const char magic[] = "\x93" const char magic[] = "\x93"
"NUMPY"; "NUMPY";
const uint8_t npy_major = 1; const uint8_t npy_major = 1;
const uint8_t npy_minor = 0; const uint8_t npy_minor = 0;
......
...@@ -35,7 +35,8 @@ ...@@ -35,7 +35,8 @@
namespace turbomind { namespace turbomind {
typedef enum datatype_enum { typedef enum datatype_enum
{
TYPE_INVALID, TYPE_INVALID,
TYPE_BOOL, TYPE_BOOL,
TYPE_UINT8, TYPE_UINT8,
...@@ -98,7 +99,8 @@ DataType getTensorType() ...@@ -98,7 +99,8 @@ DataType getTensorType()
} }
} }
typedef enum memorytype_enum { typedef enum memorytype_enum
{
MEMORY_CPU, MEMORY_CPU,
MEMORY_CPU_PINNED, MEMORY_CPU_PINNED,
MEMORY_GPU MEMORY_GPU
......
...@@ -20,7 +20,8 @@ ...@@ -20,7 +20,8 @@
namespace turbomind { namespace turbomind {
enum class ActivationType { enum class ActivationType
{
Gelu, Gelu,
Relu, Relu,
Silu, Silu,
......
...@@ -49,13 +49,15 @@ ...@@ -49,13 +49,15 @@
namespace turbomind { namespace turbomind {
enum class AllocatorType { enum class AllocatorType
{
CUDA, CUDA,
TF, TF,
TH TH
}; };
enum class ReallocType { enum class ReallocType
{
INCREASE, INCREASE,
REUSE, REUSE,
DECREASE, DECREASE,
......
...@@ -169,9 +169,9 @@ public: ...@@ -169,9 +169,9 @@ public:
cudaStream_t stream); cudaStream_t stream);
private: private:
int version_major_, version_minor_, version_patch_; int version_major_, version_minor_, version_patch_;
turbomind::qgmma1x1Launcher qgmmaLauncher; turbomind::qgmma1x1Launcher qgmmaLauncher;
void* cublas_workspace_qgemm_ = nullptr; void* cublas_workspace_qgemm_ = nullptr;
}; };
} // namespace turbomind } // namespace turbomind
...@@ -35,7 +35,8 @@ namespace turbomind { ...@@ -35,7 +35,8 @@ namespace turbomind {
const float FP8_E4M3_MAX = 480.0f; const float FP8_E4M3_MAX = 480.0f;
enum QUANTIZE_MODE { enum QUANTIZE_MODE
{
PER_CHANNEL, PER_CHANNEL,
PER_TENSOR, PER_TENSOR,
PER_CHANNEL_WEIGHT_PER_TENSOR_ACT PER_CHANNEL_WEIGHT_PER_TENSOR_ACT
......
...@@ -46,7 +46,8 @@ half4; ...@@ -46,7 +46,8 @@ half4;
/* **************************** type definition ***************************** */ /* **************************** type definition ***************************** */
enum CublasDataType { enum CublasDataType
{
FLOAT_DATATYPE = 0, FLOAT_DATATYPE = 0,
HALF_DATATYPE = 1, HALF_DATATYPE = 1,
BFLOAT16_DATATYPE = 2, BFLOAT16_DATATYPE = 2,
...@@ -54,7 +55,8 @@ enum CublasDataType { ...@@ -54,7 +55,8 @@ enum CublasDataType {
FP8_DATATYPE = 4 FP8_DATATYPE = 4
}; };
enum FtCudaDataType { enum FtCudaDataType
{
FP32 = 0, FP32 = 0,
FP16 = 1, FP16 = 1,
BF16 = 2, BF16 = 2,
...@@ -62,7 +64,8 @@ enum FtCudaDataType { ...@@ -62,7 +64,8 @@ enum FtCudaDataType {
FP8 = 4 FP8 = 4
}; };
enum class OperationType { enum class OperationType
{
FP32, FP32,
FP16, FP16,
BF16, BF16,
...@@ -212,7 +215,7 @@ inline void myAssert(bool result, const char* const file, int const line, std::s ...@@ -212,7 +215,7 @@ inline void myAssert(bool result, const char* const file, int const line, std::s
do { \ do { \
bool is_valid_val = (val); \ bool is_valid_val = (val); \
if (!is_valid_val) { \ if (!is_valid_val) { \
turbomind::myAssert(is_valid_val, __FILE__, __LINE__, (info)); \ turbomind::myAssert(is_valid_val, __FILE__, __LINE__, (info)); \
} \ } \
} while (0) } while (0)
......
...@@ -47,7 +47,8 @@ namespace turbomind { ...@@ -47,7 +47,8 @@ namespace turbomind {
// A wrapper of cublas or cusparse matrix operator. // A wrapper of cublas or cusparse matrix operator.
// - GEMM_OP_N = CUBLAS_OP_N or CUSPARSE_OP_N // - GEMM_OP_N = CUBLAS_OP_N or CUSPARSE_OP_N
// - GEMM_OP_T = CUBLAS_OP_T or CUSPARSE_OP_T // - GEMM_OP_T = CUBLAS_OP_T or CUSPARSE_OP_T
enum GemmOp { enum GemmOp
{
GEMM_OP_N, GEMM_OP_N,
GEMM_OP_T GEMM_OP_T
}; };
......
...@@ -639,7 +639,8 @@ void generate_gpt_gemm_config(int batch_size, ...@@ -639,7 +639,8 @@ void generate_gpt_gemm_config(int batch_size,
cudaStream_t streams[1] = {stream}; cudaStream_t streams[1] = {stream};
CHECK_CUSPARSE(cusparseLtStructuredDescriptorInit( CHECK_CUSPARSE(cusparseLtStructuredDescriptorInit(
&handle, &mat_A, m, k, m, alignment, CUDA_R_16F, order, CUSPARSELT_SPARSITY_50_PERCENT)) &handle, &mat_A, m, k, m, alignment, CUDA_R_16F, order, CUSPARSELT_SPARSITY_50_PERCENT))
CHECK_CUSPARSE(cusparseLtDenseDescriptorInit(&handle, &mat_B, k, n, k, alignment, CUDA_R_16F, order)) CHECK_CUSPARSE(
cusparseLtDenseDescriptorInit(&handle, &mat_B, k, n, k, alignment, CUDA_R_16F, order))
CHECK_CUSPARSE( CHECK_CUSPARSE(
cusparseLtDenseDescriptorInit(&handle, &mat_C, m, n, m, alignment, CUDA_R_16F, order)) cusparseLtDenseDescriptorInit(&handle, &mat_C, m, n, m, alignment, CUDA_R_16F, order))
cudaDeviceSynchronize(); cudaDeviceSynchronize();
......
...@@ -638,7 +638,8 @@ void generate_t5_gemm_config(int batch_size, ...@@ -638,7 +638,8 @@ void generate_t5_gemm_config(int batch_size,
cudaStream_t streams[1] = {stream}; cudaStream_t streams[1] = {stream};
CHECK_CUSPARSE(cusparseLtStructuredDescriptorInit( CHECK_CUSPARSE(cusparseLtStructuredDescriptorInit(
&handle, &mat_A, m, k, m, alignment, CUDA_R_16F, order, CUSPARSELT_SPARSITY_50_PERCENT)) &handle, &mat_A, m, k, m, alignment, CUDA_R_16F, order, CUSPARSELT_SPARSITY_50_PERCENT))
CHECK_CUSPARSE(cusparseLtDenseDescriptorInit(&handle, &mat_B, k, n, k, alignment, CUDA_R_16F, order)) CHECK_CUSPARSE(
cusparseLtDenseDescriptorInit(&handle, &mat_B, k, n, k, alignment, CUDA_R_16F, order))
CHECK_CUSPARSE( CHECK_CUSPARSE(
cusparseLtDenseDescriptorInit(&handle, &mat_C, m, n, m, alignment, CUDA_R_16F, order)) cusparseLtDenseDescriptorInit(&handle, &mat_C, m, n, m, alignment, CUDA_R_16F, order))
cudaDeviceSynchronize(); cudaDeviceSynchronize();
......
...@@ -91,15 +91,15 @@ void generate_xlnet_gemm_config(int batch_size, ...@@ -91,15 +91,15 @@ void generate_xlnet_gemm_config(int batch_size,
int ldc[gemm_num]; int ldc[gemm_num];
int strideC[gemm_num]; int strideC[gemm_num];
cublasOperation_t transa[gemm_num] = {CUBLAS_OP_N, cublasOperation_t transa[gemm_num] = {CUBLAS_OP_N,
CUBLAS_OP_N, CUBLAS_OP_N,
CUBLAS_OP_T, CUBLAS_OP_T,
CUBLAS_OP_T, CUBLAS_OP_T,
CUBLAS_OP_T, CUBLAS_OP_T,
CUBLAS_OP_T, CUBLAS_OP_T,
CUBLAS_OP_N, CUBLAS_OP_N,
CUBLAS_OP_T, CUBLAS_OP_T,
CUBLAS_OP_N, CUBLAS_OP_N,
CUBLAS_OP_N}; CUBLAS_OP_N};
cublasOperation_t transb[gemm_num] = {CUBLAS_OP_N}; cublasOperation_t transb[gemm_num] = {CUBLAS_OP_N};
int batchCount[gemm_num] = {1}; int batchCount[gemm_num] = {1};
char mess[gemm_num][256]; char mess[gemm_num][256];
......
...@@ -27,7 +27,8 @@ namespace turbomind { ...@@ -27,7 +27,8 @@ namespace turbomind {
class Logger { class Logger {
public: public:
enum Level { enum Level
{
TRACE = 0, TRACE = 0,
DEBUG = 10, DEBUG = 10,
INFO = 20, INFO = 20,
...@@ -40,7 +41,7 @@ public: ...@@ -40,7 +41,7 @@ public:
thread_local Logger instance; thread_local Logger instance;
return instance; return instance;
} }
Logger(Logger const&) = delete; Logger(Logger const&) = delete;
void operator=(Logger const&) = delete; void operator=(Logger const&) = delete;
template<typename... Args> template<typename... Args>
...@@ -108,8 +109,8 @@ private: ...@@ -108,8 +109,8 @@ private:
#define TM_LOG(level, ...) \ #define TM_LOG(level, ...) \
do { \ do { \
if (turbomind::Logger::getLogger().getLevel() <= level) { \ if (turbomind::Logger::getLogger().getLevel() <= level) { \
turbomind::Logger::getLogger().log(level, __VA_ARGS__); \ turbomind::Logger::getLogger().log(level, __VA_ARGS__); \
} \ } \
} while (0) } while (0)
......
...@@ -43,7 +43,8 @@ namespace turbomind { ...@@ -43,7 +43,8 @@ namespace turbomind {
namespace mpi { namespace mpi {
// A wrapper of MPI data type. MPI_TYPE_{data_type} // A wrapper of MPI data type. MPI_TYPE_{data_type}
enum MpiType { enum MpiType
{
MPI_TYPE_BYTE, MPI_TYPE_BYTE,
MPI_TYPE_CHAR, MPI_TYPE_CHAR,
MPI_TYPE_INT, MPI_TYPE_INT,
...@@ -53,7 +54,8 @@ enum MpiType { ...@@ -53,7 +54,8 @@ enum MpiType {
}; };
// A wrapper of the level of MPI thread support // A wrapper of the level of MPI thread support
enum MpiThreadSupport { enum MpiThreadSupport
{
THREAD_SINGLE, THREAD_SINGLE,
THREAD_FUNNELED, THREAD_FUNNELED,
THREAD_SERIALIZED, THREAD_SERIALIZED,
......
...@@ -19,7 +19,8 @@ ...@@ -19,7 +19,8 @@
namespace turbomind { namespace turbomind {
enum class PromptLearningType { enum class PromptLearningType
{
no_prompt, no_prompt,
soft_prompt, soft_prompt,
prefix_prompt, prefix_prompt,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment