Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Lmdeploy
Commits
5ea40abf
Unverified
Commit
5ea40abf
authored
Jul 04, 2023
by
AllentDan
Committed by
GitHub
Jul 04, 2023
Browse files
use format-11.1 (#38)
* format-11.1 * md-link-config
parent
9bbd39b7
Changes
35
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
54 additions
and
37 deletions
+54
-37
src/turbomind/utils/IA3.h
src/turbomind/utils/IA3.h
+2
-1
src/turbomind/utils/Tensor.cc
src/turbomind/utils/Tensor.cc
+5
-5
src/turbomind/utils/Tensor.h
src/turbomind/utils/Tensor.h
+4
-2
src/turbomind/utils/activation_types.h
src/turbomind/utils/activation_types.h
+2
-1
src/turbomind/utils/allocator.h
src/turbomind/utils/allocator.h
+4
-2
src/turbomind/utils/cublasFP8MMWrapper.h
src/turbomind/utils/cublasFP8MMWrapper.h
+2
-2
src/turbomind/utils/cuda_fp8_utils.h
src/turbomind/utils/cuda_fp8_utils.h
+2
-1
src/turbomind/utils/cuda_utils.h
src/turbomind/utils/cuda_utils.h
+7
-4
src/turbomind/utils/gemm.h
src/turbomind/utils/gemm.h
+2
-1
src/turbomind/utils/gemm_test/gpt_gemm_func.cc
src/turbomind/utils/gemm_test/gpt_gemm_func.cc
+2
-1
src/turbomind/utils/gemm_test/t5_gemm_func.cc
src/turbomind/utils/gemm_test/t5_gemm_func.cc
+2
-1
src/turbomind/utils/gemm_test/xlnet_gemm_func.cc
src/turbomind/utils/gemm_test/xlnet_gemm_func.cc
+9
-9
src/turbomind/utils/logger.h
src/turbomind/utils/logger.h
+5
-4
src/turbomind/utils/mpi_utils.h
src/turbomind/utils/mpi_utils.h
+4
-2
src/turbomind/utils/prompt_learning.h
src/turbomind/utils/prompt_learning.h
+2
-1
No files found.
src/turbomind/utils/IA3.h
View file @
5ea40abf
...
@@ -18,7 +18,8 @@
...
@@ -18,7 +18,8 @@
namespace
turbomind
{
namespace
turbomind
{
enum
IA3_config
{
enum
IA3_config
{
KEY_ADAPTER
=
1
<<
0
,
KEY_ADAPTER
=
1
<<
0
,
VALUE_ADAPTER
=
1
<<
1
,
VALUE_ADAPTER
=
1
<<
1
,
MLP_ADAPTER
=
1
<<
2
,
MLP_ADAPTER
=
1
<<
2
,
...
...
src/turbomind/utils/Tensor.cc
View file @
5ea40abf
...
@@ -59,9 +59,9 @@ Tensor::Tensor(const MemoryType _where,
...
@@ -59,9 +59,9 @@ Tensor::Tensor(const MemoryType _where,
void
Tensor
::
parseNpyIntro
(
FILE
*&
f_ptr
,
uint32_t
&
header_len
,
uint32_t
&
start_data
)
void
Tensor
::
parseNpyIntro
(
FILE
*&
f_ptr
,
uint32_t
&
header_len
,
uint32_t
&
start_data
)
{
{
const
char
magic
[]
=
"
\x93
"
const
char
magic
[]
=
"
\x93
"
"NUMPY"
;
"NUMPY"
;
char
magic_test
[
sizeof
(
magic
)]
=
"
\0
"
;
char
magic_test
[
sizeof
(
magic
)]
=
"
\0
"
;
size_t
n_elems
=
fread
((
void
*
)
magic_test
,
sizeof
(
char
),
sizeof
(
magic
)
-
1
,
f_ptr
);
size_t
n_elems
=
fread
((
void
*
)
magic_test
,
sizeof
(
char
),
sizeof
(
magic
)
-
1
,
f_ptr
);
if
(
n_elems
!=
sizeof
(
magic
)
-
1
||
std
::
string
(
magic
)
!=
std
::
string
(
magic_test
))
{
if
(
n_elems
!=
sizeof
(
magic
)
-
1
||
std
::
string
(
magic
)
!=
std
::
string
(
magic_test
))
{
...
@@ -292,8 +292,8 @@ void Tensor::saveNpy(const std::string& filename) const
...
@@ -292,8 +292,8 @@ void Tensor::saveNpy(const std::string& filename) const
cudaMemcpy
(
cpu_data
,
data
,
tensor_size
*
Tensor
::
getTypeSize
(
type
),
cudaMemcpyDeviceToHost
);
cudaMemcpy
(
cpu_data
,
data
,
tensor_size
*
Tensor
::
getTypeSize
(
type
),
cudaMemcpyDeviceToHost
);
}
}
const
char
magic
[]
=
"
\x93
"
const
char
magic
[]
=
"
\x93
"
"NUMPY"
;
"NUMPY"
;
const
uint8_t
npy_major
=
1
;
const
uint8_t
npy_major
=
1
;
const
uint8_t
npy_minor
=
0
;
const
uint8_t
npy_minor
=
0
;
...
...
src/turbomind/utils/Tensor.h
View file @
5ea40abf
...
@@ -35,7 +35,8 @@
...
@@ -35,7 +35,8 @@
namespace
turbomind
{
namespace
turbomind
{
typedef
enum
datatype_enum
{
typedef
enum
datatype_enum
{
TYPE_INVALID
,
TYPE_INVALID
,
TYPE_BOOL
,
TYPE_BOOL
,
TYPE_UINT8
,
TYPE_UINT8
,
...
@@ -98,7 +99,8 @@ DataType getTensorType()
...
@@ -98,7 +99,8 @@ DataType getTensorType()
}
}
}
}
typedef
enum
memorytype_enum
{
typedef
enum
memorytype_enum
{
MEMORY_CPU
,
MEMORY_CPU
,
MEMORY_CPU_PINNED
,
MEMORY_CPU_PINNED
,
MEMORY_GPU
MEMORY_GPU
...
...
src/turbomind/utils/activation_types.h
View file @
5ea40abf
...
@@ -20,7 +20,8 @@
...
@@ -20,7 +20,8 @@
namespace
turbomind
{
namespace
turbomind
{
enum
class
ActivationType
{
enum
class
ActivationType
{
Gelu
,
Gelu
,
Relu
,
Relu
,
Silu
,
Silu
,
...
...
src/turbomind/utils/allocator.h
View file @
5ea40abf
...
@@ -49,13 +49,15 @@
...
@@ -49,13 +49,15 @@
namespace
turbomind
{
namespace
turbomind
{
enum
class
AllocatorType
{
enum
class
AllocatorType
{
CUDA
,
CUDA
,
TF
,
TF
,
TH
TH
};
};
enum
class
ReallocType
{
enum
class
ReallocType
{
INCREASE
,
INCREASE
,
REUSE
,
REUSE
,
DECREASE
,
DECREASE
,
...
...
src/turbomind/utils/cublasFP8MMWrapper.h
View file @
5ea40abf
...
@@ -169,9 +169,9 @@ public:
...
@@ -169,9 +169,9 @@ public:
cudaStream_t
stream
);
cudaStream_t
stream
);
private:
private:
int
version_major_
,
version_minor_
,
version_patch_
;
int
version_major_
,
version_minor_
,
version_patch_
;
turbomind
::
qgmma1x1Launcher
qgmmaLauncher
;
turbomind
::
qgmma1x1Launcher
qgmmaLauncher
;
void
*
cublas_workspace_qgemm_
=
nullptr
;
void
*
cublas_workspace_qgemm_
=
nullptr
;
};
};
}
// namespace turbomind
}
// namespace turbomind
src/turbomind/utils/cuda_fp8_utils.h
View file @
5ea40abf
...
@@ -35,7 +35,8 @@ namespace turbomind {
...
@@ -35,7 +35,8 @@ namespace turbomind {
const
float
FP8_E4M3_MAX
=
480.0
f
;
const
float
FP8_E4M3_MAX
=
480.0
f
;
enum
QUANTIZE_MODE
{
enum
QUANTIZE_MODE
{
PER_CHANNEL
,
PER_CHANNEL
,
PER_TENSOR
,
PER_TENSOR
,
PER_CHANNEL_WEIGHT_PER_TENSOR_ACT
PER_CHANNEL_WEIGHT_PER_TENSOR_ACT
...
...
src/turbomind/utils/cuda_utils.h
View file @
5ea40abf
...
@@ -46,7 +46,8 @@ half4;
...
@@ -46,7 +46,8 @@ half4;
/* **************************** type definition ***************************** */
/* **************************** type definition ***************************** */
enum
CublasDataType
{
enum
CublasDataType
{
FLOAT_DATATYPE
=
0
,
FLOAT_DATATYPE
=
0
,
HALF_DATATYPE
=
1
,
HALF_DATATYPE
=
1
,
BFLOAT16_DATATYPE
=
2
,
BFLOAT16_DATATYPE
=
2
,
...
@@ -54,7 +55,8 @@ enum CublasDataType {
...
@@ -54,7 +55,8 @@ enum CublasDataType {
FP8_DATATYPE
=
4
FP8_DATATYPE
=
4
};
};
enum
FtCudaDataType
{
enum
FtCudaDataType
{
FP32
=
0
,
FP32
=
0
,
FP16
=
1
,
FP16
=
1
,
BF16
=
2
,
BF16
=
2
,
...
@@ -62,7 +64,8 @@ enum FtCudaDataType {
...
@@ -62,7 +64,8 @@ enum FtCudaDataType {
FP8
=
4
FP8
=
4
};
};
enum
class
OperationType
{
enum
class
OperationType
{
FP32
,
FP32
,
FP16
,
FP16
,
BF16
,
BF16
,
...
@@ -212,7 +215,7 @@ inline void myAssert(bool result, const char* const file, int const line, std::s
...
@@ -212,7 +215,7 @@ inline void myAssert(bool result, const char* const file, int const line, std::s
do { \
do { \
bool is_valid_val = (val); \
bool is_valid_val = (val); \
if (!is_valid_val) { \
if (!is_valid_val) { \
turbomind::myAssert(is_valid_val, __FILE__, __LINE__, (info)); \
turbomind::myAssert(is_valid_val, __FILE__, __LINE__, (info));
\
} \
} \
} while (0)
} while (0)
...
...
src/turbomind/utils/gemm.h
View file @
5ea40abf
...
@@ -47,7 +47,8 @@ namespace turbomind {
...
@@ -47,7 +47,8 @@ namespace turbomind {
// A wrapper of cublas or cusparse matrix operator.
// A wrapper of cublas or cusparse matrix operator.
// - GEMM_OP_N = CUBLAS_OP_N or CUSPARSE_OP_N
// - GEMM_OP_N = CUBLAS_OP_N or CUSPARSE_OP_N
// - GEMM_OP_T = CUBLAS_OP_T or CUSPARSE_OP_T
// - GEMM_OP_T = CUBLAS_OP_T or CUSPARSE_OP_T
enum
GemmOp
{
enum
GemmOp
{
GEMM_OP_N
,
GEMM_OP_N
,
GEMM_OP_T
GEMM_OP_T
};
};
...
...
src/turbomind/utils/gemm_test/gpt_gemm_func.cc
View file @
5ea40abf
...
@@ -639,7 +639,8 @@ void generate_gpt_gemm_config(int batch_size,
...
@@ -639,7 +639,8 @@ void generate_gpt_gemm_config(int batch_size,
cudaStream_t
streams
[
1
]
=
{
stream
};
cudaStream_t
streams
[
1
]
=
{
stream
};
CHECK_CUSPARSE
(
cusparseLtStructuredDescriptorInit
(
CHECK_CUSPARSE
(
cusparseLtStructuredDescriptorInit
(
&
handle
,
&
mat_A
,
m
,
k
,
m
,
alignment
,
CUDA_R_16F
,
order
,
CUSPARSELT_SPARSITY_50_PERCENT
))
&
handle
,
&
mat_A
,
m
,
k
,
m
,
alignment
,
CUDA_R_16F
,
order
,
CUSPARSELT_SPARSITY_50_PERCENT
))
CHECK_CUSPARSE
(
cusparseLtDenseDescriptorInit
(
&
handle
,
&
mat_B
,
k
,
n
,
k
,
alignment
,
CUDA_R_16F
,
order
))
CHECK_CUSPARSE
(
cusparseLtDenseDescriptorInit
(
&
handle
,
&
mat_B
,
k
,
n
,
k
,
alignment
,
CUDA_R_16F
,
order
))
CHECK_CUSPARSE
(
CHECK_CUSPARSE
(
cusparseLtDenseDescriptorInit
(
&
handle
,
&
mat_C
,
m
,
n
,
m
,
alignment
,
CUDA_R_16F
,
order
))
cusparseLtDenseDescriptorInit
(
&
handle
,
&
mat_C
,
m
,
n
,
m
,
alignment
,
CUDA_R_16F
,
order
))
cudaDeviceSynchronize
();
cudaDeviceSynchronize
();
...
...
src/turbomind/utils/gemm_test/t5_gemm_func.cc
View file @
5ea40abf
...
@@ -638,7 +638,8 @@ void generate_t5_gemm_config(int batch_size,
...
@@ -638,7 +638,8 @@ void generate_t5_gemm_config(int batch_size,
cudaStream_t
streams
[
1
]
=
{
stream
};
cudaStream_t
streams
[
1
]
=
{
stream
};
CHECK_CUSPARSE
(
cusparseLtStructuredDescriptorInit
(
CHECK_CUSPARSE
(
cusparseLtStructuredDescriptorInit
(
&
handle
,
&
mat_A
,
m
,
k
,
m
,
alignment
,
CUDA_R_16F
,
order
,
CUSPARSELT_SPARSITY_50_PERCENT
))
&
handle
,
&
mat_A
,
m
,
k
,
m
,
alignment
,
CUDA_R_16F
,
order
,
CUSPARSELT_SPARSITY_50_PERCENT
))
CHECK_CUSPARSE
(
cusparseLtDenseDescriptorInit
(
&
handle
,
&
mat_B
,
k
,
n
,
k
,
alignment
,
CUDA_R_16F
,
order
))
CHECK_CUSPARSE
(
cusparseLtDenseDescriptorInit
(
&
handle
,
&
mat_B
,
k
,
n
,
k
,
alignment
,
CUDA_R_16F
,
order
))
CHECK_CUSPARSE
(
CHECK_CUSPARSE
(
cusparseLtDenseDescriptorInit
(
&
handle
,
&
mat_C
,
m
,
n
,
m
,
alignment
,
CUDA_R_16F
,
order
))
cusparseLtDenseDescriptorInit
(
&
handle
,
&
mat_C
,
m
,
n
,
m
,
alignment
,
CUDA_R_16F
,
order
))
cudaDeviceSynchronize
();
cudaDeviceSynchronize
();
...
...
src/turbomind/utils/gemm_test/xlnet_gemm_func.cc
View file @
5ea40abf
...
@@ -91,15 +91,15 @@ void generate_xlnet_gemm_config(int batch_size,
...
@@ -91,15 +91,15 @@ void generate_xlnet_gemm_config(int batch_size,
int
ldc
[
gemm_num
];
int
ldc
[
gemm_num
];
int
strideC
[
gemm_num
];
int
strideC
[
gemm_num
];
cublasOperation_t
transa
[
gemm_num
]
=
{
CUBLAS_OP_N
,
cublasOperation_t
transa
[
gemm_num
]
=
{
CUBLAS_OP_N
,
CUBLAS_OP_N
,
CUBLAS_OP_N
,
CUBLAS_OP_T
,
CUBLAS_OP_T
,
CUBLAS_OP_T
,
CUBLAS_OP_T
,
CUBLAS_OP_T
,
CUBLAS_OP_T
,
CUBLAS_OP_T
,
CUBLAS_OP_T
,
CUBLAS_OP_N
,
CUBLAS_OP_N
,
CUBLAS_OP_T
,
CUBLAS_OP_T
,
CUBLAS_OP_N
,
CUBLAS_OP_N
,
CUBLAS_OP_N
};
CUBLAS_OP_N
};
cublasOperation_t
transb
[
gemm_num
]
=
{
CUBLAS_OP_N
};
cublasOperation_t
transb
[
gemm_num
]
=
{
CUBLAS_OP_N
};
int
batchCount
[
gemm_num
]
=
{
1
};
int
batchCount
[
gemm_num
]
=
{
1
};
char
mess
[
gemm_num
][
256
];
char
mess
[
gemm_num
][
256
];
...
...
src/turbomind/utils/logger.h
View file @
5ea40abf
...
@@ -27,7 +27,8 @@ namespace turbomind {
...
@@ -27,7 +27,8 @@ namespace turbomind {
class
Logger
{
class
Logger
{
public:
public:
enum
Level
{
enum
Level
{
TRACE
=
0
,
TRACE
=
0
,
DEBUG
=
10
,
DEBUG
=
10
,
INFO
=
20
,
INFO
=
20
,
...
@@ -40,7 +41,7 @@ public:
...
@@ -40,7 +41,7 @@ public:
thread_local
Logger
instance
;
thread_local
Logger
instance
;
return
instance
;
return
instance
;
}
}
Logger
(
Logger
const
&
)
=
delete
;
Logger
(
Logger
const
&
)
=
delete
;
void
operator
=
(
Logger
const
&
)
=
delete
;
void
operator
=
(
Logger
const
&
)
=
delete
;
template
<
typename
...
Args
>
template
<
typename
...
Args
>
...
@@ -108,8 +109,8 @@ private:
...
@@ -108,8 +109,8 @@ private:
#define TM_LOG(level, ...) \
#define TM_LOG(level, ...) \
do { \
do { \
if (turbomind::Logger::getLogger().getLevel() <= level) { \
if (turbomind::Logger::getLogger().getLevel() <= level) {
\
turbomind::Logger::getLogger().log(level, __VA_ARGS__); \
turbomind::Logger::getLogger().log(level, __VA_ARGS__);
\
} \
} \
} while (0)
} while (0)
...
...
src/turbomind/utils/mpi_utils.h
View file @
5ea40abf
...
@@ -43,7 +43,8 @@ namespace turbomind {
...
@@ -43,7 +43,8 @@ namespace turbomind {
namespace
mpi
{
namespace
mpi
{
// A wrapper of MPI data type. MPI_TYPE_{data_type}
// A wrapper of MPI data type. MPI_TYPE_{data_type}
enum
MpiType
{
enum
MpiType
{
MPI_TYPE_BYTE
,
MPI_TYPE_BYTE
,
MPI_TYPE_CHAR
,
MPI_TYPE_CHAR
,
MPI_TYPE_INT
,
MPI_TYPE_INT
,
...
@@ -53,7 +54,8 @@ enum MpiType {
...
@@ -53,7 +54,8 @@ enum MpiType {
};
};
// A wrapper of the level of MPI thread support
// A wrapper of the level of MPI thread support
enum
MpiThreadSupport
{
enum
MpiThreadSupport
{
THREAD_SINGLE
,
THREAD_SINGLE
,
THREAD_FUNNELED
,
THREAD_FUNNELED
,
THREAD_SERIALIZED
,
THREAD_SERIALIZED
,
...
...
src/turbomind/utils/prompt_learning.h
View file @
5ea40abf
...
@@ -19,7 +19,8 @@
...
@@ -19,7 +19,8 @@
namespace
turbomind
{
namespace
turbomind
{
enum
class
PromptLearningType
{
enum
class
PromptLearningType
{
no_prompt
,
no_prompt
,
soft_prompt
,
soft_prompt
,
prefix_prompt
,
prefix_prompt
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment