Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Lmdeploy
Commits
4d42a781
Unverified
Commit
4d42a781
authored
Jun 28, 2023
by
Li Zhang
Committed by
GitHub
Jun 28, 2023
Browse files
fix-gemm-tuning (#24)
parent
e357c71f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
14 additions
and
9 deletions
+14
-9
examples/cpp/llama/generate_gemm_config.py
examples/cpp/llama/generate_gemm_config.py
+5
-5
src/fastertransformer/utils/gemm_test/gemm_func.cc
src/fastertransformer/utils/gemm_test/gemm_func.cc
+2
-2
src/fastertransformer/utils/gemm_test/gpt_gemm_func.cc
src/fastertransformer/utils/gemm_test/gpt_gemm_func.cc
+7
-2
No files found.
examples/cpp/llama/generate_gemm_config.py
View file @
4d42a781
...
...
@@ -4,15 +4,15 @@ import subprocess
import
fire
def
main
(
head_num
:
int
=
80
,
def
main
(
head_num
:
int
=
32
,
size_per_head
:
int
=
128
,
vocab_size
:
int
=
65632
,
inter_size
:
int
=
27392
,
tensor_para_size
:
int
=
8
,
vocab_size
:
int
=
32000
,
inter_size
:
int
=
11008
,
tensor_para_size
:
int
=
1
,
max_batch_size
:
int
=
64
):
for
bsz
in
range
(
1
,
max_batch_size
+
1
):
subprocess
.
call
(
f
'bin/
gpt
_gemm
{
bsz
}
1 1
{
head_num
}
{
size_per_head
}
{
inter_size
}
{
vocab_size
}
1
{
tensor_para_size
}
{
0
if
bsz
==
1
else
1
}
'
,
f
'bin/
llama
_gemm
{
bsz
}
1 1
{
head_num
}
{
size_per_head
}
{
inter_size
}
{
vocab_size
}
1
{
tensor_para_size
}
{
0
if
bsz
==
1
else
1
}
'
,
shell
=
True
)
...
...
src/fastertransformer/utils/gemm_test/gemm_func.cc
View file @
4d42a781
...
...
@@ -270,10 +270,10 @@ int LtHgemmCustomFind(cublasLtHandle_t ltHandle,
// Let try a fixed number of combinations
int
AlgoCount
=
0
;
int
AlgoCountRestrict
=
0
;
// workspace == 0
int
maxNumTraversal
=
50
;
// max number of traversal
const
int
maxNumTraversal
=
50
;
// max number of traversal
cublasLtMatmulAlgo_t
algos
[
AlgoCombinations
];
// 0 <= workspace <= 32MB
cublasLtMatmulAlgo_t
algosRestrict
[
AlgoCombinations
];
// workspace == 0
int
kernelRepeats
=
100
;
// number of time the CUDA kernels will be run back to back
const
int
kernelRepeats
=
100
;
// number of time the CUDA kernels will be run back to back
int
nbAlgoIds
=
0
;
// Number of algorithms actually returned by
// cublasLtMatmulAlgoGetIds function.
#define ALGO_IDS 100 // Number of algorithms requested.
...
...
src/fastertransformer/utils/gemm_test/gpt_gemm_func.cc
View file @
4d42a781
...
...
@@ -39,6 +39,7 @@ void generate_gpt_gemm_config(int batch_size,
void
*
cublas_workspace
;
void
*
buffer
;
int
workSpaceSize
;
#if 0
bool workspace_flag = std::is_same<T, half>::value;
#ifdef ENABLE_FP8
workspace_flag = workspace_flag || std::is_same<T, __nv_fp8_e4m3>::value;
...
...
@@ -46,6 +47,9 @@ void generate_gpt_gemm_config(int batch_size,
#if ENABLE_BF16
workspace_flag = workspace_flag || std::is_same<T, __nv_bfloat16>::value;
#endif
#endif
// algorithms with workspace perform worse than evaluated
const
bool
workspace_flag
=
0
;
if
(
workspace_flag
)
{
// cublas_workspace_ should be the start pointer of cudaMalloc()
// to ensure 16B alignemnet
...
...
@@ -310,7 +314,8 @@ void generate_gpt_gemm_config(int batch_size,
}
for
(
int
i
=
0
;
i
<
gemm_num
;
++
i
)
{
if
(
i
<=
5
)
{
// tuning of context gemm and logits gemm is not working yet
if
(
i
<=
5
||
i
==
10
)
{
continue
;
}
int
seq_len
=
i
<=
5
?
max_input_len
:
1
;
...
...
@@ -445,7 +450,7 @@ void generate_gpt_gemm_config(int batch_size,
if
((
data_type
!=
FLOAT_DATATYPE
&&
i
!=
1
&&
i
!=
2
&&
i
!=
10
)
||
data_type
==
FP8_DATATYPE
)
{
printf
(
"***cublasLt Gemm Testing Beign***
\n
"
);
// Let try a fixed number of combinations
int
ALGO_COMBINATIONS
=
5
000
;
int
ALGO_COMBINATIONS
=
10
000
;
customMatmulPerf_t
perfResults
[
ALGO_COMBINATIONS
];
// for gpt, computeType & scaleType should be FP32
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment