Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Lmdeploy
Commits
617e86ea
You need to sign in or sign up before continuing.
Commit
617e86ea
authored
May 28, 2024
by
gaoqiong
Browse files
修改awq相关printf提示,在cmakelist中添加ck so路径
parent
d26f4c73
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
4 additions
and
17 deletions
+4
-17
lmdeploy/turbomind/turbomind.py
lmdeploy/turbomind/turbomind.py
+1
-1
src/turbomind/models/llama/CMakeLists.txt
src/turbomind/models/llama/CMakeLists.txt
+1
-0
src/turbomind/models/llama/LlamaLinear.h
src/turbomind/models/llama/LlamaLinear.h
+0
-14
src/turbomind/models/llama/LlamaWeight.cc
src/turbomind/models/llama/LlamaWeight.cc
+2
-2
No files found.
lmdeploy/turbomind/turbomind.py
View file @
617e86ea
...
@@ -381,7 +381,7 @@ class TurboMind:
...
@@ -381,7 +381,7 @@ class TurboMind:
self
.
config
=
cfg
self
.
config
=
cfg
self
.
model_name
=
cfg
.
model_name
self
.
model_name
=
cfg
.
model_name
self
.
data_type
=
cfg
.
weight_type
self
.
data_type
=
cfg
.
weight_type
print
(
"from_workspace_cfg:"
,
cfg
)
#
print("from_workspace_cfg:",cfg)
# create model
# create model
logger
.
warning
(
f
'model_config:
\n\n
{
cfg
.
toini
()
}
'
)
logger
.
warning
(
f
'model_config:
\n\n
{
cfg
.
toini
()
}
'
)
...
...
src/turbomind/models/llama/CMakeLists.txt
View file @
617e86ea
...
@@ -25,6 +25,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
...
@@ -25,6 +25,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
set
(
CMAKE_CUDA_FLAGS
"
${
CMAKE_CUDA_FLAGS
}
-fPIC"
)
set
(
CMAKE_CUDA_FLAGS
"
${
CMAKE_CUDA_FLAGS
}
-fPIC"
)
#set_property(TARGET Llama PROPERTY POSITION_INDEPENDENT_CODE ON)
#set_property(TARGET Llama PROPERTY POSITION_INDEPENDENT_CODE ON)
#set_property(TARGET Llama PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
#set_property(TARGET Llama PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
target_link_directories
(
Llama PUBLIC ../../../../3rdparty/
)
target_link_libraries
(
Llama PUBLIC cudart
target_link_libraries
(
Llama PUBLIC cudart
gemm_s4_f16
gemm_s4_f16
cublasMMWrapper
cublasMMWrapper
...
...
src/turbomind/models/llama/LlamaLinear.h
View file @
617e86ea
...
@@ -165,11 +165,6 @@ private:
...
@@ -165,11 +165,6 @@ private:
}
}
}
}
if
(
cublas_wrapper_
->
m_dump_switch
==
2
)
{
std
::
cout
<<
" m: "
<<
batch_size
<<
" n: "
<<
weight
.
output_dims
<<
" k: "
<<
weight
.
input_dims
<<
std
::
endl
;
PrintScale
<
T
>
(
stream_
,
output_data
,
36
,
0
,
0
,
0
);
}
sync_check_cuda_error
();
sync_check_cuda_error
();
}
}
else
{
else
{
...
@@ -249,10 +244,6 @@ private:
...
@@ -249,10 +244,6 @@ private:
else
if
(
weight
.
w4_weight_layout
==
2
)
//TN 模式padding ck
else
if
(
weight
.
w4_weight_layout
==
2
)
//TN 模式padding ck
{
{
//检查ck workspace 的空间是否足够
//检查ck workspace 的空间是否足够
if
(
batch_size
*
weight
.
output_dims
>
M_max
*
N_max
)
{
FT_CHECK_WITH_INFO
(
0
,
"error! ck workspace is not enough"
);
}
if
(
weight
.
input_dims
%
4096
==
0
)
if
(
weight
.
input_dims
%
4096
==
0
)
{
{
...
@@ -265,11 +256,6 @@ private:
...
@@ -265,11 +256,6 @@ private:
}
}
}
}
addFusedSiluActivation
(
stream_
,
output_data
,
output_tmp
,
batch_size
,
weight
.
output_dims
,
1
);
addFusedSiluActivation
(
stream_
,
output_data
,
output_tmp
,
batch_size
,
weight
.
output_dims
,
1
);
if
(
cublas_wrapper_
->
m_dump_switch
==
2
)
{
std
::
cout
<<
" m: "
<<
batch_size
<<
" n: "
<<
weight
.
output_dims
<<
" k: "
<<
weight
.
input_dims
<<
std
::
endl
;
PrintScale
<
T
>
(
stream_
,
output_data
,
36
,
0
,
0
,
0
);
}
sync_check_cuda_error
();
sync_check_cuda_error
();
}
}
else
{
else
{
...
...
src/turbomind/models/llama/LlamaWeight.cc
View file @
617e86ea
...
@@ -69,14 +69,14 @@ LlamaWeight<T>::LlamaWeight(size_t head_num,
...
@@ -69,14 +69,14 @@ LlamaWeight<T>::LlamaWeight(size_t head_num,
std
::
string
str_w4_weight_layout
=
std
::
to_string
(
w4_weight_layout
);
std
::
string
str_w4_weight_layout
=
std
::
to_string
(
w4_weight_layout
);
const
char
*
env_value
=
str_w4_weight_layout
.
c_str
();
const
char
*
env_value
=
str_w4_weight_layout
.
c_str
();
setenv
(
env_name
,
env_value
,
1
);
setenv
(
env_name
,
env_value
,
1
);
printf
(
"set LMDEPLOY_WEIGHTLAYOUT_SWITCH env: %d
\n
"
,
w4_weight_layout
);
//
printf("set LMDEPLOY_WEIGHTLAYOUT_SWITCH env: %d \n",w4_weight_layout);
}
}
else
else
{
{
std
::
string
str_w4_weight_layout
=
std
::
to_string
(
-
1
);
std
::
string
str_w4_weight_layout
=
std
::
to_string
(
-
1
);
const
char
*
env_value
=
str_w4_weight_layout
.
c_str
();
const
char
*
env_value
=
str_w4_weight_layout
.
c_str
();
setenv
(
env_name
,
env_value
,
1
);
setenv
(
env_name
,
env_value
,
1
);
printf
(
"set LMDEPLOY_WEIGHTLAYOUT_SWITCH env: %d
\n
"
,
w4_weight_layout
);
//
printf("set LMDEPLOY_WEIGHTLAYOUT_SWITCH env: %d \n",
-1
);
}
}
mallocWeights
();
mallocWeights
();
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment