Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Lmdeploy
Commits
617e86ea
Commit
617e86ea
authored
May 28, 2024
by
gaoqiong
Browse files
修改awq相关printf提示,在cmakelist中添加ck so路径
parent
d26f4c73
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
4 additions
and
17 deletions
+4
-17
lmdeploy/turbomind/turbomind.py
lmdeploy/turbomind/turbomind.py
+1
-1
src/turbomind/models/llama/CMakeLists.txt
src/turbomind/models/llama/CMakeLists.txt
+1
-0
src/turbomind/models/llama/LlamaLinear.h
src/turbomind/models/llama/LlamaLinear.h
+0
-14
src/turbomind/models/llama/LlamaWeight.cc
src/turbomind/models/llama/LlamaWeight.cc
+2
-2
No files found.
lmdeploy/turbomind/turbomind.py
View file @
617e86ea
...
@@ -381,7 +381,7 @@ class TurboMind:
...
@@ -381,7 +381,7 @@ class TurboMind:
self
.
config
=
cfg
self
.
config
=
cfg
self
.
model_name
=
cfg
.
model_name
self
.
model_name
=
cfg
.
model_name
self
.
data_type
=
cfg
.
weight_type
self
.
data_type
=
cfg
.
weight_type
print
(
"from_workspace_cfg:"
,
cfg
)
#
print("from_workspace_cfg:",cfg)
# create model
# create model
logger
.
warning
(
f
'model_config:
\n\n
{
cfg
.
toini
()
}
'
)
logger
.
warning
(
f
'model_config:
\n\n
{
cfg
.
toini
()
}
'
)
...
...
src/turbomind/models/llama/CMakeLists.txt
View file @
617e86ea
...
@@ -25,6 +25,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
...
@@ -25,6 +25,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
set
(
CMAKE_CUDA_FLAGS
"
${
CMAKE_CUDA_FLAGS
}
-fPIC"
)
set
(
CMAKE_CUDA_FLAGS
"
${
CMAKE_CUDA_FLAGS
}
-fPIC"
)
#set_property(TARGET Llama PROPERTY POSITION_INDEPENDENT_CODE ON)
#set_property(TARGET Llama PROPERTY POSITION_INDEPENDENT_CODE ON)
#set_property(TARGET Llama PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
#set_property(TARGET Llama PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
target_link_directories
(
Llama PUBLIC ../../../../3rdparty/
)
target_link_libraries
(
Llama PUBLIC cudart
target_link_libraries
(
Llama PUBLIC cudart
gemm_s4_f16
gemm_s4_f16
cublasMMWrapper
cublasMMWrapper
...
...
src/turbomind/models/llama/LlamaLinear.h
View file @
617e86ea
...
@@ -165,11 +165,6 @@ private:
...
@@ -165,11 +165,6 @@ private:
}
}
}
}
if
(
cublas_wrapper_
->
m_dump_switch
==
2
)
{
std
::
cout
<<
" m: "
<<
batch_size
<<
" n: "
<<
weight
.
output_dims
<<
" k: "
<<
weight
.
input_dims
<<
std
::
endl
;
PrintScale
<
T
>
(
stream_
,
output_data
,
36
,
0
,
0
,
0
);
}
sync_check_cuda_error
();
sync_check_cuda_error
();
}
}
else
{
else
{
...
@@ -249,10 +244,6 @@ private:
...
@@ -249,10 +244,6 @@ private:
else
if
(
weight
.
w4_weight_layout
==
2
)
//TN 模式padding ck
else
if
(
weight
.
w4_weight_layout
==
2
)
//TN 模式padding ck
{
{
//检查ck workspace 的空间是否足够
//检查ck workspace 的空间是否足够
if
(
batch_size
*
weight
.
output_dims
>
M_max
*
N_max
)
{
FT_CHECK_WITH_INFO
(
0
,
"error! ck workspace is not enough"
);
}
if
(
weight
.
input_dims
%
4096
==
0
)
if
(
weight
.
input_dims
%
4096
==
0
)
{
{
...
@@ -265,11 +256,6 @@ private:
...
@@ -265,11 +256,6 @@ private:
}
}
}
}
addFusedSiluActivation
(
stream_
,
output_data
,
output_tmp
,
batch_size
,
weight
.
output_dims
,
1
);
addFusedSiluActivation
(
stream_
,
output_data
,
output_tmp
,
batch_size
,
weight
.
output_dims
,
1
);
if
(
cublas_wrapper_
->
m_dump_switch
==
2
)
{
std
::
cout
<<
" m: "
<<
batch_size
<<
" n: "
<<
weight
.
output_dims
<<
" k: "
<<
weight
.
input_dims
<<
std
::
endl
;
PrintScale
<
T
>
(
stream_
,
output_data
,
36
,
0
,
0
,
0
);
}
sync_check_cuda_error
();
sync_check_cuda_error
();
}
}
else
{
else
{
...
...
src/turbomind/models/llama/LlamaWeight.cc
View file @
617e86ea
...
@@ -69,14 +69,14 @@ LlamaWeight<T>::LlamaWeight(size_t head_num,
...
@@ -69,14 +69,14 @@ LlamaWeight<T>::LlamaWeight(size_t head_num,
std
::
string
str_w4_weight_layout
=
std
::
to_string
(
w4_weight_layout
);
std
::
string
str_w4_weight_layout
=
std
::
to_string
(
w4_weight_layout
);
const
char
*
env_value
=
str_w4_weight_layout
.
c_str
();
const
char
*
env_value
=
str_w4_weight_layout
.
c_str
();
setenv
(
env_name
,
env_value
,
1
);
setenv
(
env_name
,
env_value
,
1
);
printf
(
"set LMDEPLOY_WEIGHTLAYOUT_SWITCH env: %d
\n
"
,
w4_weight_layout
);
//
printf("set LMDEPLOY_WEIGHTLAYOUT_SWITCH env: %d \n",w4_weight_layout);
}
}
else
else
{
{
std
::
string
str_w4_weight_layout
=
std
::
to_string
(
-
1
);
std
::
string
str_w4_weight_layout
=
std
::
to_string
(
-
1
);
const
char
*
env_value
=
str_w4_weight_layout
.
c_str
();
const
char
*
env_value
=
str_w4_weight_layout
.
c_str
();
setenv
(
env_name
,
env_value
,
1
);
setenv
(
env_name
,
env_value
,
1
);
printf
(
"set LMDEPLOY_WEIGHTLAYOUT_SWITCH env: %d
\n
"
,
w4_weight_layout
);
//
printf("set LMDEPLOY_WEIGHTLAYOUT_SWITCH env: %d \n",
-1
);
}
}
mallocWeights
();
mallocWeights
();
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment