You need to sign in or sign up before continuing.
Commit 617e86ea authored by gaoqiong's avatar gaoqiong
Browse files

修改awq相关printf提示,在cmakelist中添加ck so路径

parent d26f4c73
...@@ -381,7 +381,7 @@ class TurboMind: ...@@ -381,7 +381,7 @@ class TurboMind:
self.config = cfg self.config = cfg
self.model_name = cfg.model_name self.model_name = cfg.model_name
self.data_type = cfg.weight_type self.data_type = cfg.weight_type
print("from_workspace_cfg:",cfg) #print("from_workspace_cfg:",cfg)
# create model # create model
logger.warning(f'model_config:\n\n{cfg.toini()}') logger.warning(f'model_config:\n\n{cfg.toini()}')
......
...@@ -25,6 +25,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") ...@@ -25,6 +25,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -fPIC") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -fPIC")
#set_property(TARGET Llama PROPERTY POSITION_INDEPENDENT_CODE ON) #set_property(TARGET Llama PROPERTY POSITION_INDEPENDENT_CODE ON)
#set_property(TARGET Llama PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON) #set_property(TARGET Llama PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
target_link_directories(Llama PUBLIC ../../../../3rdparty/)
target_link_libraries(Llama PUBLIC cudart target_link_libraries(Llama PUBLIC cudart
gemm_s4_f16 gemm_s4_f16
cublasMMWrapper cublasMMWrapper
......
...@@ -165,11 +165,6 @@ private: ...@@ -165,11 +165,6 @@ private:
} }
} }
if(cublas_wrapper_->m_dump_switch==2)
{
std::cout<<" m: "<<batch_size<<" n: "<<weight.output_dims<<" k: "<<weight.input_dims<<std::endl;
PrintScale<T>(stream_,output_data,36,0,0,0);
}
sync_check_cuda_error(); sync_check_cuda_error();
} }
else { else {
...@@ -249,10 +244,6 @@ private: ...@@ -249,10 +244,6 @@ private:
else if(weight.w4_weight_layout==2) //TN 模式padding ck else if(weight.w4_weight_layout==2) //TN 模式padding ck
{ {
//检查ck workspace 的空间是否足够 //检查ck workspace 的空间是否足够
if(batch_size*weight.output_dims>M_max*N_max)
{
FT_CHECK_WITH_INFO(0, "error! ck workspace is not enough");
}
if(weight.input_dims%4096==0) if(weight.input_dims%4096==0)
{ {
...@@ -265,11 +256,6 @@ private: ...@@ -265,11 +256,6 @@ private:
} }
} }
addFusedSiluActivation(stream_,output_data,output_tmp,batch_size,weight.output_dims,1); addFusedSiluActivation(stream_,output_data,output_tmp,batch_size,weight.output_dims,1);
if(cublas_wrapper_->m_dump_switch==2)
{
std::cout<<" m: "<<batch_size<<" n: "<<weight.output_dims<<" k: "<<weight.input_dims<<std::endl;
PrintScale<T>(stream_,output_data,36,0,0,0);
}
sync_check_cuda_error(); sync_check_cuda_error();
} }
else { else {
......
...@@ -69,14 +69,14 @@ LlamaWeight<T>::LlamaWeight(size_t head_num, ...@@ -69,14 +69,14 @@ LlamaWeight<T>::LlamaWeight(size_t head_num,
std::string str_w4_weight_layout = std::to_string(w4_weight_layout); std::string str_w4_weight_layout = std::to_string(w4_weight_layout);
const char* env_value = str_w4_weight_layout.c_str(); const char* env_value = str_w4_weight_layout.c_str();
setenv(env_name,env_value , 1); setenv(env_name,env_value , 1);
printf("set LMDEPLOY_WEIGHTLAYOUT_SWITCH env: %d \n",w4_weight_layout); //printf("set LMDEPLOY_WEIGHTLAYOUT_SWITCH env: %d \n",w4_weight_layout);
} }
else else
{ {
std::string str_w4_weight_layout = std::to_string(-1); std::string str_w4_weight_layout = std::to_string(-1);
const char* env_value = str_w4_weight_layout.c_str(); const char* env_value = str_w4_weight_layout.c_str();
setenv(env_name,env_value , 1); setenv(env_name,env_value , 1);
printf("set LMDEPLOY_WEIGHTLAYOUT_SWITCH env: %d \n",w4_weight_layout); //printf("set LMDEPLOY_WEIGHTLAYOUT_SWITCH env: %d \n",-1);
} }
mallocWeights(); mallocWeights();
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment