Commit 617e86ea authored by gaoqiong's avatar gaoqiong
Browse files

修改awq相关printf提示,在cmakelist中添加ck so路径

parent d26f4c73
......@@ -381,7 +381,7 @@ class TurboMind:
self.config = cfg
self.model_name = cfg.model_name
self.data_type = cfg.weight_type
print("from_workspace_cfg:",cfg)
#print("from_workspace_cfg:",cfg)
# create model
logger.warning(f'model_config:\n\n{cfg.toini()}')
......
......@@ -25,6 +25,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -fPIC")
#set_property(TARGET Llama PROPERTY POSITION_INDEPENDENT_CODE ON)
#set_property(TARGET Llama PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
target_link_directories(Llama PUBLIC ../../../../3rdparty/)
target_link_libraries(Llama PUBLIC cudart
gemm_s4_f16
cublasMMWrapper
......
......@@ -165,11 +165,6 @@ private:
}
}
if(cublas_wrapper_->m_dump_switch==2)
{
std::cout<<" m: "<<batch_size<<" n: "<<weight.output_dims<<" k: "<<weight.input_dims<<std::endl;
PrintScale<T>(stream_,output_data,36,0,0,0);
}
sync_check_cuda_error();
}
else {
......@@ -249,10 +244,6 @@ private:
else if(weight.w4_weight_layout==2) //TN 模式padding ck
{
//检查ck workspace 的空间是否足够
if(batch_size*weight.output_dims>M_max*N_max)
{
FT_CHECK_WITH_INFO(0, "error! ck workspace is not enough");
}
if(weight.input_dims%4096==0)
{
......@@ -265,11 +256,6 @@ private:
}
}
addFusedSiluActivation(stream_,output_data,output_tmp,batch_size,weight.output_dims,1);
if(cublas_wrapper_->m_dump_switch==2)
{
std::cout<<" m: "<<batch_size<<" n: "<<weight.output_dims<<" k: "<<weight.input_dims<<std::endl;
PrintScale<T>(stream_,output_data,36,0,0,0);
}
sync_check_cuda_error();
}
else {
......
......@@ -69,14 +69,14 @@ LlamaWeight<T>::LlamaWeight(size_t head_num,
std::string str_w4_weight_layout = std::to_string(w4_weight_layout);
const char* env_value = str_w4_weight_layout.c_str();
setenv(env_name,env_value , 1);
printf("set LMDEPLOY_WEIGHTLAYOUT_SWITCH env: %d \n",w4_weight_layout);
//printf("set LMDEPLOY_WEIGHTLAYOUT_SWITCH env: %d \n",w4_weight_layout);
}
else
{
std::string str_w4_weight_layout = std::to_string(-1);
const char* env_value = str_w4_weight_layout.c_str();
setenv(env_name,env_value , 1);
printf("set LMDEPLOY_WEIGHTLAYOUT_SWITCH env: %d \n",w4_weight_layout);
//printf("set LMDEPLOY_WEIGHTLAYOUT_SWITCH env: %d \n",-1);
}
mallocWeights();
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment