Commit 4d897ed1 authored by zhanghj2's avatar zhanghj2
Browse files

优化nmz tp1性能

parent 3722ec71
...@@ -683,9 +683,9 @@ mha_fwd_kvcache_mla_fp8( ...@@ -683,9 +683,9 @@ mha_fwd_kvcache_mla_fp8(
// auto dprops = at::cuda::getCurrentDeviceProperties(); // auto dprops = at::cuda::getCurrentDeviceProperties();
// bool is_sm90 = dprops->major == 9 && dprops->minor == 0; // bool is_sm90 = dprops->major == 9 && dprops->minor == 0;
// TORCH_CHECK(is_sm90); // TORCH_CHECK(is_sm90);
static std::string FLASH_MLA_ROOT_DIR = execCommand("python -c 'import site; print(site.getsitepackages()[0])'"); // static std::string FLASH_MLA_ROOT_DIR = execCommand("python -c 'import site; print(site.getsitepackages()[0])'");
setenv("FLASH_MLA_ROOT_DIR", (FLASH_MLA_ROOT_DIR + "/flash_mla/asm/").c_str(), 1); // setenv("FLASH_MLA_ROOT_DIR", (FLASH_MLA_ROOT_DIR + "/flash_mla/asm/").c_str(), 1);
// std::cout << FLASH_MLA_ROOT_DIR << "\n"; // std::cout << FLASH_MLA_ROOT_DIR << "\n";
// exit(-1); // exit(-1);
at::Tensor vcache = vcache_.has_value() ? vcache_.value() : kcache; at::Tensor vcache = vcache_.has_value() ? vcache_.value() : kcache;
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment