Commit 6897826e authored by zhuwenwen's avatar zhuwenwen
Browse files

update version and requirements-rocm.txt

parent 5a9db327
......@@ -191,7 +191,7 @@ set(VLLM_EXT_SRC
"csrc/attention/attention_kernels_opt.cu"
"csrc/opt/layernorm_kernels_opt.cu"
"csrc/quantization/squeezellm/quant_cuda_kernel.cu"
"csrc/quantization/gptq/q_gemm.cu"
# "csrc/quantization/gptq/q_gemm.cu"
"csrc/quantization/compressed_tensors/int8_quant_kernels.cu"
# "csrc/quantization/fp8/common.cu"
"csrc/cuda_utils_kernels.cu"
......
......@@ -215,12 +215,12 @@ void dynamic_scaled_int8_quant(torch::Tensor& out, torch::Tensor const& input,
void squeezellm_gemm(torch::Tensor vec, torch::Tensor mat, torch::Tensor mul,
torch::Tensor lookup_table);
torch::Tensor gptq_gemm(torch::Tensor a, torch::Tensor b_q_weight,
torch::Tensor b_gptq_qzeros,
torch::Tensor b_gptq_scales, torch::Tensor b_g_idx,
bool use_exllama, int64_t bit);
// torch::Tensor gptq_gemm(torch::Tensor a, torch::Tensor b_q_weight,
// torch::Tensor b_gptq_qzeros,
// torch::Tensor b_gptq_scales, torch::Tensor b_g_idx,
// bool use_exllama, int64_t bit);
void gptq_shuffle(torch::Tensor q_weight, torch::Tensor q_perm, int64_t bit);
// void gptq_shuffle(torch::Tensor q_weight, torch::Tensor q_perm, int64_t bit);
// void static_scaled_fp8_quant(torch::Tensor& out, torch::Tensor const& input,
// torch::Tensor const& scale);
......
......@@ -295,12 +295,12 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
#endif
// Quantized GEMM for GPTQ.
ops.def("gptq_gemm", &gptq_gemm);
ops.impl("gptq_gemm", torch::kCUDA, &gptq_gemm);
// ops.def("gptq_gemm", &gptq_gemm);
// ops.impl("gptq_gemm", torch::kCUDA, &gptq_gemm);
// Post processing for GPTQ.
ops.def("gptq_shuffle(Tensor! q_weight, Tensor q_perm, int bit) -> ()");
ops.impl("gptq_shuffle", torch::kCUDA, &gptq_shuffle);
// ops.def("gptq_shuffle(Tensor! q_weight, Tensor q_perm, int bit) -> ()");
// ops.impl("gptq_shuffle", torch::kCUDA, &gptq_shuffle);
// Quantized GEMM for SqueezeLLM.
ops.def(
......
......@@ -9,3 +9,9 @@ ray >= 2.10.0
peft
pytest-asyncio
tensorizer>=2.9.0
torch == 2.1.0
triton == 2.1.0
flash_attn == 2.0.4
xformers == 0.0.25
lmslim == 0.1.0
\ No newline at end of file
......@@ -375,13 +375,21 @@ def get_sha(root: Union[str, Path]) -> str:
def get_version_add(sha: Optional[str] = None) -> str:
vllm_root = os.path.dirname(os.path.abspath(__file__))
add_version_path = os.path.join(os.path.join(vllm_root, "vllm"), "version.py")
major, minor, *res = torch.__version__.split('.')
if add_git_version:
if sha != 'Unknown':
if sha is None:
sha = get_sha(vllm_root)
version = 'das.opt1' + sha[:7]
if (major, minor) == ('2', '1'):
version = 'das.opt1.' + sha[:7]
if (major, minor) == ('2', '3'):
version = 'das.opt2.' + sha[:7]
else:
if (major, minor) == ('2', '1'):
version = 'das.opt1'
if (major, minor) == ('2', '3'):
version = 'das.opt2'
# dtk version
if os.getenv("ROCM_PATH"):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment