修改triton 量化w8a8 config命令方式

add deps of pd-pp

修改triton 量化w8a8 config命令方式
add deps of pd-pp
9f7f976c · zhuwenwen · 81eaff62 · 9f7f976c · 9f7f976c
Commit 9f7f976c authored Nov 13, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 3 deletions

requirements/rocm.txt requirements/rocm.txt +3 -0

vllm/utils/__init__.py vllm/utils/__init__.py +4 -3

No files found.
--- a/requirements/rocm.txt
+++ b/requirements/rocm.txt
@@ -24,6 +24,9 @@ numa
 pytrie
 setuptools_scm>=8
 cmake==3.29
+quart
+fastrlock==0.8.3
+cupy==12.3.0

 torch == 2.5.1
 triton == 3.1

--- a/vllm/utils/__init__.py
+++ b/vllm/utils/__init__.py
@@ -2309,9 +2309,10 @@ class W8a8GetCacheJSON:
        self.triton_json_list=[]
        self.weight_shapes=[]
        self.moe_weight_shapes=[]
-        device_name = current_platform.get_device_name().replace(" ", "_")
-        if 'K100_AI' in device_name and torch.cuda.get_device_properties(torch.cuda.current_device()).multi_processor_count == 120:
-            device_name='K100_AI_120'
+        arch_name = torch.cuda.get_device_properties("cuda").gcnArchName.split(':')[0]
+        arch_cu = torch.cuda.get_device_properties(torch.cuda.current_device()).multi_processor_count
+        
+        device_name =arch_name+'_'+str(arch_cu)+'cu'
        self.device_name=device_name
        self.topk=1
        self.quant_method=None