Commit 57512823 authored by yangql's avatar yangql
Browse files

Update test_quant.py

parent bf2b305e
......@@ -14,7 +14,7 @@ def gptq():
bits=4, # 4 or 8
group_size=128,
damp_percent=0.01,
desc_act=False, # set to False can significantly speed up inference but the perplexity may slightly bad
desc_act=True, # set to False can significantly speed up inference but the perplexity may slightly bad
static_groups=False,
sym=True,
true_sequential=True,
......@@ -46,7 +46,7 @@ def gptq():
messages.append(msg)
print('len(messages):',len(messages))
messages = messages[:5]
messages = messages[:500]
print('len(messages):',len(messages))
data = []
......@@ -65,4 +65,4 @@ def gptq():
t2 = time.time()
print(('time:{:.2f}s').format(t2-t1))
gptq()
\ No newline at end of file
gptq()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment