Commit 331ff953 authored by Casper Hansen's avatar Casper Hansen
Browse files

Default to GEMV

parent 6534f5e6
...@@ -3,7 +3,7 @@ from transformers import AutoTokenizer ...@@ -3,7 +3,7 @@ from transformers import AutoTokenizer
model_path = 'lmsys/vicuna-7b-v1.5' model_path = 'lmsys/vicuna-7b-v1.5'
quant_path = 'vicuna-7b-v1.5-awq' quant_path = 'vicuna-7b-v1.5-awq'
quant_config = { "zero_point": True, "q_group_size": 128, "w_bit": 4 } quant_config = { "zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMV" }
# Load model # Load model
model = AutoAWQForCausalLM.from_pretrained(model_path) model = AutoAWQForCausalLM.from_pretrained(model_path)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment