Commit 48be2ee2 authored by Casper Hansen's avatar Casper Hansen
Browse files

Default to GEMM

parent c58ec73b
...@@ -3,7 +3,7 @@ from transformers import AutoTokenizer ...@@ -3,7 +3,7 @@ from transformers import AutoTokenizer
model_path = 'lmsys/vicuna-7b-v1.5' model_path = 'lmsys/vicuna-7b-v1.5'
quant_path = 'vicuna-7b-v1.5-awq' quant_path = 'vicuna-7b-v1.5-awq'
quant_config = { "zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMV" } quant_config = { "zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM" }
# Load model # Load model
model = AutoAWQForCausalLM.from_pretrained(model_path) model = AutoAWQForCausalLM.from_pretrained(model_path)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment