parser.add_argument('--load_quant',type=str,default='/data/llm/checkpoints/vicuna-hf/vicuna-7b-awq-w4g128.pt',help='path to the pre-quanted 4-bit weights')
args=parser.parse_args()
args=parser.parse_args()
assertargs.model_type.lower()in["llama","falcon","mpt"],"We only support llama & falcon & mpt now"
assertargs.precisionin["W4A16","W16A16"],"We only support W4A16/W16A16 now"
assertargs.precisionin["W4A16","W16A16"],"We only support W4A16/W16A16 now"