model_kwargs=dict(tensor_parallel_size=num_gpus,gpu_memory_utilization=0.9),# add quantization="awq" or quantization="gptq" to eval quantization models
model_kwargs=dict(tensor_parallel_size=num_gpus,gpu_memory_utilization=0.9,dtype='float16',),# add quantization="awq" or quantization="gptq" to eval quantization models