[ft_instance_hyperparameter] data_type=fp16 enable_custom_all_reduce=0 tensor_para_size=1 pipeline_para_size=1 model_name=llama_7b model_dir=/data/models/llama-7b-infer/1-gpu ; model_name=llama_13b ; model_dir=/data/models/llama-13b-hf-infer/2-gpu ; model_name=llama_33b ; model_dir=/data/models/llama-33b-hf-infer/4-gpu ; model_name=llama_65b ; model_dir=/data/models/llama-65b-hf-infer/8-gpu ; model_name=llama2_7b ; model_dir=/data/models/llama-2-7b-infer/1-gpu ; model_name=llama2_13b ; model_dir=/data/models/llama-2-13b-infer/1-gpu [request] beam_width=1 # beam width for beam search top_k=1 ; k value for top k sampling top_p=0.0 ; p value for top p sampling temperature=0.0 ; Use for sampling repetition_penalty=1.0 ; Use for sampling presence_penalty=0.0 ; Only one of repetition_penalty and presence_penalty are allowed. len_penalty=0.0 beam_search_diversity_rate=0.0 request_batch_size=1 # determine by the request request_output_len=256 # determine by the request [llama_7b] head_num = 32 size_per_head = 128 inter_size = 11008 num_layer = 32 rotary_embedding = 128 layernorm_eps = 1e-06 vocab_size = 32000 start_id = 0 end_id = 1 weight_data_type = fp16 [llama_13b] head_num = 40 size_per_head = 128 inter_size = 13824 num_layer = 40 rotary_embedding = 128 layernorm_eps = 1e-06 vocab_size = 32000 start_id = 0 end_id = 1 weight_data_type = fp16 [llama_33b] head_num = 52 size_per_head = 128 inter_size = 17920 num_layer = 60 rotary_embedding = 128 layernorm_eps = 1e-06 vocab_size = 32000 start_id = 0 end_id = 1 weight_data_type = fp16 [llama_65b] head_num = 64 size_per_head = 128 inter_size = 22016 num_layer = 80 rotary_embedding = 128 layernorm_eps = 1e-05 vocab_size = 32000 start_id = 0 end_id = 1 weight_data_type = fp16 [llama2_7b] head_num = 32 size_per_head = 128 inter_size = 11008 num_layer = 32 rotary_embedding = 128 layernorm_eps = 1e-05 vocab_size = 32000 start_id = 1 end_id = 2 weight_data_type = fp16 [llama2_13b] head_num = 40 size_per_head = 128 inter_size = 13824 num_layer = 40 rotary_embedding = 128 layernorm_eps = 1e-05 vocab_size = 32000 start_id = 1 end_id = 2 weight_data_type = fp16