[ft_instance_hyperparameter] max_batch_size=1 ; Use for allocate the buffer max_seq_len=512 ; The sequence length of position embedding table, should move to model hyper-parameter beam_width=1 ; beam width for beam search top_k=5 ; k value for top k sampling top_p=0 ; p value for top p sampling temperature=1.0 ; Use for sampling repetition_penalty=1.0 ; Use for sampling len_penalty=1.0 beam_search_diversity_rate=0.0 is_half=1 enable_custom_all_reduce=0 tensor_para_size=8 pipeline_para_size=1 model_name=GLM_130B model_dir=/home/glm-130b-sat-ft-model/ model_dtype=1 # "fp32": 0, "fp16": 1, "int8": 2, "int4": 3 [request] request_batch_size=1 # determine by the request request_output_len=128 # determine by the request [GLM_130B] head_num=96 size_per_head=128 decoder_layers=70 rotary_embedding=64 vocab_size=150528 start_id=150004 end_id=150001 inter_size=32768 # change this is useless