Unverified Commit fdec650b authored by Xu Kai's avatar Xu Kai Committed by GitHub
Browse files

fix test llama (#4884)

parent 08a9f76b
......@@ -38,7 +38,6 @@ def run_llama_test(test_config):
enable_tensor_parallelism=True if test_config["tp_size"] > 1 else False, inference_only=True
)
infer_engine = TPInferEngine(model, shard_config, BATCH_SIZE, MAX_INPUT_LEN, MAX_OUTPUT_LEN)
init_to_get_rotary(model.model, base=10000)
generate_kwargs = dict(max_new_tokens=MAX_OUTPUT_LEN, do_sample=False)
input_tokens = {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment