CHECKPOINT_PATH=conver-4tp-model VOCAB_FILE=gpt2-vocab.json MERGE_FILE=gpt2-merges.txt GPUS_PER_NODE=4 NNODES=1 DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES " python -m torch.distributed.launch $DISTRIBUTED_ARGS tools/generate_samples_gpt.py \ --tensor-model-parallel-size 4 \ --num-layers 24 \ --hidden-size 1024 \ --load $CHECKPOINT_PATH \ --num-attention-heads 16 \ --max-position-embeddings 1024 \ --tokenizer-type GPT2BPETokenizer \ --fp16 \ --micro-batch-size 2 \ --seq-length 1024 \ --out-seq-length 128 \ --temperature 1.0 \ --vocab-file $VOCAB_FILE \ --merge-file $MERGE_FILE \ --genfile gpt2-genfile.json \ --num-samples 4 \ --top_p 0.9 \ --recompute \