#!/bin/bash export NCCL_ALGO=Ring export NCCL_PROTO=Simple export MP=8 export CONFIG=config.json torchrun --nproc-per-node ${MP} generate.py --ckpt-path /path/of/DeepSeek-V4-Flash-FP8-MP8 --config ${CONFIG} --interactive