#include #include #include #include #include "mpi.h" #include "net.h" #include "bootstrap.h" #include "hardware_utils.h" using namespace sccl; int main(int argc, char* argv[]) { int rank, nranks; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nranks); MPI_Comm_rank(MPI_COMM_WORLD, &rank); printf("rank=%d, nranks=%d\n", rank, nranks); // // ----------------------------------------------------------------------- // // INFO(SCCL_LOG_TOPO, "Bootstrap ...\n"); // scclRankInfo_t* rank_info; // struct sccl::hardware::topology::bootstrap::BootstrapComm* comm; // SCCLCHECK(scclCalloc(&rank_info, 1)); // SCCLCHECK(scclCalloc(&comm, 1)); // rank_info->rank = rank; // rank_info->nRanks = nranks; // rank_info->localRanks = 2; // rank_info->hipDev = rank % rank_info->localRanks; // auto sccl_bootstrap = new sccl::hardware::topology::bootstrap::Bootstrap(rank_info, comm); // SCCLCHECK(sccl_bootstrap->bootstrapInitCheck()); // sccl::hardware::topology::bootstrap::printUniqueInfo(comm->unique_info); // int cuda_id; // HIPCHECK(hipGetDevice(&cuda_id)); // printf("rank=%d, cuda_id=%d\n", rank, cuda_id); MPI_Finalize(); } /* 单机执行 SCCL_DEBUG_LEVEL=ABORT mpirun --allow-run-as-root -np 4 1_mpi_init SCCL_DEBUG_LEVEL=INFO SCCL_DEBUG_SUBSYS=ALL mpirun --allow-run-as-root -np 2 1_mpi_init 跨机执行 SCCL_DEBUG_LEVEL=ABORT mpirun --allow-run-as-root --hostfile hostfile -np 16 ./1_mpi_init SCCL_DEBUG_LEVEL=ABORT SCCL_DEBUG_SUBSYS=BOOTSTRAP mpirun --allow-run-as-root --hostfile hostfile2 -np 4 ./1_mpi_init */