1_mpi_init.cpp 1.66 KB
Newer Older
lishen's avatar
lishen committed
1
2
3
4
5
6
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include "mpi.h"
#include "net.h"
7
8
#include "bootstrap.h"
#include "hardware_utils.h"
lishen's avatar
lishen committed
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

using namespace sccl;

int main(int argc, char* argv[]) {
    int rank, nranks;
    int tag1, src, dst, cnt;

    MPI_Status status;

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    printf("rank=%d, nranks=%d\n", rank, nranks);

    // ----------------------------------------------------------------------- //

    INFO(SCCL_LOG_TOPO, "Bootstrap ...\n");
27
28
    struct scclRankInfo* rank_info;
    struct sccl::hardware::topology::bootstrap::scclBootstrapComm* comm;
lishen's avatar
lishen committed
29

30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
    SCCLCHECK(scclCalloc(&rank_info, 1));
    SCCLCHECK(scclCalloc(&comm, 1));

    rank_info->rank       = rank;
    rank_info->nRanks     = nranks;
    rank_info->localRanks = 2;
    rank_info->hipDev     = rank % rank_info->localRanks;

    auto sccl_bootstrap = new sccl::hardware::topology::bootstrap::scclBootstrap(rank_info, comm);
    SCCLCHECK(sccl_bootstrap->bootstrapInitCheck());

    sccl::hardware::topology::bootstrap::printUniqueInfo(comm->unique_info);

    int cuda_id;
    HIPCHECK(hipGetDevice(&cuda_id));
    printf("rank=%d, cuda_id=%d\n", rank, cuda_id);
lishen's avatar
lishen committed
46
47
48
49
50
51

    MPI_Finalize();
}

/*
单机执行
52
53
SCCL_DEBUG_LEVEL=ABORT mpirun --allow-run-as-root -np 4 1_mpi_init
SCCL_DEBUG_LEVEL=INFO SCCL_DEBUG_SUBSYS=ALL mpirun --allow-run-as-root -np 2 1_mpi_init
lishen's avatar
lishen committed
54
55

跨机执行
56
57
SCCL_DEBUG_LEVEL=ABORT mpirun --allow-run-as-root --hostfile hostfile -np 16 ./1_mpi_init
SCCL_DEBUG_LEVEL=ABORT SCCL_DEBUG_SUBSYS=BOOTSTRAP mpirun --allow-run-as-root --hostfile hostfile2 -np 4 ./1_mpi_init
lishen's avatar
lishen committed
58
*/