rings.cc 2.25 KB
Newer Older
lishen's avatar
lishen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
/*************************************************************************
 * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
 *
 * See LICENSE.txt for license information
 ************************************************************************/

#include "core.h"

namespace sccl {
namespace hardware {
namespace topology {
namespace detect {

#define MAXWIDTH 20
#define PREFIXLEN 15
#define STRLENGTH (PREFIXLEN + 5 * MAXWIDTH)
void dumpLine(int* values, int nranks, const char* prefix) {
    int prefixlen = strlen(prefix);
    char line[STRLENGTH + 1];
    line[STRLENGTH] = '\0';
    memset(line, ' ', STRLENGTH);
    strncpy(line, prefix, PREFIXLEN);
    for(int i = 0; i < nranks && i < MAXWIDTH; i++)
        sprintf(line + prefixlen + 4 * i, " %3d", values[i]);
    INFO(SCCL_INIT, "%s", line);
}

scclResult_t scclBuildRings(int nrings, int* rings, int rank, int nranks, int* prev, int* next) {
    for(int r = 0; r < nrings; r++) {
        char prefix[40];
        /*sprintf(prefix, "[%d] Channel %d Prev : ", rank, r);
        dumpLine(prev+r*nranks, nranks, prefix);
        sprintf(prefix, "[%d] Channel %d Next : ", rank, r);
        dumpLine(next+r*nranks, nranks, prefix);*/

        int current = rank;
        for(int i = 0; i < nranks; i++) {
            rings[r * nranks + i] = current;
            current               = next[r * nranks + current];
        }
        sprintf(prefix, "Channel %02d/%02d : ", r, nrings);
        if(rank == 0)
            dumpLine(rings + r * nranks, nranks, prefix);
        if(current != rank) {
            WARN("Error : ring %d does not loop back to start (%d != %d)", r, current, rank);
            return scclInternalError;
        }
        // Check that all ranks are there
        for(int i = 0; i < nranks; i++) {
            int found = 0;
            for(int j = 0; j < nranks; j++) {
                if(rings[r * nranks + j] == i) {
                    found = 1;
                    break;
                }
            }
            if(found == 0) {
                WARN("Error : ring %d does not contain rank %d", r, i);
                return scclInternalError;
            }
        }
    }
    return scclSuccess;
}

} // namespace detect
} // namespace topology
} // namespace hardware
} // namespace sccl