linkers_mpi.cpp 1.82 KB
Newer Older
1
2
3
4
/*!
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
Guolin Ke's avatar
Guolin Ke committed
5
#ifdef USE_MPI
6

Guolin Ke's avatar
Guolin Ke committed
7
8
#include "linkers.h"

9
10
#include <iostream>

Guolin Ke's avatar
Guolin Ke committed
11
12
namespace LightGBM {

Guolin Ke's avatar
Guolin Ke committed
13
Linkers::Linkers(Config) {
14
  is_init_ = false;
Guolin Ke's avatar
Guolin Ke committed
15
16
17
18
19
20
21
22
23
24
25
26
27
  int argc = 0;
  char**argv = nullptr;
  int flag = 0;
  MPI_SAFE_CALL(MPI_Initialized(&flag));  // test if MPI has been initialized
  if (!flag) {  // if MPI not started, start it
    MPI_SAFE_CALL(MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &flag));
  }
  MPI_SAFE_CALL(MPI_Comm_size(MPI_COMM_WORLD, &num_machines_));
  MPI_SAFE_CALL(MPI_Comm_rank(MPI_COMM_WORLD, &rank_));
  // wait for all client start up
  MPI_SAFE_CALL(MPI_Barrier(MPI_COMM_WORLD));
  bruck_map_ = BruckMap::Construct(rank_, num_machines_);
  recursive_halving_map_ = RecursiveHalvingMap::Construct(rank_, num_machines_);
28
  is_init_ = true;
Guolin Ke's avatar
Guolin Ke committed
29
30
31
}

Linkers::~Linkers() {
32
33
34
35
36
37
38
39
40
41
42
43
44
  // Don't call MPI_Finalize() here: If the destructor was called because only this node had an exception, calling MPI_Finalize() will cause all nodes to hang.
  // Instead we will handle finalize/abort for MPI in main().
}

bool Linkers::IsMpiInitialized() {
  int is_mpi_init;
  MPI_SAFE_CALL(MPI_Initialized(&is_mpi_init));
  return is_mpi_init;
}

void Linkers::MpiFinalizeIfIsParallel() {
  if (IsMpiInitialized()) {
    Log::Debug("Finalizing MPI session.");
45
46
    MPI_SAFE_CALL(MPI_Finalize());
  }
Guolin Ke's avatar
Guolin Ke committed
47
48
}

49
50
51
52
53
54
55
56
57
58
59
60
void Linkers::MpiAbortIfIsParallel() {
  try {
    if (IsMpiInitialized()) {
      std::cerr << "Aborting MPI communication." << std::endl << std::flush;
      MPI_SAFE_CALL(MPI_Abort(MPI_COMM_WORLD, -1));;
    }
  }
  catch (...) {
    std::cerr << "Exception was raised before aborting MPI. Aborting process..." << std::endl << std::flush;
    abort();
  }
}
Guolin Ke's avatar
Guolin Ke committed
61
62

}  // namespace LightGBM
63
#endif  // USE_MPI