cuda_stream_manager.cpp 1.83 KB
Newer Older
1
2
#include <unordered_map>
#include <mutex>
Rick Ho's avatar
Rick Ho committed
3
#include <cassert>
4
#include <thread>
Rick Ho's avatar
Rick Ho committed
5
6

#include "cuda_stream_manager.h"
7
8
9
#include <helper_cuda.h> 

#define SMGR_N_STREAMS 4
Rick Ho's avatar
Rick Ho committed
10

Rick Ho's avatar
Rick Ho committed
11
cudaStream_t CudaStreamManager::stream(size_t idx) {
12
	return this->streams[idx % SMGR_N_STREAMS];
Rick Ho's avatar
Rick Ho committed
13
14
}

15
16
17
18
19
20
21
cublasHandle_t CudaStreamManager::handle(size_t idx) {
	return this->handles[idx % SMGR_N_STREAMS];
}


void CudaStreamManager::sync(int idx) {
	for (int i = 0; i < idx && i < SMGR_N_STREAMS; ++i) {
Rick Ho's avatar
Rick Ho committed
22
23
24
		cudaStreamSynchronize(streams[i]);
	}
}
Rick Ho's avatar
Rick Ho committed
25

26
27
28
29
30
void CudaStreamManager::setup(const int device) {
	checkCudaErrors(cudaSetDevice(device));
	streams = new cudaStream_t[SMGR_N_STREAMS];
	handles = new cublasHandle_t[SMGR_N_STREAMS];
	for (size_t i = 0; i < SMGR_N_STREAMS; ++i) {
Rick Ho's avatar
Rick Ho committed
31
		checkCudaErrors(cudaStreamCreate(streams + i));
Rick Ho's avatar
Rick Ho committed
32
33
34
		checkCudaErrors(cublasCreate(handles + i));
		cublasSetStream(handles[i], streams[i]);
	}
35
36
37
38
39
40
41
42
43
44
45
#ifdef MOE_USE_NCCL
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);

	ncclUniqueId uid;
	if (rank == 0) {
		ncclGetUniqueId(&uid);
	}
	MPI_Bcast(&uid, sizeof(uid), MPI_BYTE, 0, MPI_COMM_WORLD);
	NCCL_SAFE_CALL(ncclCommInitRank(&ncclcomm, size, uid, rank));
#endif
Rick Ho's avatar
Rick Ho committed
46
47
}

48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
void CudaStreamManager::destroy() {
	for (size_t i = 0; i < SMGR_N_STREAMS; ++i) {
		checkCudaErrors(cudaStreamDestroy(streams[i]));
		checkCudaErrors(cublasDestroy(handles[i]));
	}
	delete[] streams;
	delete[] handles;
}

std::unordered_map<int, CudaStreamManager*> smgrs;
std::mutex smgr_mtx;

CudaStreamManager* getCudaStreamManager(const int device) {
	auto it = smgrs.find(device);
	if (it == smgrs.end()) {
		smgr_mtx.lock();
		it = smgrs.find(device);
		if (it == smgrs.end()) {
			auto smgr = new CudaStreamManager(device);
			smgrs.insert(std::pair<int, CudaStreamManager*>(device, smgr));
			smgr_mtx.unlock();
			return smgr;
		} else {
			smgr_mtx.unlock();
		}
	}
	return it->second;
}