"model/vscode:/vscode.git/clone" did not exist on "7d25b9e194f106e9c2a5289dfde40077c0838b7d"
cuda_stream_manager.h 945 Bytes
Newer Older
Rick Ho's avatar
Rick Ho committed
1
2
3
4
5
6
#ifndef CUDA_STREAM_MANAGER_H
#define CUDA_STREAM_MANAGER_H

#include <cuda_runtime.h>
#include <cublas_v2.h>

7
8
9
10
11
12
13
14
15
16
17
18
19
#ifdef MOE_USE_NCCL
#include <nccl.h>

#define NCCL_SAFE_CALL(__fn__) { \
	auto __res__ = __fn__; \
	if (__res__ != ncclSuccess) { \
		fprintf(stderr, "NCCL Error at %s:%d value %d\n", __FILE__, __LINE__, __res__); \
		exit(-1); \
	} \
}

#endif

Rick Ho's avatar
Rick Ho committed
20
class CudaStreamManager {
Rick Ho's avatar
Rick Ho committed
21
22
23
24
public:
    int device;
    cublasHandle_t* handles;
    cudaStream_t* streams;
25
#ifdef MOE_USE_NCCL
Rick Ho's avatar
Rick Ho committed
26
	char ncclgood;
27
	ncclComm_t ncclcomm;
Rick Ho's avatar
Rick Ho committed
28
	void ensure(void*, class at::Device);
29
#endif
Rick Ho's avatar
Rick Ho committed
30

Rick Ho's avatar
Rick Ho committed
31
public:
Rick Ho's avatar
Rick Ho committed
32
    CudaStreamManager(int device_): device(device_), ncclgood(0) {
33
		this->setup(device);
34
35
    }

36
37
38
39
	void setup(int);
	void sync(int=0);
	void destroy();

Rick Ho's avatar
Rick Ho committed
40
	cudaStream_t stream(size_t=0);
41
	cublasHandle_t handle(size_t=0);
42

Rick Ho's avatar
Rick Ho committed
43
    ~CudaStreamManager() {
44
		this->destroy();
Rick Ho's avatar
Rick Ho committed
45
46
47
    }
}; 

48
CudaStreamManager* getCudaStreamManager(const int device);
Rick Ho's avatar
Rick Ho committed
49
50

#endif  // CUDA_STREAM_MANAGER