CUDA error at moe_cuda_kernel.cu:86 code=13(CUBLAS_STATUS_EXECUTION_FAILED)

85306aa7 · Jiezhong Qiu · 20cc924b · 85306aa7 · 85306aa7
Commit 85306aa7 authored Jan 03, 2021 by Jiezhong Qiu
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 2 deletions

pytorch/cuda/cuda_stream_manager.h pytorch/cuda/cuda_stream_manager.h +3 -0

pytorch/mem_transformer.py pytorch/mem_transformer.py +2 -2

No files found.
--- a/pytorch/cuda/cuda_stream_manager.h
+++ b/pytorch/cuda/cuda_stream_manager.h
@@ -5,11 +5,14 @@
 #include <cublas_v2.h>
 #include <helper_cuda.h> 
+#include <cstdio>
 class CudaStreamManager {
 public:
    CudaStreamManager(const size_t num_expert_, const int device_) : num_expert(num_expert_), device(device_) {
        checkCudaErrors(cudaSetDevice(device));
+        printf("set device %d\n", device);
        streams = new cudaStream_t[num_expert];
        checkCudaErrors(cublasCreate(&handle));
        for (size_t i=0; i<num_expert; ++i) {

--- a/pytorch/mem_transformer.py
+++ b/pytorch/mem_transformer.py
@@ -45,7 +45,7 @@ class CustomizedMoEPositionwiseFF(nn.Module):
        self.d_inner = d_inner
        self.dropout = dropout
-        self.gate = nn.Linear(d_model, d_inner)
+        self.gate = nn.Linear(d_model, num_expert)
        self.moe1 = MOELayer(num_expert=num_expert, in_feat=d_model, out_feat=d_inner)
        self.moe2 = MOELayer(num_expert=num_expert, in_feat=d_inner, out_feat=d_model)
@@ -81,7 +81,7 @@ class CustomizedMoEPositionwiseFF(nn.Module):
        for i in range(self.top_k):
            print("top %d" % i)
            gate_idx = gate_top_k_idx[:, i].contiguous()
-            print(inp.size(), gate_idx.size())
+            print(inp.size(), gate_idx.size(), inp.device, gate_idx.device)
            x = self.moe1(inp, gate_idx)
            x = self.dropout(F.relu(x))
            # x = F.pad(x, pad=(0, 1), mode='constant', value=1.0)