Commit 85306aa7 authored by Jiezhong Qiu's avatar Jiezhong Qiu
Browse files

CUDA error at moe_cuda_kernel.cu:86 code=13(CUBLAS_STATUS_EXECUTION_FAILED)

parent 20cc924b
...@@ -5,11 +5,14 @@ ...@@ -5,11 +5,14 @@
#include <cublas_v2.h> #include <cublas_v2.h>
#include <helper_cuda.h> #include <helper_cuda.h>
#include <cstdio>
class CudaStreamManager { class CudaStreamManager {
public: public:
CudaStreamManager(const size_t num_expert_, const int device_) : num_expert(num_expert_), device(device_) { CudaStreamManager(const size_t num_expert_, const int device_) : num_expert(num_expert_), device(device_) {
checkCudaErrors(cudaSetDevice(device)); checkCudaErrors(cudaSetDevice(device));
printf("set device %d\n", device);
streams = new cudaStream_t[num_expert]; streams = new cudaStream_t[num_expert];
checkCudaErrors(cublasCreate(&handle)); checkCudaErrors(cublasCreate(&handle));
for (size_t i=0; i<num_expert; ++i) { for (size_t i=0; i<num_expert; ++i) {
......
...@@ -45,7 +45,7 @@ class CustomizedMoEPositionwiseFF(nn.Module): ...@@ -45,7 +45,7 @@ class CustomizedMoEPositionwiseFF(nn.Module):
self.d_inner = d_inner self.d_inner = d_inner
self.dropout = dropout self.dropout = dropout
self.gate = nn.Linear(d_model, d_inner) self.gate = nn.Linear(d_model, num_expert)
self.moe1 = MOELayer(num_expert=num_expert, in_feat=d_model, out_feat=d_inner) self.moe1 = MOELayer(num_expert=num_expert, in_feat=d_model, out_feat=d_inner)
self.moe2 = MOELayer(num_expert=num_expert, in_feat=d_inner, out_feat=d_model) self.moe2 = MOELayer(num_expert=num_expert, in_feat=d_inner, out_feat=d_model)
...@@ -81,7 +81,7 @@ class CustomizedMoEPositionwiseFF(nn.Module): ...@@ -81,7 +81,7 @@ class CustomizedMoEPositionwiseFF(nn.Module):
for i in range(self.top_k): for i in range(self.top_k):
print("top %d" % i) print("top %d" % i)
gate_idx = gate_top_k_idx[:, i].contiguous() gate_idx = gate_top_k_idx[:, i].contiguous()
print(inp.size(), gate_idx.size()) print(inp.size(), gate_idx.size(), inp.device, gate_idx.device)
x = self.moe1(inp, gate_idx) x = self.moe1(inp, gate_idx)
x = self.dropout(F.relu(x)) x = self.dropout(F.relu(x))
# x = F.pad(x, pad=(0, 1), mode='constant', value=1.0) # x = F.pad(x, pad=(0, 1), mode='constant', value=1.0)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment