"...text-generation-inference.git" did not exist on "8672cad2cbe9b31d82e54223d0f4815ffe426cee"
Commit c5d719cf authored by Jiezhong Qiu's avatar Jiezhong Qiu
Browse files

update

parent 79f16297
...@@ -20,8 +20,8 @@ class MOEFunction(Function): ...@@ -20,8 +20,8 @@ class MOEFunction(Function):
@staticmethod @staticmethod
def backward(ctx, grad_out): def backward(ctx, grad_out):
grad_input, grad_weight = moe_cuda.backward( grad_input, grad_weight = moe_cuda.backward(
grad_out.contiguous(), *ctx.saved_variables) grad_out.contiguous(), *ctx.saved_tensors)
return grad_input, grad_weight return grad_input, None, grad_weight
class MOELayer(nn.Module): class MOELayer(nn.Module):
...@@ -46,7 +46,7 @@ out_feat = 512 ...@@ -46,7 +46,7 @@ out_feat = 512
moe = MOELayer(num_expert, in_feat, out_feat).cuda() moe = MOELayer(num_expert, in_feat, out_feat).cuda()
input = torch.rand(batch_size, in_feat).cuda() input = torch.rand(batch_size, in_feat).cuda()
gate = torch.randint(low=0, high=num_expert, size=(batch_size, )).int().cuda() gate = torch.randint(low=0, high=num_expert, size=(batch_size, ), requires_grad=False).int().cuda()
output = moe(input, gate) output = moe(input, gate)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment