Commit 3e8c263c authored by Rick Ho's avatar Rick Ho
Browse files

compute capacity in gshard

parent 1aced6d8
...@@ -4,7 +4,7 @@ Layers that FMoE provides to users ...@@ -4,7 +4,7 @@ Layers that FMoE provides to users
import torch import torch
import torch.nn as nn import torch.nn as nn
from .functions import moe_prepare_forward from .functions import prepare_forward
from .functions import MOEScatter, MOEGather, MOELinear from .functions import MOEScatter, MOEGather, MOELinear
from .functions import AllGather, Slice from .functions import AllGather, Slice
from .gates import NaiveGate from .gates import NaiveGate
...@@ -112,7 +112,7 @@ def _fmoe_general_global_forward(inp, gate, expert_fn, num_expert, world_size): ...@@ -112,7 +112,7 @@ def _fmoe_general_global_forward(inp, gate, expert_fn, num_expert, world_size):
global_expert_count, global_expert_count,
fwd_expert_count, fwd_expert_count,
fwd_batch_size, fwd_batch_size,
) = moe_prepare_forward(gate, num_expert, world_size) ) = prepare_forward(gate, num_expert, world_size)
x = MOEScatter.apply( x = MOEScatter.apply(
inp, pos, inp, pos,
local_expert_count, global_expert_count, fwd_batch_size, world_size local_expert_count, global_expert_count, fwd_batch_size, world_size
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment