zero_gate.py 744 Bytes
Newer Older
Rick Ho's avatar
Rick Ho committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
r"""
Zero gate that direct all input to gate 0
"""
from .base_gate import BaseGate

import torch
import torch.nn as nn
import torch.nn.functional as F


class ZeroGate(BaseGate):
    r"""
    Guide all input samples to gate 0.
    """

    def __init__(self, _1, num_expert, world_size, top_k=2):
        super().__init__(num_expert, world_size)
        self.top_k = top_k

    def forward(self, inp):
        r"""
        All output to expert 1
        """
        idx = torch.zeros(
            inp.shape[0] * self.top_k, dtype=torch.int64, device=inp.device
        )
        gate_score = (
            torch.ones(inp.shape[0] * self.top_k, device=inp.device) / self.top_k
        )
Rick Ho's avatar
Rick Ho committed
30
        return idx, gate_score.reshape(-1, 1, self.top_k)