[docs] add MOE to docs (#693)

3dcc9eff · msbaines · GitHub · 2c663f5a · 3dcc9eff · 3dcc9eff
Unverified Commit 3dcc9eff authored May 26, 2021 by msbaines Committed by GitHub May 26, 2021
4 changed files
--- a/docs/source/api/index.rst
+++ b/docs/source/api/index.rst
@@ -7,6 +7,7 @@ API Reference
   optim/adascale
   optim/oss
   optim/grad_scaler
+   nn/moe
   nn/pipe
   nn/sharded_ddp
   nn/fsdp

--- a/docs/source/api/nn/moe.rst
+++ b/docs/source/api/nn/moe.rst
+Mixture Of Experts
+==================
+
+.. autoclass:: fairscale.nn.MOELayer
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -26,6 +26,9 @@ Components
    * `Fully Sharded Data Parallel FSDP <../../en/latest/api/nn/fsdp.html>`_
    * `FSDP Tips <../../en/latest/api/nn/fsdp_tips.html>`_

+* Mixture-of-Experts:
+    * `MOE <../../en/latest/api/nn/moe.html>`_
+
 * Optimization at scale:
   * `AdaScale SGD <../../en/latest/api/optim/adascale.html>`_


--- a/fairscale/nn/moe/moe_layer.py
+++ b/fairscale/nn/moe/moe_layer.py
@@ -43,13 +43,12 @@ class MOELayer(Base):
        output = moe(input)
        l_aux = moe.l_aux

-    .. Gshard_: https://arxiv.org/pdf/2006.16668.pdf
+    .. _Gshard: https://arxiv.org/pdf/2006.16668.pdf

    Args:
-        gate (torch.nn.Module):
-            gate network
-        expert (torch.nn.Module):
-            expert network
+        gate: gate network
+        expert: expert network
+        group: group to use for all-to-all communication
    """

    def __init__(self, gate: Module, experts: Union[Module, ModuleList], group: Optional[Any] = None) -> None: