Revise variable name

e028f2ec · Sengxian · 69121432 · e028f2ec · e028f2ec
Commit e028f2ec authored Mar 22, 2021 by Sengxian
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 7 deletions

fmoe/balance.py fmoe/balance.py +5 -5

fmoe/megatron.py fmoe/megatron.py +2 -2

No files found.
--- a/fmoe/balance.py
+++ b/fmoe/balance.py
@@ -3,8 +3,8 @@ import torch.nn.functional as F
 metrics = {
    "coefficient-variation": lambda c_e: torch.std(c_e) / torch.mean(c_e),
-    "Lmax_div_Lmin": lambda c_e: (torch.max(c_e) + 1) / (torch.min(c_e) + 1),
+    "Lmax-over-Lmin": lambda c_e: (torch.max(c_e) + 1) / (torch.min(c_e) + 1),
-    "Lmax_div_Lmean": lambda c_e: torch.max(c_e) / torch.mean(c_e),
+    "Lmax-over-Lmean": lambda c_e: torch.max(c_e) / torch.mean(c_e),
 }
@@ -19,7 +19,7 @@ def update_balance_profile(
    balance_dict,
    gate_top_k_idx,
    _gate_score_top_k,
-    gate_state_dict,
+    gate_context,
    layer_idx,
    num_expert,
    balance_strategy,
@@ -34,8 +34,8 @@ def update_balance_profile(
        balance_dict[key][layer_idx] = metrics[key](c_e)
    S = gate_top_k_idx.shape[0]
    if balance_strategy == "gshard":
-        gate_score_all = gate_state_dict
+        gate_score_all = gate_context
        m_e = torch.sum(F.softmax(gate_score_all, dim=1), dim=0) / S
        balance_dict["gshard_loss"][layer_idx] = torch.sum(c_e * m_e) / num_expert / S
    elif balance_strategy == "noisy":
-        balance_dict["noisy_loss"][layer_idx] = gate_state_dict
+        balance_dict["noisy_loss"][layer_idx] = gate_context
--- a/fmoe/megatron.py
+++ b/fmoe/megatron.py
@@ -96,13 +96,13 @@ def generate_megatron_gate_hook(layer_idx, num_expert_global):
    balance_strategy = get_args().balance_strategy
-    def megatron_gate_hook(gate_top_k_idx, gate_score_top_k, gate_state_dict):
+    def megatron_gate_hook(gate_top_k_idx, gate_score_top_k, gate_context):
        global balance_dict
        update_balance_profile(
            balance_dict,
            gate_top_k_idx,
            gate_score_top_k,
-            gate_state_dict,
+            gate_context,
            layer_idx,
            num_expert_global,
            balance_strategy,