Use correct node names for param counting in auto_shard. (#830)

Fixes #827. Co-authored-by: Eugen Hotaj <ehotaj@fb.com>

Use correct node names for param counting in auto_shard. (#830)
Fixes #827. Co-authored-by: Eugen Hotaj <ehotaj@fb.com>
86c62cc9 · Eugen Hotaj · GitHub · eadfdc49 · 86c62cc9 · 86c62cc9
Unverified Commit 86c62cc9 authored Oct 26, 2021 by Eugen Hotaj Committed by GitHub Oct 26, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 5 deletions

fairscale/experimental/nn/auto_shard.py fairscale/experimental/nn/auto_shard.py +1 -2

tests/experimental/nn/test_auto_shard.py tests/experimental/nn/test_auto_shard.py +1 -3

No files found.
--- a/fairscale/experimental/nn/auto_shard.py
+++ b/fairscale/experimental/nn/auto_shard.py
@@ -52,8 +52,7 @@ def _split_nodes(traced_graph_module: torch.fx.GraphModule, shard_count: int = 3
    # Find the total number of params in the model and
    # the number of params per shard we are aiming for.
    for name, module in traced_graph_module.named_modules():
-        if "." in name:
+        name = name.replace(".", "_")
-            continue
        param_count[name] = sum([x.numel() for x in module.parameters()])
    logging.info(f"Total number of params are {param_count['']}")
    per_shard_param = param_count[""] // shard_count

--- a/tests/experimental/nn/test_auto_shard.py
+++ b/tests/experimental/nn/test_auto_shard.py
@@ -140,9 +140,7 @@ def test_dynaimc_conditionals_auto_wrapped():
    model = BranchedNetwork(features)
    sharded_model = shard_model(model, 3)
-    # TODO(ehotaj): There might be a bug in our split code because we shard the
+    assert len(sharded_model) == 3
-    # model into 10 shards even though we specify 3 shards above.
-    assert len(sharded_model) == 10
    input_ = torch.randn(3, features)
    model_output = model(input_)