working

b6555b71 · mshoeybi · 6c40f892 · b6555b71 · b6555b71
Commit b6555b71 authored Sep 28, 2021 by mshoeybi
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 19 deletions

megatron/inference/forward_step.py megatron/inference/forward_step.py +2 -19

megatron/model/module.py megatron/model/module.py +4 -0

No files found.
--- a/megatron/inference/forward_step.py
+++ b/megatron/inference/forward_step.py
@@ -16,25 +16,10 @@
 """Forward step utilities."""
 import torch
 from megatron.p2p_communication import recv_forward, send_forward
-from .sampling import sample
+from megatron import get_args
-from megatron import mpu
-import torch.nn.functional as F
-from megatron import print_rank_0
-from megatron import get_args, get_tokenizer
-from megatron.utils import get_ltor_masks_and_position_ids, unwrap_model
-from .communication import (
-    broadcast_float_list,
-    copy_from_last_to_first_pipeline_stage,
-    broadcast_from_last_pipeline_stage)
-from .tokenization import tokenize_prompts
-# These are needed to unwrap the model, would be nice to put these in megatron.utils if possible?
-from torch.nn.parallel.distributed import DistributedDataParallel as torchDDP
-from megatron.model import DistributedDataParallel as LocalDDP
-from megatron.model import Float16Module
 def forward_step(model, tokens, position_ids, attention_mask,
@@ -51,9 +36,7 @@ def forward_step(model, tokens, position_ids, attention_mask,
    input_tensor = recv_forward()
    # Forward pass through the model.
-    unwrapped_model = unwrap_model(
+    model.set_input_tensor(input_tensor)
-        model, (torchDDP, LocalDDP, Float16Module))
-    unwrapped_model.set_input_tensor(input_tensor)
    output_tensor = model(
        tokens, position_ids, attention_mask,
        set_inference_key_value_memory=set_inference_key_value_memory,

--- a/megatron/model/module.py
+++ b/megatron/model/module.py
@@ -166,6 +166,10 @@ class Float16Module(MegatronModule):
        self.float16_convertor = float16_convertor
+    def set_input_tensor(self, input_tensor):
+        return self.module.set_input_tensor(input_tensor)
    def forward(self, *inputs, **kwargs):
        if mpu.is_pipeline_first_stage():
            inputs = fp32_to_float16(inputs, self.float16_convertor)