同步最新代码

99a0c39e · xingjinliang · 50fe58fa · 99a0c39e · 99a0c39e · 99a0c39e
Commit 99a0c39e authored Dec 25, 2024 by xingjinliang
20 changed files
--- a/tests/unit_tests/distributed/test_param_and_grad_buffer.py
+++ b/tests/unit_tests/distributed/test_param_and_grad_buffer.py
--- a/tests/unit_tests/export/trtllm/__init__.py
+++ b/tests/unit_tests/export/trtllm/__init__.py
--- a/tests/unit_tests/export/trtllm/test_distributed_fp8.py
+++ b/tests/unit_tests/export/trtllm/test_distributed_fp8.py
--- a/tests/unit_tests/export/trtllm/test_single_device_fp8.py
+++ b/tests/unit_tests/export/trtllm/test_single_device_fp8.py
--- a/tests/unit_tests/export/trtllm/test_trtllm_distributed_gpu_converter.py
+++ b/tests/unit_tests/export/trtllm/test_trtllm_distributed_gpu_converter.py
--- a/tests/unit_tests/export/trtllm/test_trtllm_helper.py
+++ b/tests/unit_tests/export/trtllm/test_trtllm_helper.py
--- a/tests/unit_tests/export/trtllm/test_trtllm_layers.py
+++ b/tests/unit_tests/export/trtllm/test_trtllm_layers.py
--- a/tests/unit_tests/export/trtllm/test_trtllm_single_device_converter.py
+++ b/tests/unit_tests/export/trtllm/test_trtllm_single_device_converter.py
--- a/tests/unit_tests/fusions/test_torch_softmax.py
+++ b/tests/unit_tests/fusions/test_torch_softmax.py
--- a/tests/unit_tests/inference/__init__.py
+++ b/tests/unit_tests/inference/__init__.py
--- a/tests/unit_tests/inference/engines/__init__.py
+++ b/tests/unit_tests/inference/engines/__init__.py
--- a/tests/unit_tests/inference/engines/test_mcore_engine.py
+++ b/tests/unit_tests/inference/engines/test_mcore_engine.py
@@ -5,7 +5,6 @@ from unittest import mock

 import torch

-from megatron.core.inference.common_inference_params import CommonInferenceParams
 from megatron.core.inference.engines.mcore_engine import MCoreEngine
 from megatron.core.inference.inference_request import InferenceRequest, Status
 from megatron.core.inference.model_inference_wrappers.gpt.gpt_inference_wrapper import (
@@ -14,8 +13,9 @@ from megatron.core.inference.model_inference_wrappers.gpt.gpt_inference_wrapper
 from megatron.core.inference.model_inference_wrappers.inference_wrapper_config import (
    InferenceWrapperConfig,
 )
-from megatron.core.inference.text_generation_controllers.simple_text_generation_controller import (
-    SimpleTextGenerationController,
+from megatron.core.inference.sampling_params import SamplingParams
+from megatron.core.inference.text_generation_controllers.text_generation_controller import (
+    TextGenerationController,
 )
 from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_local_spec
 from megatron.core.models.gpt.gpt_model import GPTModel
@@ -60,7 +60,7 @@ class TestMCoreEngine:

        inference_wrapped_model = GPTInferenceWrapper(gpt_model, inference_wrapper_config)
        self.mock_tokenizer = mock.Mock()
-        text_generation_controller = SimpleTextGenerationController(
+        text_generation_controller = TextGenerationController(
            inference_wrapped_model=inference_wrapped_model, tokenizer=self.mock_tokenizer
        )

@@ -85,7 +85,7 @@ class TestMCoreEngine:

        prompts = ["sample" * (i + 1) for i in range(self.batch_size)]
        results: List[InferenceRequest] = self.mcore_engine.generate(
-            prompts, common_inference_params=CommonInferenceParams(num_tokens_to_generate=10)
+            prompts, sampling_params=SamplingParams(num_tokens_to_generate=10)
        )

        for result in results:
@@ -110,9 +110,7 @@ class TestMCoreEngine:

        prompts = ["" for i in range(self.batch_size)]
        results: List[InferenceRequest] = self.mcore_engine.generate(
-            prompts,
-            add_BOS=True,
-            common_inference_params=CommonInferenceParams(num_tokens_to_generate=10),
+            prompts, add_BOS=True, sampling_params=SamplingParams(num_tokens_to_generate=10)
        )

        for result in results:

--- a/tests/unit_tests/inference/model_inference_wrappers/__init__.py
+++ b/tests/unit_tests/inference/model_inference_wrappers/__init__.py
--- a/tests/unit_tests/inference/model_inference_wrappers/gpt/test_gpt_inference_wrapper.py
+++ b/tests/unit_tests/inference/model_inference_wrappers/gpt/test_gpt_inference_wrapper.py
--- a/tests/unit_tests/inference/model_inference_wrappers/t5/test_t5_inference_wrapper.py
+++ b/tests/unit_tests/inference/model_inference_wrappers/t5/test_t5_inference_wrapper.py
--- a/tests/unit_tests/inference/model_inference_wrappers/test_model_inference_wrapper_config.py
+++ b/tests/unit_tests/inference/model_inference_wrappers/test_model_inference_wrapper_config.py
--- a/tests/unit_tests/inference/test_common_inference_params.py
+++ b/tests/unit_tests/inference/test_common_inference_params.py
-from megatron.core.inference.common_inference_params import CommonInferenceParams
+from megatron.core.inference.sampling_params import SamplingParams


-class TestCommonInferenceParams:
+class TestSamplingParams:

    def test_inference_params(self):
-        inference_parameters = CommonInferenceParams()
+        inference_parameters = SamplingParams()
        inference_parameters.add_attributes({"min_tokens": 45})
        assert (
            inference_parameters.min_tokens == 45

--- a/tests/unit_tests/inference/test_flash_decode.py
+++ b/tests/unit_tests/inference/test_flash_decode.py
--- a/tests/unit_tests/inference/test_inference_utils.py
+++ b/tests/unit_tests/inference/test_inference_utils.py
--- a/tests/unit_tests/inference/test_modelopt_gpt_model.py
+++ b/tests/unit_tests/inference/test_modelopt_gpt_model.py