Commit 688448db authored by silencealiang's avatar silencealiang
Browse files

更新代码

parent a02a5490
Pipeline #2503 passed with stage
......@@ -5,18 +5,11 @@ import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__),
os.path.pardir)))
from megatron.training import get_args
from megatron.training import print_rank_0
from megatron.core import mpu
from megatron.training.checkpointing import load_checkpoint
from megatron.training.initialize import initialize_megatron
from megatron.core.models.gpt import GPTModel
from megatron.training import get_model
from megatron.training.arguments import core_transformer_config_from_args
from megatron.training.yaml_arguments import core_transformer_config_from_yaml
from megatron.inference.text_generation_server import MegatronServer
from megatron.inference.text_generation import generate_and_post_process
from megatron.inference.text_generation import beam_search_and_post_process
from megatron.core.transformer.spec_utils import import_module
from megatron.core.models.gpt.gpt_layer_specs import (
get_gpt_layer_local_spec,
......@@ -24,10 +17,28 @@ from megatron.core.models.gpt.gpt_layer_specs import (
)
from contextlib import nullcontext
import torch
from typing import Union
import megatron
import os
from megatron.core.inference.model_inference_wrappers.inference_wrapper_config import InferenceWrapperConfig
import sys
from argparse import Namespace
from megatron.core.inference.engines.abstract_engine import AbstractEngine
from megatron.core.inference.engines.mcore_engine import MCoreEngine
from megatron.core.inference.model_inference_wrappers.gpt.gpt_inference_wrapper import GPTInferenceWrapper
from megatron.core.inference.text_generation_controllers.simple_text_generation_controller import SimpleTextGenerationController
from megatron.core.transformer.module import MegatronModule
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__),
os.path.pardir, os.path.pardir)))
from megatron.training import get_args
from megatron.training import get_tokenizer
from megatron.training.checkpointing import load_checkpoint
from megatron.core import mpu
from megatron.training.initialize import initialize_megatron
from megatron.training import get_model
def model_provider(pre_process=True, post_process=True) -> Union[GPTModel, megatron.legacy.model.GPTModel]:
"""Builds the model.
......@@ -84,23 +95,69 @@ def model_provider(pre_process=True, post_process=True) -> Union[GPTModel, megat
position_embedding_type=args.position_embedding_type,
rotary_percent=args.rotary_percent,
rotary_base=args.rotary_base,
rope_scaling=args.use_rope_scaling
rope_scaling=args.use_rope_scaling,
rope_scaling_factor=args.rope_scaling_factor,
)
return model
def get_inference_engine(args: Namespace, model: MegatronModule) -> AbstractEngine:
"""Get the relevant backend for running inference
This function will automatically choose the TRTLLMBackend when possible, and default to Mcore backend if the user does not specify any backends. TRTLLMBackend is not implmented yet.
Args:
args (Namespace): The user arguments parsed from command line
model (MegatronModule): The megatron model.
Returns:
AbstractBackend: The chosen backend
"""
tokenizer = get_tokenizer()
inference_wrapper_config = InferenceWrapperConfig(
hidden_size=args.hidden_size,
inference_batch_times_seqlen_threshold=args.inference_batch_times_seqlen_threshold,
fp32_residual_connection=args.fp32_residual_connection,
params_dtype=args.params_dtype,
padded_vocab_size=args.padded_vocab_size,
inference_max_seq_length=args.inference_max_seq_length,
inference_max_requests=args.inference_max_batch_size
)
inference_wrapped_model = GPTInferenceWrapper(model, inference_wrapper_config)
text_generation_controller = SimpleTextGenerationController(
inference_wrapped_model=inference_wrapped_model, tokenizer=tokenizer)
return MCoreEngine(text_generation_controller=text_generation_controller)
def add_text_generate_args(parser):
group = parser.add_argument_group(title='text generation')
group.add_argument("--port", type=int, default=5000,
help='port for text generation server to run on')
group.add_argument("--temperature", type=float, default=1.0,
help='Sampling temperature.')
group.add_argument("--top_k", type=int, default=1,
help='Top k sampling.')
group.add_argument("--top_p", type=float, default=0.0,
help='Top p sampling.')
group.add_argument("--return-log-probs", action='store_true', default=True,
help='Return the log probabilities of the final output tokens')
group.add_argument("--num-tokens-to-generate", type=int, default=30,
help='Number of tokens to generate for each prompt')
group.add_argument("--prompts", metavar='N', type=str, nargs='+',
help='Input prompts with each prompt within quotes and seperated by space')
group.add_argument("--max-batch-size", type=int, default=8,
help='Max number of prompts to process at once')
return parser
if __name__ == "__main__":
initialize_megatron(extra_args_provider=add_text_generate_args,
args_defaults={'tokenizer_type': 'GPT2BPETokenizer',
'no_load_rng': True,
'no_load_optim': True})
args_defaults={'no_load_rng': True,
'no_load_optim': True,
'exit_on_missing_checkpoint': True})
args = get_args()
if args.num_layers_per_virtual_pipeline_stage is not None:
......@@ -125,20 +182,8 @@ if __name__ == "__main__":
model = model[0]
model.eval()
inference_engine = get_inference_engine(args, model)
if mpu.is_pipeline_first_stage() and mpu.get_tensor_model_parallel_rank() == 0:
server = MegatronServer(model)
server = MegatronServer(inference_engine, args)
server.run("0.0.0.0",port=args.port)
while True:
choice = torch.tensor(1, dtype=torch.long, device='cuda')
torch.distributed.broadcast(choice, 0)
if choice.item() == 0:
try:
generate_and_post_process(model)
except ValueError as ve:
pass
elif choice.item() == 1:
try:
beam_search_and_post_process(model)
except ValueError as ve:
pass
default:
interruptible: true
other:
artifacts:
paths:
- results/
when: always
image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_utility:20283570
needs:
- job: functional:configure
pipeline: $PARENT_PIPELINE_ID
rules:
- if: $CI_PIPELINE_SOURCE == "parent_pipeline"
- if: $CI_MERGE_REQUEST_ID
script:
- export PYTHONPATH=$(pwd); python tests/test_utils/python_scripts/launch_jet_workload.py
--model unit-tests --environment lts --n-repeat 1 --time-limit 1800 --test-case
other --container-tag 20283570 --cluster dgxh100_coreweave
stage: unit-tests
tags: &id001
- arch/amd64
- env/prod
- origin/jet-fleet
- owner/jet-core
- purpose/jet-client
- team/megatron
timeout: 7 days
stages:
- unit-tests
tests/unit_tests/data/:
artifacts:
paths:
- results/
when: always
image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_utility:20283570
needs:
- job: functional:configure
pipeline: $PARENT_PIPELINE_ID
rules:
- if: $CI_PIPELINE_SOURCE == "parent_pipeline"
- if: $CI_MERGE_REQUEST_ID
script:
- export PYTHONPATH=$(pwd); python tests/test_utils/python_scripts/launch_jet_workload.py
--model unit-tests --environment lts --n-repeat 1 --time-limit 1800 --test-case
tests/unit_tests/data/ --container-tag 20283570 --cluster dgxh100_coreweave
stage: unit-tests
tags: *id001
timeout: 7 days
tests/unit_tests/dist_checkpointing/:
artifacts:
paths:
- results/
when: always
image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_utility:20283570
needs:
- job: functional:configure
pipeline: $PARENT_PIPELINE_ID
rules:
- if: $CI_PIPELINE_SOURCE == "parent_pipeline"
- if: $CI_MERGE_REQUEST_ID
script:
- export PYTHONPATH=$(pwd); python tests/test_utils/python_scripts/launch_jet_workload.py
--model unit-tests --environment lts --n-repeat 1 --time-limit 1800 --test-case
tests/unit_tests/dist_checkpointing/ --container-tag 20283570 --cluster dgxh100_coreweave
stage: unit-tests
tags: *id001
timeout: 7 days
tests/unit_tests/distributed/:
artifacts:
paths:
- results/
when: always
image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_utility:20283570
needs:
- job: functional:configure
pipeline: $PARENT_PIPELINE_ID
rules:
- if: $CI_PIPELINE_SOURCE == "parent_pipeline"
- if: $CI_MERGE_REQUEST_ID
script:
- export PYTHONPATH=$(pwd); python tests/test_utils/python_scripts/launch_jet_workload.py
--model unit-tests --environment lts --n-repeat 1 --time-limit 1800 --test-case
tests/unit_tests/distributed/ --container-tag 20283570 --cluster dgxh100_coreweave
stage: unit-tests
tags: *id001
timeout: 7 days
? tests/unit_tests/test_inference.py tests/unit_tests/test_tokenizer.py tests/unit_tests/test_utilities.py
tests/unit_tests/test_training.py
: artifacts:
paths:
- results/
when: always
image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_utility:20283570
needs:
- job: functional:configure
pipeline: $PARENT_PIPELINE_ID
rules:
- if: $CI_PIPELINE_SOURCE == "parent_pipeline"
- if: $CI_MERGE_REQUEST_ID
script:
- export PYTHONPATH=$(pwd); python tests/test_utils/python_scripts/launch_jet_workload.py
--model unit-tests --environment lts --n-repeat 1 --time-limit 1800 --test-case
tests/unit_tests/test_inference.py tests/unit_tests/test_tokenizer.py tests/unit_tests/test_utilities.py
tests/unit_tests/test_training.py --container-tag 20283570 --cluster dgxh100_coreweave
stage: unit-tests
tags: *id001
timeout: 7 days
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment