Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
688448db
Commit
688448db
authored
Mar 14, 2025
by
silencealiang
Browse files
更新代码
parent
a02a5490
Pipeline
#2503
passed with stage
Changes
823
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
452 additions
and
514 deletions
+452
-514
tools/retro/text_generation/retro_text_generation.py
tools/retro/text_generation/retro_text_generation.py
+263
-263
tools/run_text_generation_server.py
tools/run_text_generation_server.py
+189
-144
unit-test-job-lts.yaml
unit-test-job-lts.yaml
+0
-107
No files found.
tools/retro/text_generation/retro_text_generation.py
View file @
688448db
tools/run_text_generation_server.py
View file @
688448db
...
...
@@ -5,18 +5,11 @@ import os
import
sys
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
os
.
path
.
pardir
)))
from
megatron.training
import
get_args
from
megatron.training
import
print_rank_0
from
megatron.core
import
mpu
from
megatron.training.checkpointing
import
load_checkpoint
from
megatron.training.initialize
import
initialize_megatron
from
megatron.core.models.gpt
import
GPTModel
from
megatron.training
import
get_model
from
megatron.training.arguments
import
core_transformer_config_from_args
from
megatron.training.yaml_arguments
import
core_transformer_config_from_yaml
from
megatron.inference.text_generation_server
import
MegatronServer
from
megatron.inference.text_generation
import
generate_and_post_process
from
megatron.inference.text_generation
import
beam_search_and_post_process
from
megatron.core.transformer.spec_utils
import
import_module
from
megatron.core.models.gpt.gpt_layer_specs
import
(
get_gpt_layer_local_spec
,
...
...
@@ -24,10 +17,28 @@ from megatron.core.models.gpt.gpt_layer_specs import (
)
from
contextlib
import
nullcontext
import
torch
from
typing
import
Union
import
megatron
import
os
from
megatron.core.inference.model_inference_wrappers.inference_wrapper_config
import
InferenceWrapperConfig
import
sys
from
argparse
import
Namespace
from
megatron.core.inference.engines.abstract_engine
import
AbstractEngine
from
megatron.core.inference.engines.mcore_engine
import
MCoreEngine
from
megatron.core.inference.model_inference_wrappers.gpt.gpt_inference_wrapper
import
GPTInferenceWrapper
from
megatron.core.inference.text_generation_controllers.simple_text_generation_controller
import
SimpleTextGenerationController
from
megatron.core.transformer.module
import
MegatronModule
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
os
.
path
.
pardir
,
os
.
path
.
pardir
)))
from
megatron.training
import
get_args
from
megatron.training
import
get_tokenizer
from
megatron.training.checkpointing
import
load_checkpoint
from
megatron.core
import
mpu
from
megatron.training.initialize
import
initialize_megatron
from
megatron.training
import
get_model
def
model_provider
(
pre_process
=
True
,
post_process
=
True
)
->
Union
[
GPTModel
,
megatron
.
legacy
.
model
.
GPTModel
]:
"""Builds the model.
...
...
@@ -84,23 +95,69 @@ def model_provider(pre_process=True, post_process=True) -> Union[GPTModel, megat
position_embedding_type
=
args
.
position_embedding_type
,
rotary_percent
=
args
.
rotary_percent
,
rotary_base
=
args
.
rotary_base
,
rope_scaling
=
args
.
use_rope_scaling
rope_scaling
=
args
.
use_rope_scaling
,
rope_scaling_factor
=
args
.
rope_scaling_factor
,
)
return
model
def
get_inference_engine
(
args
:
Namespace
,
model
:
MegatronModule
)
->
AbstractEngine
:
"""Get the relevant backend for running inference
This function will automatically choose the TRTLLMBackend when possible, and default to Mcore backend if the user does not specify any backends. TRTLLMBackend is not implmented yet.
Args:
args (Namespace): The user arguments parsed from command line
model (MegatronModule): The megatron model.
Returns:
AbstractBackend: The chosen backend
"""
tokenizer
=
get_tokenizer
()
inference_wrapper_config
=
InferenceWrapperConfig
(
hidden_size
=
args
.
hidden_size
,
inference_batch_times_seqlen_threshold
=
args
.
inference_batch_times_seqlen_threshold
,
fp32_residual_connection
=
args
.
fp32_residual_connection
,
params_dtype
=
args
.
params_dtype
,
padded_vocab_size
=
args
.
padded_vocab_size
,
inference_max_seq_length
=
args
.
inference_max_seq_length
,
inference_max_requests
=
args
.
inference_max_batch_size
)
inference_wrapped_model
=
GPTInferenceWrapper
(
model
,
inference_wrapper_config
)
text_generation_controller
=
SimpleTextGenerationController
(
inference_wrapped_model
=
inference_wrapped_model
,
tokenizer
=
tokenizer
)
return
MCoreEngine
(
text_generation_controller
=
text_generation_controller
)
def
add_text_generate_args
(
parser
):
group
=
parser
.
add_argument_group
(
title
=
'text generation'
)
group
.
add_argument
(
"--port"
,
type
=
int
,
default
=
5000
,
help
=
'port for text generation server to run on'
)
group
.
add_argument
(
"--temperature"
,
type
=
float
,
default
=
1.0
,
help
=
'Sampling temperature.'
)
group
.
add_argument
(
"--top_k"
,
type
=
int
,
default
=
1
,
help
=
'Top k sampling.'
)
group
.
add_argument
(
"--top_p"
,
type
=
float
,
default
=
0.0
,
help
=
'Top p sampling.'
)
group
.
add_argument
(
"--return-log-probs"
,
action
=
'store_true'
,
default
=
True
,
help
=
'Return the log probabilities of the final output tokens'
)
group
.
add_argument
(
"--num-tokens-to-generate"
,
type
=
int
,
default
=
30
,
help
=
'Number of tokens to generate for each prompt'
)
group
.
add_argument
(
"--prompts"
,
metavar
=
'N'
,
type
=
str
,
nargs
=
'+'
,
help
=
'Input prompts with each prompt within quotes and seperated by space'
)
group
.
add_argument
(
"--max-batch-size"
,
type
=
int
,
default
=
8
,
help
=
'Max number of prompts to process at once'
)
return
parser
if
__name__
==
"__main__"
:
initialize_megatron
(
extra_args_provider
=
add_text_generate_args
,
args_defaults
=
{
'
tokenizer_type'
:
'GPT2BPETokenizer'
,
'no_load_
rng
'
:
True
,
'
no_load_optim
'
:
True
})
args_defaults
=
{
'
no_load_rng'
:
True
,
'no_load_
optim
'
:
True
,
'
exit_on_missing_checkpoint
'
:
True
})
args
=
get_args
()
if
args
.
num_layers_per_virtual_pipeline_stage
is
not
None
:
...
...
@@ -125,20 +182,8 @@ if __name__ == "__main__":
model
=
model
[
0
]
model
.
eval
()
inference_engine
=
get_inference_engine
(
args
,
model
)
if
mpu
.
is_pipeline_first_stage
()
and
mpu
.
get_tensor_model_parallel_rank
()
==
0
:
server
=
MegatronServer
(
model
)
server
=
MegatronServer
(
inference_engine
,
args
)
server
.
run
(
"0.0.0.0"
,
port
=
args
.
port
)
while
True
:
choice
=
torch
.
tensor
(
1
,
dtype
=
torch
.
long
,
device
=
'cuda'
)
torch
.
distributed
.
broadcast
(
choice
,
0
)
if
choice
.
item
()
==
0
:
try
:
generate_and_post_process
(
model
)
except
ValueError
as
ve
:
pass
elif
choice
.
item
()
==
1
:
try
:
beam_search_and_post_process
(
model
)
except
ValueError
as
ve
:
pass
unit-test-job-lts.yaml
deleted
100644 → 0
View file @
a02a5490
default
:
interruptible
:
true
other
:
artifacts
:
paths
:
-
results/
when
:
always
image
:
gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_utility:20283570
needs
:
-
job
:
functional:configure
pipeline
:
$PARENT_PIPELINE_ID
rules
:
-
if
:
$CI_PIPELINE_SOURCE == "parent_pipeline"
-
if
:
$CI_MERGE_REQUEST_ID
script
:
-
export PYTHONPATH=$(pwd); python tests/test_utils/python_scripts/launch_jet_workload.py
--model unit-tests --environment lts --n-repeat 1 --time-limit 1800 --test-case
other --container-tag 20283570 --cluster dgxh100_coreweave
stage
:
unit-tests
tags
:
&id001
-
arch/amd64
-
env/prod
-
origin/jet-fleet
-
owner/jet-core
-
purpose/jet-client
-
team/megatron
timeout
:
7 days
stages
:
-
unit-tests
tests/unit_tests/data/
:
artifacts
:
paths
:
-
results/
when
:
always
image
:
gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_utility:20283570
needs
:
-
job
:
functional:configure
pipeline
:
$PARENT_PIPELINE_ID
rules
:
-
if
:
$CI_PIPELINE_SOURCE == "parent_pipeline"
-
if
:
$CI_MERGE_REQUEST_ID
script
:
-
export PYTHONPATH=$(pwd); python tests/test_utils/python_scripts/launch_jet_workload.py
--model unit-tests --environment lts --n-repeat 1 --time-limit 1800 --test-case
tests/unit_tests/data/ --container-tag 20283570 --cluster dgxh100_coreweave
stage
:
unit-tests
tags
:
*id001
timeout
:
7 days
tests/unit_tests/dist_checkpointing/
:
artifacts
:
paths
:
-
results/
when
:
always
image
:
gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_utility:20283570
needs
:
-
job
:
functional:configure
pipeline
:
$PARENT_PIPELINE_ID
rules
:
-
if
:
$CI_PIPELINE_SOURCE == "parent_pipeline"
-
if
:
$CI_MERGE_REQUEST_ID
script
:
-
export PYTHONPATH=$(pwd); python tests/test_utils/python_scripts/launch_jet_workload.py
--model unit-tests --environment lts --n-repeat 1 --time-limit 1800 --test-case
tests/unit_tests/dist_checkpointing/ --container-tag 20283570 --cluster dgxh100_coreweave
stage
:
unit-tests
tags
:
*id001
timeout
:
7 days
tests/unit_tests/distributed/
:
artifacts
:
paths
:
-
results/
when
:
always
image
:
gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_utility:20283570
needs
:
-
job
:
functional:configure
pipeline
:
$PARENT_PIPELINE_ID
rules
:
-
if
:
$CI_PIPELINE_SOURCE == "parent_pipeline"
-
if
:
$CI_MERGE_REQUEST_ID
script
:
-
export PYTHONPATH=$(pwd); python tests/test_utils/python_scripts/launch_jet_workload.py
--model unit-tests --environment lts --n-repeat 1 --time-limit 1800 --test-case
tests/unit_tests/distributed/ --container-tag 20283570 --cluster dgxh100_coreweave
stage
:
unit-tests
tags
:
*id001
timeout
:
7 days
?
tests/unit_tests/test_inference.py tests/unit_tests/test_tokenizer.py tests/unit_tests/test_utilities.py
tests/unit_tests/test_training.py
:
artifacts
:
paths
:
-
results/
when
:
always
image
:
gitlab-master.nvidia.com:5005/adlr/megatron-lm/mcore_utility:20283570
needs
:
-
job
:
functional:configure
pipeline
:
$PARENT_PIPELINE_ID
rules
:
-
if
:
$CI_PIPELINE_SOURCE == "parent_pipeline"
-
if
:
$CI_MERGE_REQUEST_ID
script
:
-
export PYTHONPATH=$(pwd); python tests/test_utils/python_scripts/launch_jet_workload.py
--model unit-tests --environment lts --n-repeat 1 --time-limit 1800 --test-case
tests/unit_tests/test_inference.py tests/unit_tests/test_tokenizer.py tests/unit_tests/test_utilities.py
tests/unit_tests/test_training.py --container-tag 20283570 --cluster dgxh100_coreweave
stage
:
unit-tests
tags
:
*id001
timeout
:
7 days
Prev
1
…
38
39
40
41
42
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment