Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
cb00a196
Commit
cb00a196
authored
Sep 29, 2021
by
Jared Casper
Browse files
Merge branch 'main' into t5_pipeline_parallelism
parents
38a774e9
5ab64637
Changes
44
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
74 additions
and
68 deletions
+74
-68
megatron/training.py
megatron/training.py
+22
-21
tasks/zeroshot_gpt/evaluate.py
tasks/zeroshot_gpt/evaluate.py
+1
-1
tools/run_text_generation_server.py
tools/run_text_generation_server.py
+17
-46
tools/text_generation_cli.py
tools/text_generation_cli.py
+34
-0
No files found.
megatron/training.py
View file @
cb00a196
...
...
@@ -193,7 +193,7 @@ def update_train_iters(args):
print_rank_0
(
'setting training iterations to {}'
.
format
(
args
.
train_iters
))
def
get_model
(
model_provider_func
,
model_type
):
def
get_model
(
model_provider_func
,
model_type
,
wrap_with_ddp
=
True
):
"""Build the model."""
args
=
get_args
()
args
.
model_type
=
model_type
...
...
@@ -272,22 +272,24 @@ def get_model(model_provider_func, model_type):
if
args
.
fp16
or
args
.
bf16
:
model
=
[
Float16Module
(
model_module
,
args
)
for
model_module
in
model
]
if
args
.
DDP_impl
==
'torch'
:
i
=
torch
.
cuda
.
current_device
()
model
=
[
torch
DDP
(
model_module
,
device_ids
=
[
i
],
outpu
t_device
=
i
,
process_group
=
mpu
.
get_data_parallel_group
())
for
model_module
in
model
]
retur
n
model
if
wrap_with_ddp
:
i
f
args
.
DDP_impl
=
=
'
torch
'
:
i
=
torch
.
cuda
.
curren
t_device
()
model
=
[
torchDDP
(
model_module
,
device_ids
=
[
i
],
output_device
=
i
,
process_group
=
mpu
.
get_data_parallel_group
())
for
model_module
i
n
model
]
if
args
.
DDP_impl
==
'local'
:
model
=
[
LocalDDP
(
model_module
,
args
.
accumulate_allreduce_grads_in_fp32
,
args
.
use_contiguous_buffers_in_ddp
)
for
model_module
in
model
]
return
model
elif
args
.
DDP_impl
==
'local'
:
model
=
[
LocalDDP
(
model_module
,
args
.
accumulate_allreduce_grads_in_fp32
,
args
.
use_contiguous_buffers_in_local_ddp
)
for
model_module
in
model
]
else
:
raise
NotImplementedError
(
'Unknown DDP implementation specified: '
'{}. Exiting.'
.
format
(
args
.
DDP_impl
))
raise
NotImplementedError
(
'Unknown DDP implementation specified: {}. '
'Exiting.'
.
format
(
args
.
DDP_impl
))
return
model
def
get_learning_rate_scheduler
(
optimizer
):
...
...
@@ -380,11 +382,10 @@ def train_step(forward_step_func, data_iterator,
timers
=
get_timers
()
# Set grad to zero.
if
args
.
DDP_impl
==
'local'
and
args
.
use_contiguous_buffers_in_ddp
:
if
args
.
DDP_impl
==
'local'
and
args
.
use_contiguous_buffers_in_
local_
ddp
:
for
partition
in
model
:
partition
.
zero_grad_buffer
()
else
:
optimizer
.
zero_grad
()
optimizer
.
zero_grad
()
forward_backward_func
=
get_forward_backward_func
()
losses_reduced
=
forward_backward_func
(
...
...
@@ -392,7 +393,7 @@ def train_step(forward_step_func, data_iterator,
optimizer
,
timers
,
forward_only
=
False
)
# Empty unused memory
if
args
.
empty_unused_memory_
each_iter
>=
1
:
if
args
.
empty_unused_memory_
level
>=
1
:
torch
.
cuda
.
empty_cache
()
# All-reduce if needed.
...
...
@@ -443,7 +444,7 @@ def train_step(forward_step_func, data_iterator,
skipped_iter
=
1
# Empty unused memory
if
args
.
empty_unused_memory_
each_iter
>=
2
:
if
args
.
empty_unused_memory_
level
>=
2
:
torch
.
cuda
.
empty_cache
()
if
mpu
.
is_pipeline_last_stage
(
ignore_virtual
=
True
):
...
...
@@ -755,7 +756,7 @@ def evaluate(forward_step_func, data_iterator, model, verbose=False):
timers
=
None
,
forward_only
=
True
)
# Empty unused memory
if
args
.
empty_unused_memory_
each_iter
>=
1
:
if
args
.
empty_unused_memory_
level
>=
1
:
torch
.
cuda
.
empty_cache
()
if
mpu
.
is_pipeline_last_stage
(
ignore_virtual
=
True
):
...
...
tasks/zeroshot_gpt/evaluate.py
View file @
cb00a196
...
...
@@ -205,7 +205,7 @@ def main():
args
.
task
))
# Set up model and load checkpoint.
model
=
get_model
(
get_model_provider
(
eval_metric
))
model
=
get_model
(
get_model_provider
(
eval_metric
)
,
wrap_with_ddp
=
False
)
if
args
.
load
is
not
None
:
_
=
load_checkpoint
(
model
,
None
,
None
)
...
...
tools/
generate_samples_gpt
.py
→
tools/
run_text_generation_server
.py
View file @
cb00a196
...
...
@@ -14,37 +14,31 @@
# limitations under the License.
"""Sample Generate GPT"""
import
os
import
sys
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
os
.
path
.
pardir
)))
import
socket
from
megatron
import
get_args
from
megatron
import
print_rank_0
from
megatron
import
get_tokenizer
from
megatron
import
mpu
from
megatron.checkpointing
import
load_checkpoint
from
megatron.initialize
import
initialize_megatron
from
megatron.model
import
GPTModel
from
megatron.training
import
get_model
from
megatron.text_generation_utils
import
generate_and_write_samples_unconditional
from
megatron.text_generation_utils
import
generate_samples_input_from_file
from
megatron.text_generation_utils
import
generate_samples_interactive
from
megatron.text_generation_server
import
MegatronServer
from
megatron.text_generation_utils
import
generate
import
torch
def
model_provider
(
pre_process
=
True
,
post_process
=
True
):
"""Build the model."""
print_rank_0
(
'building GPT model ...'
)
model
=
GPTModel
(
num_tokentypes
=
0
,
parallel_output
=
False
,
pre_process
=
pre_process
,
post_process
=
post_process
)
model
=
GPTModel
(
num_tokentypes
=
0
,
parallel_output
=
False
,
pre_process
=
pre_process
,
post_process
=
post_process
)
return
model
def
add_text_generate_args
(
parser
):
"""Text generation arguments."""
group
=
parser
.
add_argument_group
(
title
=
'text generation'
)
group
.
add_argument
(
"--temperature"
,
type
=
float
,
default
=
1.0
,
...
...
@@ -57,26 +51,10 @@ def add_text_generate_args(parser):
help
=
'Top k sampling.'
)
group
.
add_argument
(
"--out-seq-length"
,
type
=
int
,
default
=
1024
,
help
=
'Size of the output generated text.'
)
group
.
add_argument
(
"--sample-input-file"
,
type
=
str
,
default
=
None
,
help
=
'Get input from file instead of interactive mode, '
'each line is an input.'
)
group
.
add_argument
(
"--sample-output-file"
,
type
=
str
,
default
=
None
,
help
=
'Output file got from --sample-input-file'
)
group
.
add_argument
(
"--num-samples"
,
type
=
int
,
default
=
0
,
help
=
'Number of samples to generate unconditionally, '
'defaults to 0 and interactive conditional sampling'
)
group
.
add_argument
(
"--genfile"
,
type
=
str
,
help
=
'Output file when generating unconditionally'
)
group
.
add_argument
(
"--recompute"
,
action
=
'store_true'
,
help
=
'During generation recompute all attention '
'instead of using previously computed keys/values.'
)
return
parser
def
main
():
"""Main program."""
if
__name__
==
"__main__"
:
initialize_megatron
(
extra_args_provider
=
add_text_generate_args
,
args_defaults
=
{
'tokenizer_type'
:
'GPT2BPETokenizer'
,
'no_load_rng'
:
True
,
...
...
@@ -86,27 +64,20 @@ def main():
if
args
.
num_layers_per_virtual_pipeline_stage
is
not
None
:
print
(
"Interleaved pipeline schedule is not yet supported for text generation."
)
exit
()
# Set up model and load checkpoint.
model
=
get_model
(
model_provider
)
# Set up model and load checkpoint
model
=
get_model
(
model_provider
,
wrap_with_ddp
=
False
)
if
args
.
load
is
not
None
:
_
=
load_checkpoint
(
model
,
None
,
None
)
assert
len
(
model
)
==
1
,
"Above condition should have caught this"
model
=
model
[
0
]
# Generate samples.
if
args
.
num_samples
==
0
:
args
.
micro_batch_size
=
1
if
args
.
sample_input_file
!=
None
:
generate_samples_input_from_file
(
model
)
else
:
generate_samples_interactive
(
model
)
else
:
generate_and_write_samples_unconditional
(
model
)
if
__name__
==
"__main__"
:
main
()
if
mpu
.
is_pipeline_first_stage
()
and
mpu
.
get_tensor_model_parallel_rank
()
==
0
:
server
=
MegatronServer
(
model
)
server
.
run
(
"0.0.0.0"
)
while
True
:
choice
=
torch
.
cuda
.
LongTensor
(
1
)
torch
.
distributed
.
broadcast
(
choice
,
0
)
if
choice
[
0
].
item
()
==
0
:
generate
(
model
)
tools/text_generation_cli.py
0 → 100644
View file @
cb00a196
# coding=utf-8
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
json
import
sys
import
urllib2
class
PutRequest
(
urllib2
.
Request
):
'''class to handling putting with urllib2'''
def
get_method
(
self
,
*
args
,
**
kwargs
):
return
'PUT'
if
__name__
==
"__main__"
:
url
=
sys
.
argv
[
1
]
while
True
:
sentence
=
raw_input
(
"Enter prompt: "
)
max_len
=
int
(
input
(
"Enter number tokens output: "
))
data
=
json
.
dumps
({
"sentences"
:
[
sentence
],
"max_len"
:
max_len
})
req
=
PutRequest
(
url
,
data
,
{
'Content-Type'
:
'application/json'
})
response
=
urllib2
.
urlopen
(
req
)
resp_sentences
=
json
.
load
(
response
)
print
(
"Megatron Response: "
)
print
(
resp_sentences
[
"sentences"
][
0
])
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment