Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
055a673e
Commit
055a673e
authored
Aug 25, 2021
by
rprenger
Browse files
Addressing comments
parent
279d8320
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
68 additions
and
20 deletions
+68
-20
examples/run_cli_530B.sh
examples/run_cli_530B.sh
+0
-11
examples/run_text_generation_server_345M.sh
examples/run_text_generation_server_345M.sh
+32
-0
examples/run_text_generation_server_345M_8_tensor_parallel.sh
...ples/run_text_generation_server_345M_8_tensor_parallel.sh
+32
-0
megatron/text_generation_server.py
megatron/text_generation_server.py
+0
-0
megatron/text_generation_utils.py
megatron/text_generation_utils.py
+3
-8
tools/run_text_generation_server.py
tools/run_text_generation_server.py
+1
-1
tools/text_generation_cli.py
tools/text_generation_cli.py
+0
-0
No files found.
examples/run_cli_530B.sh
deleted
100755 → 0
View file @
279d8320
#!/bin/bash
echo
"Loading model and starting server. May take several minutes"
./run_api_server_530B.sh
STATUS
=
1
while
[
$STATUS
-eq
1]
do
sleep
20
curl
-s
-m
20
'http://localhost:5000/generate'
-X
'PUT'
-H
'Content-Type: application/json; charset=UTF-8'
-d
'{"sentences":["Test2"], "max_len":30}'
|
head
-n
1 |
grep
"HTTP/1.[01] [23].."
>
/dev/null
STATUS
=
$?
done
python tools/run_cli.py
'http://localhost:5000/generate'
examples/run_
api
_server_
530B
.sh
→
examples/run_
text_generation
_server_
345M
.sh
View file @
055a673e
#!/bin/bash
DISTRIBUTED_ARGS
=
"--nproc_per_node 16
\
--nnodes 3
\
# This example will start serving the 345M model.
DISTRIBUTED_ARGS
=
"--nproc_per_node 1
\
--nnodes 1
\
--node_rank 0
\
--master_addr localhost
\
--master_port 6000"
CHECKPOINT
=
<Path to checkpoint
(
e.g /
gpt3-530b-megatron_tp16_pp3
)>
CHECKPOINT
=
<Path to checkpoint
(
e.g /
345m
)>
VOCAB_FILE
=
<Path to vocab.json
(
e.g. /gpt2-vocab.json
)>
MERGE_FILE
=
<Path to merges.txt
(
e.g. /gpt2-merges.txt
)>
pip
install
flask-restful
python
-m
torch.distributed.launch
$DISTRIBUTED_ARGS
tools/run_
api
_server.py /
--tensor-model-parallel-size
1
6
/
--pipeline-model-parallel-size
3
/
--num-layers
105
/
--hidden-size
20480
/
python
-m
torch.distributed.launch
$DISTRIBUTED_ARGS
tools/run_
text_generation
_server.py /
--tensor-model-parallel-size
1 /
--pipeline-model-parallel-size
1
/
--num-layers
24
/
--hidden-size
1024
/
--load
${
CHECKPOINT
}
/
--num-attention-heads
1
28
/
--max-position-embeddings
2048
/
--num-attention-heads
1
6
/
--max-position-embeddings
1024
/
--tokenizer-type
GPT2BPETokenizer /
--fp16
/
--micro-batch-size
1 /
--seq-length
2048
/
--out-seq-length
2048
/
--seq-length
1024
/
--out-seq-length
1024
/
--temperature
1.0 /
--vocab-file
$VOCAB_FILE
/
--merge-file
$MERGE_FILE
/
...
...
examples/run_text_generation_server_345M_8_tensor_parallel.sh
0 → 100755
View file @
055a673e
#!/bin/bash
# This example will start serving the 345M model that is partitioned 8 way tensor parallel
DISTRIBUTED_ARGS
=
"--nproc_per_node 8
\
--nnodes 1
\
--node_rank 0
\
--master_addr localhost
\
--master_port 6000"
CHECKPOINT
=
<Path to checkpoint
(
e.g /345m
)>
VOCAB_FILE
=
<Path to vocab.json
(
e.g. /gpt2-vocab.json
)>
MERGE_FILE
=
<Path to merges.txt
(
e.g. /gpt2-merges.txt
)>
pip
install
flask-restful
python
-m
torch.distributed.launch
$DISTRIBUTED_ARGS
tools/run_text_generation_server.py /
--tensor-model-parallel-size
8 /
--pipeline-model-parallel-size
1 /
--num-layers
24 /
--hidden-size
1024 /
--load
${
CHECKPOINT
}
/
--num-attention-heads
16 /
--max-position-embeddings
1024 /
--tokenizer-type
GPT2BPETokenizer /
--fp16
/
--micro-batch-size
1 /
--seq-length
1024 /
--out-seq-length
1024 /
--temperature
1.0 /
--vocab-file
$VOCAB_FILE
/
--merge-file
$MERGE_FILE
/
--top_p
0.9 /
--seed
42
megatron/
api
_server.py
→
megatron/
text_generation
_server.py
View file @
055a673e
File moved
megatron/text_generation_utils.py
View file @
055a673e
...
...
@@ -121,14 +121,14 @@ def receive_generate_info():
"""
Needs to be synced up with send_generate_info
"""
input_info_tensor
=
torch
.
empty
(
3
,
dtype
=
torch
.
int64
,
device
=
torch
.
device
(
"cuda"
))
input_info_tensor
=
torch
.
empty
(
3
,
dtype
=
torch
.
int64
,
device
=
torch
.
cuda
.
current_
device
())
torch
.
distributed
.
broadcast
(
input_info_tensor
,
0
)
batch_size
=
input_info_tensor
[
0
].
item
()
seq_len
=
input_info_tensor
[
1
].
item
()
max_len
=
input_info_tensor
[
2
].
item
()
context_length_tensor
=
torch
.
empty
(
batch_size
,
dtype
=
torch
.
int64
,
device
=
torch
.
device
(
"cuda"
))
context_tokens_tensor
=
torch
.
empty
(
batch_size
,
seq_len
,
dtype
=
torch
.
int64
,
device
=
torch
.
device
(
"cuda"
))
context_length_tensor
=
torch
.
empty
(
batch_size
,
dtype
=
torch
.
int64
,
device
=
torch
.
cuda
.
current_
device
())
context_tokens_tensor
=
torch
.
empty
(
batch_size
,
seq_len
,
dtype
=
torch
.
int64
,
device
=
torch
.
cuda
.
current_
device
())
# Send variables to all ranks
torch
.
distributed
.
broadcast
(
context_length_tensor
,
0
)
...
...
@@ -153,9 +153,6 @@ def synced_generate(model, context_tokens_tensor, context_length_tensor, max_len
def
generate
(
model
,
sentences
=
None
,
max_len
=
0
):
if
torch
.
distributed
.
get_rank
()
==
0
:
context_tokens_tensor
,
context_length_tensor
=
tokenize_batch
(
sentences
)
c
=
context_length_tensor
[
0
]
b
=
context_tokens_tensor
.
size
(
0
)
start
=
time
.
time
()
send_generate_info
(
context_tokens_tensor
,
context_length_tensor
,
max_len
)
else
:
context_length_tensor
,
context_tokens_tensor
,
max_len
=
receive_generate_info
()
...
...
@@ -169,8 +166,6 @@ def generate(model, sentences=None, max_len=0):
for
i
in
range
(
decode_tokens
.
size
(
0
)):
decode_token
=
decode_tokens
[
i
,:].
cpu
().
numpy
().
tolist
()
resp_sentences
.
append
(
tokenizer
.
detokenize
(
decode_token
))
end
=
time
.
time
()
print
(
str
(
b
)
+
","
+
str
(
c
)
+
","
+
str
(
decode_tokens
.
size
(
1
))
+
","
+
str
(
end
-
start
),
flush
=
True
)
return
resp_sentences
def
switch
(
val1
,
val2
,
boolean
):
...
...
tools/run_
api
_server.py
→
tools/run_
text_generation
_server.py
View file @
055a673e
...
...
@@ -26,7 +26,7 @@ from megatron.checkpointing import load_checkpoint
from
megatron.initialize
import
initialize_megatron
from
megatron.model
import
GPTModel
from
megatron.training
import
get_model
from
megatron.
api
_server
import
MegatronServer
from
megatron.
text_generation
_server
import
MegatronServer
from
megatron.text_generation_utils
import
generate
import
torch
...
...
tools/
ru
n_cli.py
→
tools/
text_generatio
n_cli.py
View file @
055a673e
File moved
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment