Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
107c29e8
Commit
107c29e8
authored
Sep 22, 2021
by
mshoeybi
Browse files
working
parent
2f08c0c3
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
87 additions
and
2 deletions
+87
-2
megatron/inference/communication.py
megatron/inference/communication.py
+49
-0
megatron/inference/tokenization.py
megatron/inference/tokenization.py
+38
-2
No files found.
megatron/inference/communication.py
0 → 100644
View file @
107c29e8
# coding=utf-8
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Communications utilities."""
import
torch
def
broadcast_tensor
(
size
,
dtype
,
tensor
=
None
,
rank
=
0
):
""" Given size and type of a tensor on all ranks and the tensor value
only on a specific rank, broadcast from that rank to all other ranks.
"""
if
torch
.
distributed
.
get_rank
()
==
rank
:
assert
tensor
is
not
None
assert
tensor
.
is_cuda
else
:
tensor
=
torch
.
empty
(
size
,
dtype
=
dtype
,
device
=
torch
.
cuda
.
current_device
())
torch
.
distributed
.
broadcast
(
tensor
,
rank
)
return
tensor
def
broadcast_int_list
(
size
,
int_list
=
None
,
rank
=
0
):
"""Broadcast a list of interger values."""
long_tensor
=
None
if
torch
.
distributed
.
get_rank
()
==
rank
:
long_tensor
=
torch
.
tensor
(
int_list
,
dtype
=
torch
.
int64
,
device
=
torch
.
cuda
.
current_device
())
return
broadcast_tensor
(
size
,
torch
.
int64
,
tensor
=
long_tensor
,
rank
=
rank
)
megatron/inference/
batching
.py
→
megatron/inference/
tokenization
.py
View file @
107c29e8
...
@@ -13,16 +13,52 @@
...
@@ -13,16 +13,52 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""
Batching
utilities."""
"""
Tokenization
utilities."""
import
torch
import
torch
from
megatron
import
get_tokenizer
from
megatron
import
get_tokenizer
from
.communication
import
broadcast_int_list
,
broadcast_tensor
def
tokenize_prompts_and_batch
(
prompts
,
tokens_to_generate
):
def
tokenize_prompts
(
prompts
=
None
,
tokens_to_generate
=
None
,
rank
=
0
):
"""Tokenize prompts and make them avaiable on all ranks."""
# On all ranks set to None so we can pass them to functions
sizes_list
=
None
prompts_tokens_cuda_long_tensor
=
None
prompts_length_cuda_long_tensor
=
None
# On the specified rank, build the above.
if
torch
.
distributed
.
get_rank
()
==
rank
:
assert
prompts
is
not
None
assert
tokens_to_generate
is
not
None
# Tensor of tokens padded and their unpadded length.
prompts_tokens_cuda_long_tensor
,
prompts_length_cuda_long_tensor
=
\
_tokenize_prompts_and_batch
(
prompts
,
tokens_to_generate
)
# We need the sizes of these tensors for the boradcast
sizes_list
=
[
prompts_tokens_cuda_long_tensor
.
size
(
0
),
# Batch size
prompts_tokens_cuda_long_tensor
.
size
(
1
)]
# Sequence lenght
# First, broadcast the sizes.
sizes_tensor
=
broadcast_int_list
(
2
,
int_list
=
sizes_list
,
rank
=
rank
)
# Now that we have the sizes, we can boradcast the tokens
# and length tensors.
sizes
=
sizes_tensor
.
tolist
()
prompts_tokens_cuda_long_tensor
=
broadcast_tensor
(
sizes
,
torch
.
int64
,
tensor
=
prompts_tokens_cuda_long_tensor
,
rank
=
rank
)
prompts_length_cuda_long_tensor
=
broadcast_tensor
(
sizes
[
0
],
torch
.
int64
,
tensor
=
prompts_length_cuda_long_tensor
,
rank
=
rank
)
return
prompts_tokens_cuda_long_tensor
,
prompts_length_cuda_long_tensor
def
_tokenize_prompts_and_batch
(
prompts
,
tokens_to_generate
):
"""Given a set of prompts and number of tokens to generate:
"""Given a set of prompts and number of tokens to generate:
- tokenize prompts
- tokenize prompts
- set the sequence length to be the max of length of prompts
- set the sequence length to be the max of length of prompts
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment