Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
295831a4
Commit
295831a4
authored
Oct 08, 2022
by
Olivier Dehaene
Browse files
Init
parents
Changes
43
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
116 additions
and
0 deletions
+116
-0
server/bloom_inference/utils.py
server/bloom_inference/utils.py
+95
-0
server/poetry.lock
server/poetry.lock
+0
-0
server/pyproject.toml
server/pyproject.toml
+21
-0
No files found.
server/bloom_inference/utils.py
0 → 100644
View file @
295831a4
import
os
import
contextlib
import
torch
import
torch.distributed
from
transformers.generation_logits_process
import
(
LogitsProcessorList
,
TemperatureLogitsWarper
,
TopPLogitsWarper
,
TopKLogitsWarper
,
)
class
Sampling
:
def
__call__
(
self
,
logits
):
probs
=
torch
.
nn
.
functional
.
softmax
(
logits
,
dim
=-
1
)
next_tokens
=
torch
.
multinomial
(
probs
,
num_samples
=
1
).
squeeze
(
1
)
return
next_tokens
class
Greedy
:
def
__call__
(
self
,
logits
):
return
logits
.
argmax
(
dim
=-
1
)
class
NextTokenChooser
:
def
__init__
(
self
,
temperature
=
1.0
,
top_k
=
None
,
top_p
=
None
,
do_sample
=
False
):
warpers
=
LogitsProcessorList
()
# the following idea is largely copied from this PR: https://github.com/huggingface/transformers/pull/5420/files
# all samplers can be found in `generation_utils_samplers.py`
sampling
=
do_sample
if
temperature
is
not
None
and
temperature
!=
1.0
:
temperature
=
float
(
temperature
)
warpers
.
append
(
TemperatureLogitsWarper
(
temperature
))
sampling
=
True
if
top_k
is
not
None
and
top_k
!=
0
:
warpers
.
append
(
TopKLogitsWarper
(
top_k
=
top_k
))
sampling
=
True
if
top_p
is
not
None
and
top_p
<
1.0
:
warpers
.
append
(
TopPLogitsWarper
(
top_p
=
top_p
))
sampling
=
True
self
.
warpers
=
warpers
self
.
choice
=
Sampling
()
if
sampling
else
Greedy
()
def
__call__
(
self
,
input_ids
,
scores
):
scores
=
self
.
warpers
(
input_ids
,
scores
)
next_ids
=
self
.
choice
(
scores
)
return
next_ids
.
unsqueeze
(
-
1
)
class
StoppingCriteria
:
def
__init__
(
self
,
max_new_tokens
=
20
):
self
.
max_new_tokens
=
max_new_tokens
self
.
current_tokens
=
0
def
__call__
(
self
,
all_ids
):
self
.
current_tokens
+=
1
if
self
.
current_tokens
>=
self
.
max_new_tokens
:
return
True
return
False
def
initialize_torch_distributed
():
rank
=
int
(
os
.
getenv
(
"RANK"
,
"0"
))
world_size
=
int
(
os
.
getenv
(
"WORLD_SIZE"
,
"1"
))
if
torch
.
cuda
.
is_available
():
# initialized `torch.distributed`
# Set the device id.
assert
world_size
<=
torch
.
cuda
.
device_count
(),
"Each process is one gpu"
device
=
rank
%
torch
.
cuda
.
device_count
()
torch
.
cuda
.
set_device
(
device
)
backend
=
"nccl"
else
:
backend
=
"gloo"
# Call the init process.
torch
.
distributed
.
init_process_group
(
backend
=
backend
,
world_size
=
world_size
,
rank
=
rank
,
init_method
=
"tcp://localhost:6000"
,
)
return
torch
.
distributed
.
distributed_c10d
.
_get_default_group
(),
rank
,
world_size
@
contextlib
.
contextmanager
def
set_default_dtype
(
dtype
):
saved_dtype
=
torch
.
get_default_dtype
()
torch
.
set_default_dtype
(
dtype
)
try
:
yield
finally
:
torch
.
set_default_dtype
(
saved_dtype
)
server/poetry.lock
0 → 100644
View file @
295831a4
This diff is collapsed.
Click to expand it.
server/pyproject.toml
0 → 100644
View file @
295831a4
[tool.poetry]
name
=
"bloom-inference"
version
=
"0.1.0"
description
=
"BLOOM Inference Python gRPC Server"
authors
=
[
"Olivier Dehaene <olivier@huggingface.co>"
]
[tool.poetry.dependencies]
python
=
"^3.9"
protobuf
=
"^4.21.7"
grpcio
=
"^1.49.1"
torch
=
"^1.12.1"
typer
=
"^0.6.1"
grpcio-reflection
=
"^1.49.1"
accelerate
=
"^0.12.0"
[tool.poetry.group.dev.dependencies]
grpcio-tools
=
"^1.49.1"
[build-system]
requires
=
["poetry-core>=1.0.0"]
build-backend
=
"poetry.core.masonry.api"
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment