Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
045959cb
Commit
045959cb
authored
Apr 16, 2021
by
Mostofa Patwary
Browse files
added this function for evaluation
parent
f32a638d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
40 additions
and
1 deletion
+40
-1
megatron/text_generation_utils.py
megatron/text_generation_utils.py
+40
-1
No files found.
megatron/text_generation_utils.py
View file @
045959cb
...
...
@@ -190,6 +190,41 @@ def generate_samples_input_from_file(model):
raw_text
=
None
context_count
+=
1
def
generate_samples_eval
(
model
,
context
,
max_gen_length
,
eos_token_id
,
do_sample
):
# Generate samples for lm evaluation
# NEED TO THINK ABOUT eos token
args
=
get_args
()
tokenizer
=
get_tokenizer
()
raw_text_len
=
len
(
context
)
model
.
eval
()
context_tokens
=
tokenizer
.
tokenize
(
context
)
args
.
out_seq_length
=
max_gen_length
+
len
(
context_tokens
)
args
.
recompute
=
True
#set this default value
args
.
eos_id
=
eos_token_id
if
not
do_sample
:
args
.
greedy
=
True
else
:
# set similar to huggngface
args
.
top_p
=
1.0
args
.
temperature
=
1.0
args
.
top_k
=
50
with
torch
.
no_grad
():
token_stream
=
get_token_stream
(
model
,
[
context_tokens
])
for
counter
,
decode_tokens
in
enumerate
(
token_stream
):
decode_tokens
,
_
=
decode_tokens
decode_tokens
=
decode_tokens
[
0
].
cpu
().
numpy
().
tolist
()
trim_decode_tokens
=
tokenizer
.
detokenize
(
decode_tokens
)[
raw_text_len
:]
if
counter
==
args
.
out_seq_length
:
break
return
trim_decode_tokens
def
generate_samples_interactive
(
model
,
print_frequency
=
24
):
...
...
@@ -438,7 +473,11 @@ def sample_sequence_batch(model, context_tokens, context_lengths,
model
.
eval
()
with
torch
.
no_grad
():
context_length
=
context_lengths
.
min
().
item
()
eos_id
=
tokenizer
.
eod
if
hasattr
(
args
,
'eos_id'
):
eos_id
=
args
.
eos_id
else
:
eos_id
=
tokenizer
.
eod
counter
=
0
org_context_length
=
context_length
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment