Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
5ff0f882
"...en/git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "983a7fbfd82a12c9315b00776b9131f9d27674ce"
Commit
5ff0f882
authored
Jan 17, 2021
by
Deepak Narayanan
Browse files
GPT2->GPT in zero-shot evaluation scripts
parent
f34cc86b
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
8 additions
and
8 deletions
+8
-8
tasks/main.py
tasks/main.py
+1
-1
tasks/zeroshot_gpt/datasets.py
tasks/zeroshot_gpt/datasets.py
+0
-0
tasks/zeroshot_gpt/detokenizer.py
tasks/zeroshot_gpt/detokenizer.py
+0
-0
tasks/zeroshot_gpt/evaluate.py
tasks/zeroshot_gpt/evaluate.py
+7
-7
No files found.
tasks/main.py
View file @
5ff0f882
...
@@ -61,7 +61,7 @@ if __name__ == '__main__':
...
@@ -61,7 +61,7 @@ if __name__ == '__main__':
elif
args
.
task
in
[
'MNLI'
,
'QQP'
]:
elif
args
.
task
in
[
'MNLI'
,
'QQP'
]:
from
glue.finetune
import
main
from
glue.finetune
import
main
elif
args
.
task
in
[
'LAMBADA'
,
'WIKITEXT103'
]:
elif
args
.
task
in
[
'LAMBADA'
,
'WIKITEXT103'
]:
from
zeroshot_gpt
2
.evaluate
import
main
from
zeroshot_gpt.evaluate
import
main
else
:
else
:
raise
NotImplementedError
(
'Task {} is not implemented.'
.
format
(
raise
NotImplementedError
(
'Task {} is not implemented.'
.
format
(
args
.
task
))
args
.
task
))
...
...
tasks/zeroshot_gpt
2
/datasets.py
→
tasks/zeroshot_gpt/datasets.py
View file @
5ff0f882
File moved
tasks/zeroshot_gpt
2
/detokenizer.py
→
tasks/zeroshot_gpt/detokenizer.py
View file @
5ff0f882
File moved
tasks/zeroshot_gpt
2
/evaluate.py
→
tasks/zeroshot_gpt/evaluate.py
View file @
5ff0f882
...
@@ -13,7 +13,7 @@
...
@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""GPT
2
zero-shot evaluation."""
"""GPT zero-shot evaluation."""
import
math
import
math
...
@@ -24,7 +24,7 @@ from megatron import print_rank_0, is_last_rank
...
@@ -24,7 +24,7 @@ from megatron import print_rank_0, is_last_rank
from
megatron
import
get_tokenizer
from
megatron
import
get_tokenizer
from
megatron
import
mpu
from
megatron
import
mpu
from
megatron.checkpointing
import
load_checkpoint
from
megatron.checkpointing
import
load_checkpoint
from
megatron.model
import
GPT
2
Model
,
GPT
2
ModelFirstStage
,
GPT
2
ModelLastStage
,
GPT
2
ModelIntermediateStage
from
megatron.model
import
GPTModel
,
GPTModelFirstStage
,
GPTModelLastStage
,
GPTModelIntermediateStage
from
megatron.training
import
get_model
,
communicate
from
megatron.training
import
get_model
,
communicate
from
megatron.utils
import
get_ltor_masks_and_position_ids
from
megatron.utils
import
get_ltor_masks_and_position_ids
from
tasks.finetune_utils
import
build_data_loader
from
tasks.finetune_utils
import
build_data_loader
...
@@ -47,18 +47,18 @@ def get_model_provider(eval_metric):
...
@@ -47,18 +47,18 @@ def get_model_provider(eval_metric):
raise
NotImplementedError
(
'output type for {} evaluation metric '
raise
NotImplementedError
(
'output type for {} evaluation metric '
'is not supported.'
.
format
(
eval_metric
))
'is not supported.'
.
format
(
eval_metric
))
print_rank_0
(
'building GPT
2
model ...'
)
print_rank_0
(
'building GPT model ...'
)
if
mpu
.
get_pipeline_model_parallel_world_size
()
>
1
:
if
mpu
.
get_pipeline_model_parallel_world_size
()
>
1
:
# Determine model based on position of stage in pipeline.
# Determine model based on position of stage in pipeline.
if
mpu
.
is_pipeline_first_stage
():
if
mpu
.
is_pipeline_first_stage
():
model
=
GPT
2
ModelFirstStage
(
num_tokentypes
=
0
)
model
=
GPTModelFirstStage
(
num_tokentypes
=
0
)
elif
mpu
.
is_pipeline_last_stage
():
elif
mpu
.
is_pipeline_last_stage
():
model
=
GPT
2
ModelLastStage
(
model
=
GPTModelLastStage
(
parallel_output
=
parallel_output
,
num_tokentypes
=
0
)
parallel_output
=
parallel_output
,
num_tokentypes
=
0
)
else
:
else
:
model
=
GPT
2
ModelIntermediateStage
(
num_tokentypes
=
0
)
model
=
GPTModelIntermediateStage
(
num_tokentypes
=
0
)
else
:
else
:
model
=
GPT
2
Model
(
num_tokentypes
=
0
,
parallel_output
=
parallel_output
)
model
=
GPTModel
(
num_tokentypes
=
0
,
parallel_output
=
parallel_output
)
return
model
return
model
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment