Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
43c9137b
Commit
43c9137b
authored
Mar 18, 2021
by
Mostofa Patwary
Browse files
Fixed based on review recoemmendation
parent
661553f6
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
23 additions
and
17 deletions
+23
-17
megatron/arguments.py
megatron/arguments.py
+0
-11
tasks/main.py
tasks/main.py
+14
-0
tasks/orqa/evaluate_orqa.py
tasks/orqa/evaluate_orqa.py
+0
-2
tasks/orqa/natural_questions/qa_utils.py
tasks/orqa/natural_questions/qa_utils.py
+5
-2
tasks/orqa/natural_questions/tokenizers.py
tasks/orqa/natural_questions/tokenizers.py
+4
-2
No files found.
megatron/arguments.py
View file @
43c9137b
...
@@ -636,10 +636,6 @@ def _add_data_args(parser):
...
@@ -636,10 +636,6 @@ def _add_data_args(parser):
'1) a single data path, 2) multiple datasets in the'
'1) a single data path, 2) multiple datasets in the'
'form: dataset1-weight dataset1-path dataset2-weight '
'form: dataset1-weight dataset1-path dataset2-weight '
'dataset2-path ...'
)
'dataset2-path ...'
)
group
.
add_argument
(
'--qa-data-dev'
,
type
=
str
,
default
=
None
,
help
=
'Path to the QA dataset dev file.'
)
group
.
add_argument
(
'--qa-data-test'
,
type
=
str
,
default
=
None
,
help
=
'Path to the QA dataset test file.'
)
group
.
add_argument
(
'--split'
,
type
=
str
,
default
=
'969, 30, 1'
,
group
.
add_argument
(
'--split'
,
type
=
str
,
default
=
'969, 30, 1'
,
help
=
'Comma-separated list of proportions for training,'
help
=
'Comma-separated list of proportions for training,'
' validation, and test split. For example the split '
' validation, and test split. For example the split '
...
@@ -743,18 +739,11 @@ def _add_biencoder_args(parser):
...
@@ -743,18 +739,11 @@ def _add_biencoder_args(parser):
'square root of hidden size'
)
'square root of hidden size'
)
# faiss index
# faiss index
group
.
add_argument
(
'--faiss-use-gpu'
,
action
=
'store_true'
,
help
=
'Whether create the FaissMIPSIndex on GPU'
)
group
.
add_argument
(
'--block-data-path'
,
type
=
str
,
default
=
None
,
group
.
add_argument
(
'--block-data-path'
,
type
=
str
,
default
=
None
,
help
=
'Where to save/load BlockData to/from'
)
help
=
'Where to save/load BlockData to/from'
)
group
.
add_argument
(
'--embedding-path'
,
type
=
str
,
default
=
None
,
group
.
add_argument
(
'--embedding-path'
,
type
=
str
,
default
=
None
,
help
=
'Where to save/load Open-Retrieval Embedding'
help
=
'Where to save/load Open-Retrieval Embedding'
' data to/from'
)
' data to/from'
)
group
.
add_argument
(
'--faiss-match'
,
type
=
str
,
default
=
'string'
,
\
choices
=
[
'regex'
,
'string'
],
help
=
"Answer matching '
\
'logic type"
)
group
.
add_argument
(
'--faiss-topk-retrievals'
,
type
=
int
,
default
=
100
,
help
=
'Number of blocks to use as top-k during retrieval'
)
# indexer
# indexer
group
.
add_argument
(
'--indexer-batch-size'
,
type
=
int
,
default
=
128
,
group
.
add_argument
(
'--indexer-batch-size'
,
type
=
int
,
default
=
128
,
...
...
tasks/main.py
View file @
43c9137b
...
@@ -47,6 +47,20 @@ def get_tasks_args(parser):
...
@@ -47,6 +47,20 @@ def get_tasks_args(parser):
help
=
'Sliding window for overlapping evaluation.'
)
help
=
'Sliding window for overlapping evaluation.'
)
group
.
add_argument
(
'--strict-lambada'
,
action
=
'store_true'
,
group
.
add_argument
(
'--strict-lambada'
,
action
=
'store_true'
,
help
=
'Use more difficult formulation of lambada.'
)
help
=
'Use more difficult formulation of lambada.'
)
# Retriever args
group
.
add_argument
(
'--qa-data-dev'
,
type
=
str
,
default
=
None
,
help
=
'Path to the QA dataset dev file.'
)
group
.
add_argument
(
'--qa-data-test'
,
type
=
str
,
default
=
None
,
help
=
'Path to the QA dataset test file.'
)
# Faiss arguments for retriever
group
.
add_argument
(
'--faiss-use-gpu'
,
action
=
'store_true'
,
help
=
'Whether create the FaissMIPSIndex on GPU'
)
group
.
add_argument
(
'--faiss-match'
,
type
=
str
,
default
=
'string'
,
\
choices
=
[
'regex'
,
'string'
],
help
=
"Answer matching '
\
'logic type"
)
group
.
add_argument
(
'--faiss-topk-retrievals'
,
type
=
int
,
default
=
100
,
help
=
'Number of blocks to use as top-k during retrieval'
)
return
parser
return
parser
...
...
tasks/orqa/evaluate_orqa.py
View file @
43c9137b
...
@@ -19,8 +19,6 @@ import os
...
@@ -19,8 +19,6 @@ import os
import
sys
import
sys
from
megatron
import
get_args
from
megatron
import
get_args
from
megatron.initialize
import
initialize_megatron
from
tasks.orqa.evaluate_utils
import
ORQAEvaluator
from
tasks.orqa.evaluate_utils
import
ORQAEvaluator
def
main
():
def
main
():
...
...
tasks/orqa/natural_questions/qa_utils.py
View file @
43c9137b
...
@@ -2,8 +2,11 @@
...
@@ -2,8 +2,11 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
# All rights reserved.
#
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
# The following code has been taken from
# https://github.com/facebookresearch/DPR, which is CC-BY-NC 4.0
# licensed as of now. More details on the license can be found
# at https://github.com/facebookresearch/DPR/blob/master/LICENSE
"""
"""
Set of utilities for Q&A results validation tasks - Retriver passage
Set of utilities for Q&A results validation tasks - Retriver passage
...
...
tasks/orqa/natural_questions/tokenizers.py
View file @
43c9137b
...
@@ -2,9 +2,11 @@
...
@@ -2,9 +2,11 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
# All rights reserved.
#
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
# The following code has been taken from
# https://github.com/facebookresearch/DPR, which is CC-BY-NC 4.0
# licensed as of now. More details on the license can be found
# at https://github.com/facebookresearch/DPR/blob/master/LICENSE
"""
"""
Most of the tokenizers code here is copied from DrQA codebase to avoid adding extra dependency
Most of the tokenizers code here is copied from DrQA codebase to avoid adding extra dependency
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment