Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
43c9137b
Commit
43c9137b
authored
Mar 18, 2021
by
Mostofa Patwary
Browse files
Fixed based on review recoemmendation
parent
661553f6
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
23 additions
and
17 deletions
+23
-17
megatron/arguments.py
megatron/arguments.py
+0
-11
tasks/main.py
tasks/main.py
+14
-0
tasks/orqa/evaluate_orqa.py
tasks/orqa/evaluate_orqa.py
+0
-2
tasks/orqa/natural_questions/qa_utils.py
tasks/orqa/natural_questions/qa_utils.py
+5
-2
tasks/orqa/natural_questions/tokenizers.py
tasks/orqa/natural_questions/tokenizers.py
+4
-2
No files found.
megatron/arguments.py
View file @
43c9137b
...
...
@@ -636,10 +636,6 @@ def _add_data_args(parser):
'1) a single data path, 2) multiple datasets in the'
'form: dataset1-weight dataset1-path dataset2-weight '
'dataset2-path ...'
)
group
.
add_argument
(
'--qa-data-dev'
,
type
=
str
,
default
=
None
,
help
=
'Path to the QA dataset dev file.'
)
group
.
add_argument
(
'--qa-data-test'
,
type
=
str
,
default
=
None
,
help
=
'Path to the QA dataset test file.'
)
group
.
add_argument
(
'--split'
,
type
=
str
,
default
=
'969, 30, 1'
,
help
=
'Comma-separated list of proportions for training,'
' validation, and test split. For example the split '
...
...
@@ -743,18 +739,11 @@ def _add_biencoder_args(parser):
'square root of hidden size'
)
# faiss index
group
.
add_argument
(
'--faiss-use-gpu'
,
action
=
'store_true'
,
help
=
'Whether create the FaissMIPSIndex on GPU'
)
group
.
add_argument
(
'--block-data-path'
,
type
=
str
,
default
=
None
,
help
=
'Where to save/load BlockData to/from'
)
group
.
add_argument
(
'--embedding-path'
,
type
=
str
,
default
=
None
,
help
=
'Where to save/load Open-Retrieval Embedding'
' data to/from'
)
group
.
add_argument
(
'--faiss-match'
,
type
=
str
,
default
=
'string'
,
\
choices
=
[
'regex'
,
'string'
],
help
=
"Answer matching '
\
'logic type"
)
group
.
add_argument
(
'--faiss-topk-retrievals'
,
type
=
int
,
default
=
100
,
help
=
'Number of blocks to use as top-k during retrieval'
)
# indexer
group
.
add_argument
(
'--indexer-batch-size'
,
type
=
int
,
default
=
128
,
...
...
tasks/main.py
View file @
43c9137b
...
...
@@ -47,6 +47,20 @@ def get_tasks_args(parser):
help
=
'Sliding window for overlapping evaluation.'
)
group
.
add_argument
(
'--strict-lambada'
,
action
=
'store_true'
,
help
=
'Use more difficult formulation of lambada.'
)
# Retriever args
group
.
add_argument
(
'--qa-data-dev'
,
type
=
str
,
default
=
None
,
help
=
'Path to the QA dataset dev file.'
)
group
.
add_argument
(
'--qa-data-test'
,
type
=
str
,
default
=
None
,
help
=
'Path to the QA dataset test file.'
)
# Faiss arguments for retriever
group
.
add_argument
(
'--faiss-use-gpu'
,
action
=
'store_true'
,
help
=
'Whether create the FaissMIPSIndex on GPU'
)
group
.
add_argument
(
'--faiss-match'
,
type
=
str
,
default
=
'string'
,
\
choices
=
[
'regex'
,
'string'
],
help
=
"Answer matching '
\
'logic type"
)
group
.
add_argument
(
'--faiss-topk-retrievals'
,
type
=
int
,
default
=
100
,
help
=
'Number of blocks to use as top-k during retrieval'
)
return
parser
...
...
tasks/orqa/evaluate_orqa.py
View file @
43c9137b
...
...
@@ -19,8 +19,6 @@ import os
import
sys
from
megatron
import
get_args
from
megatron.initialize
import
initialize_megatron
from
tasks.orqa.evaluate_utils
import
ORQAEvaluator
def
main
():
...
...
tasks/orqa/natural_questions/qa_utils.py
View file @
43c9137b
...
...
@@ -2,8 +2,11 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
# The following code has been taken from
# https://github.com/facebookresearch/DPR, which is CC-BY-NC 4.0
# licensed as of now. More details on the license can be found
# at https://github.com/facebookresearch/DPR/blob/master/LICENSE
"""
Set of utilities for Q&A results validation tasks - Retriver passage
...
...
tasks/orqa/natural_questions/tokenizers.py
View file @
43c9137b
...
...
@@ -2,9 +2,11 @@
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
# The following code has been taken from
# https://github.com/facebookresearch/DPR, which is CC-BY-NC 4.0
# licensed as of now. More details on the license can be found
# at https://github.com/facebookresearch/DPR/blob/master/LICENSE
"""
Most of the tokenizers code here is copied from DrQA codebase to avoid adding extra dependency
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment