Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
613d0fe8
Commit
613d0fe8
authored
Feb 09, 2021
by
Mostofa Patwary
Browse files
started evaluation of ICT
parent
f7d96a69
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
10 additions
and
1 deletion
+10
-1
megatron/arguments.py
megatron/arguments.py
+6
-0
tools/create_doc_index.py
tools/create_doc_index.py
+4
-1
No files found.
megatron/arguments.py
View file @
613d0fe8
...
@@ -599,6 +599,9 @@ def _add_data_args(parser):
...
@@ -599,6 +599,9 @@ def _add_data_args(parser):
'This should be exclusive of --seq-length'
)
'This should be exclusive of --seq-length'
)
group
.
add_argument
(
'--decoder-seq-length'
,
type
=
int
,
default
=
None
,
group
.
add_argument
(
'--decoder-seq-length'
,
type
=
int
,
default
=
None
,
help
=
"Maximum decoder sequence length to process."
)
help
=
"Maximum decoder sequence length to process."
)
group
.
add_argument
(
'--retriever-seq-length'
,
type
=
int
,
default
=
256
,
help
=
'Maximum sequence length for the biencoder model '
' for retriever'
)
group
.
add_argument
(
'--mask-prob'
,
type
=
float
,
default
=
0.15
,
group
.
add_argument
(
'--mask-prob'
,
type
=
float
,
default
=
0.15
,
help
=
'Probability of replacing a token with mask.'
)
help
=
'Probability of replacing a token with mask.'
)
group
.
add_argument
(
'--short-seq-prob'
,
type
=
float
,
default
=
0.1
,
group
.
add_argument
(
'--short-seq-prob'
,
type
=
float
,
default
=
0.1
,
...
@@ -686,6 +689,9 @@ def _add_biencoder_args(parser):
...
@@ -686,6 +689,9 @@ def _add_biencoder_args(parser):
help
=
'Whether create the FaissMIPSIndex on GPU'
)
help
=
'Whether create the FaissMIPSIndex on GPU'
)
group
.
add_argument
(
'--block-data-path'
,
type
=
str
,
default
=
None
,
group
.
add_argument
(
'--block-data-path'
,
type
=
str
,
default
=
None
,
help
=
'Where to save/load BlockData to/from'
)
help
=
'Where to save/load BlockData to/from'
)
group
.
add_argument
(
'--embedding-path'
,
type
=
str
,
default
=
None
,
help
=
'Where to save/load Open-Retrieval Embedding'
' data to/from'
)
# indexer
# indexer
group
.
add_argument
(
'--indexer-batch-size'
,
type
=
int
,
default
=
128
,
group
.
add_argument
(
'--indexer-batch-size'
,
type
=
int
,
default
=
128
,
...
...
tools/create_doc_index.py
View file @
613d0fe8
import
os
import
sys
import
sys
sys
.
path
.
append
(
'../'
)
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
os
.
path
.
pardir
)))
from
megatron.indexer
import
IndexBuilder
from
megatron.indexer
import
IndexBuilder
from
megatron.initialize
import
initialize_megatron
from
megatron.initialize
import
initialize_megatron
...
@@ -22,6 +24,7 @@ def main():
...
@@ -22,6 +24,7 @@ def main():
initialize_megatron
(
extra_args_provider
=
None
,
initialize_megatron
(
extra_args_provider
=
None
,
args_defaults
=
{
'tokenizer_type'
:
'BertWordPieceLowerCase'
})
args_defaults
=
{
'tokenizer_type'
:
'BertWordPieceLowerCase'
})
index_builder
=
IndexBuilder
()
index_builder
=
IndexBuilder
()
sys
.
exit
()
index_builder
.
build_and_save_index
()
index_builder
.
build_and_save_index
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment