Commit 613d0fe8 authored by Mostofa Patwary's avatar Mostofa Patwary
Browse files

started evaluation of ICT

parent f7d96a69
...@@ -599,6 +599,9 @@ def _add_data_args(parser): ...@@ -599,6 +599,9 @@ def _add_data_args(parser):
'This should be exclusive of --seq-length') 'This should be exclusive of --seq-length')
group.add_argument('--decoder-seq-length', type=int, default=None, group.add_argument('--decoder-seq-length', type=int, default=None,
help="Maximum decoder sequence length to process.") help="Maximum decoder sequence length to process.")
group.add_argument('--retriever-seq-length', type=int, default=256,
help='Maximum sequence length for the biencoder model '
' for retriever')
group.add_argument('--mask-prob', type=float, default=0.15, group.add_argument('--mask-prob', type=float, default=0.15,
help='Probability of replacing a token with mask.') help='Probability of replacing a token with mask.')
group.add_argument('--short-seq-prob', type=float, default=0.1, group.add_argument('--short-seq-prob', type=float, default=0.1,
...@@ -686,6 +689,9 @@ def _add_biencoder_args(parser): ...@@ -686,6 +689,9 @@ def _add_biencoder_args(parser):
help='Whether create the FaissMIPSIndex on GPU') help='Whether create the FaissMIPSIndex on GPU')
group.add_argument('--block-data-path', type=str, default=None, group.add_argument('--block-data-path', type=str, default=None,
help='Where to save/load BlockData to/from') help='Where to save/load BlockData to/from')
group.add_argument('--embedding-path', type=str, default=None,
help='Where to save/load Open-Retrieval Embedding'
' data to/from')
# indexer # indexer
group.add_argument('--indexer-batch-size', type=int, default=128, group.add_argument('--indexer-batch-size', type=int, default=128,
......
import os
import sys import sys
sys.path.append('../') sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__),
os.path.pardir)))
from megatron.indexer import IndexBuilder from megatron.indexer import IndexBuilder
from megatron.initialize import initialize_megatron from megatron.initialize import initialize_megatron
...@@ -22,6 +24,7 @@ def main(): ...@@ -22,6 +24,7 @@ def main():
initialize_megatron(extra_args_provider=None, initialize_megatron(extra_args_provider=None,
args_defaults={'tokenizer_type': 'BertWordPieceLowerCase'}) args_defaults={'tokenizer_type': 'BertWordPieceLowerCase'})
index_builder = IndexBuilder() index_builder = IndexBuilder()
sys.exit()
index_builder.build_and_save_index() index_builder.build_and_save_index()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment