parser.add_argument("--save_dir",default='./checkpoint',type=str,help="The output directory where the model checkpoints will be written.")
parser.add_argument("--train_set",type=str,required=True,help="The full path of train_set_file.")
parser.add_argument("--test_file",type=str,required=True,help="The full path of test file")
parser.add_argument("--max_seq_length",default=128,type=int,help="The maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.")
parser.add_argument("--batch_size",default=32,type=int,help="Batch size per GPU/CPU for training.")
parser.add_argument("--learning_rate",default=5e-5,type=float,help="The initial learning rate for Adam.")
parser.add_argument("--weight_decay",default=0.0,type=float,help="Weight decay if we apply some.")
parser.add_argument("--epochs",default=3,type=int,help="Total number of training epochs to perform.")
parser.add_argument("--warmup_proportion",default=0.0,type=float,help="Linear warmup proportion over the training process.")
parser.add_argument("--valid_steps",default=100,type=int,help="The interval steps to evaluate model performance.")
parser.add_argument("--save_steps",default=100,type=int,help="The interval steps to save checkppoints.")
parser.add_argument("--logging_steps",default=10,type=int,help="The interval steps to logging.")
parser.add_argument("--init_from_ckpt",type=str,default=None,help="The path of checkpoint to be loaded.")
parser.add_argument("--seed",type=int,default=1000,help="random seed for initialization")
parser.add_argument('--device',choices=['cpu','gpu','xpu','npu'],default="gpu",help="Select which device to train model, defaults to gpu.")
[1] Xiao, Dongling, Yu-Kun Li, Han Zhang, Yu Sun, Hao Tian, Hua Wu, and Haifeng Wang. “ERNIE-Gram: Pre-Training with Explicitly N-Gram Masked Language Modeling for Natural Language Understanding.” ArXiv:2010.12148 [Cs].
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
importargparse
importos
importsys
importnumpyasnp
importpaddle
frompaddleimportinference
frompaddlenlp.dataimportPad,Tuple
frompaddlenlp.datasetsimportload_dataset
frompaddlenlp.transformersimportAutoTokenizer
frompaddlenlp.utils.logimportlogger
sys.path.append(".")
# yapf: disable
parser=argparse.ArgumentParser()
parser.add_argument("--model_dir",type=str,required=True,help="The directory to static model.")
parser.add_argument("--max_seq_length",default=128,type=int,help="The maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.")
parser.add_argument("--batch_size",default=32,type=int,help="Batch size per GPU/CPU for training.")
parser.add_argument('--device',choices=['cpu','gpu','xpu'],default="gpu",help="Select which device to train model, defaults to gpu.")
parser.add_argument("--input_file",type=str,required=True,help="The test set file.")
parser.add_argument('--use_tensorrt',default=False,type=eval,choices=[True,False],help='Enable to use tensorrt to speed up.')
parser.add_argument("--margin",default=0.1,type=float,help="Margin for pos_score and neg_score.")
parser.add_argument("--test_file",type=str,required=True,help="The full path of test file")
parser.add_argument("--max_seq_length",default=128,type=int,help="The maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.")
parser.add_argument("--batch_size",default=32,type=int,help="Batch size per GPU/CPU for training.")
parser.add_argument('--model_name_or_path',default="ernie-3.0-medium-zh",help="The pretrained model used for training")
parser.add_argument("--init_from_ckpt",type=str,default=None,help="The path of checkpoint to be loaded.")
parser.add_argument("--seed",type=int,default=1000,help="Random seed for initialization.")
parser.add_argument('--device',choices=['cpu','gpu'],default="gpu",help="Select which device to train model, defaults to gpu.")
parser.add_argument("--params_path",type=str,required=True,default='./checkpoint/model_900/model_state.pdparams',help="The path to model parameters to be loaded.")
parser.add_argument("--output_path",type=str,default='./output',help="The path of model parameter in static graph to be saved.")
parser.add_argument('--model_name_or_path',default="ernie-3.0-medium-zh",help="The pretrained model used for training")
default='inference.get_pooled_embedding.pdmodel',help="The name of file to load the inference program. If it is None, the default filename __model__ will be used.")
default='inference.get_pooled_embedding.pdiparams',help="The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. Default: None.")
parser.add_argument("--input_file",type=str,required=True,help="The full path of input file")
parser.add_argument("--params_path",type=str,required=True,help="The path to model parameters to be loaded.")
parser.add_argument("--max_seq_length",default=64,type=int,help="The maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.")
parser.add_argument('--model_name_or_path',default="ernie-3.0-medium-zh",help="The pretrained model used for training")
parser.add_argument("--batch_size",default=32,type=int,help="Batch size per GPU/CPU for training.")
parser.add_argument('--device',choices=['cpu','gpu'],default="gpu",help="Select which device to train model, defaults to gpu.")
args=parser.parse_args()
# yapf: enable
defpredict(model,data_loader):
"""
Predicts the data labels.
Args:
model (obj:`SemanticIndexBase`): A model to extract text embedding or calculate similarity of text pair.
data_loader (obj:`List(Example)`): The processed data ids of text pair: [query_input_ids, query_token_type_ids, title_input_ids, title_token_type_ids]
Returns:
results(obj:`List`): cosine similarity of text pairs.