from libai.config import LazyCall from libai.evaluation import PPLEvaluator from .common.models.bert import pretrain_model as model from .common.models.graph import graph from .common.train import train from .common.optim import optim from .common.data.bert_dataset import dataloader, tokenization vocab_file = "./nlp_data/bert-base-chinese-vocab.txt" data_prefix = "./nlp_data/data/loss_compara_content_sentence" tokenization.tokenizer.vocab_file = vocab_file dataloader.train.dataset[0].data_prefix = data_prefix dataloader.train.dataset[0].indexed_dataset.data_prefix = data_prefix # Bert-large model config model.cfg.num_attention_heads = 40 model.cfg.hidden_size = 5120 model.cfg.intermediate_size = 5120*4 model.cfg.hidden_layers = 20 train.input_placement_device = "cpu" train.dist.pipeline_num_layers = model.cfg.hidden_layers train.amp.enabled = True graph.auto_parallel.enabled = False train.train_micro_batch_size = 8 train.num_accumulation_steps = 4 train.activation_checkpoint.enabled = True train.zero_optimization.enabled = True train.zero_optimization.stage = 1 train.dist.data_parallel_size = 12 train.dist.tensor_parallel_size = 4 train.dist.pipeline_parallel_size = 2 train.dist.custom_pipeline_stage_id = [0]*8 + [1]*12 for ds in dataloader.train.dataset: ds.max_seq_length = model.cfg.max_position_embeddings train.evaluation.evaluator = LazyCall(PPLEvaluator)() train.output_dir = "output/bert_output"