Commit df33344a authored by zihanl's avatar zihanl
Browse files

process wow

parent 7d044e4e
......@@ -3,4 +3,8 @@ __pycache__
# Distribution / packaging
build/
dist/
*.egg-info/
\ No newline at end of file
*.egg-info/
tensorboard/
commands
*.log
logs
\ No newline at end of file
#!/bin/bash
srun -p batch_short,batch -A gpu_adlr_nlp -t 2:00:00 --nodes=1 --ntasks-per-node=16 --gres=gpu:16,gpfs:circe --job-name=interact --container-mounts=/gpfs/fs1/projects/gpu_adlr/datasets:/gpfs/fs1/projects/gpu_adlr/datasets,/home/zihanl:/home/zihanl --container-image=gitlab-master.nvidia.com/adlr/megatron-lm/pytorch-nlp-retriever-faiss:20.12-py3-devel --exclusive --pty bash
#!/bin/bash
#SBATCH -p interactive -A gpu_adlr_nlp -t 1:00:00 --nodes=1 --exclusive --mem=0 --overcommit --ntasks-per-node=16 --gres=gpu:16,gpfs:circe --dependency=singleton --job-name=adlr-nlp-largelm:gpt3-357m
NAME="gpt3-357m"
DIR=`pwd`
DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'`
mkdir -p $DIR/logs
TENSORBOARD_DIR="${DIR}/tensorboard/${NAME}"
mkdir -p ${TENSORBOARD_DIR}
DATA_PATH=/gpfs/fs1/projects/gpu_adlr/datasets/nlp/gpt2_indexed_dataset/roberta_dataset/rn_owt_sto_wiki_dedup_shuf_cleaned_0.7_text_document
options=" \
--tensor-model-parallel-size 1 \
--pipeline-model-parallel-size 1 \
--num-layers 24 \
--hidden-size 1024 \
--num-attention-heads 16 \
--seq-length 2048 \
--max-position-embeddings 2048 \
--micro-batch-size 2 \
--global-batch-size 256 \
--rampup-batch-size 32 32 1953125 \
--train-samples 192000000 \
--lr-decay-samples 166400000 \
--lr-warmup-samples 162761 \
--lr 3.0e-4 \
--min-lr 3.0e-5 \
--lr-decay-style cosine \
--log-interval 100 \
--eval-iters 50 \
--eval-interval 2000 \
--data-path ${DATA_PATH} \
--vocab-file /gpfs/fs1/projects/gpu_adlr/datasets/nlp/gpt2_indexed_dataset/bpe/gpt2-vocab.json \
--merge-file /gpfs/fs1/projects/gpu_adlr/datasets/nlp/gpt2_indexed_dataset/bpe/gpt2-merges.txt \
--save-interval 10000 \
--exit-interval 100 \
--save /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/gpt3/${NAME} \
--load /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/gpt3/${NAME} \
--split 98,2,0 \
--clip-grad 1.0 \
--weight-decay 0.1 \
--adam-beta1 0.9 \
--adam-beta2 0.95 \
--init-method-std 0.02 \
--log-params-norm \
--log-num-zeros-in-grad \
--fp16 \
--DDP-impl torch \
--tensorboard-dir ${TENSORBOARD_DIR} \
--checkpoint-activations "
run_cmd="python ${DIR}/pretrain_gpt.py ${options}"
srun -l \
--container-image "gitlab-master.nvidia.com/adlr/megatron-lm/pytorch-nlp-retriever-faiss:20.12-py3-devel" \
--container-mounts "/gpfs/fs1/projects/gpu_adlr/datasets:/gpfs/fs1/projects/gpu_adlr/datasets,/home/zihanl:/home/zihanl" \
--output=$DIR/logs/%x_%j_$DATETIME.log sh -c "${run_cmd}"
set +x
#!/bin/bash
NAME="gpt3-357m"
DIR=`pwd`
DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'`
mkdir -p $DIR/logs
TENSORBOARD_DIR="${DIR}/tensorboard/${NAME}"
mkdir -p ${TENSORBOARD_DIR}
DATA_PATH=/gpfs/fs1/projects/gpu_adlr/datasets/nlp/gpt2_indexed_dataset/roberta_dataset/rn_owt_sto_wiki_dedup_shuf_cleaned_0.7_text_document
options=" \
--tensor-model-parallel-size 1 \
--pipeline-model-parallel-size 1 \
--num-layers 24 \
--hidden-size 1024 \
--num-attention-heads 16 \
--seq-length 2048 \
--max-position-embeddings 2048 \
--micro-batch-size 2 \
--global-batch-size 256 \
--rampup-batch-size 32 32 1953125 \
--train-samples 192000000 \
--lr-decay-samples 166400000 \
--lr-warmup-samples 162761 \
--lr 3.0e-4 \
--min-lr 3.0e-5 \
--lr-decay-style cosine \
--log-interval 100 \
--eval-iters 50 \
--eval-interval 2000 \
--data-path ${DATA_PATH} \
--vocab-file /gpfs/fs1/projects/gpu_adlr/datasets/nlp/gpt2_indexed_dataset/bpe/gpt2-vocab.json \
--merge-file /gpfs/fs1/projects/gpu_adlr/datasets/nlp/gpt2_indexed_dataset/bpe/gpt2-merges.txt \
--save-interval 10000 \
--exit-interval 100 \
--save /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/gpt3/${NAME} \
--load /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/gpt3/${NAME} \
--split 98,2,0 \
--clip-grad 1.0 \
--weight-decay 0.1 \
--adam-beta1 0.9 \
--adam-beta2 0.95 \
--init-method-std 0.02 \
--log-params-norm \
--log-num-zeros-in-grad \
--fp16 \
--DDP-impl torch \
--tensorboard-dir ${TENSORBOARD_DIR} \
--checkpoint-activations "
run_cmd="${DIR}/pretrain_gpt.py ${options}"
GPUS_PER_NODE=16
MASTER_ADDR=localhost
MASTER_PORT=6000
NNODES=1
NODE_RANK=0
WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES))
DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
python -m torch.distributed.launch $DISTRIBUTED_ARGS ${run_cmd}
set +x
from src.config import get_params
from transformers import AutoTokenizer
import torch
import numpy as np
from tqdm import tqdm
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string
import os
wn_lemma = WordNetLemmatizer()
stop_words = stopwords.words('english')
stop_words.append("n't")
stop_words.append("'s")
punctuations = list(string.punctuation)
punctuations.append("``")
punctuations.append("''")
stop_words_and_punctuations = stop_words + punctuations
stop_words_and_punctuations_table = {word: True for word in stop_words_and_punctuations}
label_set = ["O", "B", "I"]
def read_data(input_datapath):
data = []
print("Reading data from %s" % input_datapath)
with open(input_datapath, "r") as f:
for i, line in enumerate(f):
line = line.strip()
splits = line.split("\t")
length = len(splits)
assert length == 2 or length == 4
# length is 2: dialog context + response
# length is 4: dialog context + topic + control sentence + response
if length == 2:
# dialog context + response
data.append(line)
else:
# only need dialog context + control sentence + response
data.append(splits[0] + "\t" + splits[2] + "\t" + splits[3])
return data
def write_data(output_datapath, output_data):
print("Writing data to %s" % output_datapath)
with open(output_datapath, "w") as fw:
for data_sample in output_data:
fw.write(data_sample + "\n")
def detect_entities(tokenizer, ner_model, sentence):
tokens = sentence.split()
token_ids, first_tok_masks = [tokenizer.cls_token_id], [0]
for token in tokens:
subs_ = tokenizer.tokenize(token)
assert len(subs_) > 0
token_ids.extend(tokenizer.convert_tokens_to_ids(subs_))
first_tok_masks.extend([1] + [0] * (len(subs_) - 1))
token_ids.append(tokenizer.sep_token_id)
first_tok_masks.append(0)
token_ids = torch.LongTensor([token_ids]).cuda()
predictions = ner_model(token_ids)
predictions = predictions[0].data.cpu().numpy() # (seq_len, 3)
pred_ids = list(np.argmax(predictions, axis=1))
assert len(pred_ids) == len(first_tok_masks)
preds_for_each_word = []
for pred_id, mask in zip(pred_ids, first_tok_masks):
if mask == 1:
preds_for_each_word.append(label_set[pred_id])
assert len(preds_for_each_word) == len(tokens)
# extract entities
entity_list = []
temp = []
for i, (token, pred) in enumerate(zip(tokens, preds_for_each_word)):
if pred == "O":
if len(temp) > 0:
entity_list.append(" ".join(temp))
temp = []
else:
# pred == "B" or pred == "I"
temp.append(token)
return entity_list
def generate_entity_control_data(tokenizer, ner_model, input_data):
# aim to generate:
# dialog context + entity control code (optional) + relevant control sentence (contain entity) + response
output_data = []
## TODO
n_skip, n_skip_no_overlap, n_skip_one_contain_another = 0, 0, 0
n_control, n_entity_control, n_overlap_control = 0, 0, 0
total_num_control_code = 0
for sample_idx, data_item in enumerate(tqdm(input_data)):
# # Debug only
# if sample_idx > 1000:
# break
# 1. detect entities for dialog context, control sentence and response
splits = data_item.split("\t")
if len(splits) == 2:
output_data.append(data_item)
continue
assert len(splits) == 3
last_turn = splits[0].split(" [SEP] ")[-1]
control_sent = splits[1]
response = splits[2]
if control_sent in response or response in control_sent:
# if the whole control_sent is a part of response or vise versa, skip this data sample
n_skip += 1
n_skip_one_contain_another += 1
continue
last_turn_entities = detect_entities(tokenizer, ner_model, last_turn)
control_sent_entities = detect_entities(tokenizer, ner_model, control_sent)
response_entities = detect_entities(tokenizer, ner_model, response)
# 2. generate control code:
# 2.1 If there is one or more than one common entity in last_turn, control sentence and response. No need to use entity as control.
# 2.2 If the entity only exists in control sentence and response, use this as the control code.
# 2.3 If there is no overlaped entity or words between control sentence and response, skip this data sample.
# 2.4 If there is no overlapped entity but there are overlapped words, add entity in the control sentence (if any) as the control code if it is not in the dialog context
# TODO
# In general, need to trim the control sentence when it is too long.
# Need to lowercase to match?
# calculate common entity between control sentence and response
common_entity_list = []
for ctrl_entity in control_sent_entities:
for resp_entity in response_entities:
if resp_entity in ctrl_entity:
common_entity_list.append(ctrl_entity)
break
elif ctrl_entity in resp_entity:
common_entity_list.append(resp_entity)
break
if len(common_entity_list) == 0:
# calculate overlap between control sentence and response
control_word_list = control_sent.split()
response_word_list = response.split()
response_word_table = {wn_lemma.lemmatize(word): True for word in response_word_list}
overlap_phrases = []
temp = []
for word in control_word_list:
if word.lower() in stop_words_and_punctuations_table:
continue
if wn_lemma.lemmatize(word) in response_word_table:
temp.append(word)
else:
if len(temp) > 0:
if len(temp) > 4:
temp = temp[:4]
overlap_phrases.append(" ".join(temp))
temp = []
if len(overlap_phrases) == 0:
# skip this data sample
n_skip += 1
n_skip_no_overlap += 1
continue
n_control += 1
control_code_list = []
if len(control_sent_entities) > 0:
n_entity_control += 1
# reorder control_sent_entities based on the length of the entities (in a reverse order)
control_sent_entities = sorted(control_sent_entities, key=len, reverse=True)
for entity in control_sent_entities:
if entity not in last_turn:
add_flag = True
for code in control_code_list:
if entity in code:
add_flag = False
break
if add_flag:
control_code_list.append(entity)
else:
n_overlap_control += 1
# reorder overlap_phrases based on the length of the phrases (in a reverse order)
overlap_phrases = sorted(overlap_phrases, key=len, reverse=True)[:3]
for phrase in overlap_phrases:
if phrase not in last_turn:
add_flag = True
for code in control_code_list:
if phrase in code:
# remove repeat word
add_flag = False
break
if add_flag:
control_code_list.append(phrase)
else:
n_entity_control += 1
n_control += 1
control_code_list = []
# reorder common_entity_list based on the length of the entities (in a reverse order)
common_entity_list = sorted(common_entity_list, key=len, reverse=True)
for entity in common_entity_list:
if entity not in last_turn:
add_flag = True
for code in control_code_list:
if entity in code:
add_flag = False
break
if add_flag:
control_code_list.append(entity)
total_num_control_code += len(control_code_list)
if len(control_code_list) > 0:
output_data.append(splits[0] + "\t" + " [CTRL] ".join(control_code_list) + "\t" + control_sent + "\t" + response)
else:
output_data.append(splits[0] + "\t" + control_sent + "\t" + response)
avg_num_control_code = total_num_control_code * 1.0 / n_control
print("number of skip sentences: %d (one contain another: %d + no overlap: %d)" % (n_skip, n_skip_one_contain_another, n_skip_no_overlap))
print("Total data size: %d. Number of control case: %d (entity control: %d + overlap control: %d)" % (len(output_data), n_control, n_entity_control, n_overlap_control))
print("Number of control code: %d vs. number of control case: %d (averaged control code per case: %.4f)" % (total_num_control_code, n_control, avg_num_control_code))
return output_data
def main(params):
# load model and tokenizer
model_saved_path = os.path.join(params.saved_folder, params.model_name+".pt")
ner_model = torch.load(model_saved_path)["model"]
ner_model.cuda()
ner_model.eval()
tokenizer = AutoTokenizer.from_pretrained(params.model_name)
# load data
datafolder = os.path.join(params.default_folder, params.infer_datafolder)
input_datapath = os.path.join(datafolder, params.infer_dataname)
output_datapath = os.path.join(datafolder, params.output_dataname)
# read input data
input_data = read_data(input_datapath)
# process data (generate entity control data)
output_data = generate_entity_control_data(tokenizer, ner_model, input_data)
# write output data
write_data(output_datapath, output_data)
if __name__ == "__main__":
params = get_params()
main(params)
\ No newline at end of file
INFO - 06/21/21 23:13:46 - 0:00:00 - ============ Initialized logger ============
INFO - 06/21/21 23:13:46 - 0:00:00 - batch_size: 32
data_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/conll2003
dropout: 0.1
dump_path: logs/conll2003/1
early_stop: 3
epoch: 300
exp_id: 1
exp_name: conll2003
hidden_dim: 1024
logger_filename: train.log
lr: 5e-05
model_name: roberta-large
num_tag: 3
saved_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model
seed: 555
INFO - 06/21/21 23:13:46 - 0:00:00 - The experiment will be stored in logs/conll2003/1
INFO - 06/21/21 23:25:29 - 0:00:00 - ============ Initialized logger ============
INFO - 06/21/21 23:25:29 - 0:00:00 - batch_size: 32
data_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/conll2003
dropout: 0.1
dump_path: logs/conll2003/1
early_stop: 3
epoch: 300
exp_id: 1
exp_name: conll2003
hidden_dim: 1024
logger_filename: train.log
lr: 5e-05
model_name: roberta-large
num_tag: 3
saved_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model
seed: 555
INFO - 06/21/21 23:25:29 - 0:00:00 - The experiment will be stored in logs/conll2003/1
DEBUG - 06/21/21 23:25:29 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:25:29 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:25:29 - 0:00:01 - Attempting to acquire lock 22598820184656 on /root/.cache/huggingface/transformers/dea67b44b38d504f2523f3ddb6acb601b23d67bee52c942da336fa1283100990.94cae8b3a8dbab1d59b9d4827f7ce79e73124efa6bb970412cd503383a95f373.lock
INFO - 06/21/21 23:25:29 - 0:00:01 - Lock 22598820184656 acquired on /root/.cache/huggingface/transformers/dea67b44b38d504f2523f3ddb6acb601b23d67bee52c942da336fa1283100990.94cae8b3a8dbab1d59b9d4827f7ce79e73124efa6bb970412cd503383a95f373.lock
DEBUG - 06/21/21 23:25:29 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:25:30 - 0:00:01 - https://huggingface.co:443 "GET /roberta-large/resolve/main/config.json HTTP/1.1" 200 482
DEBUG - 06/21/21 23:25:30 - 0:00:01 - Attempting to release lock 22598820184656 on /root/.cache/huggingface/transformers/dea67b44b38d504f2523f3ddb6acb601b23d67bee52c942da336fa1283100990.94cae8b3a8dbab1d59b9d4827f7ce79e73124efa6bb970412cd503383a95f373.lock
INFO - 06/21/21 23:25:30 - 0:00:01 - Lock 22598820184656 released on /root/.cache/huggingface/transformers/dea67b44b38d504f2523f3ddb6acb601b23d67bee52c942da336fa1283100990.94cae8b3a8dbab1d59b9d4827f7ce79e73124efa6bb970412cd503383a95f373.lock
DEBUG - 06/21/21 23:25:30 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:25:30 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/vocab.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:25:30 - 0:00:01 - Attempting to acquire lock 22598820184656 on /root/.cache/huggingface/transformers/7c1ba2435b05451bc3b4da073c8dec9630b22024a65f6c41053caccf2880eb8f.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab.lock
INFO - 06/21/21 23:25:30 - 0:00:01 - Lock 22598820184656 acquired on /root/.cache/huggingface/transformers/7c1ba2435b05451bc3b4da073c8dec9630b22024a65f6c41053caccf2880eb8f.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab.lock
DEBUG - 06/21/21 23:25:30 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:25:30 - 0:00:01 - https://huggingface.co:443 "GET /roberta-large/resolve/main/vocab.json HTTP/1.1" 200 898823
DEBUG - 06/21/21 23:25:30 - 0:00:02 - Attempting to release lock 22598820184656 on /root/.cache/huggingface/transformers/7c1ba2435b05451bc3b4da073c8dec9630b22024a65f6c41053caccf2880eb8f.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab.lock
INFO - 06/21/21 23:25:30 - 0:00:02 - Lock 22598820184656 released on /root/.cache/huggingface/transformers/7c1ba2435b05451bc3b4da073c8dec9630b22024a65f6c41053caccf2880eb8f.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab.lock
DEBUG - 06/21/21 23:25:30 - 0:00:02 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:25:31 - 0:00:02 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/merges.txt HTTP/1.1" 200 0
DEBUG - 06/21/21 23:25:31 - 0:00:02 - Attempting to acquire lock 22597850387840 on /root/.cache/huggingface/transformers/20b5a00a80e27ae9accbe25672aba42ad2d4d4cb2c4b9359b50ca8e34e107d6d.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b.lock
INFO - 06/21/21 23:25:31 - 0:00:02 - Lock 22597850387840 acquired on /root/.cache/huggingface/transformers/20b5a00a80e27ae9accbe25672aba42ad2d4d4cb2c4b9359b50ca8e34e107d6d.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b.lock
DEBUG - 06/21/21 23:25:31 - 0:00:02 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:25:31 - 0:00:02 - https://huggingface.co:443 "GET /roberta-large/resolve/main/merges.txt HTTP/1.1" 200 456318
DEBUG - 06/21/21 23:25:31 - 0:00:02 - Attempting to release lock 22597850387840 on /root/.cache/huggingface/transformers/20b5a00a80e27ae9accbe25672aba42ad2d4d4cb2c4b9359b50ca8e34e107d6d.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b.lock
INFO - 06/21/21 23:25:31 - 0:00:02 - Lock 22597850387840 released on /root/.cache/huggingface/transformers/20b5a00a80e27ae9accbe25672aba42ad2d4d4cb2c4b9359b50ca8e34e107d6d.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b.lock
DEBUG - 06/21/21 23:25:31 - 0:00:02 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:25:31 - 0:00:03 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/tokenizer.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:25:31 - 0:00:03 - Attempting to acquire lock 22597850387840 on /root/.cache/huggingface/transformers/e16a2590deb9e6d73711d6e05bf27d832fa8c1162d807222e043ca650a556964.fc9576039592f026ad76a1c231b89aee8668488c671dfbe6616bab2ed298d730.lock
INFO - 06/21/21 23:25:31 - 0:00:03 - Lock 22597850387840 acquired on /root/.cache/huggingface/transformers/e16a2590deb9e6d73711d6e05bf27d832fa8c1162d807222e043ca650a556964.fc9576039592f026ad76a1c231b89aee8668488c671dfbe6616bab2ed298d730.lock
DEBUG - 06/21/21 23:25:31 - 0:00:03 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:25:32 - 0:00:03 - https://huggingface.co:443 "GET /roberta-large/resolve/main/tokenizer.json HTTP/1.1" 200 1355863
DEBUG - 06/21/21 23:25:32 - 0:00:03 - Attempting to release lock 22597850387840 on /root/.cache/huggingface/transformers/e16a2590deb9e6d73711d6e05bf27d832fa8c1162d807222e043ca650a556964.fc9576039592f026ad76a1c231b89aee8668488c671dfbe6616bab2ed298d730.lock
INFO - 06/21/21 23:25:32 - 0:00:03 - Lock 22597850387840 released on /root/.cache/huggingface/transformers/e16a2590deb9e6d73711d6e05bf27d832fa8c1162d807222e043ca650a556964.fc9576039592f026ad76a1c231b89aee8668488c671dfbe6616bab2ed298d730.lock
INFO - 06/21/21 23:26:26 - 0:00:00 - ============ Initialized logger ============
INFO - 06/21/21 23:26:26 - 0:00:00 - batch_size: 32
data_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/conll2003
dropout: 0.1
dump_path: logs/conll2003/1
early_stop: 3
epoch: 300
exp_id: 1
exp_name: conll2003
hidden_dim: 1024
logger_filename: train.log
lr: 5e-05
model_name: roberta-large
num_tag: 3
saved_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model
seed: 555
INFO - 06/21/21 23:26:26 - 0:00:00 - The experiment will be stored in logs/conll2003/1
DEBUG - 06/21/21 23:26:26 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:26:26 - 0:00:00 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:26:26 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:26:27 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/vocab.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:26:27 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:26:27 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/merges.txt HTTP/1.1" 200 0
DEBUG - 06/21/21 23:26:27 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:26:27 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/tokenizer.json HTTP/1.1" 200 0
INFO - 06/21/21 23:26:39 - 0:00:13 - conll2003 dataset: train size: 14040; dev size 3249; test size: 3452
DEBUG - 06/21/21 23:26:39 - 0:00:13 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:26:39 - 0:00:13 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:26:39 - 0:00:13 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:26:39 - 0:00:13 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/pytorch_model.bin HTTP/1.1" 302 0
DEBUG - 06/21/21 23:26:39 - 0:00:13 - Attempting to acquire lock 23082502829920 on /root/.cache/huggingface/transformers/8e36ec2f5052bec1e79e139b84c2c3089cb647694ba0f4f634fec7b8258f7c89.c43841d8c5cd23c435408295164cda9525270aa42cd0cc9200911570c0342352.lock
INFO - 06/21/21 23:26:39 - 0:00:13 - Lock 23082502829920 acquired on /root/.cache/huggingface/transformers/8e36ec2f5052bec1e79e139b84c2c3089cb647694ba0f4f634fec7b8258f7c89.c43841d8c5cd23c435408295164cda9525270aa42cd0cc9200911570c0342352.lock
DEBUG - 06/21/21 23:26:39 - 0:00:13 - Starting new HTTPS connection (1): cdn-lfs.huggingface.co:443
DEBUG - 06/21/21 23:26:39 - 0:00:13 - https://cdn-lfs.huggingface.co:443 "GET /roberta-large/36a10a8b694fadf9bf4f9049d14e257e88be45313ae02d882af9e60f39b8b2e8 HTTP/1.1" 200 1425941629
DEBUG - 06/21/21 23:27:01 - 0:00:34 - Attempting to release lock 23082502829920 on /root/.cache/huggingface/transformers/8e36ec2f5052bec1e79e139b84c2c3089cb647694ba0f4f634fec7b8258f7c89.c43841d8c5cd23c435408295164cda9525270aa42cd0cc9200911570c0342352.lock
INFO - 06/21/21 23:27:01 - 0:00:34 - Lock 23082502829920 released on /root/.cache/huggingface/transformers/8e36ec2f5052bec1e79e139b84c2c3089cb647694ba0f4f634fec7b8258f7c89.c43841d8c5cd23c435408295164cda9525270aa42cd0cc9200911570c0342352.lock
INFO - 06/21/21 23:27:57 - 0:00:00 - ============ Initialized logger ============
INFO - 06/21/21 23:27:57 - 0:00:00 - batch_size: 32
data_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/conll2003
dropout: 0.1
dump_path: logs/conll2003/1
early_stop: 3
epoch: 300
exp_id: 1
exp_name: conll2003
hidden_dim: 1024
logger_filename: train.log
lr: 5e-05
model_name: roberta-large
num_tag: 3
saved_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model
seed: 555
INFO - 06/21/21 23:27:57 - 0:00:00 - The experiment will be stored in logs/conll2003/1
DEBUG - 06/21/21 23:27:57 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:27:57 - 0:00:00 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:27:57 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:27:58 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/vocab.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:27:58 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:27:58 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/merges.txt HTTP/1.1" 200 0
DEBUG - 06/21/21 23:27:58 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:27:58 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/tokenizer.json HTTP/1.1" 200 0
INFO - 06/21/21 23:28:09 - 0:00:12 - conll2003 dataset: train size: 14040; dev size 3249; test size: 3452
DEBUG - 06/21/21 23:28:09 - 0:00:12 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:28:10 - 0:00:13 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:28:10 - 0:00:13 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:28:10 - 0:00:13 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/pytorch_model.bin HTTP/1.1" 302 0
INFO - 06/21/21 23:28:17 - 0:00:20 - Start NER training ...
INFO - 06/21/21 23:28:17 - 0:00:20 - ============== epoch 0 ==============
INFO - 06/21/21 23:29:45 - 0:00:00 - ============ Initialized logger ============
INFO - 06/21/21 23:29:45 - 0:00:00 - batch_size: 32
data_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/conll2003
dropout: 0.1
dump_path: logs/conll2003/1
early_stop: 3
epoch: 300
exp_id: 1
exp_name: conll2003
hidden_dim: 1024
logger_filename: train.log
lr: 5e-05
model_name: roberta-large
num_tag: 3
saved_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model
seed: 555
INFO - 06/21/21 23:29:45 - 0:00:00 - The experiment will be stored in logs/conll2003/1
DEBUG - 06/21/21 23:29:45 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:29:45 - 0:00:00 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:29:45 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:29:45 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/vocab.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:29:45 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:29:46 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/merges.txt HTTP/1.1" 200 0
DEBUG - 06/21/21 23:29:46 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:29:46 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/tokenizer.json HTTP/1.1" 200 0
INFO - 06/21/21 23:29:57 - 0:00:12 - conll2003 dataset: train size: 14040; dev size 3249; test size: 3452
DEBUG - 06/21/21 23:29:57 - 0:00:12 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:29:57 - 0:00:12 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:29:57 - 0:00:12 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:29:57 - 0:00:12 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/pytorch_model.bin HTTP/1.1" 302 0
INFO - 06/21/21 23:30:04 - 0:00:19 - Start NER training ...
INFO - 06/21/21 23:30:04 - 0:00:19 - ============== epoch 0 ==============
INFO - 06/21/21 23:31:17 - 0:00:00 - ============ Initialized logger ============
INFO - 06/21/21 23:31:17 - 0:00:00 - batch_size: 32
data_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/conll2003
dropout: 0.1
dump_path: logs/conll2003/1
early_stop: 3
epoch: 300
exp_id: 1
exp_name: conll2003
hidden_dim: 1024
logger_filename: train.log
lr: 5e-05
model_name: roberta-large
num_tag: 3
saved_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model
seed: 555
INFO - 06/21/21 23:31:17 - 0:00:00 - The experiment will be stored in logs/conll2003/1
DEBUG - 06/21/21 23:31:17 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:31:17 - 0:00:00 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:31:17 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:31:17 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/vocab.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:31:17 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:31:18 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/merges.txt HTTP/1.1" 200 0
DEBUG - 06/21/21 23:31:18 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:31:18 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/tokenizer.json HTTP/1.1" 200 0
INFO - 06/21/21 23:31:29 - 0:00:13 - conll2003 dataset: train size: 14040; dev size 3249; test size: 3452
DEBUG - 06/21/21 23:31:29 - 0:00:13 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:31:30 - 0:00:13 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:31:30 - 0:00:13 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:31:30 - 0:00:13 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/pytorch_model.bin HTTP/1.1" 302 0
INFO - 06/21/21 23:31:37 - 0:00:20 - Start NER training ...
INFO - 06/21/21 23:31:37 - 0:00:20 - ============== epoch 0 ==============
INFO - 06/21/21 23:33:58 - 0:02:42 - Finish training epoch 0. loss: 0.0696
INFO - 06/21/21 23:33:58 - 0:02:42 - ============== Evaluate epoch 0 on Dev Set ==============
INFO - 06/21/21 23:34:08 - 0:02:51 - Evaluate on Dev Set. F1: 95.5005.
INFO - 06/21/21 23:34:08 - 0:02:51 - Found better model!!
INFO - 06/21/21 23:48:39 - 0:00:00 - ============ Initialized logger ============
INFO - 06/21/21 23:48:39 - 0:00:00 - batch_size: 32
data_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/conll2003
dropout: 0.1
dump_path: logs/conll2003/1
early_stop: 3
epoch: 300
exp_id: 1
exp_name: conll2003
hidden_dim: 1024
logger_filename: train.log
lr: 5e-05
model_name: roberta-large
num_tag: 3
saved_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model
seed: 555
INFO - 06/21/21 23:48:39 - 0:00:00 - The experiment will be stored in logs/conll2003/1
DEBUG - 06/21/21 23:48:39 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:48:39 - 0:00:00 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:48:39 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:48:40 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/vocab.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:48:40 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:48:40 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/merges.txt HTTP/1.1" 200 0
DEBUG - 06/21/21 23:48:40 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:48:40 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/tokenizer.json HTTP/1.1" 200 0
INFO - 06/21/21 23:48:51 - 0:00:12 - conll2003 dataset: train size: 14040; dev size 3249; test size: 3452
DEBUG - 06/21/21 23:48:51 - 0:00:12 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:48:51 - 0:00:12 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:48:51 - 0:00:12 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:48:51 - 0:00:12 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/pytorch_model.bin HTTP/1.1" 302 0
INFO - 06/21/21 23:49:00 - 0:00:21 - Start NER training ...
INFO - 06/21/21 23:49:00 - 0:00:21 - ============== epoch 0 ==============
INFO - 06/21/21 23:51:22 - 0:02:43 - Finish training epoch 0. loss: 0.0696
INFO - 06/21/21 23:51:22 - 0:02:43 - ============== Evaluate epoch 0 on Dev Set ==============
INFO - 06/21/21 23:51:31 - 0:02:52 - Evaluate on Dev Set. F1: 95.5005.
INFO - 06/21/21 23:51:31 - 0:02:52 - Found better model!!
INFO - 06/21/21 23:51:33 - 0:02:54 - Best model has been saved to /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model/roberta-large.pt
INFO - 06/21/21 23:51:33 - 0:02:54 - ============== epoch 1 ==============
INFO - 06/21/21 23:53:55 - 0:05:16 - Finish training epoch 1. loss: 0.0234
INFO - 06/21/21 23:53:55 - 0:05:16 - ============== Evaluate epoch 1 on Dev Set ==============
INFO - 06/21/21 23:54:03 - 0:00:00 - ============ Initialized logger ============
INFO - 06/21/21 23:54:03 - 0:00:00 - batch_size: 32
data_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/conll2003
dropout: 0.1
dump_path: logs/conll2003/1
early_stop: 3
epoch: 300
exp_id: 1
exp_name: conll2003
hidden_dim: 1024
logger_filename: train.log
lr: 5e-05
model_name: roberta-large
num_tag: 3
saved_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model
seed: 111
INFO - 06/21/21 23:54:03 - 0:00:00 - The experiment will be stored in logs/conll2003/1
DEBUG - 06/21/21 23:54:03 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:54:04 - 0:00:00 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:54:04 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:54:04 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/vocab.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:54:04 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:54:04 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/merges.txt HTTP/1.1" 200 0
DEBUG - 06/21/21 23:54:04 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:54:05 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/tokenizer.json HTTP/1.1" 200 0
INFO - 06/21/21 23:54:05 - 0:05:25 - Evaluate on Dev Set. F1: 96.9048.
INFO - 06/21/21 23:54:05 - 0:05:25 - Found better model!!
INFO - 06/21/21 23:54:06 - 0:05:27 - Best model has been saved to /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model/roberta-large.pt
INFO - 06/21/21 23:54:06 - 0:05:27 - ============== epoch 2 ==============
INFO - 06/21/21 23:54:16 - 0:00:12 - conll2003 dataset: train size: 14040; dev size 3249; test size: 3452
DEBUG - 06/21/21 23:54:16 - 0:00:12 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:54:16 - 0:00:12 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:54:16 - 0:00:12 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:54:16 - 0:00:13 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/pytorch_model.bin HTTP/1.1" 302 0
INFO - 06/21/21 23:54:24 - 0:00:20 - Start NER training ...
INFO - 06/21/21 23:54:24 - 0:00:20 - ============== epoch 0 ==============
INFO - 06/21/21 23:55:40 - 0:00:00 - ============ Initialized logger ============
INFO - 06/21/21 23:55:40 - 0:00:00 - batch_size: 32
data_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/conll2003
dropout: 0.1
dump_path: logs/conll2003/1
early_stop: 3
epoch: 300
exp_id: 1
exp_name: conll2003
hidden_dim: 1024
logger_filename: train.log
lr: 5e-05
model_name: roberta-large
num_tag: 3
saved_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model
seed: 123456
INFO - 06/21/21 23:55:40 - 0:00:00 - The experiment will be stored in logs/conll2003/1
DEBUG - 06/21/21 23:55:40 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:55:40 - 0:00:00 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:55:40 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:55:41 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/vocab.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:55:41 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:55:41 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/merges.txt HTTP/1.1" 200 0
DEBUG - 06/21/21 23:55:41 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:55:41 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/tokenizer.json HTTP/1.1" 200 0
INFO - 06/21/21 23:55:53 - 0:00:13 - conll2003 dataset: train size: 14040; dev size 3249; test size: 3452
DEBUG - 06/21/21 23:55:53 - 0:00:13 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:55:53 - 0:00:13 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:55:53 - 0:00:13 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:55:53 - 0:00:13 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/pytorch_model.bin HTTP/1.1" 302 0
INFO - 06/21/21 23:56:01 - 0:00:21 - Start NER training ...
INFO - 06/21/21 23:56:01 - 0:00:21 - ============== epoch 0 ==============
INFO - 06/21/21 23:56:29 - 0:07:50 - Finish training epoch 2. loss: 0.0162
INFO - 06/21/21 23:56:29 - 0:07:50 - ============== Evaluate epoch 2 on Dev Set ==============
INFO - 06/21/21 23:56:38 - 0:07:59 - Evaluate on Dev Set. F1: 97.3381.
INFO - 06/21/21 23:56:38 - 0:07:59 - Found better model!!
INFO - 06/21/21 23:56:40 - 0:08:01 - Best model has been saved to /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model/roberta-large.pt
INFO - 06/21/21 23:56:40 - 0:08:01 - ============== epoch 3 ==============
INFO - 06/21/21 23:56:47 - 0:02:43 - Finish training epoch 0. loss: 0.0580
INFO - 06/21/21 23:56:47 - 0:02:43 - ============== Evaluate epoch 0 on Dev Set ==============
INFO - 06/21/21 23:56:56 - 0:02:53 - Evaluate on Dev Set. F1: 96.7327.
INFO - 06/21/21 23:56:56 - 0:02:53 - Found better model!!
INFO - 06/21/21 23:56:58 - 0:02:54 - Best model has been saved to /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model/roberta-large.pt
INFO - 06/21/21 23:56:58 - 0:02:54 - ============== epoch 1 ==============
INFO - 06/21/21 23:58:25 - 0:02:45 - Finish training epoch 0. loss: 0.0544
INFO - 06/21/21 23:58:25 - 0:02:45 - ============== Evaluate epoch 0 on Dev Set ==============
INFO - 06/21/21 23:58:34 - 0:02:54 - Evaluate on Dev Set. F1: 96.8227.
INFO - 06/21/21 23:58:34 - 0:02:54 - Found better model!!
INFO - 06/21/21 23:58:36 - 0:02:56 - Best model has been saved to /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model/roberta-large.pt
INFO - 06/21/21 23:58:36 - 0:02:56 - ============== epoch 1 ==============
INFO - 06/21/21 23:58:40 - 0:00:00 - ============ Initialized logger ============
INFO - 06/21/21 23:58:40 - 0:00:00 - batch_size: 32
data_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/conll2003
dropout: 0.1
dump_path: logs/conll2003/1
early_stop: 3
epoch: 300
exp_id: 1
exp_name: conll2003
hidden_dim: 1024
logger_filename: train.log
lr: 3e-05
model_name: roberta-large
num_tag: 3
saved_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model
seed: 555
INFO - 06/21/21 23:58:40 - 0:00:00 - The experiment will be stored in logs/conll2003/1
DEBUG - 06/21/21 23:58:40 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:58:40 - 0:00:00 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:58:40 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:58:41 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/vocab.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:58:41 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:58:41 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/merges.txt HTTP/1.1" 200 0
DEBUG - 06/21/21 23:58:41 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:58:41 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/tokenizer.json HTTP/1.1" 200 0
INFO - 06/21/21 23:58:57 - 0:00:00 - ============ Initialized logger ============
INFO - 06/21/21 23:58:57 - 0:00:00 - batch_size: 32
data_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/conll2003
dropout: 0.1
dump_path: logs/conll2003/1
early_stop: 3
epoch: 300
exp_id: 1
exp_name: conll2003
hidden_dim: 1024
logger_filename: train.log
lr: 3e-05
model_name: roberta-large
num_tag: 3
saved_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model
seed: 111
INFO - 06/21/21 23:58:57 - 0:00:00 - The experiment will be stored in logs/conll2003/1
DEBUG - 06/21/21 23:58:57 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:58:57 - 0:00:00 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:58:57 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:58:58 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/vocab.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:58:58 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:58:58 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/merges.txt HTTP/1.1" 200 0
DEBUG - 06/21/21 23:58:58 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:58:58 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/tokenizer.json HTTP/1.1" 200 0
INFO - 06/21/21 23:59:02 - 0:10:23 - Finish training epoch 3. loss: 0.0136
INFO - 06/21/21 23:59:02 - 0:10:23 - ============== Evaluate epoch 3 on Dev Set ==============
INFO - 06/21/21 23:59:10 - 0:00:12 - conll2003 dataset: train size: 14040; dev size 3249; test size: 3452
DEBUG - 06/21/21 23:59:10 - 0:00:12 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:59:10 - 0:00:13 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:59:10 - 0:00:13 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:59:10 - 0:00:13 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/pytorch_model.bin HTTP/1.1" 302 0
INFO - 06/21/21 23:59:12 - 0:10:33 - Evaluate on Dev Set. F1: 96.0542.
INFO - 06/21/21 23:59:12 - 0:10:33 - No better model found (1/3)
INFO - 06/21/21 23:59:12 - 0:10:33 - ============== epoch 4 ==============
INFO - 06/21/21 23:59:18 - 0:00:20 - Start NER training ...
INFO - 06/21/21 23:59:18 - 0:00:20 - ============== epoch 0 ==============
INFO - 06/21/21 23:59:21 - 0:05:18 - Finish training epoch 1. loss: 0.0190
INFO - 06/21/21 23:59:21 - 0:05:18 - ============== Evaluate epoch 1 on Dev Set ==============
INFO - 06/21/21 23:59:30 - 0:00:00 - ============ Initialized logger ============
INFO - 06/21/21 23:59:30 - 0:00:00 - batch_size: 32
data_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/conll2003
dropout: 0.1
dump_path: logs/conll2003/1
early_stop: 3
epoch: 300
exp_id: 1
exp_name: conll2003
hidden_dim: 1024
logger_filename: train.log
lr: 2e-05
model_name: roberta-large
num_tag: 3
saved_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model
seed: 111
INFO - 06/21/21 23:59:30 - 0:00:00 - The experiment will be stored in logs/conll2003/1
DEBUG - 06/21/21 23:59:30 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:59:30 - 0:00:00 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:59:30 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
INFO - 06/21/21 23:59:31 - 0:05:27 - Evaluate on Dev Set. F1: 97.1510.
INFO - 06/21/21 23:59:31 - 0:05:27 - Found better model!!
DEBUG - 06/21/21 23:59:31 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/vocab.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:59:31 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:59:31 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/merges.txt HTTP/1.1" 200 0
DEBUG - 06/21/21 23:59:31 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:59:31 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/tokenizer.json HTTP/1.1" 200 0
INFO - 06/21/21 23:59:32 - 0:05:29 - Best model has been saved to /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model/roberta-large.pt
INFO - 06/21/21 23:59:32 - 0:05:29 - ============== epoch 2 ==============
INFO - 06/21/21 23:59:43 - 0:00:13 - conll2003 dataset: train size: 14040; dev size 3249; test size: 3452
DEBUG - 06/21/21 23:59:43 - 0:00:13 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:59:43 - 0:00:13 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/21/21 23:59:43 - 0:00:13 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/21/21 23:59:44 - 0:00:13 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/pytorch_model.bin HTTP/1.1" 302 0
INFO - 06/21/21 23:59:51 - 0:00:21 - Start NER training ...
INFO - 06/21/21 23:59:51 - 0:00:21 - ============== epoch 0 ==============
INFO - 06/22/21 00:01:00 - 0:05:20 - Finish training epoch 1. loss: 0.0229
INFO - 06/22/21 00:01:00 - 0:05:20 - ============== Evaluate epoch 1 on Dev Set ==============
INFO - 06/22/21 00:01:10 - 0:05:30 - Evaluate on Dev Set. F1: 97.0174.
INFO - 06/22/21 00:01:10 - 0:05:30 - Found better model!!
INFO - 06/22/21 00:01:12 - 0:05:31 - Best model has been saved to /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model/roberta-large.pt
INFO - 06/22/21 00:01:12 - 0:05:31 - ============== epoch 2 ==============
INFO - 06/22/21 00:01:35 - 0:12:56 - Finish training epoch 4. loss: 0.0170
INFO - 06/22/21 00:01:35 - 0:12:56 - ============== Evaluate epoch 4 on Dev Set ==============
INFO - 06/22/21 00:01:40 - 0:02:43 - Finish training epoch 0. loss: 0.0544
INFO - 06/22/21 00:01:40 - 0:02:43 - ============== Evaluate epoch 0 on Dev Set ==============
INFO - 06/22/21 00:01:45 - 0:13:05 - Evaluate on Dev Set. F1: 97.1884.
INFO - 06/22/21 00:01:45 - 0:13:05 - No better model found (2/3)
INFO - 06/22/21 00:01:45 - 0:13:05 - ============== epoch 5 ==============
INFO - 06/22/21 00:01:50 - 0:02:53 - Evaluate on Dev Set. F1: 96.2938.
INFO - 06/22/21 00:01:50 - 0:02:53 - Found better model!!
INFO - 06/22/21 00:01:52 - 0:02:55 - Best model has been saved to /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model/roberta-large.pt
INFO - 06/22/21 00:01:52 - 0:02:55 - ============== epoch 1 ==============
INFO - 06/22/21 00:01:55 - 0:07:51 - Finish training epoch 2. loss: 0.0200
INFO - 06/22/21 00:01:55 - 0:07:51 - ============== Evaluate epoch 2 on Dev Set ==============
INFO - 06/22/21 00:02:04 - 0:08:01 - Evaluate on Dev Set. F1: 96.9804.
INFO - 06/22/21 00:02:04 - 0:08:01 - No better model found (1/3)
INFO - 06/22/21 00:02:04 - 0:08:01 - ============== epoch 3 ==============
INFO - 06/22/21 00:02:13 - 0:02:42 - Finish training epoch 0. loss: 0.0547
INFO - 06/22/21 00:02:13 - 0:02:42 - ============== Evaluate epoch 0 on Dev Set ==============
INFO - 06/22/21 00:02:22 - 0:02:52 - Evaluate on Dev Set. F1: 97.0400.
INFO - 06/22/21 00:02:22 - 0:02:52 - Found better model!!
INFO - 06/22/21 00:02:24 - 0:02:54 - Best model has been saved to /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model/roberta-large.pt
INFO - 06/22/21 00:02:24 - 0:02:54 - ============== epoch 1 ==============
INFO - 06/22/21 00:03:35 - 0:07:55 - Finish training epoch 2. loss: 0.0173
INFO - 06/22/21 00:03:35 - 0:07:55 - ============== Evaluate epoch 2 on Dev Set ==============
INFO - 06/22/21 00:03:45 - 0:08:04 - Evaluate on Dev Set. F1: 97.3191.
INFO - 06/22/21 00:03:45 - 0:08:04 - Found better model!!
INFO - 06/22/21 00:03:46 - 0:08:06 - Best model has been saved to /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model/roberta-large.pt
INFO - 06/22/21 00:03:46 - 0:08:06 - ============== epoch 3 ==============
INFO - 06/22/21 00:04:07 - 0:15:28 - Finish training epoch 5. loss: 0.0083
INFO - 06/22/21 00:04:07 - 0:15:28 - ============== Evaluate epoch 5 on Dev Set ==============
INFO - 06/22/21 00:04:14 - 0:05:17 - Finish training epoch 1. loss: 0.0182
INFO - 06/22/21 00:04:14 - 0:05:17 - ============== Evaluate epoch 1 on Dev Set ==============
INFO - 06/22/21 00:04:17 - 0:15:37 - Evaluate on Dev Set. F1: 97.3169.
INFO - 06/22/21 00:04:17 - 0:15:37 - No better model found (3/3)
INFO - 06/22/21 00:04:17 - 0:15:37 - ============== Evaluate on Test Set ==============
INFO - 06/22/21 00:04:24 - 0:05:27 - Evaluate on Dev Set. F1: 97.6314.
INFO - 06/22/21 00:04:24 - 0:05:27 - Found better model!!
INFO - 06/22/21 00:04:26 - 0:15:46 - Evaluate on Test Set. F1: 95.6012.
INFO - 06/22/21 00:04:26 - 0:05:29 - Best model has been saved to /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model/roberta-large.pt
INFO - 06/22/21 00:04:26 - 0:05:29 - ============== epoch 2 ==============
INFO - 06/22/21 00:04:27 - 0:10:24 - Finish training epoch 3. loss: 0.0157
INFO - 06/22/21 00:04:27 - 0:10:24 - ============== Evaluate epoch 3 on Dev Set ==============
INFO - 06/22/21 00:04:37 - 0:10:33 - Evaluate on Dev Set. F1: 97.6654.
INFO - 06/22/21 00:04:37 - 0:10:33 - Found better model!!
INFO - 06/22/21 00:04:39 - 0:10:35 - Best model has been saved to /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model/roberta-large.pt
INFO - 06/22/21 00:04:39 - 0:10:35 - ============== epoch 4 ==============
INFO - 06/22/21 00:04:45 - 0:05:15 - Finish training epoch 1. loss: 0.0177
INFO - 06/22/21 00:04:45 - 0:05:15 - ============== Evaluate epoch 1 on Dev Set ==============
INFO - 06/22/21 00:04:55 - 0:05:25 - Evaluate on Dev Set. F1: 97.6093.
INFO - 06/22/21 00:04:55 - 0:05:25 - Found better model!!
INFO - 06/22/21 00:04:56 - 0:05:26 - Best model has been saved to /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model/roberta-large.pt
INFO - 06/22/21 00:04:56 - 0:05:26 - ============== epoch 2 ==============
INFO - 06/22/21 00:06:10 - 0:10:30 - Finish training epoch 3. loss: 0.0439
INFO - 06/22/21 00:06:10 - 0:10:30 - ============== Evaluate epoch 3 on Dev Set ==============
INFO - 06/22/21 00:06:20 - 0:10:40 - Evaluate on Dev Set. F1: 0.0000.
INFO - 06/22/21 00:06:20 - 0:10:40 - No better model found (1/3)
INFO - 06/22/21 00:06:20 - 0:10:40 - ============== epoch 4 ==============
INFO - 06/22/21 00:06:47 - 0:07:50 - Finish training epoch 2. loss: 0.0156
INFO - 06/22/21 00:06:47 - 0:07:50 - ============== Evaluate epoch 2 on Dev Set ==============
INFO - 06/22/21 00:06:57 - 0:07:59 - Evaluate on Dev Set. F1: 97.5384.
INFO - 06/22/21 00:06:57 - 0:07:59 - No better model found (1/3)
INFO - 06/22/21 00:06:57 - 0:07:59 - ============== epoch 3 ==============
INFO - 06/22/21 00:07:02 - 0:12:59 - Finish training epoch 4. loss: 0.0127
INFO - 06/22/21 00:07:02 - 0:12:59 - ============== Evaluate epoch 4 on Dev Set ==============
INFO - 06/22/21 00:07:12 - 0:13:08 - Evaluate on Dev Set. F1: 97.4583.
INFO - 06/22/21 00:07:12 - 0:13:08 - No better model found (1/3)
INFO - 06/22/21 00:07:12 - 0:13:08 - ============== epoch 5 ==============
INFO - 06/22/21 00:07:17 - 0:07:47 - Finish training epoch 2. loss: 0.0115
INFO - 06/22/21 00:07:17 - 0:07:47 - ============== Evaluate epoch 2 on Dev Set ==============
INFO - 06/22/21 00:07:26 - 0:07:56 - Evaluate on Dev Set. F1: 97.2615.
INFO - 06/22/21 00:07:26 - 0:07:56 - No better model found (1/3)
INFO - 06/22/21 00:07:26 - 0:07:56 - ============== epoch 3 ==============
INFO - 06/22/21 00:08:43 - 0:13:03 - Finish training epoch 4. loss: 0.5637
INFO - 06/22/21 00:08:43 - 0:13:03 - ============== Evaluate epoch 4 on Dev Set ==============
INFO - 06/22/21 00:08:53 - 0:13:12 - Evaluate on Dev Set. F1: 0.0000.
INFO - 06/22/21 00:08:53 - 0:13:12 - No better model found (2/3)
INFO - 06/22/21 00:08:53 - 0:13:12 - ============== epoch 5 ==============
INFO - 06/22/21 00:09:18 - 0:10:21 - Finish training epoch 3. loss: 0.0110
INFO - 06/22/21 00:09:18 - 0:10:21 - ============== Evaluate epoch 3 on Dev Set ==============
INFO - 06/22/21 00:09:28 - 0:10:31 - Evaluate on Dev Set. F1: 97.2738.
INFO - 06/22/21 00:09:28 - 0:10:31 - No better model found (2/3)
INFO - 06/22/21 00:09:28 - 0:10:31 - ============== epoch 4 ==============
INFO - 06/22/21 00:09:35 - 0:15:31 - Finish training epoch 5. loss: 0.0132
INFO - 06/22/21 00:09:35 - 0:15:31 - ============== Evaluate epoch 5 on Dev Set ==============
INFO - 06/22/21 00:09:45 - 0:15:41 - Evaluate on Dev Set. F1: 97.4630.
INFO - 06/22/21 00:09:45 - 0:15:41 - No better model found (2/3)
INFO - 06/22/21 00:09:45 - 0:15:41 - ============== epoch 6 ==============
INFO - 06/22/21 00:09:47 - 0:10:17 - Finish training epoch 3. loss: 0.0101
INFO - 06/22/21 00:09:47 - 0:10:17 - ============== Evaluate epoch 3 on Dev Set ==============
INFO - 06/22/21 00:09:57 - 0:10:27 - Evaluate on Dev Set. F1: 97.5034.
INFO - 06/22/21 00:09:57 - 0:10:27 - No better model found (2/3)
INFO - 06/22/21 00:09:57 - 0:10:27 - ============== epoch 4 ==============
INFO - 06/22/21 00:11:16 - 0:15:36 - Finish training epoch 5. loss: 0.5620
INFO - 06/22/21 00:11:16 - 0:15:36 - ============== Evaluate epoch 5 on Dev Set ==============
INFO - 06/22/21 00:11:26 - 0:15:45 - Evaluate on Dev Set. F1: 0.0000.
INFO - 06/22/21 00:11:26 - 0:15:45 - No better model found (3/3)
INFO - 06/22/21 00:11:26 - 0:15:45 - ============== Evaluate on Test Set ==============
INFO - 06/22/21 00:11:35 - 0:15:54 - Evaluate on Test Set. F1: 0.0000.
INFO - 06/22/21 00:11:50 - 0:12:53 - Finish training epoch 4. loss: 0.0137
INFO - 06/22/21 00:11:50 - 0:12:53 - ============== Evaluate epoch 4 on Dev Set ==============
INFO - 06/22/21 00:12:00 - 0:13:02 - Evaluate on Dev Set. F1: 97.4501.
INFO - 06/22/21 00:12:00 - 0:13:02 - No better model found (3/3)
INFO - 06/22/21 00:12:00 - 0:13:02 - ============== Evaluate on Test Set ==============
INFO - 06/22/21 00:12:08 - 0:18:04 - Finish training epoch 6. loss: 0.0129
INFO - 06/22/21 00:12:08 - 0:18:04 - ============== Evaluate epoch 6 on Dev Set ==============
INFO - 06/22/21 00:12:09 - 0:13:11 - Evaluate on Test Set. F1: 95.4761.
INFO - 06/22/21 00:12:17 - 0:18:14 - Evaluate on Dev Set. F1: 97.2311.
INFO - 06/22/21 00:12:17 - 0:18:14 - No better model found (3/3)
INFO - 06/22/21 00:12:17 - 0:18:14 - ============== Evaluate on Test Set ==============
INFO - 06/22/21 00:12:19 - 0:12:48 - Finish training epoch 4. loss: 0.0074
INFO - 06/22/21 00:12:19 - 0:12:48 - ============== Evaluate epoch 4 on Dev Set ==============
INFO - 06/22/21 00:12:26 - 0:18:23 - Evaluate on Test Set. F1: 95.2934.
INFO - 06/22/21 00:12:28 - 0:12:58 - Evaluate on Dev Set. F1: 97.0406.
INFO - 06/22/21 00:12:28 - 0:12:58 - No better model found (3/3)
INFO - 06/22/21 00:12:28 - 0:12:58 - ============== Evaluate on Test Set ==============
INFO - 06/22/21 00:12:37 - 0:13:07 - Evaluate on Test Set. F1: 95.3264.
INFO - 06/22/21 00:16:11 - 0:00:00 - ============ Initialized logger ============
INFO - 06/22/21 00:16:11 - 0:00:00 - batch_size: 32
data_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/conll2003
dropout: 0.1
dump_path: logs/conll2003/1
early_stop: 3
epoch: 300
exp_id: 1
exp_name: conll2003
hidden_dim: 1024
logger_filename: train.log
lr: 3e-05
model_name: roberta-large
num_tag: 3
saved_folder: /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model
seed: 111
INFO - 06/22/21 00:16:11 - 0:00:00 - The experiment will be stored in logs/conll2003/1
DEBUG - 06/22/21 00:16:11 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/22/21 00:16:12 - 0:00:00 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/22/21 00:16:12 - 0:00:00 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/22/21 00:16:12 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/vocab.json HTTP/1.1" 200 0
DEBUG - 06/22/21 00:16:12 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/22/21 00:16:12 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/merges.txt HTTP/1.1" 200 0
DEBUG - 06/22/21 00:16:12 - 0:00:01 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/22/21 00:16:13 - 0:00:01 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/tokenizer.json HTTP/1.1" 200 0
INFO - 06/22/21 00:16:24 - 0:00:12 - conll2003 dataset: train size: 14040; dev size 3249; test size: 3452
DEBUG - 06/22/21 00:16:24 - 0:00:12 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/22/21 00:16:24 - 0:00:13 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG - 06/22/21 00:16:24 - 0:00:13 - Starting new HTTPS connection (1): huggingface.co:443
DEBUG - 06/22/21 00:16:24 - 0:00:13 - https://huggingface.co:443 "HEAD /roberta-large/resolve/main/pytorch_model.bin HTTP/1.1" 302 0
INFO - 06/22/21 00:16:31 - 0:00:20 - Start NER training ...
INFO - 06/22/21 00:16:31 - 0:00:20 - ============== epoch 0 ==============
INFO - 06/22/21 00:18:53 - 0:02:42 - Finish training epoch 0. loss: 0.0544
INFO - 06/22/21 00:18:53 - 0:02:42 - ============== Evaluate epoch 0 on Dev Set ==============
INFO - 06/22/21 00:19:03 - 0:02:51 - Evaluate on Dev Set. F1: 96.2938.
INFO - 06/22/21 00:19:03 - 0:02:51 - Found better model!!
INFO - 06/22/21 00:19:05 - 0:02:53 - Best model has been saved to /gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model/roberta-large.pt
INFO - 06/22/21 00:19:05 - 0:02:53 - ============== epoch 1 ==============
import torch
import numpy as np
from transformers import AutoTokenizer
from tabulate import tabulate
tokenizer = AutoTokenizer.from_pretrained("roberta-large")
ner_model = torch.load("/gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model/roberta-large.pt")["model"]
ner_model.cuda()
ner_model.eval()
label_set = ["O", "B", "I"]
for step in range(100):
print("===========================================================================")
input_sent = input(">> Input:")
tokens = input_sent.split()
token_ids, first_tok_masks = [tokenizer.cls_token_id], [0]
for token in tokens:
subs_ = tokenizer.tokenize(token)
assert len(subs_) > 0
token_ids.extend(tokenizer.convert_tokens_to_ids(subs_))
first_tok_masks.extend([1] + [0] * (len(subs_) - 1))
token_ids.append(tokenizer.sep_token_id)
first_tok_masks.append(0)
token_ids = torch.LongTensor([token_ids]).cuda()
predictions = ner_model(token_ids) # (1, seq_len, 3)
predictions = predictions[0].data.cpu().numpy() # (seq_len, 3)
pred_ids = list(np.argmax(predictions, axis=1))
assert len(pred_ids) == len(first_tok_masks)
preds_for_each_word = []
for pred, mask in zip(pred_ids, first_tok_masks):
if mask == 1:
preds_for_each_word.append(label_set[pred])
assert len(preds_for_each_word) == len(tokens)
table = [tokens, preds_for_each_word]
print(tabulate(table))
......@@ -23,6 +23,12 @@ def get_params():
parser.add_argument("--data_folder", type=str, default="/gpfs/fs1/projects/gpu_adlr/datasets/zihanl/conll2003", help="NER data folder")
parser.add_argument("--saved_folder", type=str, default="/gpfs/fs1/projects/gpu_adlr/datasets/zihanl/checkpoints/ner_model", help="NER data folder")
parser.add_argument("--default_folder", type=str, default="/gpfs/fs1/projects/gpu_adlr/datasets/zihanl")
parser.add_argument("--infer_datafolder", type=str, default="dialog_datasets/wizard_of_wikipedia/processed")
parser.add_argument("--infer_dataname", type=str, default="train.txt")
parser.add_argument("--output_dataname", type=str, default="train_entity_based_control.txt")
params = parser.parse_args()
return params
......@@ -7,7 +7,6 @@ from src.trainer import NERTrainer
import torch
import numpy as np
from tqdm import tqdm
import random
def random_seed(seed):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment