run_pl.sh 1.67 KB
Newer Older
Shubham Agarwal's avatar
Shubham Agarwal committed
1
2
3
4
#!/usr/bin/env bash

# for seqeval metrics import
pip install -r ../requirements.txt
5

6
7
8
9
## The relevant files are currently on a shared Google
## drive at https://drive.google.com/drive/folders/1kC0I2UGl2ltrluI9NqDjaQJGw5iliw_J
## Monitor for changes and eventually migrate to nlp dataset
curl -L 'https://drive.google.com/uc?export=download&id=1Jjhbal535VVz2ap4v4r_rN1UEHTdLK5P' \
10
| grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > train.txt.tmp
11
curl -L 'https://drive.google.com/uc?export=download&id=1ZfRcQThdtAR5PPRjIDtrVP7BtXSCUBbm' \
12
| grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > dev.txt.tmp
13
curl -L 'https://drive.google.com/uc?export=download&id=1u9mb7kNJHWQCWyweMDRMuTFoOHOfeBTH' \
14
| grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > test.txt.tmp
15

16
17
export MAX_LENGTH=128
export BERT_MODEL=bert-base-multilingual-cased
18
19
20
python3 scripts/preprocess.py train.txt.tmp $BERT_MODEL $MAX_LENGTH > train.txt
python3 scripts/preprocess.py dev.txt.tmp $BERT_MODEL $MAX_LENGTH > dev.txt
python3 scripts/preprocess.py test.txt.tmp $BERT_MODEL $MAX_LENGTH > test.txt
21
cat train.txt dev.txt test.txt | cut -d " " -f 2 | grep -v "^$"| sort | uniq > labels.txt
22
23
24
25
export BATCH_SIZE=32
export NUM_EPOCHS=3
export SEED=1

Shubham Agarwal's avatar
Shubham Agarwal committed
26
27
28
29
30
export OUTPUT_DIR_NAME=germeval-model
export CURRENT_DIR=${PWD}
export OUTPUT_DIR=${CURRENT_DIR}/${OUTPUT_DIR_NAME}
mkdir -p $OUTPUT_DIR

31
# Add parent directory to python path to access lightning_base.py
32
33
export PYTHONPATH="../":"${PYTHONPATH}"

34
35
36
37
38
39
python3 run_pl_ner.py --data_dir ./ \
--labels ./labels.txt \
--model_name_or_path $BERT_MODEL \
--output_dir $OUTPUT_DIR \
--max_seq_length  $MAX_LENGTH \
--num_train_epochs $NUM_EPOCHS \
40
--train_batch_size $BATCH_SIZE \
41
--seed $SEED \
42
--gpus 1 \
43
--do_train \
44
--do_predict