"experiments/pyexps/iperf_client_server_pair.py" did not exist on "13673a6f42d4dab4a6095906229ad0d5426d372a"
training_stsbenchmark.py 4.32 KB
Newer Older
Rayyyyy's avatar
Rayyyyy committed
1
2
3
4
import logging
import argparse

from datetime import datetime
Rayyyyy's avatar
Update  
Rayyyyy committed
5
6
from datasets import load_dataset
from sentence_transformers import SentenceTransformer, losses
Rayyyyy's avatar
Rayyyyy committed
7
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator
Rayyyyy's avatar
Update  
Rayyyyy committed
8
9
10
from sentence_transformers.similarity_functions import SimilarityFunction
from sentence_transformers.trainer import SentenceTransformerTrainer
from sentence_transformers.training_args import SentenceTransformerTrainingArguments
Rayyyyy's avatar
Rayyyyy committed
11

Rayyyyy's avatar
Update  
Rayyyyy committed
12
13
# Set the log level to INFO to get more information
logging.basicConfig(format="%(asctime)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO)
Rayyyyy's avatar
Rayyyyy committed
14

Rayyyyy's avatar
Update  
Rayyyyy committed
15
# params
Rayyyyy's avatar
Rayyyyy committed
16
17
18
19
20
21
parser = argparse.ArgumentParser()
parser.add_argument('--train_batch_size', type=int, default=16)
parser.add_argument('--num_epochs', type=int, default=10)
parser.add_argument('--model_name_or_path', type=str, default="bert-base-uncased")
parser.add_argument('--save_root_path', type=str, default="output", help='Model output folder')
parser.add_argument('--lr', default=2e-05)
Rayyyyy's avatar
Rayyyyy committed
22
23
parser.add_argument('--eval_steps', type=int, default=-1)
parser.add_argument('--save_steps', type=int, default=-1)
Rayyyyy's avatar
Update  
Rayyyyy committed
24
25
parser.add_argument('--save_total_limit', type=int, default=2)
parser.add_argument('--logging_steps', type=int, default=10)
Rayyyyy's avatar
Rayyyyy committed
26
27
args = parser.parse_args()

Rayyyyy's avatar
Update  
Rayyyyy committed
28
# You can specify any Hugging Face pre-trained model here, for example, bert-base-uncased, roberta-base, xlm-roberta-base
Rayyyyy's avatar
Rayyyyy committed
29
30
31
32
model_name = args.model_name_or_path
train_batch_size = args.train_batch_size
num_epochs = args.num_epochs

Rayyyyy's avatar
Update  
Rayyyyy committed
33
34
35
output_dir = (
    args.save_root_path + "/training_stsbenchmark_" + model_name.replace("/", "-") + "-" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
)
Rayyyyy's avatar
Rayyyyy committed
36

Rayyyyy's avatar
Update  
Rayyyyy committed
37
38
39
# 1. Here we define our SentenceTransformer model. If not already a Sentence Transformer model, it will automatically
# create one with "mean" pooling.
model = SentenceTransformer(model_name)
Rayyyyy's avatar
Rayyyyy committed
40

Rayyyyy's avatar
Update  
Rayyyyy committed
41
42
43
44
45
46
logging.info("Loading STSbenchmark train dataset")
# 2. Load the STSB dataset: https://huggingface.co/datasets/sentence-transformers/stsb
train_dataset = load_dataset("sentence-transformers/stsb", split="train")
eval_dataset = load_dataset("sentence-transformers/stsb", split="validation")
test_dataset = load_dataset("sentence-transformers/stsb", split="test")
logging.info(train_dataset)
Rayyyyy's avatar
Rayyyyy committed
47

Rayyyyy's avatar
Update  
Rayyyyy committed
48
49
50
# 3. Define our training loss
# CosineSimilarityLoss (https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) needs two text columns and one
# similarity score column (between 0 and 1)
Rayyyyy's avatar
Rayyyyy committed
51
train_loss = losses.CosineSimilarityLoss(model=model)
Rayyyyy's avatar
Update  
Rayyyyy committed
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# train_loss = losses.CoSENTLoss(model=model)

# 4. Define an evaluator for use during training. This is useful to keep track of alongside the evaluation loss.
dev_evaluator = EmbeddingSimilarityEvaluator(
    sentences1=eval_dataset["sentence1"],
    sentences2=eval_dataset["sentence2"],
    scores=eval_dataset["score"],
    main_similarity=SimilarityFunction.COSINE,
    name="sts-dev",
)

# 5. Define the training arguments
args = SentenceTransformerTrainingArguments(
    # Required parameter:
    output_dir=output_dir,
    # Optional training parameters:
    num_train_epochs=num_epochs,
    per_device_train_batch_size=train_batch_size,
    per_device_eval_batch_size=train_batch_size,
    warmup_ratio=0.1,
    fp16=True,  # Set to False if you get an error that your GPU can't run on FP16
    bf16=False, # Set to True if you have a GPU that supports BF16
    # Optional tracking/debugging parameters:
    evaluation_strategy="steps",
    eval_steps=args.eval_steps,
    save_strategy="steps",
    save_steps=args.save_steps,
    save_total_limit=args.save_total_limit,
    logging_steps=args.logging_steps,
    run_name="sts",  # Will be used in W&B if `wandb` is installed
)

# 6. Create the trainer & start training
trainer = SentenceTransformerTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    loss=train_loss,
    evaluator=dev_evaluator,
)
trainer.train()

# 7. Evaluate the model performance on the STS Benchmark test dataset
test_evaluator = EmbeddingSimilarityEvaluator(
    sentences1=test_dataset["sentence1"],
    sentences2=test_dataset["sentence2"],
    scores=test_dataset["score"],
    main_similarity=SimilarityFunction.COSINE,
    name="sts-test",
)
test_evaluator(model, output_path=output_dir)

# 8. Save the trained & evaluated model locally
final_output_dir = f"{output_dir}/final"
model.save(final_output_dir)
Rayyyyy's avatar
Rayyyyy committed
108