Unverified Commit 1f843991 authored by atturaioe's avatar atturaioe Committed by GitHub
Browse files

Migrate metric to Evaluate in Pytorch examples (#18369)

* Migrate metric to Evaluate in pytorch examples

* Remove unused imports
parent 25ec12ea
......@@ -23,3 +23,4 @@ torchvision
jiwer
librosa
torch < 1.12
evaluate
......@@ -26,6 +26,7 @@ import datasets
import numpy as np
from datasets import DatasetDict, load_dataset
import evaluate
import transformers
from transformers import (
AutoConfig,
......@@ -315,7 +316,7 @@ def main():
id2label[str(i)] = label
# Load the accuracy metric from the datasets package
metric = datasets.load_metric("accuracy")
metric = evaluate.load("accuracy")
# Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with
# `predictions` and `label_ids` fields) and has to return a dictionary string to float.
......
......@@ -19,7 +19,6 @@ import sys
from dataclasses import dataclass, field
from typing import Optional
import datasets
import numpy as np
import torch
from datasets import load_dataset
......@@ -34,6 +33,7 @@ from torchvision.transforms import (
ToTensor,
)
import evaluate
import transformers
from transformers import (
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
......@@ -252,7 +252,7 @@ def main():
id2label[str(i)] = label
# Load the accuracy metric from the datasets package
metric = datasets.load_metric("accuracy")
metric = evaluate.load("accuracy")
# Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
# predictions and label_ids field) and has to return a dictionary string to float.
......
......@@ -22,7 +22,7 @@ from pathlib import Path
import datasets
import torch
from datasets import load_dataset, load_metric
from datasets import load_dataset
from torch.utils.data import DataLoader
from torchvision.transforms import (
CenterCrop,
......@@ -35,6 +35,7 @@ from torchvision.transforms import (
)
from tqdm.auto import tqdm
import evaluate
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
......@@ -415,7 +416,7 @@ def main():
accelerator.init_trackers("image_classification_no_trainer", experiment_config)
# Get the metric function
metric = load_metric("accuracy")
metric = evaluate.load("accuracy")
# Train!
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
......
......@@ -30,8 +30,9 @@ from itertools import chain
from typing import Optional
import datasets
from datasets import load_dataset, load_metric
from datasets import load_dataset
import evaluate
import transformers
from transformers import (
CONFIG_MAPPING,
......@@ -492,7 +493,7 @@ def main():
logits = logits[0]
return logits.argmax(dim=-1)
metric = load_metric("accuracy")
metric = evaluate.load("accuracy")
def compute_metrics(eval_preds):
preds, labels = eval_preds
......
......@@ -30,8 +30,9 @@ from itertools import chain
from typing import Optional
import datasets
from datasets import load_dataset, load_metric
from datasets import load_dataset
import evaluate
import transformers
from transformers import (
CONFIG_MAPPING,
......@@ -515,7 +516,7 @@ def main():
logits = logits[0]
return logits.argmax(dim=-1)
metric = load_metric("accuracy")
metric = evaluate.load("accuracy")
def compute_metrics(eval_preds):
preds, labels = eval_preds
......
......@@ -31,10 +31,11 @@ from typing import Optional, Union
import datasets
import torch
from datasets import load_dataset, load_metric
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import evaluate
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
......@@ -514,7 +515,7 @@ def main():
accelerator.init_trackers("swag_no_trainer", experiment_config)
# Metrics
metric = load_metric("accuracy")
metric = evaluate.load("accuracy")
# Train!
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
......
......@@ -25,8 +25,9 @@ from dataclasses import dataclass, field
from typing import Optional
import datasets
from datasets import load_dataset, load_metric
from datasets import load_dataset
import evaluate
import transformers
from trainer_qa import QuestionAnsweringTrainer
from transformers import (
......@@ -593,7 +594,7 @@ def main():
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
return EvalPrediction(predictions=formatted_predictions, label_ids=references)
metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad")
metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad")
def compute_metrics(p: EvalPrediction):
return metric.compute(predictions=p.predictions, references=p.label_ids)
......
......@@ -25,8 +25,9 @@ from dataclasses import dataclass, field
from typing import Optional
import datasets
from datasets import load_dataset, load_metric
from datasets import load_dataset
import evaluate
import transformers
from trainer_qa import QuestionAnsweringTrainer
from transformers import (
......@@ -625,7 +626,7 @@ def main():
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
return EvalPrediction(predictions=formatted_predictions, label_ids=references)
metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad")
metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad")
def compute_metrics(p: EvalPrediction):
return metric.compute(predictions=p.predictions, references=p.label_ids)
......
......@@ -29,10 +29,11 @@ from pathlib import Path
import datasets
import numpy as np
import torch
from datasets import load_dataset, load_metric
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import evaluate
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
......@@ -680,7 +681,7 @@ def main():
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
return EvalPrediction(predictions=formatted_predictions, label_ids=references)
metric = load_metric("squad_v2" if args.version_2_with_negative else "squad")
metric = evaluate.load("squad_v2" if args.version_2_with_negative else "squad")
def create_and_fill_np_array(start_or_end_logits, dataset, max_len):
"""
......
......@@ -29,10 +29,11 @@ from pathlib import Path
import datasets
import numpy as np
import torch
from datasets import load_dataset, load_metric
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import evaluate
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
......@@ -696,7 +697,7 @@ def main():
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
return EvalPrediction(predictions=formatted_predictions, label_ids=references)
metric = load_metric("squad_v2" if args.version_2_with_negative else "squad")
metric = evaluate.load("squad_v2" if args.version_2_with_negative else "squad")
# Create and fill numpy array of size len_of_validation_data * max_length_of_output_tensor
def create_and_fill_np_array(start_or_end_logits, dataset, max_len):
......
......@@ -25,8 +25,9 @@ from dataclasses import dataclass, field
from typing import List, Optional, Tuple
import datasets
from datasets import load_dataset, load_metric
from datasets import load_dataset
import evaluate
import transformers
from trainer_seq2seq_qa import QuestionAnsweringSeq2SeqTrainer
from transformers import (
......@@ -581,7 +582,7 @@ def main():
pad_to_multiple_of=8 if training_args.fp16 else None,
)
metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad")
metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad")
def compute_metrics(p: EvalPrediction):
return metric.compute(predictions=p.predictions, references=p.label_ids)
......
......@@ -21,7 +21,6 @@ import sys
from dataclasses import dataclass, field
from typing import Optional
import datasets
import numpy as np
import torch
from datasets import load_dataset
......@@ -30,6 +29,7 @@ from torch import nn
from torchvision import transforms
from torchvision.transforms import functional
import evaluate
import transformers
from huggingface_hub import hf_hub_download
from transformers import (
......@@ -337,7 +337,7 @@ def main():
label2id = {v: str(k) for k, v in id2label.items()}
# Load the mean IoU metric from the datasets package
metric = datasets.load_metric("mean_iou")
metric = evaluate.load("mean_iou")
# Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
# predictions and label_ids field) and has to return a dictionary string to float.
......
......@@ -24,13 +24,14 @@ from pathlib import Path
import datasets
import numpy as np
import torch
from datasets import load_dataset, load_metric
from datasets import load_dataset
from PIL import Image
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.transforms import functional
from tqdm.auto import tqdm
import evaluate
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
......@@ -500,7 +501,7 @@ def main():
args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch)
# Instantiate metric
metric = load_metric("mean_iou")
metric = evaluate.load("mean_iou")
# We need to initialize the trackers we use, and also store our configuration.
# We initialize the trackers only on main process because `accelerator.log`
......
......@@ -28,8 +28,9 @@ from typing import Dict, List, Optional, Union
import datasets
import numpy as np
import torch
from datasets import DatasetDict, load_dataset, load_metric
from datasets import DatasetDict, load_dataset
import evaluate
import transformers
from transformers import (
AutoConfig,
......@@ -643,7 +644,7 @@ def main():
# instantiate a data collator and the trainer
# Define evaluation metrics during training, *i.e.* word error rate, character error rate
eval_metrics = {metric: load_metric(metric) for metric in data_args.eval_metrics}
eval_metrics = {metric: evaluate.load(metric) for metric in data_args.eval_metrics}
# for large datasets it is advised to run the preprocessing on a
# single machine first with ``args.preprocessing_only`` since there will mostly likely
......
......@@ -27,8 +27,9 @@ from typing import Any, Dict, List, Optional, Union
import datasets
import torch
from datasets import DatasetDict, load_dataset, load_metric
from datasets import DatasetDict, load_dataset
import evaluate
import transformers
from transformers import (
AutoConfig,
......@@ -425,7 +426,7 @@ def main():
return
# 8. Load Metric
metric = load_metric("wer")
metric = evaluate.load("wer")
def compute_metrics(pred):
pred_ids = pred.predictions
......
......@@ -27,8 +27,9 @@ from typing import Optional
import datasets
import nltk # Here to have a nice missing dependency error message early on
import numpy as np
from datasets import load_dataset, load_metric
from datasets import load_dataset
import evaluate
import transformers
from filelock import FileLock
from transformers import (
......@@ -598,7 +599,7 @@ def main():
)
# Metric
metric = load_metric("rouge")
metric = evaluate.load("rouge")
def postprocess_text(preds, labels):
preds = [pred.strip() for pred in preds]
......
......@@ -30,10 +30,11 @@ import datasets
import nltk
import numpy as np
import torch
from datasets import load_dataset, load_metric
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import evaluate
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
......@@ -583,7 +584,7 @@ def main():
accelerator.init_trackers("summarization_no_trainer", experiment_config)
# Metric
metric = load_metric("rouge")
metric = evaluate.load("rouge")
# Train!
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
......
......@@ -25,8 +25,9 @@ from typing import Optional
import datasets
import numpy as np
from datasets import load_dataset, load_metric
from datasets import load_dataset
import evaluate
import transformers
from transformers import (
AutoConfig,
......@@ -480,9 +481,9 @@ def main():
# Get the metric function
if data_args.task_name is not None:
metric = load_metric("glue", data_args.task_name)
metric = evaluate.load("glue", data_args.task_name)
else:
metric = load_metric("accuracy")
metric = evaluate.load("accuracy")
# You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
# predictions and label_ids field) and has to return a dictionary string to float.
......
......@@ -23,10 +23,11 @@ from pathlib import Path
import datasets
import torch
from datasets import load_dataset, load_metric
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import evaluate
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
......@@ -466,9 +467,9 @@ def main():
# Get the metric function
if args.task_name is not None:
metric = load_metric("glue", args.task_name)
metric = evaluate.load("glue", args.task_name)
else:
metric = load_metric("accuracy")
metric = evaluate.load("accuracy")
# Train!
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment