Unverified Commit 1f843991 authored by atturaioe's avatar atturaioe Committed by GitHub
Browse files

Migrate metric to Evaluate in Pytorch examples (#18369)

* Migrate metric to Evaluate in pytorch examples

* Remove unused imports
parent 25ec12ea
...@@ -23,3 +23,4 @@ torchvision ...@@ -23,3 +23,4 @@ torchvision
jiwer jiwer
librosa librosa
torch < 1.12 torch < 1.12
evaluate
...@@ -26,6 +26,7 @@ import datasets ...@@ -26,6 +26,7 @@ import datasets
import numpy as np import numpy as np
from datasets import DatasetDict, load_dataset from datasets import DatasetDict, load_dataset
import evaluate
import transformers import transformers
from transformers import ( from transformers import (
AutoConfig, AutoConfig,
...@@ -315,7 +316,7 @@ def main(): ...@@ -315,7 +316,7 @@ def main():
id2label[str(i)] = label id2label[str(i)] = label
# Load the accuracy metric from the datasets package # Load the accuracy metric from the datasets package
metric = datasets.load_metric("accuracy") metric = evaluate.load("accuracy")
# Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with # Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with
# `predictions` and `label_ids` fields) and has to return a dictionary string to float. # `predictions` and `label_ids` fields) and has to return a dictionary string to float.
......
...@@ -19,7 +19,6 @@ import sys ...@@ -19,7 +19,6 @@ import sys
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
import datasets
import numpy as np import numpy as np
import torch import torch
from datasets import load_dataset from datasets import load_dataset
...@@ -34,6 +33,7 @@ from torchvision.transforms import ( ...@@ -34,6 +33,7 @@ from torchvision.transforms import (
ToTensor, ToTensor,
) )
import evaluate
import transformers import transformers
from transformers import ( from transformers import (
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING, MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
...@@ -252,7 +252,7 @@ def main(): ...@@ -252,7 +252,7 @@ def main():
id2label[str(i)] = label id2label[str(i)] = label
# Load the accuracy metric from the datasets package # Load the accuracy metric from the datasets package
metric = datasets.load_metric("accuracy") metric = evaluate.load("accuracy")
# Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a # Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
# predictions and label_ids field) and has to return a dictionary string to float. # predictions and label_ids field) and has to return a dictionary string to float.
......
...@@ -22,7 +22,7 @@ from pathlib import Path ...@@ -22,7 +22,7 @@ from pathlib import Path
import datasets import datasets
import torch import torch
from datasets import load_dataset, load_metric from datasets import load_dataset
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from torchvision.transforms import ( from torchvision.transforms import (
CenterCrop, CenterCrop,
...@@ -35,6 +35,7 @@ from torchvision.transforms import ( ...@@ -35,6 +35,7 @@ from torchvision.transforms import (
) )
from tqdm.auto import tqdm from tqdm.auto import tqdm
import evaluate
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
...@@ -415,7 +416,7 @@ def main(): ...@@ -415,7 +416,7 @@ def main():
accelerator.init_trackers("image_classification_no_trainer", experiment_config) accelerator.init_trackers("image_classification_no_trainer", experiment_config)
# Get the metric function # Get the metric function
metric = load_metric("accuracy") metric = evaluate.load("accuracy")
# Train! # Train!
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
......
...@@ -30,8 +30,9 @@ from itertools import chain ...@@ -30,8 +30,9 @@ from itertools import chain
from typing import Optional from typing import Optional
import datasets import datasets
from datasets import load_dataset, load_metric from datasets import load_dataset
import evaluate
import transformers import transformers
from transformers import ( from transformers import (
CONFIG_MAPPING, CONFIG_MAPPING,
...@@ -492,7 +493,7 @@ def main(): ...@@ -492,7 +493,7 @@ def main():
logits = logits[0] logits = logits[0]
return logits.argmax(dim=-1) return logits.argmax(dim=-1)
metric = load_metric("accuracy") metric = evaluate.load("accuracy")
def compute_metrics(eval_preds): def compute_metrics(eval_preds):
preds, labels = eval_preds preds, labels = eval_preds
......
...@@ -30,8 +30,9 @@ from itertools import chain ...@@ -30,8 +30,9 @@ from itertools import chain
from typing import Optional from typing import Optional
import datasets import datasets
from datasets import load_dataset, load_metric from datasets import load_dataset
import evaluate
import transformers import transformers
from transformers import ( from transformers import (
CONFIG_MAPPING, CONFIG_MAPPING,
...@@ -515,7 +516,7 @@ def main(): ...@@ -515,7 +516,7 @@ def main():
logits = logits[0] logits = logits[0]
return logits.argmax(dim=-1) return logits.argmax(dim=-1)
metric = load_metric("accuracy") metric = evaluate.load("accuracy")
def compute_metrics(eval_preds): def compute_metrics(eval_preds):
preds, labels = eval_preds preds, labels = eval_preds
......
...@@ -31,10 +31,11 @@ from typing import Optional, Union ...@@ -31,10 +31,11 @@ from typing import Optional, Union
import datasets import datasets
import torch import torch
from datasets import load_dataset, load_metric from datasets import load_dataset
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
import evaluate
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
...@@ -514,7 +515,7 @@ def main(): ...@@ -514,7 +515,7 @@ def main():
accelerator.init_trackers("swag_no_trainer", experiment_config) accelerator.init_trackers("swag_no_trainer", experiment_config)
# Metrics # Metrics
metric = load_metric("accuracy") metric = evaluate.load("accuracy")
# Train! # Train!
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
......
...@@ -25,8 +25,9 @@ from dataclasses import dataclass, field ...@@ -25,8 +25,9 @@ from dataclasses import dataclass, field
from typing import Optional from typing import Optional
import datasets import datasets
from datasets import load_dataset, load_metric from datasets import load_dataset
import evaluate
import transformers import transformers
from trainer_qa import QuestionAnsweringTrainer from trainer_qa import QuestionAnsweringTrainer
from transformers import ( from transformers import (
...@@ -593,7 +594,7 @@ def main(): ...@@ -593,7 +594,7 @@ def main():
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples] references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
return EvalPrediction(predictions=formatted_predictions, label_ids=references) return EvalPrediction(predictions=formatted_predictions, label_ids=references)
metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad") metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad")
def compute_metrics(p: EvalPrediction): def compute_metrics(p: EvalPrediction):
return metric.compute(predictions=p.predictions, references=p.label_ids) return metric.compute(predictions=p.predictions, references=p.label_ids)
......
...@@ -25,8 +25,9 @@ from dataclasses import dataclass, field ...@@ -25,8 +25,9 @@ from dataclasses import dataclass, field
from typing import Optional from typing import Optional
import datasets import datasets
from datasets import load_dataset, load_metric from datasets import load_dataset
import evaluate
import transformers import transformers
from trainer_qa import QuestionAnsweringTrainer from trainer_qa import QuestionAnsweringTrainer
from transformers import ( from transformers import (
...@@ -625,7 +626,7 @@ def main(): ...@@ -625,7 +626,7 @@ def main():
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples] references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
return EvalPrediction(predictions=formatted_predictions, label_ids=references) return EvalPrediction(predictions=formatted_predictions, label_ids=references)
metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad") metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad")
def compute_metrics(p: EvalPrediction): def compute_metrics(p: EvalPrediction):
return metric.compute(predictions=p.predictions, references=p.label_ids) return metric.compute(predictions=p.predictions, references=p.label_ids)
......
...@@ -29,10 +29,11 @@ from pathlib import Path ...@@ -29,10 +29,11 @@ from pathlib import Path
import datasets import datasets
import numpy as np import numpy as np
import torch import torch
from datasets import load_dataset, load_metric from datasets import load_dataset
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
import evaluate
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
...@@ -680,7 +681,7 @@ def main(): ...@@ -680,7 +681,7 @@ def main():
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples] references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
return EvalPrediction(predictions=formatted_predictions, label_ids=references) return EvalPrediction(predictions=formatted_predictions, label_ids=references)
metric = load_metric("squad_v2" if args.version_2_with_negative else "squad") metric = evaluate.load("squad_v2" if args.version_2_with_negative else "squad")
def create_and_fill_np_array(start_or_end_logits, dataset, max_len): def create_and_fill_np_array(start_or_end_logits, dataset, max_len):
""" """
......
...@@ -29,10 +29,11 @@ from pathlib import Path ...@@ -29,10 +29,11 @@ from pathlib import Path
import datasets import datasets
import numpy as np import numpy as np
import torch import torch
from datasets import load_dataset, load_metric from datasets import load_dataset
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
import evaluate
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
...@@ -696,7 +697,7 @@ def main(): ...@@ -696,7 +697,7 @@ def main():
references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples] references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
return EvalPrediction(predictions=formatted_predictions, label_ids=references) return EvalPrediction(predictions=formatted_predictions, label_ids=references)
metric = load_metric("squad_v2" if args.version_2_with_negative else "squad") metric = evaluate.load("squad_v2" if args.version_2_with_negative else "squad")
# Create and fill numpy array of size len_of_validation_data * max_length_of_output_tensor # Create and fill numpy array of size len_of_validation_data * max_length_of_output_tensor
def create_and_fill_np_array(start_or_end_logits, dataset, max_len): def create_and_fill_np_array(start_or_end_logits, dataset, max_len):
......
...@@ -25,8 +25,9 @@ from dataclasses import dataclass, field ...@@ -25,8 +25,9 @@ from dataclasses import dataclass, field
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
import datasets import datasets
from datasets import load_dataset, load_metric from datasets import load_dataset
import evaluate
import transformers import transformers
from trainer_seq2seq_qa import QuestionAnsweringSeq2SeqTrainer from trainer_seq2seq_qa import QuestionAnsweringSeq2SeqTrainer
from transformers import ( from transformers import (
...@@ -581,7 +582,7 @@ def main(): ...@@ -581,7 +582,7 @@ def main():
pad_to_multiple_of=8 if training_args.fp16 else None, pad_to_multiple_of=8 if training_args.fp16 else None,
) )
metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad") metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad")
def compute_metrics(p: EvalPrediction): def compute_metrics(p: EvalPrediction):
return metric.compute(predictions=p.predictions, references=p.label_ids) return metric.compute(predictions=p.predictions, references=p.label_ids)
......
...@@ -21,7 +21,6 @@ import sys ...@@ -21,7 +21,6 @@ import sys
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
import datasets
import numpy as np import numpy as np
import torch import torch
from datasets import load_dataset from datasets import load_dataset
...@@ -30,6 +29,7 @@ from torch import nn ...@@ -30,6 +29,7 @@ from torch import nn
from torchvision import transforms from torchvision import transforms
from torchvision.transforms import functional from torchvision.transforms import functional
import evaluate
import transformers import transformers
from huggingface_hub import hf_hub_download from huggingface_hub import hf_hub_download
from transformers import ( from transformers import (
...@@ -337,7 +337,7 @@ def main(): ...@@ -337,7 +337,7 @@ def main():
label2id = {v: str(k) for k, v in id2label.items()} label2id = {v: str(k) for k, v in id2label.items()}
# Load the mean IoU metric from the datasets package # Load the mean IoU metric from the datasets package
metric = datasets.load_metric("mean_iou") metric = evaluate.load("mean_iou")
# Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a # Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
# predictions and label_ids field) and has to return a dictionary string to float. # predictions and label_ids field) and has to return a dictionary string to float.
......
...@@ -24,13 +24,14 @@ from pathlib import Path ...@@ -24,13 +24,14 @@ from pathlib import Path
import datasets import datasets
import numpy as np import numpy as np
import torch import torch
from datasets import load_dataset, load_metric from datasets import load_dataset
from PIL import Image from PIL import Image
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from torchvision import transforms from torchvision import transforms
from torchvision.transforms import functional from torchvision.transforms import functional
from tqdm.auto import tqdm from tqdm.auto import tqdm
import evaluate
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
...@@ -500,7 +501,7 @@ def main(): ...@@ -500,7 +501,7 @@ def main():
args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch) args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch)
# Instantiate metric # Instantiate metric
metric = load_metric("mean_iou") metric = evaluate.load("mean_iou")
# We need to initialize the trackers we use, and also store our configuration. # We need to initialize the trackers we use, and also store our configuration.
# We initialize the trackers only on main process because `accelerator.log` # We initialize the trackers only on main process because `accelerator.log`
......
...@@ -28,8 +28,9 @@ from typing import Dict, List, Optional, Union ...@@ -28,8 +28,9 @@ from typing import Dict, List, Optional, Union
import datasets import datasets
import numpy as np import numpy as np
import torch import torch
from datasets import DatasetDict, load_dataset, load_metric from datasets import DatasetDict, load_dataset
import evaluate
import transformers import transformers
from transformers import ( from transformers import (
AutoConfig, AutoConfig,
...@@ -643,7 +644,7 @@ def main(): ...@@ -643,7 +644,7 @@ def main():
# instantiate a data collator and the trainer # instantiate a data collator and the trainer
# Define evaluation metrics during training, *i.e.* word error rate, character error rate # Define evaluation metrics during training, *i.e.* word error rate, character error rate
eval_metrics = {metric: load_metric(metric) for metric in data_args.eval_metrics} eval_metrics = {metric: evaluate.load(metric) for metric in data_args.eval_metrics}
# for large datasets it is advised to run the preprocessing on a # for large datasets it is advised to run the preprocessing on a
# single machine first with ``args.preprocessing_only`` since there will mostly likely # single machine first with ``args.preprocessing_only`` since there will mostly likely
......
...@@ -27,8 +27,9 @@ from typing import Any, Dict, List, Optional, Union ...@@ -27,8 +27,9 @@ from typing import Any, Dict, List, Optional, Union
import datasets import datasets
import torch import torch
from datasets import DatasetDict, load_dataset, load_metric from datasets import DatasetDict, load_dataset
import evaluate
import transformers import transformers
from transformers import ( from transformers import (
AutoConfig, AutoConfig,
...@@ -425,7 +426,7 @@ def main(): ...@@ -425,7 +426,7 @@ def main():
return return
# 8. Load Metric # 8. Load Metric
metric = load_metric("wer") metric = evaluate.load("wer")
def compute_metrics(pred): def compute_metrics(pred):
pred_ids = pred.predictions pred_ids = pred.predictions
......
...@@ -27,8 +27,9 @@ from typing import Optional ...@@ -27,8 +27,9 @@ from typing import Optional
import datasets import datasets
import nltk # Here to have a nice missing dependency error message early on import nltk # Here to have a nice missing dependency error message early on
import numpy as np import numpy as np
from datasets import load_dataset, load_metric from datasets import load_dataset
import evaluate
import transformers import transformers
from filelock import FileLock from filelock import FileLock
from transformers import ( from transformers import (
...@@ -598,7 +599,7 @@ def main(): ...@@ -598,7 +599,7 @@ def main():
) )
# Metric # Metric
metric = load_metric("rouge") metric = evaluate.load("rouge")
def postprocess_text(preds, labels): def postprocess_text(preds, labels):
preds = [pred.strip() for pred in preds] preds = [pred.strip() for pred in preds]
......
...@@ -30,10 +30,11 @@ import datasets ...@@ -30,10 +30,11 @@ import datasets
import nltk import nltk
import numpy as np import numpy as np
import torch import torch
from datasets import load_dataset, load_metric from datasets import load_dataset
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
import evaluate
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
...@@ -583,7 +584,7 @@ def main(): ...@@ -583,7 +584,7 @@ def main():
accelerator.init_trackers("summarization_no_trainer", experiment_config) accelerator.init_trackers("summarization_no_trainer", experiment_config)
# Metric # Metric
metric = load_metric("rouge") metric = evaluate.load("rouge")
# Train! # Train!
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
......
...@@ -25,8 +25,9 @@ from typing import Optional ...@@ -25,8 +25,9 @@ from typing import Optional
import datasets import datasets
import numpy as np import numpy as np
from datasets import load_dataset, load_metric from datasets import load_dataset
import evaluate
import transformers import transformers
from transformers import ( from transformers import (
AutoConfig, AutoConfig,
...@@ -480,9 +481,9 @@ def main(): ...@@ -480,9 +481,9 @@ def main():
# Get the metric function # Get the metric function
if data_args.task_name is not None: if data_args.task_name is not None:
metric = load_metric("glue", data_args.task_name) metric = evaluate.load("glue", data_args.task_name)
else: else:
metric = load_metric("accuracy") metric = evaluate.load("accuracy")
# You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a # You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
# predictions and label_ids field) and has to return a dictionary string to float. # predictions and label_ids field) and has to return a dictionary string to float.
......
...@@ -23,10 +23,11 @@ from pathlib import Path ...@@ -23,10 +23,11 @@ from pathlib import Path
import datasets import datasets
import torch import torch
from datasets import load_dataset, load_metric from datasets import load_dataset
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm.auto import tqdm from tqdm.auto import tqdm
import evaluate
import transformers import transformers
from accelerate import Accelerator from accelerate import Accelerator
from accelerate.logging import get_logger from accelerate.logging import get_logger
...@@ -466,9 +467,9 @@ def main(): ...@@ -466,9 +467,9 @@ def main():
# Get the metric function # Get the metric function
if args.task_name is not None: if args.task_name is not None:
metric = load_metric("glue", args.task_name) metric = evaluate.load("glue", args.task_name)
else: else:
metric = load_metric("accuracy") metric = evaluate.load("accuracy")
# Train! # Train!
total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps total_batch_size = args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment