Commit 9569ee87 authored by cardy20's avatar cardy20
Browse files

apeach update

parent 82906bdc
......@@ -69,7 +69,6 @@ gpt3_translation_benchmarks = {
"wmt16": ['en-ro', 'ro-en', 'de-en', 'en-de'], # German, Romanian
}
# 28 total
selected_translation_benchmarks = {
**gpt3_translation_benchmarks,
......@@ -88,7 +87,7 @@ all_translation_benchmarks = {
# All tasks
########################################
# task 레지스트리에 클래스 추가
TASK_REGISTRY = {
# GLUE
"cola": glue.CoLA,
......@@ -322,7 +321,8 @@ TASK_REGISTRY = {
"ko_en_translation": ko_translation.KoEnTranslation,
"en_ko_translation": ko_translation.EnKoTranslation,
"kohatespeech":kohatespeech.HateSpeech,
"kohatespeech_gen_bias":kohatespeech.GenderBias
"kohatespeech_gen_bias":kohatespeech.GenderBias,
"kohatespeech_apeach":kohatespeech.Apeach
}
ALL_TASKS = sorted(list(TASK_REGISTRY))
......
"""
KLUE
https://aclanthology.org/2020.socialnlp-1.4.pdf
For hate speech, they introduce hate, offensive, and none labels.
They also added binary label whether a comment contains gender bias or not.
https://aclanthology.org/2020.socialnlp-1.4.pdf
Updated on May 06 2023
APEACH is the first crowd-generated Korean evaluation dataset for hate speech detection.
"""
import numpy as np
......@@ -11,7 +12,7 @@ from lm_eval.base import Task, MultipleChoiceTask, rf
from lm_eval.metrics import macro_f1_score, mean, matthews_corrcoef, f1_score, yesno
from lm_eval.utils import general_detokenize
_CITATION ="""
_CITATION1 ="""
@inproceedings{moon-etal-2020-beep,
title = "{BEEP}! {K}orean Corpus of Online News Comments for Toxic Speech Detection",
author = "Moon, Jihyung and
......@@ -56,7 +57,7 @@ class HateSpeech(MultipleChoiceTask):
def _process_doc(self, doc):
out_doc = {
"query": "문장: {}".format(doc["comments"]),
"choices": ["없음", "공격적", "오"],
"choices": ["없음", "공격적", "오"], # ["none", "offensive", "hate"]
"gold": doc['hate']
}
return out_doc
......@@ -135,3 +136,69 @@ class GenderBias(Task):
"macro_f1": macro_f1_score
}
_CITATION2 = """
@inproceedings{yang-etal-2022-apeach,
title = "{APEACH}: Attacking Pejorative Expressions with Analysis on Crowd-Generated Hate Speech Evaluation Datasets",
author = "Yang, Kichang and
Jang, Wonjun and
Cho, Won Ik",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.525",
pages = "7076--7086",
abstract = "In hate speech detection, developing training and evaluation datasets across various domains is the critical issue. Whereas, major approaches crawl social media texts and hire crowd-workers to annotate the data. Following this convention often restricts the scope of pejorative expressions to a single domain lacking generalization. Sometimes domain overlap between training corpus and evaluation set overestimate the prediction performance when pretraining language models on low-data language. To alleviate these problems in Korean, we propose APEACH that asks unspecified users to generate hate speech examples followed by minimal post-labeling. We find that APEACH can collect useful datasets that are less sensitive to the lexical overlaps between the pretraining corpus and the evaluation set, thereby properly measuring the model performance.",
}
"""
class Apeach(Task):
VERSION = 0
DATASET_PATH = "jason9693/APEACH"
DATASET_NAME = "apeach"
def has_training_docs(self):
return False
def has_validation_docs(self):
return False
def has_test_docs(self):
return True
def test_docs(self):
return map(self._process_doc, self.dataset["test"])
def doc_to_text(self, doc):
return "문장: {} 혐오 발화가 있습니까?".format(doc["text"])
def doc_to_target(self, doc):
return " {}".format({0: "아니오", 1: "예"}[doc["class"]])
def construct_requests(self, doc, ctx):
ll_no, _ = rf.loglikelihood(ctx, " 아니오")
ll_yes, _ = rf.loglikelihood(ctx, " 예")
return ll_no, ll_yes
def process_results(self, doc, results):
pred = np.argmax(results)
gold = doc["class"]
return {
"acc": pred == gold,
"macro_f1": (gold, pred)
}
def higher_is_better(self):
return {
"acc": True,
"macro_f1": True
}
def aggregation(self):
return {
"acc": mean,
"macro_f1": macro_f1_score
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment