".github/workflows/metal-ci.yml" did not exist on "b6f90d25f0853a5e92036d988a47541ff8c3c15e"
Commit 8c997e53 authored by jon-tow's avatar jon-tow
Browse files

Revert `tests/testdata` changes and address flake8 issues

parent d95a4333
# Ignore test linting to avoid conflicting changes to version stability.
exclude: ^tests/testdata/
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.1.0
......
......@@ -9,6 +9,7 @@ import collections
from .janitor import Janitor, word_ngrams
from .archiver import ZStdTextReader
# Was used for testing the evaluator decoupled from the full logic below
def get_train_overlap_stub(docs, ngrams_path, ngrams_n_size):
simulated_overlap = 0.1
......
......@@ -11,7 +11,7 @@ try:
import janitor_util
JANITOR_CPP = True
except Exception as e:
except Exception:
print("WARNING: C++ module could not be loaded. Janitor running in python mode")
traceback.print_exc()
JANITOR_CPP = False
......
......@@ -22,14 +22,12 @@ from . import naturalqs
from . import sat
from . import arithmetic
from . import lambada
from . import race
from . import piqa
from . import prost
from . import mc_taco
from . import triviaqa
from . import pubmedqa
from . import sciq
from . import webqs
from . import qasper
from . import qa4mre
from . import translation
......@@ -294,7 +292,7 @@ ALL_TASKS = sorted(list(TASK_REGISTRY))
def get_task(task_name):
try:
return TASK_REGISTRY[task_name]
except KeyError as e:
except KeyError:
print("Available tasks:")
pprint(TASK_REGISTRY)
raise KeyError(f"Missing task {task_name}")
......
......@@ -28,7 +28,7 @@ _CITATION = """
eprint = {https://doi.org/10.1162/tacl_a_00321},
abstract = { We introduce The Benchmark of Linguistic Minimal Pairs (BLiMP),1 a challenge set for evaluating the linguistic knowledge of language models (LMs) on major grammatical phenomena in English. BLiMP consists of 67 individual datasets, each containing 1,000 minimal pairs—that is, pairs of minimally different sentences that contrast in grammatical acceptability and isolate specific phenomenon in syntax, morphology, or semantics. We generate the data according to linguist-crafted grammar templates, and human aggregate agreement with the labels is 96.4\%. We evaluate n-gram, LSTM, and Transformer (GPT-2 and Transformer-XL) LMs by observing whether they assign a higher probability to the acceptable sentence in each minimal pair. We find that state-of-the-art models identify morphological contrasts related to agreement reliably, but they struggle with some subtle semantic and syntactic phenomena, such as negative polarity items and extraction islands. }
}
"""
""" # noqa: W605
class BlimpTask(Task):
......
......@@ -98,7 +98,7 @@ class Math(Task):
if verbose:
print(ss1, ss2)
return ss1 == ss2
except:
except Exception:
return str1 == str2
def remove_boxed(self, s):
......@@ -246,7 +246,7 @@ class Math(Task):
# remove percentage
string = string.replace("\\%", "")
string = string.replace("\%", "")
string = string.replace("\%", "") # noqa: W605
# " 0." equivalent to " ." and "{0." equivalent to "{." Alternatively, add "0" if "." is the start of the string
string = string.replace(" .", " 0.")
......
......@@ -71,7 +71,7 @@ class NaturalQs(Task):
def doc_to_target(self, doc):
# There's a short answer and a long answer. Based on the paper, I'm using the long answer.
short_answer = doc["annotations"]["short_answers"][0]["text"]
# short_answer = doc["annotations"]["short_answers"][0]["text"]
long_answer_start = doc["annotations"]["long_answer"][0]["start_token"]
long_answer_end = doc["annotations"]["long_answer"][0]["end_token"]
long_answer_span = doc["document"]["tokens"]["token"][
......
......@@ -23,7 +23,7 @@ _CITATION = """
booktitle={CLEF},
year={2013}
}
"""
""" # noqa: W605
class QA4MRE(MultipleChoiceTask):
......
......@@ -144,7 +144,7 @@ class SQuAD2(Task):
"f1": (
predictions,
references,
), # The F-score of predicted tokens versus the gold answer
), # The F-score of predicted tokens versus the gold answer
"HasAns_exact": (
predictions,
references,
......@@ -180,7 +180,7 @@ class SQuAD2(Task):
), # Exact match (the normalized answer exactly match the gold answer)
"f1": partial(
_squad_agg, "f1"
), # The F-score of predicted tokens versus the gold answer
), # The F-score of predicted tokens versus the gold answer
"HasAns_exact": partial(
_squad_agg, "HasAns_exact"
), # Exact match (the normalized answer exactly match the gold answer)
......@@ -209,7 +209,7 @@ class SQuAD2(Task):
"""
return {
"exact": True, # Exact match (the normalized answer exactly match the gold answer)
"f1": True, # The F-score of predicted tokens versus the gold answer
"f1": True, # The F-score of predicted tokens versus the gold answer
"HasAns_exact": True, # Exact match (the normalized answer exactly match the gold answer)
"HasAns_f1": True, # The F-score of predicted tokens versus the gold answer
"NoAns_exact": True, # Exact match (the normalized answer exactly match the gold answer)
......
......@@ -390,6 +390,7 @@ class TruthfulQAGeneration(Task):
rouge_types = ["rouge1", "rouge2", "rougeLsum"]
scorer = rouge_scorer.RougeScorer(rouge_types)
# Add newlines between sentences to correctly compute `rougeLsum`.
def _prepare_summary(summary):
summary = summary.replace(" . ", ".\n")
return summary
......
......@@ -42,8 +42,12 @@ def compress_and_move(working_directory, output_directory, process_count):
tasks.append(task)
pool = TqdmMultiProcessPool(process_count)
on_done = lambda _: None
on_error = lambda _: None
def on_done(_):
return None
def on_error(_):
return None
global_progress = tqdm(
total=len(bucket_file_paths), dynamic_ncols=True, unit="file"
......
......@@ -51,8 +51,12 @@ def get_stats():
# Generate minhashes with pool
tasks = [(get_file_stats, (file,)) for file in files]
on_done = lambda _: None
on_error = lambda _: None
def on_done(_):
return None
def on_error(_):
return None
results = pool.map(global_tqdm, tasks, on_error, on_done)
total_documents, total_size = reduce(
......
......@@ -30,12 +30,13 @@ from tqdm_multiprocess.logger import setup_logger_tqdm
logger = logging.getLogger(__name__)
# Multiprocessed
def process_bucket(
bucket_file_path, processed_directory, move_dir, tqdm_func, global_tqdm
):
bucket_id = re.sub("\D", "", os.path.basename(bucket_file_path))
bucket_id = re.sub("\D", "", os.path.basename(bucket_file_path)) # noqa: W605
done_file = os.path.join(
processed_directory, f"ngram_bucket_processing_{bucket_id}.done"
)
......@@ -106,8 +107,13 @@ def process_sorted_buckets(working_directory, move_dir, process_count):
]
global_tqdm = tqdm(total=len(bucket_file_paths), dynamic_ncols=True, unit="bucket")
on_done = lambda _: None
on_error = lambda _: None
def on_done(_):
return None
def on_error(_):
return None
_ = pool.map(global_tqdm, tasks, on_error, on_done)
......
......@@ -3,7 +3,7 @@ from collections import Counter
import shutil
import glob
from lm_eval.decontamination.janitor import *
from lm_eval.decontamination.janitor import Janitor, word_ngrams
from scripts.clean_training_data.generate_13_grams import do_ngrams_in_buckets
from lm_eval.decontamination.archiver import Archive, TextReader
......
import re
from collections import defaultdict
from lm_eval.decontamination.janitor import *
from lm_eval.decontamination.janitor import (
Janitor,
form_ngrams,
word_ngrams,
split_indices,
word_ngrams_indices,
)
def simple_ngram(sequence, n):
......
7c0c5246d3f751f39119a5629ac1d4b2c6fd2a315f78d6de9b2c387e24e3fef1
7c0c5246d3f751f39119a5629ac1d4b2c6fd2a315f78d6de9b2c387e24e3fef1
\ No newline at end of file
{"results": {"anagrams1": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"anagrams1": 0}}
{"results": {"anagrams1": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"anagrams1": 0}}
\ No newline at end of file
6700a3c44e48abe8337238dcbe3b54cf4abafe0c204c52d921e590872fbd05e7
6700a3c44e48abe8337238dcbe3b54cf4abafe0c204c52d921e590872fbd05e7
\ No newline at end of file
{"results": {"anagrams2": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"anagrams2": 0}}
{"results": {"anagrams2": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"anagrams2": 0}}
\ No newline at end of file
3a84baf2f170e138c6ce0bc9f06f905def35d705fa2b8781f10c87aef404c4cb
3a84baf2f170e138c6ce0bc9f06f905def35d705fa2b8781f10c87aef404c4cb
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment