Commit 9baa294c authored by jon-tow's avatar jon-tow
Browse files
parents 4587b718 e0396a4e
import collections
import itertools
import pathlib
import numpy as np
import random
import lm_eval.metrics
import lm_eval.models
import lm_eval.tasks
import lm_eval.base
import lm_eval.decontamination
import numpy as np
from lm_eval.utils import positional_deprecated, run_task_tests
from lm_eval.decontamination.decontaminate import get_train_overlap
@positional_deprecated
......@@ -229,6 +226,8 @@ def evaluate(
# Compare all tasks/sets at once to ensure a single training set scan
if decontaminate:
from lm_eval.decontamination.decontaminate import get_train_overlap
print("Finding train/test overlap, please wait...")
overlaps = get_train_overlap(
docs_for_decontamination, decontamination_ngrams_path, limit
......
......@@ -176,7 +176,7 @@ clean_ngram_with_indices(std::string const &input, std::string const &ignore,
}
// Skip ignored characters
} else if (ignore.find(*iter) != std::string::npos) {
} else if (ignore.find(ch) != std::string::npos) {
continue;
// If it is a non-ignored character, add it to the ngram and update the
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment