Commit 9baa294c authored by jon-tow's avatar jon-tow
Browse files
parents 4587b718 e0396a4e
import collections import collections
import itertools import itertools
import pathlib import numpy as np
import random import random
import lm_eval.metrics import lm_eval.metrics
import lm_eval.models import lm_eval.models
import lm_eval.tasks import lm_eval.tasks
import lm_eval.base import lm_eval.base
import lm_eval.decontamination
import numpy as np
from lm_eval.utils import positional_deprecated, run_task_tests from lm_eval.utils import positional_deprecated, run_task_tests
from lm_eval.decontamination.decontaminate import get_train_overlap
@positional_deprecated @positional_deprecated
...@@ -229,6 +226,8 @@ def evaluate( ...@@ -229,6 +226,8 @@ def evaluate(
# Compare all tasks/sets at once to ensure a single training set scan # Compare all tasks/sets at once to ensure a single training set scan
if decontaminate: if decontaminate:
from lm_eval.decontamination.decontaminate import get_train_overlap
print("Finding train/test overlap, please wait...") print("Finding train/test overlap, please wait...")
overlaps = get_train_overlap( overlaps = get_train_overlap(
docs_for_decontamination, decontamination_ngrams_path, limit docs_for_decontamination, decontamination_ngrams_path, limit
......
...@@ -176,7 +176,7 @@ clean_ngram_with_indices(std::string const &input, std::string const &ignore, ...@@ -176,7 +176,7 @@ clean_ngram_with_indices(std::string const &input, std::string const &ignore,
} }
// Skip ignored characters // Skip ignored characters
} else if (ignore.find(*iter) != std::string::npos) { } else if (ignore.find(ch) != std::string::npos) {
continue; continue;
// If it is a non-ignored character, add it to the ngram and update the // If it is a non-ignored character, add it to the ngram and update the
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment