"vscode:/vscode.git/clone" did not exist on "e9eef9628579e4ce946dadd7b84b14808183e72b"
Commit fbeaa2c1 authored by Yu Shi Jie's avatar Yu Shi Jie
Browse files

Merge branch 'mmlu-pro' of github.com:ysjprojects/lm-evaluation-harness into mmlu-pro

Resolve conflict.
parents 91b2eec6 5c7cba23
......@@ -20,13 +20,13 @@ jobs:
with:
fetch-depth: 2 # OR "2" -> To retrieve the preceding commit.
# Uses the tj-actions/changed-files@v37 action to check for changes.
# Uses the tj-actions/changed-files action to check for changes.
# Outputs provided here: https://github.com/tj-actions/changed-files#outputs
# The `files_yaml` input optionally takes a yaml string to specify filters,
# and prepends the filter name to the standard output names.
- name: Check task folders
id: changed-tasks
uses: tj-actions/changed-files@v37.1.2
uses: tj-actions/changed-files@v44.5.2
with:
# tasks checks the tasks folder and api checks the api folder for changes
files_yaml: |
......
......@@ -32,7 +32,7 @@ jobs:
env:
SKIP: "no-commit-to-branch,mypy"
uses: pre-commit/action@v3.0.0
uses: pre-commit/action@v3.0.1
# # mypy turned off for now
# - name: Lint with mypy
# run: mypy . --ignore-missing-imports --check-untyped-defs --explicit-package-bases --warn-unreachable
......
......@@ -29,8 +29,7 @@ repos:
- id: mixed-line-ending
args: [--fix=lf]
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.2.2
rev: v0.4.8
hooks:
# Run the linter.
- id: ruff
......@@ -39,7 +38,7 @@ repos:
# Run the formatter.
- id: ruff-format
- repo: https://github.com/codespell-project/codespell
rev: v2.2.6
rev: v2.3.0
hooks:
- id: codespell
exclude: >
......@@ -47,9 +46,9 @@ repos:
.*\.json|ignore.txt|lm_eval/tasks/.*|.*yaml|.*\.ipynb
)$
args: [--check-filenames, --check-hidden, --ignore-words=ignore.txt]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.5.1
hooks:
- id: mypy
additional_dependencies: [".[sentencepiece,multilingual,promptsource,gptq]", "types-PyYAML", "types-requests"]
exclude: ^tests/.*$
# - repo: https://github.com/pre-commit/mirrors-mypy
# rev: v1.5.1
# hooks:
# - id: mypy
# additional_dependencies: [".[sentencepiece,multilingual,promptsource,gptq]", "types-PyYAML", "types-requests"]
# exclude: ^tests/.*$
......@@ -67,9 +67,9 @@ class TaskConfig(dict):
training_split: Optional[str] = None
validation_split: Optional[str] = None
test_split: Optional[str] = None
fewshot_split: Optional[
str
] = None # TODO: assert that this not None if num_fewshot > 0. (?) assert if this is same split as one evaling (?)
fewshot_split: Optional[str] = (
None # TODO: assert that this not None if num_fewshot > 0. (?) assert if this is same split as one evaling (?)
)
# formatting / prompting options.
# see docs/advanced_task_guide.md for more info
process_docs: Optional[Callable] = None
......@@ -92,9 +92,9 @@ class TaskConfig(dict):
filter_list: Optional[Union[str, list]] = None
should_decontaminate: bool = False
doc_to_decontamination_query: Optional[str] = None
metadata: Optional[
dict
] = None # by default, not used in the code. allows for users to pass arbitrary info to tasks
metadata: Optional[dict] = (
None # by default, not used in the code. allows for users to pass arbitrary info to tasks
)
def __post_init__(self) -> None:
if self.generation_kwargs is not None:
......@@ -229,9 +229,9 @@ class Task(abc.ABC):
self._config: TaskConfig = TaskConfig({**config}) if config else TaskConfig()
self._filters = [build_filter_ensemble("none", [["take_first", None]])]
self.fewshot_rnd: Optional[
random.Random
] = None # purposely induce errors in case of improper usage
self.fewshot_rnd: Optional[random.Random] = (
None # purposely induce errors in case of improper usage
)
def download(
self,
......
......@@ -4,7 +4,6 @@ from lm_eval.api.registry import register_filter
@register_filter("decontaminate")
class DecontaminationFilter(Filter):
"""
A filter which evaluates
"""
......
......@@ -259,7 +259,7 @@ class EvaluationTracker:
path.mkdir(parents=True, exist_ok=True)
file_results_samples = path.joinpath(
f"samples_{task_name}_{self.date_id}.json"
f"samples_{task_name}_{self.date_id}.jsonl"
)
for sample in samples:
......
......@@ -307,7 +307,7 @@ please install anthropic via `pip install 'lm-eval[anthropic]'` or `pip install
# defaults to os.environ.get("ANTHROPIC_API_KEY")
self.client = anthropic.Anthropic()
self.temperature = temperature
self.max_token = max_tokens
self.max_tokens = max_tokens
self.tokenizer = self.client.get_tokenizer()
self.kwargs = kwargs
......
""" TextSynth API
"""TextSynth API
Implementation provided by Fabrice Bellard:
https://github.com/EleutherAI/lm-evaluation-harness/issues/295
......@@ -11,6 +11,7 @@ Example usage:
Homepage: https://textsynth.com/index.html
"""
import logging
import os
......
......@@ -499,7 +499,10 @@ class VLLM(TemplateLM):
def modify_gen_kwargs(kwargs: dict) -> dict:
# sampling_params
do_sample = kwargs.pop("do_sample", None)
if do_sample is False or "temperature" not in kwargs:
if do_sample is False and "temperature" not in kwargs:
eval_logger.debug(
"Got `do_sample=False` and no temperature value, setting VLLM temperature to 0.0 ..."
)
kwargs["temperature"] = 0.0
# hf defaults
kwargs["skip_special_tokens"] = kwargs.get("skip_special_tokens", False)
......
"""
Take in a YAML, and output all other splits with this YAML
"""
import argparse
import os
......
"""
Take in a YAML, and output all other splits with this YAML
"""
import argparse
import os
import re
......
"""
Take in a YAML, and output all other splits with this YAML
"""
import argparse
import os
......
......@@ -8,6 +8,7 @@ Requires the installation of
`pip install "bigbench @ https://storage.googleapis.com/public_research_data/bigbench/bigbench-0.0.1.tar.gz"`
and is included so that the bigbench dependency can be avoided.
"""
import bigbench.api.util as bb_utils
import datasets
from tqdm import tqdm
......
"""
Take in a YAML, and output all other splits with this YAML
"""
import argparse
import os
......
"""
Take in a YAML, and output all other splits with this YAML
"""
import argparse
import os
......
"""
Take in a YAML, and output all other splits with this YAML
"""
import argparse
import os
......
"""
"""
import re
from typing import List
......
......@@ -13,6 +13,7 @@
# limitations under the License.
"""Library of instructions."""
import collections
import json
import logging
......
......@@ -13,6 +13,7 @@
# limitations under the License.
"""Registry of all instructions."""
from lm_eval.tasks.ifeval import instructions
......
"""
Take in a YAML, and output all "other" splits with this YAML
"""
import argparse
import logging
import os
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment