Unverified Commit c4fa6fb5 authored by Hailey Schoelkopf's avatar Hailey Schoelkopf Committed by GitHub
Browse files

Merge pull request #899 from EleutherAI/fix-scripts

[Refactor] Fix error when calling `lm-eval`
parents 15f4a3ef 436b2697
# name: Tasks Modified name: Tasks Modified
# on: on:
# push: push:
# branches: branches:
# - 'big-refactor*' - 'big-refactor*'
# pull_request: pull_request:
# branches: branches:
# - 'big-refactor*' - 'big-refactor*'
# workflow_dispatch: workflow_dispatch:
# # comment/edit out the above to stop/change the triggers # comment/edit out the above to stop/change the triggers
# jobs: jobs:
# changed_files: changed_files:
# runs-on: ubuntu-latest # windows-latest || macos-latest runs-on: ubuntu-latest # windows-latest || macos-latest
# timeout-minutes: 120 timeout-minutes: 120
# name: Scan for changed tasks name: Scan for changed tasks
# steps: steps:
# - name: checkout - name: checkout
# uses: actions/checkout@v3 uses: actions/checkout@v3
# with: with:
# fetch-depth: 2 # OR "2" -> To retrieve the preceding commit. fetch-depth: 2 # OR "2" -> To retrieve the preceding commit.
# # Uses the tj-actions/changed-files@v37 action to check for changes. # Uses the tj-actions/changed-files@v37 action to check for changes.
# # Outputs provided here: https://github.com/tj-actions/changed-files#outputs # Outputs provided here: https://github.com/tj-actions/changed-files#outputs
# # The `files_yaml` input optionally takes a yaml string to specify filters, # The `files_yaml` input optionally takes a yaml string to specify filters,
# # and prepends the filter name to the standard output names. # and prepends the filter name to the standard output names.
# - name: Check task folders - name: Check task folders
# id: changed-tasks id: changed-tasks
# uses: tj-actions/changed-files@v37.1.2 uses: tj-actions/changed-files@v37.1.2
# with: with:
# # tasks checks the tasks folder and api checks the api folder for changes # tasks checks the tasks folder and api checks the api folder for changes
# files_yaml: | files_yaml: |
# tasks: tasks:
# - lm_eval/tasks/** - lm_eval/tasks/**
# api: api:
# - lm_eval/api/** - lm_eval/api/**
# write_output_files: true write_output_files: true
# # The next step is optional; the files are written to the workspace by default (above). # The next step is optional; the files are written to the workspace by default (above).
# # so it's just for debugging # so it's just for debugging
# - name: Run Tests - name: Run Tests
# if: steps.changed-tasks.outputs.tasks_any_modified == 'true' || steps.changed-tasks.outputs.api_any_modified == 'true' if: steps.changed-tasks.outputs.tasks_any_modified == 'true' || steps.changed-tasks.outputs.api_any_modified == 'true'
# run: | run: |
# echo .github/outputs/tasks_all_changed_and_modified_files.txt >> 'GITHUB_ENV' echo .github/outputs/tasks_all_changed_and_modified_files.txt >> 'GITHUB_ENV'
# echo "One or more test file(s) has changed." echo "One or more test file(s) has changed."
# echo "List of all the files that have changed: ${{ steps.changed-tasks.outputs.tasks_all_modified_files }}" echo "List of all the files that have changed: ${{ steps.changed-tasks.outputs.tasks_all_modified_files }}"
# - name: Set up Python 3.9 - name: Set up Python 3.9
# if: steps.changed-tasks.outputs.tasks_any_modified == 'true' || steps.changed-tasks.outputs.api_any_modified == 'true' if: steps.changed-tasks.outputs.tasks_any_modified == 'true' || steps.changed-tasks.outputs.api_any_modified == 'true'
# uses: actions/setup-python@v4 uses: actions/setup-python@v4
# with: with:
# python-version: 3.9 python-version: 3.9
# cache: 'pip' cache: 'pip'
# cache-dependency-path: setup.py cache-dependency-path: setup.py
# - name: Install dependencies - name: Install dependencies
# if: steps.changed-tasks.outputs.tasks_any_modified == 'true' || steps.changed-tasks.outputs.api_any_modified == 'true' if: steps.changed-tasks.outputs.tasks_any_modified == 'true' || steps.changed-tasks.outputs.api_any_modified == 'true'
# run: | run: |
# python -m pip install --upgrade pip python -m pip install --upgrade pip
# pip install -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu pip install -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu
# # Install optional git dependencies # Install optional git dependencies
# # pip install bleurt@https://github.com/google-research/bleurt/archive/b610120347ef22b494b6d69b4316e303f5932516.zip#egg=bleurt # pip install bleurt@https://github.com/google-research/bleurt/archive/b610120347ef22b494b6d69b4316e303f5932516.zip#egg=bleurt
# # if [ -f requirements.txt ]; then pip install -r requirements.txt; fi # if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
# - name: Test with pytest - name: Test with pytest
# # if new tasks are added, run tests on them # if new tasks are added, run tests on them
# if: steps.changed-tasks.outputs.tasks_any_modified == 'true' if: steps.changed-tasks.outputs.tasks_any_modified == 'true'
# run: python -m pytest tests/test_tasks.py -s -vv run: python -m pytest tests/test_tasks.py -s -vv
# # if api is modified, run tests on it # if api is modified, run tests on it
# - name: Test more tasks with pytest - name: Test more tasks with pytest
# env: env:
# API: true API: true
# if: steps.changed-tasks.outputs.api_any_modified == 'true' if: steps.changed-tasks.outputs.api_any_modified == 'true'
# run: python -m pytest tests/test_tasks.py -s -vv run: python -m pytest tests/test_tasks.py -s -vv
...@@ -43,35 +43,35 @@ jobs: ...@@ -43,35 +43,35 @@ jobs:
# # mypy turned off for now # # mypy turned off for now
# - name: Lint with mypy # - name: Lint with mypy
# run: mypy . --ignore-missing-imports --check-untyped-defs --explicit-package-bases --warn-unreachable # run: mypy . --ignore-missing-imports --check-untyped-defs --explicit-package-bases --warn-unreachable
# Job 2 Job 2
# testcpu: testcpu:
# name: CPU Tests name: CPU Tests
# runs-on: ubuntu-latest runs-on: ubuntu-latest
# strategy: strategy:
# matrix: matrix:
# python-version: [ "3.8", "3.9", "3.10", "3.11" ] python-version: [ "3.8", "3.9", "3.10", "3.11" ]
# timeout-minutes: 30 timeout-minutes: 30
# steps: steps:
# - name: Checkout Code - name: Checkout Code
# uses: actions/checkout@v3 uses: actions/checkout@v3
# - name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}
# uses: actions/setup-python@v4 uses: actions/setup-python@v4
# with: with:
# python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
# cache: pip cache: pip
# cache-dependency-path: setup.py cache-dependency-path: setup.py
# - name: Install dependencies - name: Install dependencies
# run: | run: |
# python -m pip install --upgrade pip python -m pip install --upgrade pip
# pip install -e '.[testing,anthropic,sentencepiece]' --extra-index-url https://download.pytorch.org/whl/cpu pip install -e '.[testing,anthropic,sentencepiece]' --extra-index-url https://download.pytorch.org/whl/cpu
# # Install optional git dependencies # Install optional git dependencies
# # pip install bleurt@https://github.com/google-research/bleurt/archive/b610120347ef22b494b6d69b4316e303f5932516.zip#egg=bleurt # pip install bleurt@https://github.com/google-research/bleurt/archive/b610120347ef22b494b6d69b4316e303f5932516.zip#egg=bleurt
# # if [ -f requirements.txt ]; then pip install -r requirements.txt; fi # if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
# - name: Test with pytest - name: Test with pytest
# run: python -m pytest --showlocals -s -vv -n=auto --ignore=tests/tests_master --ignore=tests/extra run: python -m pytest --showlocals -s -vv -n=auto --ignore=tests/tests_master --ignore=tests/extra
# - name: Archive artifacts - name: Archive artifacts
# uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v3
# with: with:
# name: output_results name: output_results
# path: | path: |
# test_logs/* test_logs/*
...@@ -9,8 +9,8 @@ We’d like your help to test it out! you can help by: ...@@ -9,8 +9,8 @@ We’d like your help to test it out! you can help by:
2. Porting tasks supported in the previous version of the harness to the new YAML configuration format. Please check out our [task implementation guide](https://github.com/EleutherAI/lm-evaluation-harness/blob/big-refactor/docs/new_task_guide.md) for more information. 2. Porting tasks supported in the previous version of the harness to the new YAML configuration format. Please check out our [task implementation guide](https://github.com/EleutherAI/lm-evaluation-harness/blob/big-refactor/docs/new_task_guide.md) for more information.
If you choose to port a task not yet completed according to [our checklist](https://github.com/EleutherAI/lm-evaluation-harness/blob/big-refactor/lm_eval/tasks/README.md), then you can contribute it by opening a PR containing [Refactor] in the name with: If you choose to port a task not yet completed according to [our checklist](https://github.com/EleutherAI/lm-evaluation-harness/blob/big-refactor/lm_eval/tasks/README.md), then you can contribute it by opening a PR containing [Refactor] in the name with:
- A command of the form `python main.py --model hf --model_args ..... --tasks <task name> ...` which will run the task in the `master` branch, and what the score is - A command of the form `python -m lm_eval --model hf --model_args ..... --tasks <task name> ...` which will run the task in the `master` branch, and what the score is
- A command of the form `python main.py --model hf --model_args ..... --tasks <task name> ...` to run the task in your PR branch to `big-refactor`, and what the resulting score is, to show that we achieve equality between the two implementations. - A command of the form `python -m lm_eval --model hf --model_args ..... --tasks <task name> ...` to run the task in your PR branch to `big-refactor`, and what the resulting score is, to show that we achieve equality between the two implementations.
Lastly, we'll no longer be accepting new feature requests beyond those that are already open to the master branch as we carry out this switch to the new version over the next week, though we will be accepting bugfixes to `master` branch and PRs to `big-refactor`. Feel free to reach out in the #lm-thunderdome channel of the EAI discord for more information. Lastly, we'll no longer be accepting new feature requests beyond those that are already open to the master branch as we carry out this switch to the new version over the next week, though we will be accepting bugfixes to `master` branch and PRs to `big-refactor`. Feel free to reach out in the #lm-thunderdome channel of the EAI discord for more information.
...@@ -67,7 +67,7 @@ To evaluate a model hosted on the [HuggingFace Hub](https://huggingface.co/model ...@@ -67,7 +67,7 @@ To evaluate a model hosted on the [HuggingFace Hub](https://huggingface.co/model
```bash ```bash
python main.py \ python -m lm_eval \
--model hf \ --model hf \
--model_args pretrained=EleutherAI/gpt-j-6B \ --model_args pretrained=EleutherAI/gpt-j-6B \
--tasks hellaswag \ --tasks hellaswag \
...@@ -78,7 +78,7 @@ python main.py \ ...@@ -78,7 +78,7 @@ python main.py \
Additional arguments can be provided to the model constructor using the `--model_args` flag. Most notably, this supports the common practice of using the `revisions` feature on the Hub to store partially trained checkpoints, or to specify the datatype for running a model: Additional arguments can be provided to the model constructor using the `--model_args` flag. Most notably, this supports the common practice of using the `revisions` feature on the Hub to store partially trained checkpoints, or to specify the datatype for running a model:
```bash ```bash
python main.py \ python -m lm_eval \
--model hf \ --model hf \
--model_args pretrained=EleutherAI/pythia-160m,revision=step100000,dtype="float" \ --model_args pretrained=EleutherAI/pythia-160m,revision=step100000,dtype="float" \
--tasks lambada_openai,hellaswag \ --tasks lambada_openai,hellaswag \
...@@ -91,7 +91,7 @@ Models that are loaded via either `transformers.AutoModelForCausalLM` (autoregre ...@@ -91,7 +91,7 @@ Models that are loaded via either `transformers.AutoModelForCausalLM` (autoregre
Batch size selection can be automated by setting the ```--batch_size``` flag to ```auto```. This will perform automatic detection of the largest batch size that will fit on your device. On tasks where there is a large difference between the longest and shortest example, it can be helpful to periodically recompute the largest batch size, to gain a further speedup. To do this, append ```:N``` to above flag to automatically recompute the largest batch size ```N``` times. For example, to recompute the batch size 4 times, the command would be: Batch size selection can be automated by setting the ```--batch_size``` flag to ```auto```. This will perform automatic detection of the largest batch size that will fit on your device. On tasks where there is a large difference between the longest and shortest example, it can be helpful to periodically recompute the largest batch size, to gain a further speedup. To do this, append ```:N``` to above flag to automatically recompute the largest batch size ```N``` times. For example, to recompute the batch size 4 times, the command would be:
```bash ```bash
python main.py \ python -m lm_eval \
--model hf \ --model hf \
--model_args pretrained=EleutherAI/pythia-160m,revision=step100000,dtype="float" \ --model_args pretrained=EleutherAI/pythia-160m,revision=step100000,dtype="float" \
--tasks lambada_openai,hellaswag \ --tasks lambada_openai,hellaswag \
...@@ -99,7 +99,7 @@ python main.py \ ...@@ -99,7 +99,7 @@ python main.py \
--batch_size auto:4 --batch_size auto:4
``` ```
Alternatively, you can use `lm-eval` instead of `python main.py` to call lm eval from anywhere. Alternatively, you can use `lm-eval` or `lm_eval` instead of `python -m lm_eval` to call lm eval from anywhere.
### Multi-GPU Evaluation with Hugging Face `accelerate` ### Multi-GPU Evaluation with Hugging Face `accelerate`
...@@ -108,7 +108,7 @@ To parallelize evaluation of HuggingFace models across multiple GPUs, we allow f ...@@ -108,7 +108,7 @@ To parallelize evaluation of HuggingFace models across multiple GPUs, we allow f
The first is performed by launching evaluation via the `accelerate` library as follows: The first is performed by launching evaluation via the `accelerate` library as follows:
``` ```
accelerate launch main.py \ accelerate launch -m lm_eval \
--model hf \ --model hf \
--tasks lambada_openai,arc_easy \ --tasks lambada_openai,arc_easy \
--batch_size 16 \ --batch_size 16 \
...@@ -121,7 +121,7 @@ If your model is *is too large to be run on a single one of your GPUs* then you ...@@ -121,7 +121,7 @@ If your model is *is too large to be run on a single one of your GPUs* then you
We also provide an second method to run these large models: use of the `parallelize` argument. We also provide an second method to run these large models: use of the `parallelize` argument.
``` ```
python main.py \ python -m lm_eval \
--model hf \ --model hf \
--model_args pretrained=EleutherAI/pythia-12b,parallelize=True --model_args pretrained=EleutherAI/pythia-12b,parallelize=True
--tasks lambada_openai,arc_easy \ --tasks lambada_openai,arc_easy \
...@@ -136,7 +136,7 @@ To pass even more advanced keyword arguments to `accelerate`, we allow for the f ...@@ -136,7 +136,7 @@ To pass even more advanced keyword arguments to `accelerate`, we allow for the f
Note that this method naively splits models across GPUs, resulting in only a single GPU performing work at any point in time, and so is much slower than launching with `accelerate launch`, possibly by a factor of the total # of GPUs. Note that this method naively splits models across GPUs, resulting in only a single GPU performing work at any point in time, and so is much slower than launching with `accelerate launch`, possibly by a factor of the total # of GPUs.
**Note that this option requires launching evaluation via `python main.py` rather than `accelerate launch main.py`.** **Note that this option requires launching evaluation via `python -m lm_eval` rather than `accelerate launch -m lm_eval`.**
To use `accelerate` with the `lm-eval` command, use To use `accelerate` with the `lm-eval` command, use
``` ```
...@@ -167,7 +167,7 @@ Our library supports language models served via the OpenAI Completions API as fo ...@@ -167,7 +167,7 @@ Our library supports language models served via the OpenAI Completions API as fo
```bash ```bash
export OPENAI_API_SECRET_KEY=YOUR_KEY_HERE export OPENAI_API_SECRET_KEY=YOUR_KEY_HERE
python main.py \ python -m lm_eval \
--model openai-completions \ --model openai-completions \
--model_args engine=davinci \ --model_args engine=davinci \
--tasks lambada_openai,hellaswag --tasks lambada_openai,hellaswag
...@@ -198,7 +198,7 @@ This will write out one text file for each task. ...@@ -198,7 +198,7 @@ This will write out one text file for each task.
To verify the data integrity of the tasks you're performing in addition to running the tasks themselves, you can use the `--check_integrity` flag: To verify the data integrity of the tasks you're performing in addition to running the tasks themselves, you can use the `--check_integrity` flag:
```bash ```bash
python main.py \ python -m lm_eval \
--model openai \ --model openai \
--model_args engine=davinci \ --model_args engine=davinci \
--tasks lambada_openai,hellaswag \ --tasks lambada_openai,hellaswag \
...@@ -209,7 +209,7 @@ python main.py \ ...@@ -209,7 +209,7 @@ python main.py \
For models loaded with the HuggingFace `transformers` library, any arguments provided via `--model_args` get passed to the relevant constructor directly. This means that anything you can do with `AutoModel` can be done with our library. For example, you can pass a local path via `pretrained=` or use models finetuned with [PEFT](https://github.com/huggingface/peft) by taking the call you would run to evaluate the base model and add `,peft=PATH` to the `model_args` argument: For models loaded with the HuggingFace `transformers` library, any arguments provided via `--model_args` get passed to the relevant constructor directly. This means that anything you can do with `AutoModel` can be done with our library. For example, you can pass a local path via `pretrained=` or use models finetuned with [PEFT](https://github.com/huggingface/peft) by taking the call you would run to evaluate the base model and add `,peft=PATH` to the `model_args` argument:
```bash ```bash
python main.py \ python -m lm_eval \
--model hf \ --model hf \
--model_args pretrained=EleutherAI/gpt-j-6b,parallelize=True,load_in_4bit=True,peft=nomic-ai/gpt4all-j-lora \ --model_args pretrained=EleutherAI/gpt-j-6b,parallelize=True,load_in_4bit=True,peft=nomic-ai/gpt4all-j-lora \
--tasks openbookqa,arc_easy,winogrande,hellaswag,arc_challenge,piqa,boolq \ --tasks openbookqa,arc_easy,winogrande,hellaswag,arc_challenge,piqa,boolq \
...@@ -219,7 +219,7 @@ python main.py \ ...@@ -219,7 +219,7 @@ python main.py \
[GPTQ](https://github.com/PanQiWei/AutoGPTQ) quantized models can be loaded by specifying their file names in `,gptq=NAME` (or `,gptq=True` for default names) in the `model_args` argument: [GPTQ](https://github.com/PanQiWei/AutoGPTQ) quantized models can be loaded by specifying their file names in `,gptq=NAME` (or `,gptq=True` for default names) in the `model_args` argument:
```bash ```bash
python main.py \ python -m lm_eval \
--model hf \ --model hf \
--model_args pretrained=model-name-or-path,gptq=model.safetensors,gptq_use_triton=True \ --model_args pretrained=model-name-or-path,gptq=model.safetensors,gptq_use_triton=True \
--tasks hellaswag --tasks hellaswag
......
...@@ -2,11 +2,11 @@ ...@@ -2,11 +2,11 @@
## Usage ## Usage
Simply add a "--decontamination_ngrams_path" when running main.py. The provided directory should contain Simply add a "--decontamination_ngrams_path" when running \__main\__.py. The provided directory should contain
the ngram files and info.json produced in "Pile Ngram Generation" further down. the ngram files and info.json produced in "Pile Ngram Generation" further down.
```bash ```bash
python main.py \ python -m lm_eval \
--model gpt2 \ --model gpt2 \
--device 0 \ --device 0 \
--tasks sciq \ --tasks sciq \
......
...@@ -4,7 +4,7 @@ This document details the interface exposed by `lm-eval` and provides details on ...@@ -4,7 +4,7 @@ This document details the interface exposed by `lm-eval` and provides details on
## Command-line Interface ## Command-line Interface
A majority of users run the library by cloning it from Github and running the `main.py` script. A majority of users run the library by cloning it from Github, installing the package as editable, and running the `python -m lm_eval` script.
Equivalently, running the library can be done via the `lm-eval` entrypoint at the command line. Equivalently, running the library can be done via the `lm-eval` entrypoint at the command line.
......
...@@ -70,9 +70,9 @@ smth smth tokenizer-agnostic ...@@ -70,9 +70,9 @@ smth smth tokenizer-agnostic
Congrats on implementing your model! Now it's time to test it out. Congrats on implementing your model! Now it's time to test it out.
To make your model usable via the command line interface to `lm-eval` using `main.py`, you'll need to tell `lm-eval` what your model's name is. To make your model usable via the command line interface to `lm-eval` using `python -m lm_eval`, you'll need to tell `lm-eval` what your model's name is.
This is done via a *decorator*, `lm_eval.api.registry.register_model`. Using `register_model()`, one can both tell the package what the model's name(s) to be used are when invoking it with `python main.py --model <name>` and alert `lm-eval` to the model's existence. This is done via a *decorator*, `lm_eval.api.registry.register_model`. Using `register_model()`, one can both tell the package what the model's name(s) to be used are when invoking it with `python -m lm_eval --model <name>` and alert `lm-eval` to the model's existence.
```python ```python
from lm_eval.api.registry import register_model from lm_eval.api.registry import register_model
......
...@@ -258,7 +258,7 @@ You can do this via adding the Python snippet ...@@ -258,7 +258,7 @@ You can do this via adding the Python snippet
from lm_eval.tasks import include_task_folder from lm_eval.tasks import include_task_folder
include_task_folder("/path/to/yaml/parent/folder") include_task_folder("/path/to/yaml/parent/folder")
``` ```
to the top of any Python file that is run or imported when performing evaluation, such as `main.py`. to the top of any Python file that is run or imported when performing evaluation, such as `\_\_main\_\_.py`.
Passing `--tasks /path/to/yaml/file` is also accepted. Passing `--tasks /path/to/yaml/file` is also accepted.
......
...@@ -12,10 +12,9 @@ from lm_eval.api.registry import ALL_TASKS ...@@ -12,10 +12,9 @@ from lm_eval.api.registry import ALL_TASKS
from lm_eval.logger import eval_logger, SPACING from lm_eval.logger import eval_logger, SPACING
from lm_eval.tasks import include_task_folder from lm_eval.tasks import include_task_folder
os.environ["TOKENIZERS_PARALLELISM"] = "false" from typing import Union
def parse_eval_args() -> argparse.Namespace:
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("--model", required=True, help="Name of model e.g. `hf`") parser.add_argument("--model", required=True, help="Name of model e.g. `hf`")
parser.add_argument( parser.add_argument(
...@@ -100,8 +99,13 @@ def parse_args() -> argparse.Namespace: ...@@ -100,8 +99,13 @@ def parse_args() -> argparse.Namespace:
return parser.parse_args() return parser.parse_args()
def main() -> None: def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
args = parse_args()
if not args:
# we allow for args to be passed externally, else we parse them ourselves
args = parse_eval_args()
os.environ["TOKENIZERS_PARALLELISM"] = "false"
if args.limit: if args.limit:
eval_logger.warning( eval_logger.warning(
...@@ -213,4 +217,4 @@ def main() -> None: ...@@ -213,4 +217,4 @@ def main() -> None:
if __name__ == "__main__": if __name__ == "__main__":
main() cli_evaluate()
...@@ -47,8 +47,8 @@ lm_eval = ["**/*.yaml", "tasks/**/*"] ...@@ -47,8 +47,8 @@ lm_eval = ["**/*.yaml", "tasks/**/*"]
examples = ["**/*.yaml"] examples = ["**/*.yaml"]
[project.scripts] [project.scripts]
lm-eval = "main:main" lm-eval = "lm_eval.__main__:cli_evaluate"
lm_eval = "main:main" lm_eval = "lm_eval.__main__:cli_evaluate"
[project.urls] [project.urls]
Homepage = "https://github.com/EleutherAI/lm-evaluation-harness" Homepage = "https://github.com/EleutherAI/lm-evaluation-harness"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment