Unverified Commit 0366c74f authored by Hailey Schoelkopf's avatar Hailey Schoelkopf Committed by GitHub
Browse files

Factor out LM-specific tests (#1859)

* separate out optimum/neuralmagic tests to separate job

* fix vllm tests

* fix bug in --trust_remote_code

* use datasets.config instead intentionally

* fix remote code issue?
parent b62b9bd0
......@@ -55,13 +55,39 @@ jobs:
cache-dependency-path: pyproject.toml
- name: Install dependencies
run: |
export HF_DATASETS_TRUST_REMOTE_CODE=1
python -m pip install --upgrade pip
pip install -e '.[dev,anthropic,sentencepiece,optimum,deepsparse,sparseml]' --extra-index-url https://download.pytorch.org/whl/cpu
pip install -e '.[dev,anthropic,sentencepiece]' --extra-index-url https://download.pytorch.org/whl/cpu
# Install optional git dependencies
# pip install bleurt@https://github.com/google-research/bleurt/archive/b610120347ef22b494b6d69b4316e303f5932516.zip#egg=bleurt
# if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Test with pytest
run: python -m pytest --showlocals -s -vv -n=auto
run: python -m pytest --showlocals -s -vv -n=auto --ignore=tests/models/test_neuralmagic.py --ignore=tests/models/test_openvino.py
- name: Archive artifacts
uses: actions/upload-artifact@v3
with:
name: output_results
path: |
test_logs/*
testmodels:
name: External LM Tests
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Set up Python 3.8
uses: actions/setup-python@v5
with:
python-version: 3.8
cache: pip
cache-dependency-path: pyproject.toml
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e '.[dev,optimum,deepsparse,sparseml]' --extra-index-url https://download.pytorch.org/whl/cpu
- name: Test with pytest
run: python -m pytest tests/models --showlocals -s -vv
- name: Archive artifacts
uses: actions/upload-artifact@v3
with:
......
from typing import List
import pytest
import torch
from lm_eval import tasks
from lm_eval.api.instance import Instance
......@@ -11,7 +10,7 @@ task_manager = tasks.TaskManager()
@pytest.mark.skip(reason="requires CUDA")
class TEST_VLLM:
class Test_VLLM:
vllm = pytest.importorskip("vllm")
try:
from lm_eval.models.vllm_causallms import VLLM
......@@ -19,7 +18,7 @@ class TEST_VLLM:
LM = VLLM(pretrained="EleutherAI/pythia-70m")
except ModuleNotFoundError:
pass
torch.use_deterministic_algorithms(True)
# torch.use_deterministic_algorithms(True)
task_list = task_manager.load_task_or_group(["arc_easy", "gsm8k", "wikitext"])
multiple_choice_task = task_list["arc_easy"] # type: ignore
multiple_choice_task.build_all_requests(limit=10, rank=0, world_size=1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment