Unverified Commit b0969caf authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Make `parallelism` for CircleCI jobs work - but keep it `1` for now (#21157)



* split tests

* test CI

* add if else
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent 25533638
...@@ -15,7 +15,9 @@ ...@@ -15,7 +15,9 @@
import argparse import argparse
import copy import copy
import glob
import os import os
import random
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
...@@ -58,6 +60,8 @@ class CircleCIJob: ...@@ -58,6 +60,8 @@ class CircleCIJob:
self.pytest_options = {} self.pytest_options = {}
if isinstance(self.tests_to_run, str): if isinstance(self.tests_to_run, str):
self.tests_to_run = [self.tests_to_run] self.tests_to_run = [self.tests_to_run]
if self.parallelism is None:
self.parallelism = 1
def to_dict(self): def to_dict(self):
job = { job = {
...@@ -99,10 +103,57 @@ class CircleCIJob: ...@@ -99,10 +103,57 @@ class CircleCIJob:
f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}" f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}"
) )
test_command = f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags) test_command = f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
if self.tests_to_run is None: if self.parallelism == 1:
test_command += " << pipeline.parameters.tests_to_run >>" if self.tests_to_run is None:
test_command += " << pipeline.parameters.tests_to_run >>"
else:
test_command += " " + " ".join(self.tests_to_run)
else: else:
test_command += " " + " ".join(self.tests_to_run) # We need explicit list instead of `pipeline.parameters.tests_to_run` (only available at job runtime)
tests = self.tests_to_run
if tests is None:
folder = os.environ["test_preparation_dir"]
test_file = os.path.join(folder, "filtered_test_list.txt")
if os.path.exists(test_file):
with open(test_file) as f:
tests = f.read().split(" ")
# expand the test list
if tests == ["tests"]:
tests = [os.path.join("tests", x) for x in os.listdir("tests")]
expanded_tests = []
for test in tests:
if test.endswith(".py"):
expanded_tests.append(test)
elif test == "tests/models":
expanded_tests.extend([os.path.join(test, x) for x in os.listdir(test)])
elif test == "tests/pipelines":
expanded_tests.extend([os.path.join(test, x) for x in os.listdir(test)])
else:
expanded_tests.append(test)
# Avoid long tests always being collected together
random.shuffle(expanded_tests)
tests = " ".join(expanded_tests)
# Each executor to run ~10 tests
n_executors = max(len(tests) // 10, 1)
# Avoid empty test list on some executor(s) or launching too many executors
if n_executors > self.parallelism:
n_executors = self.parallelism
job["parallelism"] = n_executors
# Need to be newline separated for the command `circleci tests split` below
command = f'echo {tests} | tr " " "\\n" >> tests.txt'
steps.append({"run": {"name": "Get tests", "command": command}})
command = 'TESTS=$(circleci tests split tests.txt) && echo $TESTS > splitted_tests.txt'
steps.append({"run": {"name": "Split tests", "command": command}})
steps.append({"store_artifacts": {"path": "~/transformers/tests.txt"}})
steps.append({"store_artifacts": {"path": "~/transformers/splitted_tests.txt"}})
test_command = f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
test_command += " $(cat splitted_tests.txt)"
if self.marker is not None: if self.marker is not None:
test_command += f" -m {self.marker}" test_command += f" -m {self.marker}"
test_command += " | tee tests_output.txt" test_command += " | tee tests_output.txt"
...@@ -156,6 +207,7 @@ torch_job = CircleCIJob( ...@@ -156,6 +207,7 @@ torch_job = CircleCIJob(
"pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]", "pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]",
"pip install git+https://github.com/huggingface/accelerate", "pip install git+https://github.com/huggingface/accelerate",
], ],
parallelism=1,
pytest_num_workers=3, pytest_num_workers=3,
) )
...@@ -168,6 +220,7 @@ tf_job = CircleCIJob( ...@@ -168,6 +220,7 @@ tf_job = CircleCIJob(
"pip install .[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]", "pip install .[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]",
"pip install tensorflow_probability", "pip install tensorflow_probability",
], ],
parallelism=1,
pytest_options={"rA": None}, pytest_options={"rA": None},
) )
...@@ -179,6 +232,7 @@ flax_job = CircleCIJob( ...@@ -179,6 +232,7 @@ flax_job = CircleCIJob(
"pip install --upgrade pip", "pip install --upgrade pip",
"pip install .[flax,testing,sentencepiece,flax-speech,vision]", "pip install .[flax,testing,sentencepiece,flax-speech,vision]",
], ],
parallelism=1,
pytest_options={"rA": None}, pytest_options={"rA": None},
) )
...@@ -356,6 +410,8 @@ REPO_UTIL_TESTS = [repo_utils_job] ...@@ -356,6 +410,8 @@ REPO_UTIL_TESTS = [repo_utils_job]
def create_circleci_config(folder=None): def create_circleci_config(folder=None):
if folder is None: if folder is None:
folder = os.getcwd() folder = os.getcwd()
# Used in CircleCIJob.to_dict() to expand the test list (for using parallelism)
os.environ["test_preparation_dir"] = folder
jobs = [] jobs = []
all_test_file = os.path.join(folder, "test_list.txt") all_test_file = os.path.join(folder, "test_list.txt")
if os.path.exists(all_test_file): if os.path.exists(all_test_file):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment