Unverified Commit 1d757686 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Reuse the cache created for latest `main` on PRs/branches if `setup.py` is not modified (#25445)



* fix

* fix

* fix

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent 4692d261
...@@ -34,9 +34,9 @@ jobs: ...@@ -34,9 +34,9 @@ jobs:
- run: pip install -U --upgrade-strategy eager GitPython - run: pip install -U --upgrade-strategy eager GitPython
- run: pip install -U --upgrade-strategy eager . - run: pip install -U --upgrade-strategy eager .
- run: mkdir -p test_preparation - run: mkdir -p test_preparation
- run: python utils/tests_fetcher.py | tee tests_fetched_summary.txt - run: python utils/tests_fetcher.py | tee test_preparation/tests_fetched_summary.txt
- store_artifacts: - store_artifacts:
path: ~/transformers/tests_fetched_summary.txt path: test_preparation/tests_fetched_summary.txt
- run: | - run: |
if [ -f test_list.txt ]; then if [ -f test_list.txt ]; then
cp test_list.txt test_preparation/test_list.txt cp test_list.txt test_preparation/test_list.txt
......
...@@ -18,10 +18,14 @@ import copy ...@@ -18,10 +18,14 @@ import copy
import glob import glob
import os import os
import random import random
import subprocess
import yaml
from base64 import b64encode
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
import yaml from git import Repo
COMMON_ENV_VARIABLES = { COMMON_ENV_VARIABLES = {
...@@ -64,6 +68,8 @@ class CircleCIJob: ...@@ -64,6 +68,8 @@ class CircleCIJob:
working_directory: str = "~/transformers" working_directory: str = "~/transformers"
# This should be only used for doctest job! # This should be only used for doctest job!
command_timeout: Optional[int] = None command_timeout: Optional[int] = None
# The explicit checksum to use for cache load/save
checksum: Optional[str] = None
def __post_init__(self): def __post_init__(self):
# Deal with defaults for mutable attributes. # Deal with defaults for mutable attributes.
...@@ -100,6 +106,14 @@ class CircleCIJob: ...@@ -100,6 +106,14 @@ class CircleCIJob:
job["resource_class"] = self.resource_class job["resource_class"] = self.resource_class
if self.parallelism is not None: if self.parallelism is not None:
job["parallelism"] = self.parallelism job["parallelism"] = self.parallelism
checksum = self.checksum if self.checksum is not None else '{{ checksum "setup.py" }}'
save_cache = True
if self.checksum is not None:
# `setup.py` is not modified and we are not on `main` branch
cache_branch_prefix = "main"
save_cache = False
steps = [ steps = [
"checkout", "checkout",
{"attach_workspace": {"at": "~/transformers/test_preparation"}}, {"attach_workspace": {"at": "~/transformers/test_preparation"}},
...@@ -107,7 +121,7 @@ class CircleCIJob: ...@@ -107,7 +121,7 @@ class CircleCIJob:
"restore_cache": { "restore_cache": {
"keys": [ "keys": [
# check the fully-matched cache first # check the fully-matched cache first
f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-pip-" + '{{ checksum "setup.py" }}', f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-pip-{checksum}",
# try the partially-matched cache from `main` # try the partially-matched cache from `main`
f"v{self.cache_version}-{self.cache_name}-main-pip-", f"v{self.cache_version}-{self.cache_name}-main-pip-",
# try the general partially-matched cache # try the general partially-matched cache
...@@ -118,7 +132,7 @@ class CircleCIJob: ...@@ -118,7 +132,7 @@ class CircleCIJob:
{ {
"restore_cache": { "restore_cache": {
"keys": [ "keys": [
f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-" + '{{ checksum "setup.py" }}', f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-{checksum}",
f"v{self.cache_version}-{self.cache_name}-main-site-packages-", f"v{self.cache_version}-{self.cache_name}-main-site-packages-",
f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-", f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-",
] ]
...@@ -126,10 +140,11 @@ class CircleCIJob: ...@@ -126,10 +140,11 @@ class CircleCIJob:
}, },
] ]
steps.extend([{"run": l} for l in self.install_steps]) steps.extend([{"run": l} for l in self.install_steps])
if save_cache:
steps.append( steps.append(
{ {
"save_cache": { "save_cache": {
"key": f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-pip-" + '{{ checksum "setup.py" }}', "key": f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-pip-{checksum}",
"paths": ["~/.cache/pip"], "paths": ["~/.cache/pip"],
} }
} }
...@@ -137,7 +152,7 @@ class CircleCIJob: ...@@ -137,7 +152,7 @@ class CircleCIJob:
steps.append( steps.append(
{ {
"save_cache": { "save_cache": {
"key": f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-" + '{{ checksum "setup.py" }}', "key": f"v{self.cache_version}-{self.cache_name}-{cache_branch_prefix}-site-packages-{checksum}",
"paths": ["~/.pyenv/versions/"], "paths": ["~/.pyenv/versions/"],
} }
} }
...@@ -531,11 +546,48 @@ REPO_UTIL_TESTS = [repo_utils_job] ...@@ -531,11 +546,48 @@ REPO_UTIL_TESTS = [repo_utils_job]
DOC_TESTS = [doc_test_job] DOC_TESTS = [doc_test_job]
def get_main_setup_checksum():
PATH_TO_REPO = Path(__file__).parent.parent.resolve()
repo = Repo(PATH_TO_REPO)
current_head = repo.head.ref
main_head = repo.refs.main
setup_file_path = os.path.join(PATH_TO_REPO, "setup.py")
main_head.checkout()
proc = subprocess.Popen(["sha256sum", f"{setup_file_path}"], stdout=subprocess.PIPE)
checksum = proc.stdout.read().decode().split(" ")[0]
checksum = b64encode(bytes.fromhex(checksum)).decode()
# go back to the original branch
current_head.checkout()
return checksum
def create_circleci_config(folder=None): def create_circleci_config(folder=None):
if folder is None: if folder is None:
folder = os.getcwd() folder = os.getcwd()
# Used in CircleCIJob.to_dict() to expand the test list (for using parallelism) # Used in CircleCIJob.to_dict() to expand the test list (for using parallelism)
os.environ["test_preparation_dir"] = folder os.environ["test_preparation_dir"] = folder
checksum = None
# if already on `main`, don't try to use the latest commit on `main` to avoid (rare) race condition where multiple
# commits are merged into `main`.
if os.environ.get("CIRCLE_BRANCH", "pull") != "main":
# Check if `setup.py` is modified.
summary_file = os.path.join(folder, "tests_fetched_summary.txt")
if os.path.exists(summary_file):
with open(summary_file) as f:
tests_fetched_summary = f.read()
setup_file_modifiled = "### TEST TO RUN ###\n- tests\n" in tests_fetched_summary
if not setup_file_modifiled:
# If not, we use `setup.py` of the `latest` commit on the `main` branch to compute the checksum for
# cache
checksum = get_main_setup_checksum()
jobs = [] jobs = []
all_test_file = os.path.join(folder, "test_list.txt") all_test_file = os.path.join(folder, "test_list.txt")
if os.path.exists(all_test_file): if os.path.exists(all_test_file):
...@@ -618,6 +670,10 @@ def create_circleci_config(folder=None): ...@@ -618,6 +670,10 @@ def create_circleci_config(folder=None):
"nightly": {"type": "boolean", "default": False}, "nightly": {"type": "boolean", "default": False},
"tests_to_run": {"type": "string", "default": test_list}, "tests_to_run": {"type": "string", "default": test_list},
} }
for job in jobs:
job.checksum = checksum
config["jobs"] = {j.job_name: j.to_dict() for j in jobs} config["jobs"] = {j.job_name: j.to_dict() for j in jobs}
config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}} config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
with open(os.path.join(folder, "generated_config.yml"), "w") as f: with open(os.path.join(folder, "generated_config.yml"), "w") as f:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment