Unverified Commit ca3df9f0 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Run doctest (in PRs) only when some doc example(s) are modified (#23387)



* fix

* fix

* update

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent 17d0290e
......@@ -43,6 +43,12 @@ jobs:
else
touch test_preparation/test_list.txt
fi
- run: |
if [ -f doctest_list.txt ]; then
cp doctest_list.txt test_preparation/doctest_list.txt
else
touch test_preparation/doctest_list.txt
fi
- run: |
if [ -f test_repo_utils.txt ]; then
mv test_repo_utils.txt test_preparation/test_repo_utils.txt
......@@ -71,6 +77,8 @@ jobs:
fi
- store_artifacts:
path: test_preparation/test_list.txt
- store_artifacts:
path: test_preparation/doctest_list.txt
- store_artifacts:
path: ~/transformers/test_preparation/filtered_test_list.txt
- store_artifacts:
......
......@@ -483,7 +483,6 @@ REGULAR_TESTS = [
hub_job,
onnx_job,
exotic_models_job,
doc_test_job
]
EXAMPLES_TESTS = [
examples_torch_job,
......@@ -495,6 +494,8 @@ PIPELINE_TESTS = [
pipelines_tf_job,
]
REPO_UTIL_TESTS = [repo_utils_job]
DOC_TESTS = [doc_test_job]
def create_circleci_config(folder=None):
if folder is None:
......@@ -552,6 +553,15 @@ def create_circleci_config(folder=None):
if os.path.exists(example_file) and os.path.getsize(example_file) > 0:
jobs.extend(EXAMPLES_TESTS)
doctest_file = os.path.join(folder, "doctest_list.txt")
if os.path.exists(doctest_file):
with open(doctest_file) as f:
doctest_list = f.read()
else:
doctest_list = []
if len(doctest_list) > 0:
jobs.extend(DOC_TESTS)
repo_util_file = os.path.join(folder, "test_repo_utils.txt")
if os.path.exists(repo_util_file) and os.path.getsize(repo_util_file) > 0:
jobs.extend(REPO_UTIL_TESTS)
......
......@@ -116,6 +116,26 @@ def clean_code(content):
return "\n".join(lines_to_keep)
def keep_doc_examples_only(content):
"""
Remove code, docstring that is not code example, empty line or comments from `content`.
"""
# Keep doc examples only by splitting on triple "`"
splits = content.split("```")
# Add leading and trailing "```" so the navigation is easier when compared to the original input `content`
content = "```" + "```".join(splits[1::2]) + "```"
# Remove empty lines and comments
lines_to_keep = []
for line in content.split("\n"):
# remove anything that is after a # sign.
line = re.sub("#.*$", "", line)
if len(line) == 0 or line.isspace():
continue
lines_to_keep.append(line)
return "\n".join(lines_to_keep)
def get_all_tests():
"""
Return a list of paths to all test folders and files under `tests`. All paths are rooted at `tests`.
......@@ -162,6 +182,24 @@ def diff_is_docstring_only(repo, branching_point, filename):
return old_content_clean == new_content_clean
def diff_contains_doc_examples(repo, branching_point, filename):
"""
Check if the diff is only in code in a filename.
"""
folder = Path(repo.working_dir)
with checkout_commit(repo, branching_point):
with open(folder / filename, "r", encoding="utf-8") as f:
old_content = f.read()
with open(folder / filename, "r", encoding="utf-8") as f:
new_content = f.read()
old_content_clean = keep_doc_examples_only(old_content)
new_content_clean = keep_doc_examples_only(new_content)
return old_content_clean != new_content_clean
def get_diff(repo, base_commit, commits):
"""
Get's the diff between one or several commits and the head of the repository.
......@@ -216,32 +254,46 @@ def get_modified_python_files(diff_with_last_commit=False):
return get_diff(repo, repo.head.commit, parent_commits)
def get_diff_for_py_and_mdx_files(repo, base_commit, commits):
def get_diff_for_doctesting(repo, base_commit, commits):
"""
Get's the diff between one or several commits and the head of the repository.
Get's the diff between one or several commits and the head of the repository where some doc example(s) are changed.
"""
print("\n### DIFF ###\n")
code_diff = []
for commit in commits:
for diff_obj in commit.diff(base_commit):
# We always add new python files
if diff_obj.change_type in ["A", "M", "R"] and (
diff_obj.b_path.endswith(".py") or diff_obj.b_path.endswith(".mdx")
):
# We always add new python/mdx files
if diff_obj.change_type in ["A"] and (diff_obj.b_path.endswith(".py") or diff_obj.b_path.endswith(".mdx")):
code_diff.append(diff_obj.b_path)
# Now for modified files
elif (
diff_obj.change_type in ["M", "R"]
and diff_obj.b_path.endswith(".py")
or diff_obj.b_path.endswith(".mdx")
):
# In case of renames, we'll look at the tests using both the old and new name.
if diff_obj.a_path != diff_obj.b_path:
code_diff.extend([diff_obj.a_path, diff_obj.b_path])
else:
# Otherwise, we check modifications contain some doc example(s).
if diff_contains_doc_examples(repo, commit, diff_obj.b_path):
code_diff.append(diff_obj.a_path)
else:
print(f"Ignoring diff in {diff_obj.b_path} as it doesn't contain any doc example.")
return code_diff
def get_modified_python_and_mdx_files(diff_with_last_commit=False):
def get_doctest_files(diff_with_last_commit=False):
"""
Return a list of python and mdx files that have been modified between:
Return a list of python and mdx files where some doc example(s) in them have been modified between:
- the current head and the main branch if `diff_with_last_commit=False` (default)
- the current head and its parent commit otherwise.
"""
repo = Repo(PATH_TO_REPO)
test_files_to_run = [] # noqa
if not diff_with_last_commit:
print(f"main is at {repo.refs.main.commit}")
print(f"Current head is at {repo.head.commit}")
......@@ -249,23 +301,14 @@ def get_modified_python_and_mdx_files(diff_with_last_commit=False):
branching_commits = repo.merge_base(repo.refs.main, repo.head)
for commit in branching_commits:
print(f"Branching commit: {commit}")
return get_diff_for_py_and_mdx_files(repo, repo.head.commit, branching_commits)
test_files_to_run = get_diff_for_doctesting(repo, repo.head.commit, branching_commits)
else:
print(f"main is at {repo.head.commit}")
parent_commits = repo.head.commit.parents
for commit in parent_commits:
print(f"Parent commit: {commit}")
return get_diff_for_py_and_mdx_files(repo, repo.head.commit, parent_commits)
def get_doctest_files(diff_with_last_commit=False):
"""
Return a list of python and mdx files that have been modified between:
test_files_to_run = get_diff_for_doctesting(repo, repo.head.commit, parent_commits)
- the current head and the main branch if `diff_with_last_commit=False` (default)
- the current head and its parent commit otherwise.
"""
test_files_to_run = get_modified_python_and_mdx_files(diff_with_last_commit)
with open("utils/documentation_tests.txt") as fp:
documentation_tests = set(fp.read().strip().split("\n"))
# So far we don't have 100% coverage for doctest. This line will be removed once we achieve 100%.
......@@ -647,6 +690,14 @@ def infer_tests_to_run(
create_json_map(test_files_to_run, json_output_file)
doctest_list = get_doctest_files()
print(f"\n### DOCTEST TO RUN ###\n{_print_list(doctest_list)}")
if len(doctest_list) > 0:
doctest_file = Path(output_file).parent / "doctest_list.txt"
with open(doctest_file, "w", encoding="utf-8") as f:
f.write(" ".join(doctest_list))
def filter_tests(output_file, filters):
"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment