"...composable_kernel_onnx.git" did not exist on "43c898f6ffe39244b6f023a565407a39ef2152bb"
Unverified Commit ca3df9f0 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Run doctest (in PRs) only when some doc example(s) are modified (#23387)



* fix

* fix

* update

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent 17d0290e
...@@ -43,6 +43,12 @@ jobs: ...@@ -43,6 +43,12 @@ jobs:
else else
touch test_preparation/test_list.txt touch test_preparation/test_list.txt
fi fi
- run: |
if [ -f doctest_list.txt ]; then
cp doctest_list.txt test_preparation/doctest_list.txt
else
touch test_preparation/doctest_list.txt
fi
- run: | - run: |
if [ -f test_repo_utils.txt ]; then if [ -f test_repo_utils.txt ]; then
mv test_repo_utils.txt test_preparation/test_repo_utils.txt mv test_repo_utils.txt test_preparation/test_repo_utils.txt
...@@ -71,6 +77,8 @@ jobs: ...@@ -71,6 +77,8 @@ jobs:
fi fi
- store_artifacts: - store_artifacts:
path: test_preparation/test_list.txt path: test_preparation/test_list.txt
- store_artifacts:
path: test_preparation/doctest_list.txt
- store_artifacts: - store_artifacts:
path: ~/transformers/test_preparation/filtered_test_list.txt path: ~/transformers/test_preparation/filtered_test_list.txt
- store_artifacts: - store_artifacts:
......
...@@ -483,7 +483,6 @@ REGULAR_TESTS = [ ...@@ -483,7 +483,6 @@ REGULAR_TESTS = [
hub_job, hub_job,
onnx_job, onnx_job,
exotic_models_job, exotic_models_job,
doc_test_job
] ]
EXAMPLES_TESTS = [ EXAMPLES_TESTS = [
examples_torch_job, examples_torch_job,
...@@ -495,6 +494,8 @@ PIPELINE_TESTS = [ ...@@ -495,6 +494,8 @@ PIPELINE_TESTS = [
pipelines_tf_job, pipelines_tf_job,
] ]
REPO_UTIL_TESTS = [repo_utils_job] REPO_UTIL_TESTS = [repo_utils_job]
DOC_TESTS = [doc_test_job]
def create_circleci_config(folder=None): def create_circleci_config(folder=None):
if folder is None: if folder is None:
...@@ -552,6 +553,15 @@ def create_circleci_config(folder=None): ...@@ -552,6 +553,15 @@ def create_circleci_config(folder=None):
if os.path.exists(example_file) and os.path.getsize(example_file) > 0: if os.path.exists(example_file) and os.path.getsize(example_file) > 0:
jobs.extend(EXAMPLES_TESTS) jobs.extend(EXAMPLES_TESTS)
doctest_file = os.path.join(folder, "doctest_list.txt")
if os.path.exists(doctest_file):
with open(doctest_file) as f:
doctest_list = f.read()
else:
doctest_list = []
if len(doctest_list) > 0:
jobs.extend(DOC_TESTS)
repo_util_file = os.path.join(folder, "test_repo_utils.txt") repo_util_file = os.path.join(folder, "test_repo_utils.txt")
if os.path.exists(repo_util_file) and os.path.getsize(repo_util_file) > 0: if os.path.exists(repo_util_file) and os.path.getsize(repo_util_file) > 0:
jobs.extend(REPO_UTIL_TESTS) jobs.extend(REPO_UTIL_TESTS)
......
...@@ -116,6 +116,26 @@ def clean_code(content): ...@@ -116,6 +116,26 @@ def clean_code(content):
return "\n".join(lines_to_keep) return "\n".join(lines_to_keep)
def keep_doc_examples_only(content):
"""
Remove code, docstring that is not code example, empty line or comments from `content`.
"""
# Keep doc examples only by splitting on triple "`"
splits = content.split("```")
# Add leading and trailing "```" so the navigation is easier when compared to the original input `content`
content = "```" + "```".join(splits[1::2]) + "```"
# Remove empty lines and comments
lines_to_keep = []
for line in content.split("\n"):
# remove anything that is after a # sign.
line = re.sub("#.*$", "", line)
if len(line) == 0 or line.isspace():
continue
lines_to_keep.append(line)
return "\n".join(lines_to_keep)
def get_all_tests(): def get_all_tests():
""" """
Return a list of paths to all test folders and files under `tests`. All paths are rooted at `tests`. Return a list of paths to all test folders and files under `tests`. All paths are rooted at `tests`.
...@@ -162,6 +182,24 @@ def diff_is_docstring_only(repo, branching_point, filename): ...@@ -162,6 +182,24 @@ def diff_is_docstring_only(repo, branching_point, filename):
return old_content_clean == new_content_clean return old_content_clean == new_content_clean
def diff_contains_doc_examples(repo, branching_point, filename):
"""
Check if the diff is only in code in a filename.
"""
folder = Path(repo.working_dir)
with checkout_commit(repo, branching_point):
with open(folder / filename, "r", encoding="utf-8") as f:
old_content = f.read()
with open(folder / filename, "r", encoding="utf-8") as f:
new_content = f.read()
old_content_clean = keep_doc_examples_only(old_content)
new_content_clean = keep_doc_examples_only(new_content)
return old_content_clean != new_content_clean
def get_diff(repo, base_commit, commits): def get_diff(repo, base_commit, commits):
""" """
Get's the diff between one or several commits and the head of the repository. Get's the diff between one or several commits and the head of the repository.
...@@ -216,32 +254,46 @@ def get_modified_python_files(diff_with_last_commit=False): ...@@ -216,32 +254,46 @@ def get_modified_python_files(diff_with_last_commit=False):
return get_diff(repo, repo.head.commit, parent_commits) return get_diff(repo, repo.head.commit, parent_commits)
def get_diff_for_py_and_mdx_files(repo, base_commit, commits): def get_diff_for_doctesting(repo, base_commit, commits):
""" """
Get's the diff between one or several commits and the head of the repository. Get's the diff between one or several commits and the head of the repository where some doc example(s) are changed.
""" """
print("\n### DIFF ###\n") print("\n### DIFF ###\n")
code_diff = [] code_diff = []
for commit in commits: for commit in commits:
for diff_obj in commit.diff(base_commit): for diff_obj in commit.diff(base_commit):
# We always add new python files # We always add new python/mdx files
if diff_obj.change_type in ["A", "M", "R"] and ( if diff_obj.change_type in ["A"] and (diff_obj.b_path.endswith(".py") or diff_obj.b_path.endswith(".mdx")):
diff_obj.b_path.endswith(".py") or diff_obj.b_path.endswith(".mdx")
):
code_diff.append(diff_obj.b_path) code_diff.append(diff_obj.b_path)
# Now for modified files
elif (
diff_obj.change_type in ["M", "R"]
and diff_obj.b_path.endswith(".py")
or diff_obj.b_path.endswith(".mdx")
):
# In case of renames, we'll look at the tests using both the old and new name.
if diff_obj.a_path != diff_obj.b_path:
code_diff.extend([diff_obj.a_path, diff_obj.b_path])
else:
# Otherwise, we check modifications contain some doc example(s).
if diff_contains_doc_examples(repo, commit, diff_obj.b_path):
code_diff.append(diff_obj.a_path)
else:
print(f"Ignoring diff in {diff_obj.b_path} as it doesn't contain any doc example.")
return code_diff return code_diff
def get_modified_python_and_mdx_files(diff_with_last_commit=False): def get_doctest_files(diff_with_last_commit=False):
""" """
Return a list of python and mdx files that have been modified between: Return a list of python and mdx files where some doc example(s) in them have been modified between:
- the current head and the main branch if `diff_with_last_commit=False` (default) - the current head and the main branch if `diff_with_last_commit=False` (default)
- the current head and its parent commit otherwise. - the current head and its parent commit otherwise.
""" """
repo = Repo(PATH_TO_REPO) repo = Repo(PATH_TO_REPO)
test_files_to_run = [] # noqa
if not diff_with_last_commit: if not diff_with_last_commit:
print(f"main is at {repo.refs.main.commit}") print(f"main is at {repo.refs.main.commit}")
print(f"Current head is at {repo.head.commit}") print(f"Current head is at {repo.head.commit}")
...@@ -249,23 +301,14 @@ def get_modified_python_and_mdx_files(diff_with_last_commit=False): ...@@ -249,23 +301,14 @@ def get_modified_python_and_mdx_files(diff_with_last_commit=False):
branching_commits = repo.merge_base(repo.refs.main, repo.head) branching_commits = repo.merge_base(repo.refs.main, repo.head)
for commit in branching_commits: for commit in branching_commits:
print(f"Branching commit: {commit}") print(f"Branching commit: {commit}")
return get_diff_for_py_and_mdx_files(repo, repo.head.commit, branching_commits) test_files_to_run = get_diff_for_doctesting(repo, repo.head.commit, branching_commits)
else: else:
print(f"main is at {repo.head.commit}") print(f"main is at {repo.head.commit}")
parent_commits = repo.head.commit.parents parent_commits = repo.head.commit.parents
for commit in parent_commits: for commit in parent_commits:
print(f"Parent commit: {commit}") print(f"Parent commit: {commit}")
return get_diff_for_py_and_mdx_files(repo, repo.head.commit, parent_commits) test_files_to_run = get_diff_for_doctesting(repo, repo.head.commit, parent_commits)
def get_doctest_files(diff_with_last_commit=False):
"""
Return a list of python and mdx files that have been modified between:
- the current head and the main branch if `diff_with_last_commit=False` (default)
- the current head and its parent commit otherwise.
"""
test_files_to_run = get_modified_python_and_mdx_files(diff_with_last_commit)
with open("utils/documentation_tests.txt") as fp: with open("utils/documentation_tests.txt") as fp:
documentation_tests = set(fp.read().strip().split("\n")) documentation_tests = set(fp.read().strip().split("\n"))
# So far we don't have 100% coverage for doctest. This line will be removed once we achieve 100%. # So far we don't have 100% coverage for doctest. This line will be removed once we achieve 100%.
...@@ -647,6 +690,14 @@ def infer_tests_to_run( ...@@ -647,6 +690,14 @@ def infer_tests_to_run(
create_json_map(test_files_to_run, json_output_file) create_json_map(test_files_to_run, json_output_file)
doctest_list = get_doctest_files()
print(f"\n### DOCTEST TO RUN ###\n{_print_list(doctest_list)}")
if len(doctest_list) > 0:
doctest_file = Path(output_file).parent / "doctest_list.txt"
with open(doctest_file, "w", encoding="utf-8") as f:
f.write(" ".join(doctest_list))
def filter_tests(output_file, filters): def filter_tests(output_file, filters):
""" """
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment