Unverified Commit bd908e9b authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Fix README localizer script (#17407)

parent 4d727bd2
......@@ -312,8 +312,6 @@ def convert_to_localized_md(model_list, localized_model_list, format_str):
# This regex is used to synchronize link.
_re_capture_title_link = re.compile(r"\*\*\[([^\]]*)\]\(([^\)]*)\)\*\*")
num_models_equal = True
if len(localized_model_list) == 0:
localized_model_index = {}
else:
......@@ -325,10 +323,16 @@ def convert_to_localized_md(model_list, localized_model_list, format_str):
except AttributeError:
raise AttributeError("A model name in localized READMEs cannot be recognized.")
model_keys = [re.search(r"\*\*\[([^\]]*)", line).groups()[0] for line in model_list.strip().split("\n")]
# We exclude keys in localized README not in the main one.
readmes_match = not any([k not in model_keys for k in localized_model_index])
localized_model_index = {k: v for k, v in localized_model_index.items() if k in model_keys}
for model in model_list.strip().split("\n"):
title, model_link = _re_capture_title_link.search(model).groups()
if title not in localized_model_index:
num_models_equal = False
readmes_match = False
# Add an anchor white space behind a model description string for regex.
# If metadata cannot be captured, the English version will be directly copied.
localized_model_index[title] = _re_capture_meta.sub(_rep, model + " ")
......@@ -340,7 +344,7 @@ def convert_to_localized_md(model_list, localized_model_list, format_str):
sorted_index = sorted(localized_model_index.items(), key=lambda x: x[0].lower())
return num_models_equal, "\n".join(map(lambda x: x[1], sorted_index)) + "\n"
return readmes_match, "\n".join(map(lambda x: x[1], sorted_index)) + "\n"
def convert_readme_to_index(model_list):
......@@ -380,7 +384,7 @@ def check_model_list_copy(overwrite=False, max_per_line=119):
with open(os.path.join(REPO_PATH, "README.md"), "r", encoding="utf-8", newline="\n") as f:
readme = f.read()
new_readme = readme.replace("https://huggingface.co/transformers", "https://huggingface.co/docs/transformers")
new_readme = readme.replace(
new_readme = new_readme.replace(
"https://huggingface.co/docs/main/transformers", "https://huggingface.co/docs/transformers/main"
)
if new_readme != readme:
......@@ -412,9 +416,9 @@ def check_model_list_copy(overwrite=False, max_per_line=119):
_format_model_list = value["format_model_list"]
localized_md_list = get_model_list(filename, _start_prompt, _end_prompt)
num_models_equal, converted_md_list = convert_to_localized_md(md_list, localized_md_list, _format_model_list)
readmes_match, converted_md_list = convert_to_localized_md(md_list, localized_md_list, _format_model_list)
converted_md_lists.append((filename, num_models_equal, converted_md_list, _start_prompt, _end_prompt))
converted_md_lists.append((filename, readmes_match, converted_md_list, _start_prompt, _end_prompt))
converted_md_list = convert_readme_to_index(md_list)
if converted_md_list != index_list:
......@@ -428,7 +432,7 @@ def check_model_list_copy(overwrite=False, max_per_line=119):
)
for converted_md_list in converted_md_lists:
filename, num_models_equal, converted_md, _start_prompt, _end_prompt = converted_md_list
filename, readmes_match, converted_md, _start_prompt, _end_prompt = converted_md_list
if filename == "README.md":
continue
......@@ -438,7 +442,7 @@ def check_model_list_copy(overwrite=False, max_per_line=119):
)
with open(os.path.join(REPO_PATH, filename), "w", encoding="utf-8", newline="\n") as f:
f.writelines(lines[:start_index] + [converted_md] + lines[end_index:])
elif not num_models_equal:
elif not readmes_match:
raise ValueError(
f"The model list in the README changed and the list in `{filename}` has not been updated. Run "
"`make fix-copies` to fix this."
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment