""" This is to keep Chinese doc update to English doc. Should be run regularly. The files in whitelist will be kept unchanged, as they will be translated manually. Under docs, run python tools/chineselink.py """ import hashlib import os import shutil import sys from pathlib import Path def walk(path): for p in Path(path).iterdir(): if p.is_dir(): yield from walk(p) continue yield p # Keeps files as discussed in # https://github.com/microsoft/nni/issues/4298 # Not the recommended way of sphinx though: https://docs.readthedocs.io/en/stable/guides/manage-translations-sphinx.html whitelist = [ '_templates/index.html', # I think no one ever remembers to update this file. Might need to rethink about this. 'Overview.rst', 'installation.rst', 'Tutorial/InstallationLinux.rst', 'Tutorial/InstallationWin.rst', 'Tutorial/QuickStart.rst', 'TrialExample/Trials.rst', 'Tutorial/WebUI.rst', 'NAS/QuickStart.rst', 'Compression/Overview.rst', 'Compression/QuickStart.rst', ] suffix_list = [ '.html', '.md', '.rst', '.ipynb', ] for path in whitelist: assert (Path('zh_CN') / path).exists(), path content_tables = [] for path in walk(Path('en_US')): if path.suffix == '.rst': is_content_table = False for line in path.open('r').readlines(): if is_content_table: if not line.startswith(' ') and line.strip(): is_content_table = False if 'toctree::' in line: is_content_table = True if is_content_table: content_tables.append(path.relative_to('en_US').as_posix()) print('Whitelist:' ,content_tables) whitelist += content_tables pipeline_mode = len(sys.argv) > 1 and sys.argv[1] == 'check' failed_files = [] def need_to_translate(source, target): if not target.exists(): failed_files.append('(missing) ' + target.as_posix()) if pipeline_mode: return shutil.copyfile(source, target) if target.suffix == '.html': return # FIXME I don't know how to process html target_checksum = hashlib.sha256(path.open('rb').read()).hexdigest()[:32] checksum = target.open('r').readline().strip()[3:] if checksum != target_checksum: failed_files.append('(out-of-date) ' + target.as_posix()) if pipeline_mode: return contents = target.open('r').readlines() firstline = '.. ' + target_checksum + '\n' if contents[0].startswith('.. '): contents = [firstline] + contents[1:] else: contents = [firstline, '\n'] + contents target.open('w').writelines(contents) for path in walk(Path('en_US')): relative_path = path.relative_to('en_US') if relative_path.as_posix().startswith('_build'): continue if path.suffix in suffix_list: target_path = (Path('zh_CN') / relative_path) if relative_path.as_posix() in whitelist: # whitelist files. should be translated need_to_translate(path, target_path) print(f'Skipped linking for {path} as it is in whitelist.') else: target_path.parent.mkdir(exist_ok=True) link_path = path for _ in range(len(list(Path(relative_path).parents))): link_path = Path('..') / link_path if not target_path.is_symlink() or os.readlink(target_path) != link_path.as_posix(): failed_files.append('(invalid link) ' + target_path.as_posix()) if not pipeline_mode: target_path.unlink(missing_ok=True) target_path.symlink_to(link_path) # delete redundant files for path in walk(Path('zh_CN')): if path.suffix in suffix_list: relative_path = path.relative_to('zh_CN') if not (Path('en_US') / relative_path).exists(): failed_files.append('(redundant) ' + path.as_posix()) if not pipeline_mode: print(f'Deleting {path}') path.unlink() if pipeline_mode and failed_files: raise ValueError( 'The following files are not up-to-date. Please run "python3 tools/chineselink.py" under docs folder ' 'to refresh them and update their corresponding translation.\n' + '\n'.join([' ' + line for line in failed_files])) if failed_files: print('Updated files:', failed_files)