chineselink.py 2.97 KB
Newer Older
1
2
"""
This is to keep Chinese doc update to English doc. Should be run regularly.
3
4
There is no sane way to check the contents though. PR review should enforce contributors to update the corresponding translation.
See https://github.com/microsoft/nni/issues/4298 for discussion.
5
6
7
8
9
10
11
12
13
14
15
16

Under docs, run

    python tools/chineselink.py
"""

import hashlib
import shutil
import sys
from pathlib import Path


17
def iterate_dir(path):
18
19
    for p in Path(path).iterdir():
        if p.is_dir():
20
            yield from iterate_dir(p)
21
22
23
            continue
        yield p

24
25
26
27
28
29
30
suffix_list = [
    '.html',
    '.md',
    '.rst',
    '.ipynb',
]

31
32
33
pipeline_mode = len(sys.argv) > 1 and sys.argv[1] == 'check'
failed_files = []

34
35
36
37
38
# in case I need to change `_zh` to something else
# files = list(filter(lambda d: d.name.endswith('zh_CN.rst'), iterate_dir('source')))
# for file in files:
#     os.rename(file, file.parent / (file.name[:-7] + file.name[-4:]))

39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62

def need_to_translate(source, target):
    if not target.exists():
        failed_files.append('(missing) ' + target.as_posix())
        if pipeline_mode:
            return
        shutil.copyfile(source, target)
    if target.suffix == '.html':
        return  # FIXME I don't know how to process html
    target_checksum = hashlib.sha256(path.open('rb').read()).hexdigest()[:32]
    checksum = target.open('r').readline().strip()[3:]
    if checksum != target_checksum:
        failed_files.append('(out-of-date) ' + target.as_posix())
        if pipeline_mode:
            return
    contents = target.open('r').readlines()
    firstline = '.. ' + target_checksum + '\n'
    if contents[0].startswith('.. '):
        contents = [firstline] + contents[1:]
    else:
        contents = [firstline, '\n'] + contents
    target.open('w').writelines(contents)


63
64
for path in iterate_dir(Path('source')):
    relative_path = path.relative_to('source')
65
66
    if relative_path.as_posix().startswith('_build'):
        continue
67
    if path.suffix in suffix_list:
68
69
70
71
72
73
        if '_zh.' not in path.name:
            target_path = path.parent / (path.stem + '_zh' + path.suffix)
            if target_path.exists():
                # whitelist files. should be translated
                need_to_translate(path, target_path)
                print(f'Skipped linking for {path} as it is in whitelist.')
74
        else:
75
76
77
78
            source_path = path.parent / (path.stem[:-3] + path.suffix)
            if not source_path.exists():
                # delete redundant files
                failed_files.append('(redundant) ' + source_path.as_posix())
79
                if not pipeline_mode:
80
                    print(f'Deleting {source_path}')
81
                    path.unlink()
82
83
84
85
86
87
88
89


if pipeline_mode and failed_files:
    raise ValueError(
        'The following files are not up-to-date. Please run "python3 tools/chineselink.py" under docs folder '
        'to refresh them and update their corresponding translation.\n' + '\n'.join(['  ' + line for line in failed_files]))
if failed_files:
    print('Updated files:', failed_files)