md2rst.py 5.35 KB
Newer Older
qianyj's avatar
qianyj committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import argparse
import m2r
import os
import re
import shutil
from pathlib import Path


def single_line_process(line):
    if line == ' .. contents::':
        return '.. contents::'
    # https://github.com/sphinx-doc/sphinx/issues/3921
    line = re.sub(r'(`.*? <.*?>`)_', r'\1__', line)
    # inline emphasis
    line = re.sub(r'\*\*\\ (.*?)\\ \*\*', r' **\1** ', line)
    line = re.sub(r'\*(.*?)\\ \*', r'*\1*', line)
    line = re.sub(r'\*\*(.*?) \*\*', r'**\1** ', line)
    line = re.sub(r'\\\*\\\*(.*?)\*\*', r'**\1**', line)
    line = re.sub(r'\\\*\\\*(.*?)\*\*\\ ', r'**\1**', line)
    line = line.replace(r'\* - `\**', r'* - `**')
    line = re.sub(r'\\\* \*\*(.*?)\*\* \(\\\*\s*(.*?)\s*\*\\ \)', r'* \1 (\2)', line)
    line = re.sub(r'\<(.*)\.md(\>|#)', r'<\1.rst\2', line)
    line = re.sub(r'`\*\*(.*?)\*\* <#(.*?)>`__', r'`\1 <#\2>`__', line)
    line = re.sub(r'\*\* (classArgs|stop|FLOPS.*?|pruned.*?|large.*?|path|pythonPath|2D.*?|codeDirectory|ps|worker|Tuner|Assessor)\*\*',
                  r' **\1**', line)

    line = line.replace('.. code-block:::: bash', '.. code-block:: bash')
    line = line.replace('raw-html-m2r', 'raw-html')
    line = line.replace('[toc]', '.. toctree::')

    # image
    line = re.sub(r'\:raw\-html\:`\<img src\=\"(.*?)\" style\=\"zoom\: ?(\d+)\%\;\" \/\>`', r'\n.. image:: \1\n   :scale: \2%', line)

    # special case (per line handling)
    line = line.replace('Nb = |Db|', r'Nb = \|Db\|')
    line = line.replace('  Here is just a small list of libraries ', '\nHere is just a small list of libraries ')
    line = line.replace('  Find the data management region in job submission page.', 'Find the data management region in job submission page.')
    line = line.replace('Tuner/InstallCustomizedTuner.md', 'Tuner/InstallCustomizedTuner')
    line = line.replace('&#10003;', ':raw-html:`&#10003;`')
    line = line.replace(' **builtinTunerName** and** classArgs**', '**builtinTunerName** and **classArgs**')
    line = line.replace('`\ ``nnictl ss_gen`` <../Tutorial/Nnictl.rst>`__', '`nnictl ss_gen <../Tutorial/Nnictl.rst>`__')
    line = line.replace('**Step 1. Install NNI, follow the install guide `here <../Tutorial/QuickStart.rst>`__.**',
                        '**Step 1. Install NNI, follow the install guide** `here <../Tutorial/QuickStart.rst>`__.')
    line = line.replace('*Please refer to `here ', 'Please refer to `here ')
    # line = line.replace('\* **optimize_mode** ', '* **optimize_mode** ')
    if line == '~' * len(line):
        line = '^' * len(line)
    return line


def special_case_replace(full_text):
    replace_pairs = {}
    replace_pairs['PyTorch\n"""""""'] = '**PyTorch**'
    replace_pairs['Search Space\n============'] = '.. role:: raw-html(raw)\n   :format: html\n\nSearch Space\n============'
    for file in os.listdir(Path(__file__).parent / 'patches'):
        with open(Path(__file__).parent / 'patches' / file) as f:
            r, s = f.read().split('%%%%%%\n')
        replace_pairs[r] = s
    for r, s in replace_pairs.items():
        full_text = full_text.replace(r, s)
    return full_text


def process_table(content):
    content = content.replace('------ |', '------|')
    lines = []
    for line in content.split('\n'):
        if line.startswith('  |'):
            line = line[2:]
        lines.append(line)
    return '\n'.join(lines)


def process_github_link(line):
    line = re.sub(r'`(\\ ``)?([^`]*?)(``)? \<(.*?)(blob|tree)/v1.9/(.*?)\>`__', r':githublink:`\2 <\6>`', line)
    if 'githublink' in line:
        line = re.sub(r'\*Example: (.*)\*', r'*Example:* \1', line)
    line = line.replace('https://nni.readthedocs.io/en/latest', '')
    return line


for root, dirs, files in os.walk('en_US'):
    root = Path(root)
    for file in files:
        if not file.endswith('.md') or file == 'Release_v1.0.md':
            continue

        with open(root / file) as f:
            md_content = f.read()

        if file == 'Nnictl.md':
            md_content = process_table(md_content)

        out = m2r.convert(md_content)
        lines = out.split('\n')
        if lines[0] == '':
            lines = lines[1:]

        # remove code-block eval_rst
        i = 0
        while i < len(lines):
            line = lines[i]
            if line.strip() == '.. code-block:: eval_rst':
                space_count = line.index('.')
                lines[i] = lines[i + 1] = None
                if i > 0 and lines[i - 1]:
                    lines[i] = ''  # blank line
                i += 2
                while i < len(lines) and (lines[i].startswith(' ' * (space_count + 3)) or lines[i] == ''):
                    lines[i] = lines[i][space_count + 3:]
                    i += 1
            elif line.strip() == '.. code-block' or line.strip() == '.. code-block::':
                lines[i] += ':: bash'
                i += 1
            else:
                i += 1

        lines = [l for l in lines if l is not None]

        lines = list(map(single_line_process, lines))

        if file != 'Release.md':
            # githublink
            lines = list(map(process_github_link, lines))

        out = '\n'.join(lines)
        out = special_case_replace(out)

        with open(root / (Path(file).stem + '.rst'), 'w') as f:
            f.write(out)

        # back it up and remove
        moved_root = Path('archive_en_US') / root.relative_to('en_US')
        moved_root.mkdir(exist_ok=True)
        shutil.move(root / file, moved_root / file)