mkdoc.py 10.2 KB
Newer Older
Wenzel Jakob's avatar
Wenzel Jakob committed
1
2
3
4
5
6
7
#!/usr/bin/env python3
#
#  Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
#
#  Extract documentation from C++ header files to use it in Python bindings
#

8
9
10
11
12
13
import os
import sys
import platform
import re
import textwrap

Wenzel Jakob's avatar
Wenzel Jakob committed
14
15
16
from clang import cindex
from clang.cindex import CursorKind
from collections import OrderedDict
17
18
from threading import Thread, Semaphore
from multiprocessing import cpu_count
Wenzel Jakob's avatar
Wenzel Jakob committed
19
20
21
22
23
24

RECURSE_LIST = [
    CursorKind.TRANSLATION_UNIT,
    CursorKind.NAMESPACE,
    CursorKind.CLASS_DECL,
    CursorKind.STRUCT_DECL,
25
    CursorKind.ENUM_DECL,
Wenzel Jakob's avatar
Wenzel Jakob committed
26
27
28
29
30
31
    CursorKind.CLASS_TEMPLATE
]

PRINT_LIST = [
    CursorKind.CLASS_DECL,
    CursorKind.STRUCT_DECL,
Wenzel Jakob's avatar
Wenzel Jakob committed
32
    CursorKind.ENUM_DECL,
33
    CursorKind.ENUM_CONSTANT_DECL,
Wenzel Jakob's avatar
Wenzel Jakob committed
34
35
36
    CursorKind.CLASS_TEMPLATE,
    CursorKind.FUNCTION_DECL,
    CursorKind.FUNCTION_TEMPLATE,
37
    CursorKind.CONVERSION_FUNCTION,
Wenzel Jakob's avatar
Wenzel Jakob committed
38
39
40
41
42
43
    CursorKind.CXX_METHOD,
    CursorKind.CONSTRUCTOR,
    CursorKind.FIELD_DECL
]

CPP_OPERATORS = {
44
45
46
47
48
49
    '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
    '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
    'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
    '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
    'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
    '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
50
    'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
Wenzel Jakob's avatar
Wenzel Jakob committed
51
}
52
53
54

CPP_OPERATORS = OrderedDict(
    sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
Wenzel Jakob's avatar
Wenzel Jakob committed
55

56
57
58
job_count = cpu_count()
job_semaphore = Semaphore(job_count)

Wenzel Jakob's avatar
Wenzel Jakob committed
59
60
registered_names = dict()

61

Wenzel Jakob's avatar
Wenzel Jakob committed
62
63
64
def d(s):
    return s.decode('utf8')

65

Wenzel Jakob's avatar
Wenzel Jakob committed
66
67
def sanitize_name(name):
    global registered_names
68
    name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
Wenzel Jakob's avatar
Wenzel Jakob committed
69
70
    for k, v in CPP_OPERATORS.items():
        name = name.replace('operator%s' % k, 'operator_%s' % v)
71
72
73
    name = re.sub('<.*>', '', name)
    name = ''.join([ch if ch.isalnum() else '_' for ch in name])
    name = re.sub('_$', '', re.sub('_+', '_', name))
Wenzel Jakob's avatar
Wenzel Jakob committed
74
75
76
77
78
79
80
    if name in registered_names:
        registered_names[name] += 1
        name += '_' + str(registered_names[name])
    else:
        registered_names[name] = 1
    return '__doc_' + name

81

Wenzel Jakob's avatar
Wenzel Jakob committed
82
83
84
85
def process_comment(comment):
    result = ''

    # Remove C++ comment syntax
86
87
    leading_spaces = float('inf')
    for s in comment.expandtabs(tabsize=4).splitlines():
Wenzel Jakob's avatar
Wenzel Jakob committed
88
89
        s = s.strip()
        if s.startswith('/*'):
90
            s = s[2:].lstrip('*')
Wenzel Jakob's avatar
Wenzel Jakob committed
91
        elif s.endswith('*/'):
92
            s = s[:-2].rstrip('*')
Wenzel Jakob's avatar
Wenzel Jakob committed
93
94
95
96
        elif s.startswith('///'):
            s = s[3:]
        if s.startswith('*'):
            s = s[1:]
97
98
99
100
101
102
103
104
105
        if len(s) > 0:
            leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
        result += s + '\n'

    if leading_spaces != float('inf'):
        result2 = ""
        for s in result.splitlines():
            result2 += s[leading_spaces:] + '\n'
        result = result2
Wenzel Jakob's avatar
Wenzel Jakob committed
106
107
108
109
110
111
112
113
114
115
116

    # Doxygen tags
    cpp_group = '([\w:]+)'
    param_group = '([\[\w:\]]+)'

    s = result
    s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
    s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
117
    s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
118
119
    s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
               r'\n\n$Parameter ``\2``:\n\n', s)
120
121
    s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
               r'\n\n$Template parameter ``\2``:\n\n', s)
Wenzel Jakob's avatar
Wenzel Jakob committed
122
123

    for in_, out_ in {
124
125
126
127
128
129
130
131
132
133
134
135
        'return': 'Returns',
        'author': 'Author',
        'authors': 'Authors',
        'copyright': 'Copyright',
        'date': 'Date',
        'remark': 'Remark',
        'sa': 'See also',
        'see': 'See also',
        'extends': 'Extends',
        'throw': 'Throws',
        'throws': 'Throws'
    }.items():
Wenzel Jakob's avatar
Wenzel Jakob committed
136
137
138
139
140
141
142
        s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)

    s = re.sub(r'\\details\s*', r'\n\n', s)
    s = re.sub(r'\\brief\s*', r'', s)
    s = re.sub(r'\\short\s*', r'', s)
    s = re.sub(r'\\ref\s*', r'', s)

143
144
145
    s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
               r"```\n\1\n```\n", s, flags=re.DOTALL)

146
    # HTML/TeX tags
147
148
149
150
151
152
153
154
    s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
    s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
    s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
    s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
    s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
    s = re.sub(r'<li>', r'\n\n* ', s)
    s = re.sub(r'</?ul>', r'', s)
    s = re.sub(r'</li>', r'\n\n', s)
Wenzel Jakob's avatar
Wenzel Jakob committed
155
156
157
158
159
160
161
162

    s = s.replace('``true``', '``True``')
    s = s.replace('``false``', '``False``')

    # Re-flow text
    wrapper = textwrap.TextWrapper()
    wrapper.expand_tabs = True
    wrapper.replace_whitespace = True
163
164
    wrapper.drop_whitespace = True
    wrapper.width = 70
Wenzel Jakob's avatar
Wenzel Jakob committed
165
166
167
    wrapper.initial_indent = wrapper.subsequent_indent = ''

    result = ''
168
169
170
171
172
173
174
175
176
177
    in_code_segment = False
    for x in re.split(r'(```)', s):
        if x == '```':
            if not in_code_segment:
                result += '```\n'
            else:
                result += '\n```\n\n'
            in_code_segment = not in_code_segment
        elif in_code_segment:
            result += x.strip()
Wenzel Jakob's avatar
Wenzel Jakob committed
178
        else:
179
180
181
182
183
184
185
186
187
188
            for y in re.split(r'(?: *\n *){2,}', x):
                wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
                if len(wrapped) > 0 and wrapped[0] == '$':
                    result += wrapped[1:] + '\n'
                    wrapper.initial_indent = \
                        wrapper.subsequent_indent = ' ' * 4
                else:
                    if len(wrapped) > 0:
                        result += wrapped + '\n\n'
                    wrapper.initial_indent = wrapper.subsequent_indent = ''
189
    return result.rstrip().lstrip('\n')
Wenzel Jakob's avatar
Wenzel Jakob committed
190
191


192
def extract(filename, node, prefix, output):
Wenzel Jakob's avatar
Wenzel Jakob committed
193
    num_extracted = 0
194
195
    if not (node.location.file is None or
            os.path.samefile(d(node.location.file.name), filename)):
Wenzel Jakob's avatar
Wenzel Jakob committed
196
197
198
199
200
201
202
203
        return 0
    if node.kind in RECURSE_LIST:
        sub_prefix = prefix
        if node.kind != CursorKind.TRANSLATION_UNIT:
            if len(sub_prefix) > 0:
                sub_prefix += '_'
            sub_prefix += d(node.spelling)
        for i in node.get_children():
204
            num_extracted += extract(filename, i, sub_prefix, output)
Wenzel Jakob's avatar
Wenzel Jakob committed
205
206
207
208
209
        if num_extracted == 0:
            return 0
    if node.kind in PRINT_LIST:
        comment = d(node.raw_comment) if node.raw_comment is not None else ''
        comment = process_comment(comment)
Wenzel Jakob's avatar
Wenzel Jakob committed
210
211
212
        sub_prefix = prefix
        if len(sub_prefix) > 0:
            sub_prefix += '_'
213
214
215
216
217
        if len(node.spelling) > 0:
            name = sanitize_name(sub_prefix + d(node.spelling))
            output.append('\nstatic const char *%s =%sR"doc(%s)doc";' %
                (name, '\n' if '\n' in comment else ' ', comment))
            num_extracted += 1
Wenzel Jakob's avatar
Wenzel Jakob committed
218
219
    return num_extracted

220

221
class ExtractionThread(Thread):
222
    def __init__(self, filename, parameters, output):
223
224
225
226
227
228
229
        Thread.__init__(self)
        self.filename = filename
        self.parameters = parameters
        self.output = output
        job_semaphore.acquire()

    def run(self):
230
        print('Processing "%s" ..' % self.filename, file=sys.stderr)
231
        try:
232
233
            index = cindex.Index(
                cindex.conf.lib.clang_createIndex(False, True))
234
235
236
237
238
            tu = index.parse(self.filename, self.parameters)
            extract(self.filename, tu.cursor, '', self.output)
        finally:
            job_semaphore.release()

Wenzel Jakob's avatar
Wenzel Jakob committed
239
240
241
242
if __name__ == '__main__':
    parameters = ['-x', 'c++', '-std=c++11']
    filenames = []

243
    if platform.system() == 'Darwin':
244
245
246
247
248
        dev_path = '/Applications/Xcode.app/Contents/Developer/'
        lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
        sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
        libclang = lib_dir + 'libclang.dylib'

249
250
251
        if os.path.exists(libclang):
            cindex.Config.set_library_path(os.path.dirname(libclang))

252
253
        if os.path.exists(sdk_dir):
            sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
254
            parameters.append('-isysroot')
255
            parameters.append(sysroot_dir)
256

Wenzel Jakob's avatar
Wenzel Jakob committed
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
    for item in sys.argv[1:]:
        if item.startswith('-'):
            parameters.append(item)
        else:
            filenames.append(item)

    if len(filenames) == 0:
        print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
        exit(-1)

    print('''/*
  This file contains docstrings for the Python bindings.
  Do not edit! These were automatically extracted by mkdoc.py
 */

Wenzel Jakob's avatar
Wenzel Jakob committed
272
273
274
275
276
277
278
279
280
281
282
283
284
#define __EXPAND(x)                                      x
#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...)  COUNT
#define __VA_SIZE(...)                                   __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
#define __CAT1(a, b)                                     a ## b
#define __CAT2(a, b)                                     __CAT1(a, b)
#define __DOC1(n1)                                       __doc_##n1
#define __DOC2(n1, n2)                                   __doc_##n1##_##n2
#define __DOC3(n1, n2, n3)                               __doc_##n1##_##n2##_##n3
#define __DOC4(n1, n2, n3, n4)                           __doc_##n1##_##n2##_##n3##_##n4
#define __DOC5(n1, n2, n3, n4, n5)                       __doc_##n1##_##n2##_##n3##_##n4##_##n5
#define __DOC6(n1, n2, n3, n4, n5, n6)                   __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
#define __DOC7(n1, n2, n3, n4, n5, n6, n7)               __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
#define DOC(...)                                         __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
285
286
287
288
289
290

#if defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
''')
291
292

    output = []
Wenzel Jakob's avatar
Wenzel Jakob committed
293
    for filename in filenames:
294
295
296
        thr = ExtractionThread(filename, parameters, output)
        thr.start()

297
    print('Waiting for jobs to finish ..', file=sys.stderr)
298
299
300
301
302
303
    for i in range(job_count):
        job_semaphore.acquire()

    output.sort()
    for l in output:
        print(l)
304
305
306
307
308
309

    print('''
#if defined(__GNUG__)
#pragma GCC diagnostic pop
#endif
''')