mkdoc.py 11.7 KB
Newer Older
Wenzel Jakob's avatar
Wenzel Jakob committed
1
2
3
4
5
6
7
#!/usr/bin/env python3
#
#  Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
#
#  Extract documentation from C++ header files to use it in Python bindings
#

8
9
10
11
12
13
import os
import sys
import platform
import re
import textwrap

Wenzel Jakob's avatar
Wenzel Jakob committed
14
15
16
from clang import cindex
from clang.cindex import CursorKind
from collections import OrderedDict
17
from glob import glob
18
19
from threading import Thread, Semaphore
from multiprocessing import cpu_count
Wenzel Jakob's avatar
Wenzel Jakob committed
20
21
22
23
24
25

RECURSE_LIST = [
    CursorKind.TRANSLATION_UNIT,
    CursorKind.NAMESPACE,
    CursorKind.CLASS_DECL,
    CursorKind.STRUCT_DECL,
26
    CursorKind.ENUM_DECL,
Wenzel Jakob's avatar
Wenzel Jakob committed
27
28
29
30
31
32
    CursorKind.CLASS_TEMPLATE
]

PRINT_LIST = [
    CursorKind.CLASS_DECL,
    CursorKind.STRUCT_DECL,
Wenzel Jakob's avatar
Wenzel Jakob committed
33
    CursorKind.ENUM_DECL,
34
    CursorKind.ENUM_CONSTANT_DECL,
Wenzel Jakob's avatar
Wenzel Jakob committed
35
36
37
    CursorKind.CLASS_TEMPLATE,
    CursorKind.FUNCTION_DECL,
    CursorKind.FUNCTION_TEMPLATE,
38
    CursorKind.CONVERSION_FUNCTION,
Wenzel Jakob's avatar
Wenzel Jakob committed
39
40
41
42
43
44
    CursorKind.CXX_METHOD,
    CursorKind.CONSTRUCTOR,
    CursorKind.FIELD_DECL
]

CPP_OPERATORS = {
45
46
47
48
49
50
    '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
    '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
    'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
    '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
    'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
    '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
51
    'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
Wenzel Jakob's avatar
Wenzel Jakob committed
52
}
53
54
55

CPP_OPERATORS = OrderedDict(
    sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
Wenzel Jakob's avatar
Wenzel Jakob committed
56

57
58
59
job_count = cpu_count()
job_semaphore = Semaphore(job_count)

Wenzel Jakob's avatar
Wenzel Jakob committed
60
output = []
61

62
63
64
65
66

class NoFilenamesError(ValueError):
    pass


Wenzel Jakob's avatar
Wenzel Jakob committed
67
def d(s):
68
    return s if isinstance(s, str) else s.decode('utf8')
Wenzel Jakob's avatar
Wenzel Jakob committed
69

70

Wenzel Jakob's avatar
Wenzel Jakob committed
71
def sanitize_name(name):
72
    name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
Wenzel Jakob's avatar
Wenzel Jakob committed
73
74
    for k, v in CPP_OPERATORS.items():
        name = name.replace('operator%s' % k, 'operator_%s' % v)
75
76
77
    name = re.sub('<.*>', '', name)
    name = ''.join([ch if ch.isalnum() else '_' for ch in name])
    name = re.sub('_$', '', re.sub('_+', '_', name))
Wenzel Jakob's avatar
Wenzel Jakob committed
78
79
    return '__doc_' + name

80

Wenzel Jakob's avatar
Wenzel Jakob committed
81
82
83
84
def process_comment(comment):
    result = ''

    # Remove C++ comment syntax
85
86
    leading_spaces = float('inf')
    for s in comment.expandtabs(tabsize=4).splitlines():
Wenzel Jakob's avatar
Wenzel Jakob committed
87
88
        s = s.strip()
        if s.startswith('/*'):
89
            s = s[2:].lstrip('*')
Wenzel Jakob's avatar
Wenzel Jakob committed
90
        elif s.endswith('*/'):
91
            s = s[:-2].rstrip('*')
Wenzel Jakob's avatar
Wenzel Jakob committed
92
93
94
95
        elif s.startswith('///'):
            s = s[3:]
        if s.startswith('*'):
            s = s[1:]
96
97
98
99
100
101
102
103
104
        if len(s) > 0:
            leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
        result += s + '\n'

    if leading_spaces != float('inf'):
        result2 = ""
        for s in result.splitlines():
            result2 += s[leading_spaces:] + '\n'
        result = result2
Wenzel Jakob's avatar
Wenzel Jakob committed
105
106
107
108
109
110
111
112
113
114
115

    # Doxygen tags
    cpp_group = '([\w:]+)'
    param_group = '([\[\w:\]]+)'

    s = result
    s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
    s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
116
    s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
117
118
    s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
               r'\n\n$Parameter ``\2``:\n\n', s)
119
120
    s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
               r'\n\n$Template parameter ``\2``:\n\n', s)
Wenzel Jakob's avatar
Wenzel Jakob committed
121
122

    for in_, out_ in {
123
124
125
126
127
128
129
130
131
132
133
134
        'return': 'Returns',
        'author': 'Author',
        'authors': 'Authors',
        'copyright': 'Copyright',
        'date': 'Date',
        'remark': 'Remark',
        'sa': 'See also',
        'see': 'See also',
        'extends': 'Extends',
        'throw': 'Throws',
        'throws': 'Throws'
    }.items():
Wenzel Jakob's avatar
Wenzel Jakob committed
135
136
137
138
139
140
141
        s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)

    s = re.sub(r'\\details\s*', r'\n\n', s)
    s = re.sub(r'\\brief\s*', r'', s)
    s = re.sub(r'\\short\s*', r'', s)
    s = re.sub(r'\\ref\s*', r'', s)

142
143
144
    s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
               r"```\n\1\n```\n", s, flags=re.DOTALL)

145
    # HTML/TeX tags
146
147
148
149
150
151
152
153
    s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
    s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
    s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
    s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
    s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
    s = re.sub(r'<li>', r'\n\n* ', s)
    s = re.sub(r'</?ul>', r'', s)
    s = re.sub(r'</li>', r'\n\n', s)
Wenzel Jakob's avatar
Wenzel Jakob committed
154
155
156
157
158
159
160
161

    s = s.replace('``true``', '``True``')
    s = s.replace('``false``', '``False``')

    # Re-flow text
    wrapper = textwrap.TextWrapper()
    wrapper.expand_tabs = True
    wrapper.replace_whitespace = True
162
163
    wrapper.drop_whitespace = True
    wrapper.width = 70
Wenzel Jakob's avatar
Wenzel Jakob committed
164
165
166
    wrapper.initial_indent = wrapper.subsequent_indent = ''

    result = ''
167
168
169
170
171
172
173
174
175
176
    in_code_segment = False
    for x in re.split(r'(```)', s):
        if x == '```':
            if not in_code_segment:
                result += '```\n'
            else:
                result += '\n```\n\n'
            in_code_segment = not in_code_segment
        elif in_code_segment:
            result += x.strip()
Wenzel Jakob's avatar
Wenzel Jakob committed
177
        else:
178
179
180
181
182
183
184
185
186
187
            for y in re.split(r'(?: *\n *){2,}', x):
                wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
                if len(wrapped) > 0 and wrapped[0] == '$':
                    result += wrapped[1:] + '\n'
                    wrapper.initial_indent = \
                        wrapper.subsequent_indent = ' ' * 4
                else:
                    if len(wrapped) > 0:
                        result += wrapped + '\n\n'
                    wrapper.initial_indent = wrapper.subsequent_indent = ''
188
    return result.rstrip().lstrip('\n')
Wenzel Jakob's avatar
Wenzel Jakob committed
189
190


Wenzel Jakob's avatar
Wenzel Jakob committed
191
def extract(filename, node, prefix):
192
193
    if not (node.location.file is None or
            os.path.samefile(d(node.location.file.name), filename)):
Wenzel Jakob's avatar
Wenzel Jakob committed
194
195
196
197
198
199
200
201
        return 0
    if node.kind in RECURSE_LIST:
        sub_prefix = prefix
        if node.kind != CursorKind.TRANSLATION_UNIT:
            if len(sub_prefix) > 0:
                sub_prefix += '_'
            sub_prefix += d(node.spelling)
        for i in node.get_children():
Wenzel Jakob's avatar
Wenzel Jakob committed
202
            extract(filename, i, sub_prefix)
Wenzel Jakob's avatar
Wenzel Jakob committed
203
204
205
    if node.kind in PRINT_LIST:
        comment = d(node.raw_comment) if node.raw_comment is not None else ''
        comment = process_comment(comment)
Wenzel Jakob's avatar
Wenzel Jakob committed
206
207
208
        sub_prefix = prefix
        if len(sub_prefix) > 0:
            sub_prefix += '_'
209
210
        if len(node.spelling) > 0:
            name = sanitize_name(sub_prefix + d(node.spelling))
Wenzel Jakob's avatar
Wenzel Jakob committed
211
212
            global output
            output.append((name, filename, comment))
Wenzel Jakob's avatar
Wenzel Jakob committed
213

214

215
class ExtractionThread(Thread):
Wenzel Jakob's avatar
Wenzel Jakob committed
216
    def __init__(self, filename, parameters):
217
218
219
220
221
222
        Thread.__init__(self)
        self.filename = filename
        self.parameters = parameters
        job_semaphore.acquire()

    def run(self):
223
        print('Processing "%s" ..' % self.filename, file=sys.stderr)
224
        try:
225
226
            index = cindex.Index(
                cindex.conf.lib.clang_createIndex(False, True))
227
            tu = index.parse(self.filename, self.parameters)
Wenzel Jakob's avatar
Wenzel Jakob committed
228
            extract(self.filename, tu.cursor, '')
229
230
231
        finally:
            job_semaphore.release()

232

233
def mkdoc(args, out_file=sys.stdout):
234
    parameters = []
Wenzel Jakob's avatar
Wenzel Jakob committed
235
    filenames = []
236
237
238
239
    if "-x" not in args:
        parameters.extend(['-x', 'c++'])
    if not any(it.startswith("-std=") for it in args):
        parameters.append('-std=c++11')
Wenzel Jakob's avatar
Wenzel Jakob committed
240

241
    if platform.system() == 'Darwin':
242
243
244
245
246
        dev_path = '/Applications/Xcode.app/Contents/Developer/'
        lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
        sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
        libclang = lib_dir + 'libclang.dylib'

247
248
249
        if os.path.exists(libclang):
            cindex.Config.set_library_path(os.path.dirname(libclang))

250
251
        if os.path.exists(sdk_dir):
            sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
252
            parameters.append('-isysroot')
253
            parameters.append(sysroot_dir)
254
255
256
257
258
259
260
261
262
263
264
265
266
267
    elif platform.system() == 'Linux':
        # clang doesn't find its own base includes by default on Linux,
        # but different distros install them in different paths.
        # Try to autodetect, preferring the highest numbered version.
        def clang_folder_version(d):
            return [int(ver) for ver in re.findall(r'(?<!lib)(?<!\d)\d+', d)]
        clang_include_dir = max((
            path
            for libdir in ['lib64', 'lib', 'lib32']
            for path in glob('/usr/%s/clang/*/include' % libdir)
            if os.path.isdir(path)
        ), default=None, key=clang_folder_version)
        if clang_include_dir:
            parameters.extend(['-isystem', clang_include_dir])
268

269
    for item in args:
Wenzel Jakob's avatar
Wenzel Jakob committed
270
271
272
273
274
275
        if item.startswith('-'):
            parameters.append(item)
        else:
            filenames.append(item)

    if len(filenames) == 0:
276
        raise NoFilenamesError("args parameter did not contain any filenames")
Wenzel Jakob's avatar
Wenzel Jakob committed
277
278
279
280
281
282

    print('''/*
  This file contains docstrings for the Python bindings.
  Do not edit! These were automatically extracted by mkdoc.py
 */

Wenzel Jakob's avatar
Wenzel Jakob committed
283
284
285
286
287
288
289
290
291
292
293
294
295
#define __EXPAND(x)                                      x
#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...)  COUNT
#define __VA_SIZE(...)                                   __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
#define __CAT1(a, b)                                     a ## b
#define __CAT2(a, b)                                     __CAT1(a, b)
#define __DOC1(n1)                                       __doc_##n1
#define __DOC2(n1, n2)                                   __doc_##n1##_##n2
#define __DOC3(n1, n2, n3)                               __doc_##n1##_##n2##_##n3
#define __DOC4(n1, n2, n3, n4)                           __doc_##n1##_##n2##_##n3##_##n4
#define __DOC5(n1, n2, n3, n4, n5)                       __doc_##n1##_##n2##_##n3##_##n4##_##n5
#define __DOC6(n1, n2, n3, n4, n5, n6)                   __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
#define __DOC7(n1, n2, n3, n4, n5, n6, n7)               __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
#define DOC(...)                                         __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
296
297
298
299
300

#if defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
301
''', file=out_file)
302

Wenzel Jakob's avatar
Wenzel Jakob committed
303
    output.clear()
Wenzel Jakob's avatar
Wenzel Jakob committed
304
    for filename in filenames:
Wenzel Jakob's avatar
Wenzel Jakob committed
305
        thr = ExtractionThread(filename, parameters)
306
307
        thr.start()

308
    print('Waiting for jobs to finish ..', file=sys.stderr)
309
310
311
    for i in range(job_count):
        job_semaphore.acquire()

Wenzel Jakob's avatar
Wenzel Jakob committed
312
313
314
315
316
317
318
319
320
321
    name_ctr = 1
    name_prev = None
    for name, _, comment in list(sorted(output, key=lambda x: (x[0], x[1]))):
        if name == name_prev:
            name_ctr += 1
            name = name + "_%i" % name_ctr
        else:
            name_prev = name
            name_ctr = 1
        print('\nstatic const char *%s =%sR"doc(%s)doc";' %
322
              (name, '\n' if '\n' in comment else ' ', comment), file=out_file)
323
324
325
326
327

    print('''
#if defined(__GNUG__)
#pragma GCC diagnostic pop
#endif
328
''', file=out_file)
329
330
331


if __name__ == '__main__':
332
333
334
335
336
337
338
339
340
341
342
    args = sys.argv[1:]
    out_path = None
    for idx, arg in enumerate(args):
        if arg.startswith("-o"):
            args.remove(arg)
            try:
                out_path = arg[2:] or args.pop(idx)
            except IndexError:
                print("-o flag requires an argument")
                exit(-1)
            break
343
    try:
344
345
346
347
348
        if out_path:
            with open(out_path, 'w') as out_file:
                mkdoc(args, out_file)
        else:
            mkdoc(args)
349
350
351
    except NoFilenamesError:
        print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
        exit(-1)