mkdoc.py 12.2 KB
Newer Older
Wenzel Jakob's avatar
Wenzel Jakob committed
1
2
3
4
5
6
7
#!/usr/bin/env python3
#
#  Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
#
#  Extract documentation from C++ header files to use it in Python bindings
#

8
9
10
11
12
13
import os
import sys
import platform
import re
import textwrap

Wenzel Jakob's avatar
Wenzel Jakob committed
14
15
16
from clang import cindex
from clang.cindex import CursorKind
from collections import OrderedDict
17
from glob import glob
18
19
from threading import Thread, Semaphore
from multiprocessing import cpu_count
Wenzel Jakob's avatar
Wenzel Jakob committed
20
21
22
23
24
25

RECURSE_LIST = [
    CursorKind.TRANSLATION_UNIT,
    CursorKind.NAMESPACE,
    CursorKind.CLASS_DECL,
    CursorKind.STRUCT_DECL,
26
    CursorKind.ENUM_DECL,
Wenzel Jakob's avatar
Wenzel Jakob committed
27
28
29
30
31
32
    CursorKind.CLASS_TEMPLATE
]

PRINT_LIST = [
    CursorKind.CLASS_DECL,
    CursorKind.STRUCT_DECL,
Wenzel Jakob's avatar
Wenzel Jakob committed
33
    CursorKind.ENUM_DECL,
34
    CursorKind.ENUM_CONSTANT_DECL,
Wenzel Jakob's avatar
Wenzel Jakob committed
35
36
37
    CursorKind.CLASS_TEMPLATE,
    CursorKind.FUNCTION_DECL,
    CursorKind.FUNCTION_TEMPLATE,
38
    CursorKind.CONVERSION_FUNCTION,
Wenzel Jakob's avatar
Wenzel Jakob committed
39
40
41
42
43
    CursorKind.CXX_METHOD,
    CursorKind.CONSTRUCTOR,
    CursorKind.FIELD_DECL
]

44
45
46
47
PREFIX_BLACKLIST = [
    CursorKind.TRANSLATION_UNIT
]

Wenzel Jakob's avatar
Wenzel Jakob committed
48
CPP_OPERATORS = {
49
50
51
52
53
54
    '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
    '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
    'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
    '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
    'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
    '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
55
    'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
Wenzel Jakob's avatar
Wenzel Jakob committed
56
}
57
58
59

CPP_OPERATORS = OrderedDict(
    sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
Wenzel Jakob's avatar
Wenzel Jakob committed
60

61
62
63
job_count = cpu_count()
job_semaphore = Semaphore(job_count)

64
65
66
67
68

class NoFilenamesError(ValueError):
    pass


Wenzel Jakob's avatar
Wenzel Jakob committed
69
def d(s):
70
    return s if isinstance(s, str) else s.decode('utf8')
Wenzel Jakob's avatar
Wenzel Jakob committed
71

72

Wenzel Jakob's avatar
Wenzel Jakob committed
73
def sanitize_name(name):
74
    name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
Wenzel Jakob's avatar
Wenzel Jakob committed
75
76
    for k, v in CPP_OPERATORS.items():
        name = name.replace('operator%s' % k, 'operator_%s' % v)
77
78
79
    name = re.sub('<.*>', '', name)
    name = ''.join([ch if ch.isalnum() else '_' for ch in name])
    name = re.sub('_$', '', re.sub('_+', '_', name))
Wenzel Jakob's avatar
Wenzel Jakob committed
80
81
    return '__doc_' + name

82

Wenzel Jakob's avatar
Wenzel Jakob committed
83
84
85
86
def process_comment(comment):
    result = ''

    # Remove C++ comment syntax
87
88
    leading_spaces = float('inf')
    for s in comment.expandtabs(tabsize=4).splitlines():
Wenzel Jakob's avatar
Wenzel Jakob committed
89
90
        s = s.strip()
        if s.startswith('/*'):
91
            s = s[2:].lstrip('*')
Wenzel Jakob's avatar
Wenzel Jakob committed
92
        elif s.endswith('*/'):
93
            s = s[:-2].rstrip('*')
Wenzel Jakob's avatar
Wenzel Jakob committed
94
95
96
97
        elif s.startswith('///'):
            s = s[3:]
        if s.startswith('*'):
            s = s[1:]
98
99
100
101
102
103
104
105
106
        if len(s) > 0:
            leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
        result += s + '\n'

    if leading_spaces != float('inf'):
        result2 = ""
        for s in result.splitlines():
            result2 += s[leading_spaces:] + '\n'
        result = result2
Wenzel Jakob's avatar
Wenzel Jakob committed
107
108
109
110
111
112
113
114
115
116
117

    # Doxygen tags
    cpp_group = '([\w:]+)'
    param_group = '([\[\w:\]]+)'

    s = result
    s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
    s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
118
    s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
119
120
    s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
               r'\n\n$Parameter ``\2``:\n\n', s)
121
122
    s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
               r'\n\n$Template parameter ``\2``:\n\n', s)
Wenzel Jakob's avatar
Wenzel Jakob committed
123
124

    for in_, out_ in {
125
126
127
128
129
130
131
132
133
134
135
136
        'return': 'Returns',
        'author': 'Author',
        'authors': 'Authors',
        'copyright': 'Copyright',
        'date': 'Date',
        'remark': 'Remark',
        'sa': 'See also',
        'see': 'See also',
        'extends': 'Extends',
        'throw': 'Throws',
        'throws': 'Throws'
    }.items():
Wenzel Jakob's avatar
Wenzel Jakob committed
137
138
139
140
141
142
143
        s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)

    s = re.sub(r'\\details\s*', r'\n\n', s)
    s = re.sub(r'\\brief\s*', r'', s)
    s = re.sub(r'\\short\s*', r'', s)
    s = re.sub(r'\\ref\s*', r'', s)

144
145
146
    s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
               r"```\n\1\n```\n", s, flags=re.DOTALL)

147
    # HTML/TeX tags
148
149
150
151
152
153
154
155
    s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
    s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
    s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
    s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
    s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
    s = re.sub(r'<li>', r'\n\n* ', s)
    s = re.sub(r'</?ul>', r'', s)
    s = re.sub(r'</li>', r'\n\n', s)
Wenzel Jakob's avatar
Wenzel Jakob committed
156
157
158
159
160
161
162
163

    s = s.replace('``true``', '``True``')
    s = s.replace('``false``', '``False``')

    # Re-flow text
    wrapper = textwrap.TextWrapper()
    wrapper.expand_tabs = True
    wrapper.replace_whitespace = True
164
165
    wrapper.drop_whitespace = True
    wrapper.width = 70
Wenzel Jakob's avatar
Wenzel Jakob committed
166
167
168
    wrapper.initial_indent = wrapper.subsequent_indent = ''

    result = ''
169
170
171
172
173
174
175
176
177
178
    in_code_segment = False
    for x in re.split(r'(```)', s):
        if x == '```':
            if not in_code_segment:
                result += '```\n'
            else:
                result += '\n```\n\n'
            in_code_segment = not in_code_segment
        elif in_code_segment:
            result += x.strip()
Wenzel Jakob's avatar
Wenzel Jakob committed
179
        else:
180
181
182
183
184
185
186
187
188
189
            for y in re.split(r'(?: *\n *){2,}', x):
                wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
                if len(wrapped) > 0 and wrapped[0] == '$':
                    result += wrapped[1:] + '\n'
                    wrapper.initial_indent = \
                        wrapper.subsequent_indent = ' ' * 4
                else:
                    if len(wrapped) > 0:
                        result += wrapped + '\n\n'
                    wrapper.initial_indent = wrapper.subsequent_indent = ''
190
    return result.rstrip().lstrip('\n')
Wenzel Jakob's avatar
Wenzel Jakob committed
191
192


Dan's avatar
Dan committed
193
def extract(filename, node, prefix, output):
194
195
    if not (node.location.file is None or
            os.path.samefile(d(node.location.file.name), filename)):
Wenzel Jakob's avatar
Wenzel Jakob committed
196
197
198
        return 0
    if node.kind in RECURSE_LIST:
        sub_prefix = prefix
199
        if node.kind not in PREFIX_BLACKLIST:
Wenzel Jakob's avatar
Wenzel Jakob committed
200
201
202
203
            if len(sub_prefix) > 0:
                sub_prefix += '_'
            sub_prefix += d(node.spelling)
        for i in node.get_children():
Dan's avatar
Dan committed
204
            extract(filename, i, sub_prefix, output)
Wenzel Jakob's avatar
Wenzel Jakob committed
205
206
207
    if node.kind in PRINT_LIST:
        comment = d(node.raw_comment) if node.raw_comment is not None else ''
        comment = process_comment(comment)
Wenzel Jakob's avatar
Wenzel Jakob committed
208
209
210
        sub_prefix = prefix
        if len(sub_prefix) > 0:
            sub_prefix += '_'
211
212
        if len(node.spelling) > 0:
            name = sanitize_name(sub_prefix + d(node.spelling))
Wenzel Jakob's avatar
Wenzel Jakob committed
213
            output.append((name, filename, comment))
Wenzel Jakob's avatar
Wenzel Jakob committed
214

215

216
class ExtractionThread(Thread):
Dan's avatar
Dan committed
217
    def __init__(self, filename, parameters, output):
218
219
220
        Thread.__init__(self)
        self.filename = filename
        self.parameters = parameters
Dan's avatar
Dan committed
221
        self.output = output
222
223
224
        job_semaphore.acquire()

    def run(self):
225
        print('Processing "%s" ..' % self.filename, file=sys.stderr)
226
        try:
227
228
            index = cindex.Index(
                cindex.conf.lib.clang_createIndex(False, True))
229
            tu = index.parse(self.filename, self.parameters)
Dan's avatar
Dan committed
230
            extract(self.filename, tu.cursor, '', self.output)
231
232
233
        finally:
            job_semaphore.release()

234

235
def read_args(args):
236
    parameters = []
Wenzel Jakob's avatar
Wenzel Jakob committed
237
    filenames = []
238
239
240
241
    if "-x" not in args:
        parameters.extend(['-x', 'c++'])
    if not any(it.startswith("-std=") for it in args):
        parameters.append('-std=c++11')
Wenzel Jakob's avatar
Wenzel Jakob committed
242

243
    if platform.system() == 'Darwin':
244
245
246
247
248
        dev_path = '/Applications/Xcode.app/Contents/Developer/'
        lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
        sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
        libclang = lib_dir + 'libclang.dylib'

249
250
251
        if os.path.exists(libclang):
            cindex.Config.set_library_path(os.path.dirname(libclang))

252
253
        if os.path.exists(sdk_dir):
            sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
254
            parameters.append('-isysroot')
255
            parameters.append(sysroot_dir)
256
257
258
259
260
261
262
263
264
265
266
267
268
269
    elif platform.system() == 'Linux':
        # clang doesn't find its own base includes by default on Linux,
        # but different distros install them in different paths.
        # Try to autodetect, preferring the highest numbered version.
        def clang_folder_version(d):
            return [int(ver) for ver in re.findall(r'(?<!lib)(?<!\d)\d+', d)]
        clang_include_dir = max((
            path
            for libdir in ['lib64', 'lib', 'lib32']
            for path in glob('/usr/%s/clang/*/include' % libdir)
            if os.path.isdir(path)
        ), default=None, key=clang_folder_version)
        if clang_include_dir:
            parameters.extend(['-isystem', clang_include_dir])
270

271
    for item in args:
Wenzel Jakob's avatar
Wenzel Jakob committed
272
273
274
275
276
277
        if item.startswith('-'):
            parameters.append(item)
        else:
            filenames.append(item)

    if len(filenames) == 0:
278
        raise NoFilenamesError("args parameter did not contain any filenames")
Wenzel Jakob's avatar
Wenzel Jakob committed
279

280
281
282
283
284
    return parameters, filenames


def extract_all(args):
    parameters, filenames = read_args(args)
285
286
287
288
289
290
291
292
293
294
295
296
297
    output = []
    for filename in filenames:
        thr = ExtractionThread(filename, parameters, output)
        thr.start()

    print('Waiting for jobs to finish ..', file=sys.stderr)
    for i in range(job_count):
        job_semaphore.acquire()

    return output


def write_header(comments, out_file=sys.stdout):
Wenzel Jakob's avatar
Wenzel Jakob committed
298
299
300
301
302
    print('''/*
  This file contains docstrings for the Python bindings.
  Do not edit! These were automatically extracted by mkdoc.py
 */

Wenzel Jakob's avatar
Wenzel Jakob committed
303
304
305
306
307
308
309
310
311
312
313
314
315
#define __EXPAND(x)                                      x
#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...)  COUNT
#define __VA_SIZE(...)                                   __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
#define __CAT1(a, b)                                     a ## b
#define __CAT2(a, b)                                     __CAT1(a, b)
#define __DOC1(n1)                                       __doc_##n1
#define __DOC2(n1, n2)                                   __doc_##n1##_##n2
#define __DOC3(n1, n2, n3)                               __doc_##n1##_##n2##_##n3
#define __DOC4(n1, n2, n3, n4)                           __doc_##n1##_##n2##_##n3##_##n4
#define __DOC5(n1, n2, n3, n4, n5)                       __doc_##n1##_##n2##_##n3##_##n4##_##n5
#define __DOC6(n1, n2, n3, n4, n5, n6)                   __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
#define __DOC7(n1, n2, n3, n4, n5, n6, n7)               __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
#define DOC(...)                                         __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
316
317
318
319
320

#if defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
321
''', file=out_file)
322
323


Wenzel Jakob's avatar
Wenzel Jakob committed
324
325
    name_ctr = 1
    name_prev = None
326
    for name, _, comment in list(sorted(comments, key=lambda x: (x[0], x[1]))):
Wenzel Jakob's avatar
Wenzel Jakob committed
327
328
329
330
331
332
333
        if name == name_prev:
            name_ctr += 1
            name = name + "_%i" % name_ctr
        else:
            name_prev = name
            name_ctr = 1
        print('\nstatic const char *%s =%sR"doc(%s)doc";' %
334
              (name, '\n' if '\n' in comment else ' ', comment), file=out_file)
335
336
337
338
339

    print('''
#if defined(__GNUG__)
#pragma GCC diagnostic pop
#endif
340
''', file=out_file)
341
342


343
344
def mkdoc(args):
    args = list(args)
345
346
347
348
349
350
351
352
353
354
    out_path = None
    for idx, arg in enumerate(args):
        if arg.startswith("-o"):
            args.remove(arg)
            try:
                out_path = arg[2:] or args.pop(idx)
            except IndexError:
                print("-o flag requires an argument")
                exit(-1)
            break
355
356
357
358
359
360
361
362
363
364

    comments = extract_all(args)

    if out_path:
        try:
            with open(out_path, 'w') as out_file:
                write_header(comments, out_file)
        except:
            # In the event of an error, don't leave a partially-written
            # output file.
365
            try:
366
                os.unlink(out_path)
367
            except:
368
369
370
371
372
373
374
375
376
                pass
            raise
    else:
        write_header(comments)


if __name__ == '__main__':
    try:
        mkdoc(sys.argv[1:])
377
378
379
    except NoFilenamesError:
        print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
        exit(-1)