mkdoc.py 12 KB
Newer Older
Wenzel Jakob's avatar
Wenzel Jakob committed
1
2
3
4
5
6
7
#!/usr/bin/env python3
#
#  Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
#
#  Extract documentation from C++ header files to use it in Python bindings
#

8
9
10
11
12
13
import os
import sys
import platform
import re
import textwrap

Wenzel Jakob's avatar
Wenzel Jakob committed
14
15
16
from clang import cindex
from clang.cindex import CursorKind
from collections import OrderedDict
17
from glob import glob
18
19
from threading import Thread, Semaphore
from multiprocessing import cpu_count
Wenzel Jakob's avatar
Wenzel Jakob committed
20
21
22
23
24
25

RECURSE_LIST = [
    CursorKind.TRANSLATION_UNIT,
    CursorKind.NAMESPACE,
    CursorKind.CLASS_DECL,
    CursorKind.STRUCT_DECL,
26
    CursorKind.ENUM_DECL,
Wenzel Jakob's avatar
Wenzel Jakob committed
27
28
29
30
31
32
    CursorKind.CLASS_TEMPLATE
]

PRINT_LIST = [
    CursorKind.CLASS_DECL,
    CursorKind.STRUCT_DECL,
Wenzel Jakob's avatar
Wenzel Jakob committed
33
    CursorKind.ENUM_DECL,
34
    CursorKind.ENUM_CONSTANT_DECL,
Wenzel Jakob's avatar
Wenzel Jakob committed
35
36
37
    CursorKind.CLASS_TEMPLATE,
    CursorKind.FUNCTION_DECL,
    CursorKind.FUNCTION_TEMPLATE,
38
    CursorKind.CONVERSION_FUNCTION,
Wenzel Jakob's avatar
Wenzel Jakob committed
39
40
41
42
43
44
    CursorKind.CXX_METHOD,
    CursorKind.CONSTRUCTOR,
    CursorKind.FIELD_DECL
]

CPP_OPERATORS = {
45
46
47
48
49
50
    '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
    '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
    'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
    '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
    'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
    '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
51
    'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
Wenzel Jakob's avatar
Wenzel Jakob committed
52
}
53
54
55

CPP_OPERATORS = OrderedDict(
    sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
Wenzel Jakob's avatar
Wenzel Jakob committed
56

57
58
59
job_count = cpu_count()
job_semaphore = Semaphore(job_count)

60
61
62
63
64

class NoFilenamesError(ValueError):
    pass


Wenzel Jakob's avatar
Wenzel Jakob committed
65
def d(s):
66
    return s if isinstance(s, str) else s.decode('utf8')
Wenzel Jakob's avatar
Wenzel Jakob committed
67

68

Wenzel Jakob's avatar
Wenzel Jakob committed
69
def sanitize_name(name):
70
    name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
Wenzel Jakob's avatar
Wenzel Jakob committed
71
72
    for k, v in CPP_OPERATORS.items():
        name = name.replace('operator%s' % k, 'operator_%s' % v)
73
74
75
    name = re.sub('<.*>', '', name)
    name = ''.join([ch if ch.isalnum() else '_' for ch in name])
    name = re.sub('_$', '', re.sub('_+', '_', name))
Wenzel Jakob's avatar
Wenzel Jakob committed
76
77
    return '__doc_' + name

78

Wenzel Jakob's avatar
Wenzel Jakob committed
79
80
81
82
def process_comment(comment):
    result = ''

    # Remove C++ comment syntax
83
84
    leading_spaces = float('inf')
    for s in comment.expandtabs(tabsize=4).splitlines():
Wenzel Jakob's avatar
Wenzel Jakob committed
85
86
        s = s.strip()
        if s.startswith('/*'):
87
            s = s[2:].lstrip('*')
Wenzel Jakob's avatar
Wenzel Jakob committed
88
        elif s.endswith('*/'):
89
            s = s[:-2].rstrip('*')
Wenzel Jakob's avatar
Wenzel Jakob committed
90
91
92
93
        elif s.startswith('///'):
            s = s[3:]
        if s.startswith('*'):
            s = s[1:]
94
95
96
97
98
99
100
101
102
        if len(s) > 0:
            leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
        result += s + '\n'

    if leading_spaces != float('inf'):
        result2 = ""
        for s in result.splitlines():
            result2 += s[leading_spaces:] + '\n'
        result = result2
Wenzel Jakob's avatar
Wenzel Jakob committed
103
104
105
106
107
108
109
110
111
112
113

    # Doxygen tags
    cpp_group = '([\w:]+)'
    param_group = '([\[\w:\]]+)'

    s = result
    s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
    s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
114
    s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
115
116
    s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
               r'\n\n$Parameter ``\2``:\n\n', s)
117
118
    s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
               r'\n\n$Template parameter ``\2``:\n\n', s)
Wenzel Jakob's avatar
Wenzel Jakob committed
119
120

    for in_, out_ in {
121
122
123
124
125
126
127
128
129
130
131
132
        'return': 'Returns',
        'author': 'Author',
        'authors': 'Authors',
        'copyright': 'Copyright',
        'date': 'Date',
        'remark': 'Remark',
        'sa': 'See also',
        'see': 'See also',
        'extends': 'Extends',
        'throw': 'Throws',
        'throws': 'Throws'
    }.items():
Wenzel Jakob's avatar
Wenzel Jakob committed
133
134
135
136
137
138
139
        s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)

    s = re.sub(r'\\details\s*', r'\n\n', s)
    s = re.sub(r'\\brief\s*', r'', s)
    s = re.sub(r'\\short\s*', r'', s)
    s = re.sub(r'\\ref\s*', r'', s)

140
141
142
    s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
               r"```\n\1\n```\n", s, flags=re.DOTALL)

143
    # HTML/TeX tags
144
145
146
147
148
149
150
151
    s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
    s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
    s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
    s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
    s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
    s = re.sub(r'<li>', r'\n\n* ', s)
    s = re.sub(r'</?ul>', r'', s)
    s = re.sub(r'</li>', r'\n\n', s)
Wenzel Jakob's avatar
Wenzel Jakob committed
152
153
154
155
156
157
158
159

    s = s.replace('``true``', '``True``')
    s = s.replace('``false``', '``False``')

    # Re-flow text
    wrapper = textwrap.TextWrapper()
    wrapper.expand_tabs = True
    wrapper.replace_whitespace = True
160
161
    wrapper.drop_whitespace = True
    wrapper.width = 70
Wenzel Jakob's avatar
Wenzel Jakob committed
162
163
164
    wrapper.initial_indent = wrapper.subsequent_indent = ''

    result = ''
165
166
167
168
169
170
171
172
173
174
    in_code_segment = False
    for x in re.split(r'(```)', s):
        if x == '```':
            if not in_code_segment:
                result += '```\n'
            else:
                result += '\n```\n\n'
            in_code_segment = not in_code_segment
        elif in_code_segment:
            result += x.strip()
Wenzel Jakob's avatar
Wenzel Jakob committed
175
        else:
176
177
178
179
180
181
182
183
184
185
            for y in re.split(r'(?: *\n *){2,}', x):
                wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
                if len(wrapped) > 0 and wrapped[0] == '$':
                    result += wrapped[1:] + '\n'
                    wrapper.initial_indent = \
                        wrapper.subsequent_indent = ' ' * 4
                else:
                    if len(wrapped) > 0:
                        result += wrapped + '\n\n'
                    wrapper.initial_indent = wrapper.subsequent_indent = ''
186
    return result.rstrip().lstrip('\n')
Wenzel Jakob's avatar
Wenzel Jakob committed
187
188


Dan's avatar
Dan committed
189
def extract(filename, node, prefix, output):
190
191
    if not (node.location.file is None or
            os.path.samefile(d(node.location.file.name), filename)):
Wenzel Jakob's avatar
Wenzel Jakob committed
192
193
194
195
196
197
198
199
        return 0
    if node.kind in RECURSE_LIST:
        sub_prefix = prefix
        if node.kind != CursorKind.TRANSLATION_UNIT:
            if len(sub_prefix) > 0:
                sub_prefix += '_'
            sub_prefix += d(node.spelling)
        for i in node.get_children():
Dan's avatar
Dan committed
200
            extract(filename, i, sub_prefix, output)
Wenzel Jakob's avatar
Wenzel Jakob committed
201
202
203
    if node.kind in PRINT_LIST:
        comment = d(node.raw_comment) if node.raw_comment is not None else ''
        comment = process_comment(comment)
Wenzel Jakob's avatar
Wenzel Jakob committed
204
205
206
        sub_prefix = prefix
        if len(sub_prefix) > 0:
            sub_prefix += '_'
207
208
        if len(node.spelling) > 0:
            name = sanitize_name(sub_prefix + d(node.spelling))
Wenzel Jakob's avatar
Wenzel Jakob committed
209
            output.append((name, filename, comment))
Wenzel Jakob's avatar
Wenzel Jakob committed
210

211

212
class ExtractionThread(Thread):
Dan's avatar
Dan committed
213
    def __init__(self, filename, parameters, output):
214
215
216
        Thread.__init__(self)
        self.filename = filename
        self.parameters = parameters
Dan's avatar
Dan committed
217
        self.output = output
218
219
220
        job_semaphore.acquire()

    def run(self):
221
        print('Processing "%s" ..' % self.filename, file=sys.stderr)
222
        try:
223
224
            index = cindex.Index(
                cindex.conf.lib.clang_createIndex(False, True))
225
            tu = index.parse(self.filename, self.parameters)
Dan's avatar
Dan committed
226
            extract(self.filename, tu.cursor, '', self.output)
227
228
229
        finally:
            job_semaphore.release()

230

231
def mkdoc(args, out_file=sys.stdout):
232
    parameters = []
Wenzel Jakob's avatar
Wenzel Jakob committed
233
    filenames = []
234
235
236
237
    if "-x" not in args:
        parameters.extend(['-x', 'c++'])
    if not any(it.startswith("-std=") for it in args):
        parameters.append('-std=c++11')
Wenzel Jakob's avatar
Wenzel Jakob committed
238

239
    if platform.system() == 'Darwin':
240
241
242
243
244
        dev_path = '/Applications/Xcode.app/Contents/Developer/'
        lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
        sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
        libclang = lib_dir + 'libclang.dylib'

245
246
247
        if os.path.exists(libclang):
            cindex.Config.set_library_path(os.path.dirname(libclang))

248
249
        if os.path.exists(sdk_dir):
            sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
250
            parameters.append('-isysroot')
251
            parameters.append(sysroot_dir)
252
253
254
255
256
257
258
259
260
261
262
263
264
265
    elif platform.system() == 'Linux':
        # clang doesn't find its own base includes by default on Linux,
        # but different distros install them in different paths.
        # Try to autodetect, preferring the highest numbered version.
        def clang_folder_version(d):
            return [int(ver) for ver in re.findall(r'(?<!lib)(?<!\d)\d+', d)]
        clang_include_dir = max((
            path
            for libdir in ['lib64', 'lib', 'lib32']
            for path in glob('/usr/%s/clang/*/include' % libdir)
            if os.path.isdir(path)
        ), default=None, key=clang_folder_version)
        if clang_include_dir:
            parameters.extend(['-isystem', clang_include_dir])
266

267
    for item in args:
Wenzel Jakob's avatar
Wenzel Jakob committed
268
269
270
271
272
273
        if item.startswith('-'):
            parameters.append(item)
        else:
            filenames.append(item)

    if len(filenames) == 0:
274
        raise NoFilenamesError("args parameter did not contain any filenames")
Wenzel Jakob's avatar
Wenzel Jakob committed
275
276
277
278
279
280

    print('''/*
  This file contains docstrings for the Python bindings.
  Do not edit! These were automatically extracted by mkdoc.py
 */

Wenzel Jakob's avatar
Wenzel Jakob committed
281
282
283
284
285
286
287
288
289
290
291
292
293
#define __EXPAND(x)                                      x
#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...)  COUNT
#define __VA_SIZE(...)                                   __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
#define __CAT1(a, b)                                     a ## b
#define __CAT2(a, b)                                     __CAT1(a, b)
#define __DOC1(n1)                                       __doc_##n1
#define __DOC2(n1, n2)                                   __doc_##n1##_##n2
#define __DOC3(n1, n2, n3)                               __doc_##n1##_##n2##_##n3
#define __DOC4(n1, n2, n3, n4)                           __doc_##n1##_##n2##_##n3##_##n4
#define __DOC5(n1, n2, n3, n4, n5)                       __doc_##n1##_##n2##_##n3##_##n4##_##n5
#define __DOC6(n1, n2, n3, n4, n5, n6)                   __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
#define __DOC7(n1, n2, n3, n4, n5, n6, n7)               __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
#define DOC(...)                                         __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
294
295
296
297
298

#if defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
299
''', file=out_file)
300

Dan's avatar
Dan committed
301
    output = []
Wenzel Jakob's avatar
Wenzel Jakob committed
302
    for filename in filenames:
Dan's avatar
Dan committed
303
        thr = ExtractionThread(filename, parameters, output)
304
305
        thr.start()

306
    print('Waiting for jobs to finish ..', file=sys.stderr)
307
308
309
    for i in range(job_count):
        job_semaphore.acquire()

Wenzel Jakob's avatar
Wenzel Jakob committed
310
311
312
313
314
315
316
317
318
319
    name_ctr = 1
    name_prev = None
    for name, _, comment in list(sorted(output, key=lambda x: (x[0], x[1]))):
        if name == name_prev:
            name_ctr += 1
            name = name + "_%i" % name_ctr
        else:
            name_prev = name
            name_ctr = 1
        print('\nstatic const char *%s =%sR"doc(%s)doc";' %
320
              (name, '\n' if '\n' in comment else ' ', comment), file=out_file)
321
322
323
324
325

    print('''
#if defined(__GNUG__)
#pragma GCC diagnostic pop
#endif
326
''', file=out_file)
327
328
329


if __name__ == '__main__':
330
331
332
333
334
335
336
337
338
339
340
    args = sys.argv[1:]
    out_path = None
    for idx, arg in enumerate(args):
        if arg.startswith("-o"):
            args.remove(arg)
            try:
                out_path = arg[2:] or args.pop(idx)
            except IndexError:
                print("-o flag requires an argument")
                exit(-1)
            break
341
    try:
342
        if out_path:
343
344
345
346
347
348
349
350
351
352
353
            try:
                with open(out_path, 'w') as out_file:
                    mkdoc(args, out_file)
            except:
                # In the event of an error, don't leave a partially-written
                # output file.
                try:
                    os.unlink(out_path)
                except:
                    pass
                raise
354
355
        else:
            mkdoc(args)
356
357
358
    except NoFilenamesError:
        print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
        exit(-1)