mkdoc.py 12.7 KB
Newer Older
Wenzel Jakob's avatar
Wenzel Jakob committed
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
Wenzel Jakob's avatar
Wenzel Jakob committed
3
4
5
6
7
8
#
#  Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
#
#  Extract documentation from C++ header files to use it in Python bindings
#

9
10
11
12
13
14
import os
import sys
import platform
import re
import textwrap

Wenzel Jakob's avatar
Wenzel Jakob committed
15
16
17
from clang import cindex
from clang.cindex import CursorKind
from collections import OrderedDict
18
from glob import glob
19
20
from threading import Thread, Semaphore
from multiprocessing import cpu_count
Wenzel Jakob's avatar
Wenzel Jakob committed
21
22
23
24
25
26

RECURSE_LIST = [
    CursorKind.TRANSLATION_UNIT,
    CursorKind.NAMESPACE,
    CursorKind.CLASS_DECL,
    CursorKind.STRUCT_DECL,
27
    CursorKind.ENUM_DECL,
Wenzel Jakob's avatar
Wenzel Jakob committed
28
29
30
31
32
33
    CursorKind.CLASS_TEMPLATE
]

PRINT_LIST = [
    CursorKind.CLASS_DECL,
    CursorKind.STRUCT_DECL,
Wenzel Jakob's avatar
Wenzel Jakob committed
34
    CursorKind.ENUM_DECL,
35
    CursorKind.ENUM_CONSTANT_DECL,
Wenzel Jakob's avatar
Wenzel Jakob committed
36
37
38
    CursorKind.CLASS_TEMPLATE,
    CursorKind.FUNCTION_DECL,
    CursorKind.FUNCTION_TEMPLATE,
39
    CursorKind.CONVERSION_FUNCTION,
Wenzel Jakob's avatar
Wenzel Jakob committed
40
41
42
43
44
    CursorKind.CXX_METHOD,
    CursorKind.CONSTRUCTOR,
    CursorKind.FIELD_DECL
]

45
46
47
48
PREFIX_BLACKLIST = [
    CursorKind.TRANSLATION_UNIT
]

Wenzel Jakob's avatar
Wenzel Jakob committed
49
CPP_OPERATORS = {
50
51
52
53
54
55
    '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
    '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
    'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
    '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
    'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
    '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
56
    'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
Wenzel Jakob's avatar
Wenzel Jakob committed
57
}
58
59
60

CPP_OPERATORS = OrderedDict(
    sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
Wenzel Jakob's avatar
Wenzel Jakob committed
61

62
63
64
job_count = cpu_count()
job_semaphore = Semaphore(job_count)

65
66
67
68
69

class NoFilenamesError(ValueError):
    pass


Wenzel Jakob's avatar
Wenzel Jakob committed
70
def d(s):
71
    return s if isinstance(s, str) else s.decode('utf8')
Wenzel Jakob's avatar
Wenzel Jakob committed
72

73

Wenzel Jakob's avatar
Wenzel Jakob committed
74
def sanitize_name(name):
75
    name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
Wenzel Jakob's avatar
Wenzel Jakob committed
76
77
    for k, v in CPP_OPERATORS.items():
        name = name.replace('operator%s' % k, 'operator_%s' % v)
78
79
80
    name = re.sub('<.*>', '', name)
    name = ''.join([ch if ch.isalnum() else '_' for ch in name])
    name = re.sub('_$', '', re.sub('_+', '_', name))
Wenzel Jakob's avatar
Wenzel Jakob committed
81
82
    return '__doc_' + name

83

Wenzel Jakob's avatar
Wenzel Jakob committed
84
85
86
87
def process_comment(comment):
    result = ''

    # Remove C++ comment syntax
88
89
    leading_spaces = float('inf')
    for s in comment.expandtabs(tabsize=4).splitlines():
Wenzel Jakob's avatar
Wenzel Jakob committed
90
91
        s = s.strip()
        if s.startswith('/*'):
92
            s = s[2:].lstrip('*')
Wenzel Jakob's avatar
Wenzel Jakob committed
93
        elif s.endswith('*/'):
94
            s = s[:-2].rstrip('*')
Wenzel Jakob's avatar
Wenzel Jakob committed
95
96
97
98
        elif s.startswith('///'):
            s = s[3:]
        if s.startswith('*'):
            s = s[1:]
99
100
101
102
103
104
105
106
107
        if len(s) > 0:
            leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
        result += s + '\n'

    if leading_spaces != float('inf'):
        result2 = ""
        for s in result.splitlines():
            result2 += s[leading_spaces:] + '\n'
        result = result2
Wenzel Jakob's avatar
Wenzel Jakob committed
108
109

    # Doxygen tags
110
111
    cpp_group = r'([\w:]+)'
    param_group = r'([\[\w:\]]+)'
Wenzel Jakob's avatar
Wenzel Jakob committed
112
113
114
115
116
117
118

    s = result
    s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
    s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
119
    s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
120
121
    s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
               r'\n\n$Parameter ``\2``:\n\n', s)
122
123
    s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
               r'\n\n$Template parameter ``\2``:\n\n', s)
Wenzel Jakob's avatar
Wenzel Jakob committed
124
125

    for in_, out_ in {
126
127
128
129
130
131
132
133
134
135
136
137
        'return': 'Returns',
        'author': 'Author',
        'authors': 'Authors',
        'copyright': 'Copyright',
        'date': 'Date',
        'remark': 'Remark',
        'sa': 'See also',
        'see': 'See also',
        'extends': 'Extends',
        'throw': 'Throws',
        'throws': 'Throws'
    }.items():
Wenzel Jakob's avatar
Wenzel Jakob committed
138
139
140
141
142
143
144
        s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)

    s = re.sub(r'\\details\s*', r'\n\n', s)
    s = re.sub(r'\\brief\s*', r'', s)
    s = re.sub(r'\\short\s*', r'', s)
    s = re.sub(r'\\ref\s*', r'', s)

145
146
147
    s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
               r"```\n\1\n```\n", s, flags=re.DOTALL)

148
    # HTML/TeX tags
149
150
151
152
153
154
155
156
    s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
    s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
    s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
    s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
    s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
    s = re.sub(r'<li>', r'\n\n* ', s)
    s = re.sub(r'</?ul>', r'', s)
    s = re.sub(r'</li>', r'\n\n', s)
Wenzel Jakob's avatar
Wenzel Jakob committed
157
158
159
160
161
162
163
164

    s = s.replace('``true``', '``True``')
    s = s.replace('``false``', '``False``')

    # Re-flow text
    wrapper = textwrap.TextWrapper()
    wrapper.expand_tabs = True
    wrapper.replace_whitespace = True
165
166
    wrapper.drop_whitespace = True
    wrapper.width = 70
Wenzel Jakob's avatar
Wenzel Jakob committed
167
168
169
    wrapper.initial_indent = wrapper.subsequent_indent = ''

    result = ''
170
171
172
173
174
175
176
177
178
179
    in_code_segment = False
    for x in re.split(r'(```)', s):
        if x == '```':
            if not in_code_segment:
                result += '```\n'
            else:
                result += '\n```\n\n'
            in_code_segment = not in_code_segment
        elif in_code_segment:
            result += x.strip()
Wenzel Jakob's avatar
Wenzel Jakob committed
180
        else:
181
182
183
184
185
186
187
188
189
190
            for y in re.split(r'(?: *\n *){2,}', x):
                wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
                if len(wrapped) > 0 and wrapped[0] == '$':
                    result += wrapped[1:] + '\n'
                    wrapper.initial_indent = \
                        wrapper.subsequent_indent = ' ' * 4
                else:
                    if len(wrapped) > 0:
                        result += wrapped + '\n\n'
                    wrapper.initial_indent = wrapper.subsequent_indent = ''
191
    return result.rstrip().lstrip('\n')
Wenzel Jakob's avatar
Wenzel Jakob committed
192
193


Dan's avatar
Dan committed
194
def extract(filename, node, prefix, output):
195
196
    if not (node.location.file is None or
            os.path.samefile(d(node.location.file.name), filename)):
Wenzel Jakob's avatar
Wenzel Jakob committed
197
198
199
        return 0
    if node.kind in RECURSE_LIST:
        sub_prefix = prefix
200
        if node.kind not in PREFIX_BLACKLIST:
Wenzel Jakob's avatar
Wenzel Jakob committed
201
202
203
204
            if len(sub_prefix) > 0:
                sub_prefix += '_'
            sub_prefix += d(node.spelling)
        for i in node.get_children():
Dan's avatar
Dan committed
205
            extract(filename, i, sub_prefix, output)
Wenzel Jakob's avatar
Wenzel Jakob committed
206
207
208
    if node.kind in PRINT_LIST:
        comment = d(node.raw_comment) if node.raw_comment is not None else ''
        comment = process_comment(comment)
Wenzel Jakob's avatar
Wenzel Jakob committed
209
210
211
        sub_prefix = prefix
        if len(sub_prefix) > 0:
            sub_prefix += '_'
212
213
        if len(node.spelling) > 0:
            name = sanitize_name(sub_prefix + d(node.spelling))
Wenzel Jakob's avatar
Wenzel Jakob committed
214
            output.append((name, filename, comment))
Wenzel Jakob's avatar
Wenzel Jakob committed
215

216

217
class ExtractionThread(Thread):
Dan's avatar
Dan committed
218
    def __init__(self, filename, parameters, output):
219
220
221
        Thread.__init__(self)
        self.filename = filename
        self.parameters = parameters
Dan's avatar
Dan committed
222
        self.output = output
223
224
225
        job_semaphore.acquire()

    def run(self):
226
        print('Processing "%s" ..' % self.filename, file=sys.stderr)
227
        try:
228
229
            index = cindex.Index(
                cindex.conf.lib.clang_createIndex(False, True))
230
            tu = index.parse(self.filename, self.parameters)
Dan's avatar
Dan committed
231
            extract(self.filename, tu.cursor, '', self.output)
232
233
234
        finally:
            job_semaphore.release()

235

236
def read_args(args):
237
    parameters = []
Wenzel Jakob's avatar
Wenzel Jakob committed
238
    filenames = []
239
240
241
242
    if "-x" not in args:
        parameters.extend(['-x', 'c++'])
    if not any(it.startswith("-std=") for it in args):
        parameters.append('-std=c++11')
Wenzel Jakob's avatar
Wenzel Jakob committed
243

244
    if platform.system() == 'Darwin':
245
246
247
248
249
        dev_path = '/Applications/Xcode.app/Contents/Developer/'
        lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
        sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
        libclang = lib_dir + 'libclang.dylib'

250
251
252
        if os.path.exists(libclang):
            cindex.Config.set_library_path(os.path.dirname(libclang))

253
254
        if os.path.exists(sdk_dir):
            sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
255
            parameters.append('-isysroot')
256
            parameters.append(sysroot_dir)
257
    elif platform.system() == 'Linux':
258
259
260
261
        # cython.util.find_library does not find `libclang` for all clang
        # versions and distributions. LLVM switched to a monolithical setup
        # that includes everything under /usr/lib/llvm{version_number}/
        # We therefore glob for the library and select the highest version
262
        library_file = sorted(glob("/usr/lib/llvm-*/lib/libclang.so"), reverse=True)[0]
MRocholl's avatar
MRocholl committed
263
        cindex.Config.set_library_file(library_file)
264

265
266
267
268
269
270
271
272
273
274
275
276
277
        # clang doesn't find its own base includes by default on Linux,
        # but different distros install them in different paths.
        # Try to autodetect, preferring the highest numbered version.
        def clang_folder_version(d):
            return [int(ver) for ver in re.findall(r'(?<!lib)(?<!\d)\d+', d)]
        clang_include_dir = max((
            path
            for libdir in ['lib64', 'lib', 'lib32']
            for path in glob('/usr/%s/clang/*/include' % libdir)
            if os.path.isdir(path)
        ), default=None, key=clang_folder_version)
        if clang_include_dir:
            parameters.extend(['-isystem', clang_include_dir])
278

279
    for item in args:
Wenzel Jakob's avatar
Wenzel Jakob committed
280
281
282
283
284
285
        if item.startswith('-'):
            parameters.append(item)
        else:
            filenames.append(item)

    if len(filenames) == 0:
286
        raise NoFilenamesError("args parameter did not contain any filenames")
Wenzel Jakob's avatar
Wenzel Jakob committed
287

288
289
290
291
292
    return parameters, filenames


def extract_all(args):
    parameters, filenames = read_args(args)
293
294
295
296
297
298
299
300
301
302
303
304
305
    output = []
    for filename in filenames:
        thr = ExtractionThread(filename, parameters, output)
        thr.start()

    print('Waiting for jobs to finish ..', file=sys.stderr)
    for i in range(job_count):
        job_semaphore.acquire()

    return output


def write_header(comments, out_file=sys.stdout):
Wenzel Jakob's avatar
Wenzel Jakob committed
306
307
308
309
310
    print('''/*
  This file contains docstrings for the Python bindings.
  Do not edit! These were automatically extracted by mkdoc.py
 */

Wenzel Jakob's avatar
Wenzel Jakob committed
311
312
313
314
315
316
317
318
319
320
321
322
323
#define __EXPAND(x)                                      x
#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...)  COUNT
#define __VA_SIZE(...)                                   __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
#define __CAT1(a, b)                                     a ## b
#define __CAT2(a, b)                                     __CAT1(a, b)
#define __DOC1(n1)                                       __doc_##n1
#define __DOC2(n1, n2)                                   __doc_##n1##_##n2
#define __DOC3(n1, n2, n3)                               __doc_##n1##_##n2##_##n3
#define __DOC4(n1, n2, n3, n4)                           __doc_##n1##_##n2##_##n3##_##n4
#define __DOC5(n1, n2, n3, n4, n5)                       __doc_##n1##_##n2##_##n3##_##n4##_##n5
#define __DOC6(n1, n2, n3, n4, n5, n6)                   __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
#define __DOC7(n1, n2, n3, n4, n5, n6, n7)               __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
#define DOC(...)                                         __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
324
325
326
327
328

#if defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
329
''', file=out_file)
330
331


Wenzel Jakob's avatar
Wenzel Jakob committed
332
333
    name_ctr = 1
    name_prev = None
334
    for name, _, comment in list(sorted(comments, key=lambda x: (x[0], x[1]))):
Wenzel Jakob's avatar
Wenzel Jakob committed
335
336
337
338
339
340
341
        if name == name_prev:
            name_ctr += 1
            name = name + "_%i" % name_ctr
        else:
            name_prev = name
            name_ctr = 1
        print('\nstatic const char *%s =%sR"doc(%s)doc";' %
342
              (name, '\n' if '\n' in comment else ' ', comment), file=out_file)
343
344
345
346
347

    print('''
#if defined(__GNUG__)
#pragma GCC diagnostic pop
#endif
348
''', file=out_file)
349
350


351
352
def mkdoc(args):
    args = list(args)
353
354
355
356
357
358
359
360
361
362
    out_path = None
    for idx, arg in enumerate(args):
        if arg.startswith("-o"):
            args.remove(arg)
            try:
                out_path = arg[2:] or args.pop(idx)
            except IndexError:
                print("-o flag requires an argument")
                exit(-1)
            break
363
364
365
366
367
368
369
370
371
372

    comments = extract_all(args)

    if out_path:
        try:
            with open(out_path, 'w') as out_file:
                write_header(comments, out_file)
        except:
            # In the event of an error, don't leave a partially-written
            # output file.
373
            try:
374
                os.unlink(out_path)
375
            except:
376
377
378
379
380
381
382
383
384
                pass
            raise
    else:
        write_header(comments)


if __name__ == '__main__':
    try:
        mkdoc(sys.argv[1:])
385
386
387
    except NoFilenamesError:
        print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
        exit(-1)