mkdoc.py 10.1 KB
Newer Older
Wenzel Jakob's avatar
Wenzel Jakob committed
1
2
3
4
5
6
7
#!/usr/bin/env python3
#
#  Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
#
#  Extract documentation from C++ header files to use it in Python bindings
#

8
9
10
11
12
13
import os
import sys
import platform
import re
import textwrap

Wenzel Jakob's avatar
Wenzel Jakob committed
14
15
16
from clang import cindex
from clang.cindex import CursorKind
from collections import OrderedDict
17
18
from threading import Thread, Semaphore
from multiprocessing import cpu_count
Wenzel Jakob's avatar
Wenzel Jakob committed
19
20
21
22
23
24

RECURSE_LIST = [
    CursorKind.TRANSLATION_UNIT,
    CursorKind.NAMESPACE,
    CursorKind.CLASS_DECL,
    CursorKind.STRUCT_DECL,
25
    CursorKind.ENUM_DECL,
Wenzel Jakob's avatar
Wenzel Jakob committed
26
27
28
29
30
31
    CursorKind.CLASS_TEMPLATE
]

PRINT_LIST = [
    CursorKind.CLASS_DECL,
    CursorKind.STRUCT_DECL,
Wenzel Jakob's avatar
Wenzel Jakob committed
32
    CursorKind.ENUM_DECL,
33
    CursorKind.ENUM_CONSTANT_DECL,
Wenzel Jakob's avatar
Wenzel Jakob committed
34
35
36
    CursorKind.CLASS_TEMPLATE,
    CursorKind.FUNCTION_DECL,
    CursorKind.FUNCTION_TEMPLATE,
37
    CursorKind.CONVERSION_FUNCTION,
Wenzel Jakob's avatar
Wenzel Jakob committed
38
39
40
41
42
43
    CursorKind.CXX_METHOD,
    CursorKind.CONSTRUCTOR,
    CursorKind.FIELD_DECL
]

CPP_OPERATORS = {
44
45
46
47
48
49
    '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
    '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
    'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
    '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
    'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
    '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
50
    'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
Wenzel Jakob's avatar
Wenzel Jakob committed
51
}
52
53
54

CPP_OPERATORS = OrderedDict(
    sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
Wenzel Jakob's avatar
Wenzel Jakob committed
55

56
57
58
job_count = cpu_count()
job_semaphore = Semaphore(job_count)

Wenzel Jakob's avatar
Wenzel Jakob committed
59
output = []
60

Wenzel Jakob's avatar
Wenzel Jakob committed
61
62
63
def d(s):
    return s.decode('utf8')

64

Wenzel Jakob's avatar
Wenzel Jakob committed
65
def sanitize_name(name):
66
    name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
Wenzel Jakob's avatar
Wenzel Jakob committed
67
68
    for k, v in CPP_OPERATORS.items():
        name = name.replace('operator%s' % k, 'operator_%s' % v)
69
70
71
    name = re.sub('<.*>', '', name)
    name = ''.join([ch if ch.isalnum() else '_' for ch in name])
    name = re.sub('_$', '', re.sub('_+', '_', name))
Wenzel Jakob's avatar
Wenzel Jakob committed
72
73
    return '__doc_' + name

74

Wenzel Jakob's avatar
Wenzel Jakob committed
75
76
77
78
def process_comment(comment):
    result = ''

    # Remove C++ comment syntax
79
80
    leading_spaces = float('inf')
    for s in comment.expandtabs(tabsize=4).splitlines():
Wenzel Jakob's avatar
Wenzel Jakob committed
81
82
        s = s.strip()
        if s.startswith('/*'):
83
            s = s[2:].lstrip('*')
Wenzel Jakob's avatar
Wenzel Jakob committed
84
        elif s.endswith('*/'):
85
            s = s[:-2].rstrip('*')
Wenzel Jakob's avatar
Wenzel Jakob committed
86
87
88
89
        elif s.startswith('///'):
            s = s[3:]
        if s.startswith('*'):
            s = s[1:]
90
91
92
93
94
95
96
97
98
        if len(s) > 0:
            leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
        result += s + '\n'

    if leading_spaces != float('inf'):
        result2 = ""
        for s in result.splitlines():
            result2 += s[leading_spaces:] + '\n'
        result = result2
Wenzel Jakob's avatar
Wenzel Jakob committed
99
100
101
102
103
104
105
106
107
108
109

    # Doxygen tags
    cpp_group = '([\w:]+)'
    param_group = '([\[\w:\]]+)'

    s = result
    s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
    s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
110
    s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
111
112
    s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
               r'\n\n$Parameter ``\2``:\n\n', s)
113
114
    s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
               r'\n\n$Template parameter ``\2``:\n\n', s)
Wenzel Jakob's avatar
Wenzel Jakob committed
115
116

    for in_, out_ in {
117
118
119
120
121
122
123
124
125
126
127
128
        'return': 'Returns',
        'author': 'Author',
        'authors': 'Authors',
        'copyright': 'Copyright',
        'date': 'Date',
        'remark': 'Remark',
        'sa': 'See also',
        'see': 'See also',
        'extends': 'Extends',
        'throw': 'Throws',
        'throws': 'Throws'
    }.items():
Wenzel Jakob's avatar
Wenzel Jakob committed
129
130
131
132
133
134
135
        s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)

    s = re.sub(r'\\details\s*', r'\n\n', s)
    s = re.sub(r'\\brief\s*', r'', s)
    s = re.sub(r'\\short\s*', r'', s)
    s = re.sub(r'\\ref\s*', r'', s)

136
137
138
    s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
               r"```\n\1\n```\n", s, flags=re.DOTALL)

139
    # HTML/TeX tags
140
141
142
143
144
145
146
147
    s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
    s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
    s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
    s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
    s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
    s = re.sub(r'<li>', r'\n\n* ', s)
    s = re.sub(r'</?ul>', r'', s)
    s = re.sub(r'</li>', r'\n\n', s)
Wenzel Jakob's avatar
Wenzel Jakob committed
148
149
150
151
152
153
154
155

    s = s.replace('``true``', '``True``')
    s = s.replace('``false``', '``False``')

    # Re-flow text
    wrapper = textwrap.TextWrapper()
    wrapper.expand_tabs = True
    wrapper.replace_whitespace = True
156
157
    wrapper.drop_whitespace = True
    wrapper.width = 70
Wenzel Jakob's avatar
Wenzel Jakob committed
158
159
160
    wrapper.initial_indent = wrapper.subsequent_indent = ''

    result = ''
161
162
163
164
165
166
167
168
169
170
    in_code_segment = False
    for x in re.split(r'(```)', s):
        if x == '```':
            if not in_code_segment:
                result += '```\n'
            else:
                result += '\n```\n\n'
            in_code_segment = not in_code_segment
        elif in_code_segment:
            result += x.strip()
Wenzel Jakob's avatar
Wenzel Jakob committed
171
        else:
172
173
174
175
176
177
178
179
180
181
            for y in re.split(r'(?: *\n *){2,}', x):
                wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
                if len(wrapped) > 0 and wrapped[0] == '$':
                    result += wrapped[1:] + '\n'
                    wrapper.initial_indent = \
                        wrapper.subsequent_indent = ' ' * 4
                else:
                    if len(wrapped) > 0:
                        result += wrapped + '\n\n'
                    wrapper.initial_indent = wrapper.subsequent_indent = ''
182
    return result.rstrip().lstrip('\n')
Wenzel Jakob's avatar
Wenzel Jakob committed
183
184


Wenzel Jakob's avatar
Wenzel Jakob committed
185
def extract(filename, node, prefix):
186
187
    if not (node.location.file is None or
            os.path.samefile(d(node.location.file.name), filename)):
Wenzel Jakob's avatar
Wenzel Jakob committed
188
189
190
191
192
193
194
195
        return 0
    if node.kind in RECURSE_LIST:
        sub_prefix = prefix
        if node.kind != CursorKind.TRANSLATION_UNIT:
            if len(sub_prefix) > 0:
                sub_prefix += '_'
            sub_prefix += d(node.spelling)
        for i in node.get_children():
Wenzel Jakob's avatar
Wenzel Jakob committed
196
            extract(filename, i, sub_prefix)
Wenzel Jakob's avatar
Wenzel Jakob committed
197
198
199
    if node.kind in PRINT_LIST:
        comment = d(node.raw_comment) if node.raw_comment is not None else ''
        comment = process_comment(comment)
Wenzel Jakob's avatar
Wenzel Jakob committed
200
201
202
        sub_prefix = prefix
        if len(sub_prefix) > 0:
            sub_prefix += '_'
203
204
        if len(node.spelling) > 0:
            name = sanitize_name(sub_prefix + d(node.spelling))
Wenzel Jakob's avatar
Wenzel Jakob committed
205
206
            global output
            output.append((name, filename, comment))
Wenzel Jakob's avatar
Wenzel Jakob committed
207

208

209
class ExtractionThread(Thread):
Wenzel Jakob's avatar
Wenzel Jakob committed
210
    def __init__(self, filename, parameters):
211
212
213
214
215
216
        Thread.__init__(self)
        self.filename = filename
        self.parameters = parameters
        job_semaphore.acquire()

    def run(self):
217
        print('Processing "%s" ..' % self.filename, file=sys.stderr)
218
        try:
219
220
            index = cindex.Index(
                cindex.conf.lib.clang_createIndex(False, True))
221
            tu = index.parse(self.filename, self.parameters)
Wenzel Jakob's avatar
Wenzel Jakob committed
222
            extract(self.filename, tu.cursor, '')
223
224
225
        finally:
            job_semaphore.release()

Wenzel Jakob's avatar
Wenzel Jakob committed
226
227
228
229
if __name__ == '__main__':
    parameters = ['-x', 'c++', '-std=c++11']
    filenames = []

230
    if platform.system() == 'Darwin':
231
232
233
234
235
        dev_path = '/Applications/Xcode.app/Contents/Developer/'
        lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
        sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
        libclang = lib_dir + 'libclang.dylib'

236
237
238
        if os.path.exists(libclang):
            cindex.Config.set_library_path(os.path.dirname(libclang))

239
240
        if os.path.exists(sdk_dir):
            sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
241
            parameters.append('-isysroot')
242
            parameters.append(sysroot_dir)
243

Wenzel Jakob's avatar
Wenzel Jakob committed
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
    for item in sys.argv[1:]:
        if item.startswith('-'):
            parameters.append(item)
        else:
            filenames.append(item)

    if len(filenames) == 0:
        print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
        exit(-1)

    print('''/*
  This file contains docstrings for the Python bindings.
  Do not edit! These were automatically extracted by mkdoc.py
 */

Wenzel Jakob's avatar
Wenzel Jakob committed
259
260
261
262
263
264
265
266
267
268
269
270
271
#define __EXPAND(x)                                      x
#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...)  COUNT
#define __VA_SIZE(...)                                   __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
#define __CAT1(a, b)                                     a ## b
#define __CAT2(a, b)                                     __CAT1(a, b)
#define __DOC1(n1)                                       __doc_##n1
#define __DOC2(n1, n2)                                   __doc_##n1##_##n2
#define __DOC3(n1, n2, n3)                               __doc_##n1##_##n2##_##n3
#define __DOC4(n1, n2, n3, n4)                           __doc_##n1##_##n2##_##n3##_##n4
#define __DOC5(n1, n2, n3, n4, n5)                       __doc_##n1##_##n2##_##n3##_##n4##_##n5
#define __DOC6(n1, n2, n3, n4, n5, n6)                   __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
#define __DOC7(n1, n2, n3, n4, n5, n6, n7)               __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
#define DOC(...)                                         __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
272
273
274
275
276
277

#if defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
''')
278

Wenzel Jakob's avatar
Wenzel Jakob committed
279
    output.clear()
Wenzel Jakob's avatar
Wenzel Jakob committed
280
    for filename in filenames:
Wenzel Jakob's avatar
Wenzel Jakob committed
281
        thr = ExtractionThread(filename, parameters)
282
283
        thr.start()

284
    print('Waiting for jobs to finish ..', file=sys.stderr)
285
286
287
    for i in range(job_count):
        job_semaphore.acquire()

Wenzel Jakob's avatar
Wenzel Jakob committed
288
289
290
291
292
293
294
295
296
297
298
    name_ctr = 1
    name_prev = None
    for name, _, comment in list(sorted(output, key=lambda x: (x[0], x[1]))):
        if name == name_prev:
            name_ctr += 1
            name = name + "_%i" % name_ctr
        else:
            name_prev = name
            name_ctr = 1
        print('\nstatic const char *%s =%sR"doc(%s)doc";' %
              (name, '\n' if '\n' in comment else ' ', comment))
299
300
301
302
303
304

    print('''
#if defined(__GNUG__)
#pragma GCC diagnostic pop
#endif
''')