mkdoc.py 8.33 KB
Newer Older
Wenzel Jakob's avatar
Wenzel Jakob committed
1
2
3
4
5
6
7
#!/usr/bin/env python3
#
#  Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
#
#  Extract documentation from C++ header files to use it in Python bindings
#

8
9
10
11
12
13
import os
import sys
import platform
import re
import textwrap

Wenzel Jakob's avatar
Wenzel Jakob committed
14
15
16
from clang import cindex
from clang.cindex import CursorKind
from collections import OrderedDict
17
18
from threading import Thread, Semaphore
from multiprocessing import cpu_count
Wenzel Jakob's avatar
Wenzel Jakob committed
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39

RECURSE_LIST = [
    CursorKind.TRANSLATION_UNIT,
    CursorKind.NAMESPACE,
    CursorKind.CLASS_DECL,
    CursorKind.STRUCT_DECL,
    CursorKind.CLASS_TEMPLATE
]

PRINT_LIST = [
    CursorKind.CLASS_DECL,
    CursorKind.STRUCT_DECL,
    CursorKind.CLASS_TEMPLATE,
    CursorKind.FUNCTION_DECL,
    CursorKind.FUNCTION_TEMPLATE,
    CursorKind.CXX_METHOD,
    CursorKind.CONSTRUCTOR,
    CursorKind.FIELD_DECL
]

CPP_OPERATORS = {
40
41
42
43
44
45
46
    '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
    '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
    'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
    '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
    'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
    '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
    'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign'
Wenzel Jakob's avatar
Wenzel Jakob committed
47
}
48
49
50

CPP_OPERATORS = OrderedDict(
    sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
Wenzel Jakob's avatar
Wenzel Jakob committed
51

52
53
54
job_count = cpu_count()
job_semaphore = Semaphore(job_count)

Wenzel Jakob's avatar
Wenzel Jakob committed
55
56
registered_names = dict()

57

Wenzel Jakob's avatar
Wenzel Jakob committed
58
59
60
def d(s):
    return s.decode('utf8')

61

Wenzel Jakob's avatar
Wenzel Jakob committed
62
63
64
65
def sanitize_name(name):
    global registered_names
    for k, v in CPP_OPERATORS.items():
        name = name.replace('operator%s' % k, 'operator_%s' % v)
66
67
68
    name = re.sub('<.*>', '', name)
    name = ''.join([ch if ch.isalnum() else '_' for ch in name])
    name = re.sub('_$', '', re.sub('_+', '_', name))
Wenzel Jakob's avatar
Wenzel Jakob committed
69
70
71
72
73
74
75
    if name in registered_names:
        registered_names[name] += 1
        name += '_' + str(registered_names[name])
    else:
        registered_names[name] = 1
    return '__doc_' + name

76

Wenzel Jakob's avatar
Wenzel Jakob committed
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
def process_comment(comment):
    result = ''

    # Remove C++ comment syntax
    for s in comment.splitlines():
        s = s.strip()
        if s.startswith('/*'):
            s = s[2:].lstrip('* \t')
        elif s.endswith('*/'):
            s = s[:-2].rstrip('* \t')
        elif s.startswith('///'):
            s = s[3:]
        if s.startswith('*'):
            s = s[1:]
        result += s.strip() + '\n'

    # Doxygen tags
    cpp_group = '([\w:]+)'
    param_group = '([\[\w:\]]+)'

    s = result
    s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
    s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
103
104
    s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
               r'\n\n$Parameter ``\2``:\n\n', s)
Wenzel Jakob's avatar
Wenzel Jakob committed
105
106

    for in_, out_ in {
107
108
109
110
111
112
113
114
115
116
117
118
        'return': 'Returns',
        'author': 'Author',
        'authors': 'Authors',
        'copyright': 'Copyright',
        'date': 'Date',
        'remark': 'Remark',
        'sa': 'See also',
        'see': 'See also',
        'extends': 'Extends',
        'throw': 'Throws',
        'throws': 'Throws'
    }.items():
Wenzel Jakob's avatar
Wenzel Jakob committed
119
120
121
122
123
124
125
        s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)

    s = re.sub(r'\\details\s*', r'\n\n', s)
    s = re.sub(r'\\brief\s*', r'', s)
    s = re.sub(r'\\short\s*', r'', s)
    s = re.sub(r'\\ref\s*', r'', s)

126
    # HTML/TeX tags
Wenzel Jakob's avatar
Wenzel Jakob committed
127
128
129
    s = re.sub(r'<tt>([^<]*)</tt>', r'``\1``', s)
    s = re.sub(r'<em>([^<]*)</em>', r'*\1*', s)
    s = re.sub(r'<b>([^<]*)</b>', r'**\1**', s)
130
    s = re.sub(r'\\f\$([^\$]*)\\f\$', r'$\1$', s)
Wenzel Jakob's avatar
Wenzel Jakob committed
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146

    s = s.replace('``true``', '``True``')
    s = s.replace('``false``', '``False``')

    # Re-flow text
    wrapper = textwrap.TextWrapper()
    wrapper.expand_tabs = True
    wrapper.replace_whitespace = True
    wrapper.width = 75
    wrapper.initial_indent = wrapper.subsequent_indent = ''

    result = ''
    for x in re.split(r'\n{2,}', s):
        wrapped = wrapper.fill(x.strip())
        if len(wrapped) > 0 and wrapped[0] == '$':
            result += wrapped[1:] + '\n'
147
            wrapper.initial_indent = wrapper.subsequent_indent = ' ' * 4
Wenzel Jakob's avatar
Wenzel Jakob committed
148
149
150
151
152
153
        else:
            result += wrapped + '\n\n'
            wrapper.initial_indent = wrapper.subsequent_indent = ''
    return result.rstrip()


154
def extract(filename, node, prefix, output):
Wenzel Jakob's avatar
Wenzel Jakob committed
155
    num_extracted = 0
156
157
    if not (node.location.file is None or
            os.path.samefile(d(node.location.file.name), filename)):
Wenzel Jakob's avatar
Wenzel Jakob committed
158
159
160
161
162
163
164
165
        return 0
    if node.kind in RECURSE_LIST:
        sub_prefix = prefix
        if node.kind != CursorKind.TRANSLATION_UNIT:
            if len(sub_prefix) > 0:
                sub_prefix += '_'
            sub_prefix += d(node.spelling)
        for i in node.get_children():
166
            num_extracted += extract(filename, i, sub_prefix, output)
Wenzel Jakob's avatar
Wenzel Jakob committed
167
168
169
170
171
        if num_extracted == 0:
            return 0
    if node.kind in PRINT_LIST:
        comment = d(node.raw_comment) if node.raw_comment is not None else ''
        comment = process_comment(comment)
Wenzel Jakob's avatar
Wenzel Jakob committed
172
173
174
175
        sub_prefix = prefix
        if len(sub_prefix) > 0:
            sub_prefix += '_'
        name = sanitize_name(sub_prefix + d(node.spelling))
176
177
        output.append('\nstatic const char *%s =%sR"doc(%s)doc";' %
            (name, '\n' if '\n' in comment else ' ', comment))
Wenzel Jakob's avatar
Wenzel Jakob committed
178
179
180
        num_extracted += 1
    return num_extracted

181

182
class ExtractionThread(Thread):
183
    def __init__(self, filename, parameters, output):
184
185
186
187
188
189
190
        Thread.__init__(self)
        self.filename = filename
        self.parameters = parameters
        self.output = output
        job_semaphore.acquire()

    def run(self):
191
        print('Processing "%s" ..' % self.filename, file=sys.stderr)
192
        try:
193
194
            index = cindex.Index(
                cindex.conf.lib.clang_createIndex(False, True))
195
196
197
198
199
            tu = index.parse(self.filename, self.parameters)
            extract(self.filename, tu.cursor, '', self.output)
        finally:
            job_semaphore.release()

Wenzel Jakob's avatar
Wenzel Jakob committed
200
201
202
203
if __name__ == '__main__':
    parameters = ['-x', 'c++', '-std=c++11']
    filenames = []

204
    if platform.system() == 'Darwin':
205
206
207
208
209
        dev_path = '/Applications/Xcode.app/Contents/Developer/'
        lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
        sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
        libclang = lib_dir + 'libclang.dylib'

210
211
212
        if os.path.exists(libclang):
            cindex.Config.set_library_path(os.path.dirname(libclang))

213
214
        if os.path.exists(sdk_dir):
            sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
215
            parameters.append('-isysroot')
216
            parameters.append(sysroot_dir)
217

Wenzel Jakob's avatar
Wenzel Jakob committed
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
    for item in sys.argv[1:]:
        if item.startswith('-'):
            parameters.append(item)
        else:
            filenames.append(item)

    if len(filenames) == 0:
        print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
        exit(-1)

    print('''/*
  This file contains docstrings for the Python bindings.
  Do not edit! These were automatically extracted by mkdoc.py
 */

Wenzel Jakob's avatar
Wenzel Jakob committed
233
#define __EXPAND(x)                              x
Wenzel Jakob's avatar
Wenzel Jakob committed
234
#define __COUNT(_1, _2, _3, _4, _5, COUNT, ...)  COUNT
Wenzel Jakob's avatar
Wenzel Jakob committed
235
#define __VA_SIZE(...)                           __EXPAND(__COUNT(__VA_ARGS__, 5, 4, 3, 2, 1))
Wenzel Jakob's avatar
Wenzel Jakob committed
236
237
238
239
240
241
242
#define __CAT1(a, b)                             a ## b
#define __CAT2(a, b)                             __CAT1(a, b)
#define __DOC1(n1)                               __doc_##n1
#define __DOC2(n1, n2)                           __doc_##n1##_##n2
#define __DOC3(n1, n2, n3)                       __doc_##n1##_##n2##_##n3
#define __DOC4(n1, n2, n3, n4)                   __doc_##n1##_##n2##_##n3##_##n4
#define __DOC5(n1, n2, n3, n4, n5)               __doc_##n1##_##n2##_##n3##_##n4_##n5
Wenzel Jakob's avatar
Wenzel Jakob committed
243
#define DOC(...)                                 __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
244
245
246
247
248
249

#if defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
''')
250
251

    output = []
Wenzel Jakob's avatar
Wenzel Jakob committed
252
    for filename in filenames:
253
254
255
        thr = ExtractionThread(filename, parameters, output)
        thr.start()

256
    print('Waiting for jobs to finish ..', file=sys.stderr)
257
258
259
260
261
262
    for i in range(job_count):
        job_semaphore.acquire()

    output.sort()
    for l in output:
        print(l)
263
264
265
266
267
268

    print('''
#if defined(__GNUG__)
#pragma GCC diagnostic pop
#endif
''')