mkdoc.py 6.64 KB
Newer Older
Wenzel Jakob's avatar
Wenzel Jakob committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#!/usr/bin/env python3
#
#  Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
#
#  Extract documentation from C++ header files to use it in Python bindings
#

import os, sys, platform, re, textwrap
from clang import cindex
from clang.cindex import CursorKind
from collections import OrderedDict

if platform.system() == 'Darwin':
    libclang = '/opt/llvm/lib/libclang.dylib'
    if os.path.exists(libclang):
        cindex.Config.set_library_path(os.path.dirname(libclang))

RECURSE_LIST = [
    CursorKind.TRANSLATION_UNIT,
    CursorKind.NAMESPACE,
    CursorKind.CLASS_DECL,
    CursorKind.STRUCT_DECL,
    CursorKind.CLASS_TEMPLATE
]

PRINT_LIST = [
    CursorKind.CLASS_DECL,
    CursorKind.STRUCT_DECL,
    CursorKind.CLASS_TEMPLATE,
    CursorKind.FUNCTION_DECL,
    CursorKind.FUNCTION_TEMPLATE,
    CursorKind.CXX_METHOD,
    CursorKind.CONSTRUCTOR,
    CursorKind.FIELD_DECL
]

CPP_OPERATORS = {
    '<=' : 'le', '>=' : 'ge', '==' : 'eq', '!=' : 'ne', '[]' : 'array',
    '+=' : 'iadd', '-=' : 'isub', '*=' : 'imul', '/=' : 'idiv', '%=' :
    'imod', '&=' : 'iand', '|=' : 'ior', '^=' : 'ixor', '<<=' : 'ilshift',
    '>>=' : 'irshift', '++' : 'inc', '--' : 'dec', '<<' : 'lshift', '>>' :
    'rshift', '&&' : 'land', '||' : 'lor', '!' : 'lnot', '~' : 'bnot', '&'
    : 'band', '|' : 'bor', '+' : 'add', '-' : 'sub', '*' : 'mul', '/' :
    'div', '%' : 'mod', '<' : 'lt', '>' : 'gt', '=' : 'assign'
}
CPP_OPERATORS = OrderedDict(sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))

registered_names = dict()

def d(s):
    return s.decode('utf8')

def sanitize_name(name):
    global registered_names
    for k, v in CPP_OPERATORS.items():
        name = name.replace('operator%s' % k, 'operator_%s' % v)
    name = name.replace('<', '_')
    name = name.replace('>', '_')
    name = name.replace(' ', '_')
    name = name.replace(',', '_')
    if name in registered_names:
        registered_names[name] += 1
        name += '_' + str(registered_names[name])
    else:
        registered_names[name] = 1
    return '__doc_' + name

def process_comment(comment):
    result = ''

    # Remove C++ comment syntax
    for s in comment.splitlines():
        s = s.strip()
        if s.startswith('/*'):
            s = s[2:].lstrip('* \t')
        elif s.endswith('*/'):
            s = s[:-2].rstrip('* \t')
        elif s.startswith('///'):
            s = s[3:]
        if s.startswith('*'):
            s = s[1:]
        result += s.strip() + '\n'

    # Doxygen tags
    cpp_group = '([\w:]+)'
    param_group = '([\[\w:\]]+)'

    s = result
    s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
    s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
    s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
    s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group), r'\n\n$Parameter "\2":\n\n', s)

    for in_, out_ in {
        'return' : 'Returns',
        'author' : 'Author',
        'authors' : 'Authors',
        'copyright' : 'Copyright',
        'date' : 'Date',
        'remark' : 'Remark',
        'sa' : 'See also',
        'see' : 'See also',
        'extends' : 'Extends',
        'throw' : 'Throws',
        'throws' : 'Throws' }.items():
        s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)

    s = re.sub(r'\\details\s*', r'\n\n', s)
    s = re.sub(r'\\brief\s*', r'', s)
    s = re.sub(r'\\short\s*', r'', s)
    s = re.sub(r'\\ref\s*', r'', s)

    # HTML tags 
    s = re.sub(r'<tt>([^<]*)</tt>', r'``\1``', s)
    s = re.sub(r'<em>([^<]*)</em>', r'*\1*', s)
    s = re.sub(r'<b>([^<]*)</b>', r'**\1**', s)

    s = s.replace('``true``', '``True``')
    s = s.replace('``false``', '``False``')

    # Re-flow text
    wrapper = textwrap.TextWrapper()
    wrapper.expand_tabs = True
    wrapper.replace_whitespace = True
    wrapper.width = 75
    wrapper.initial_indent = wrapper.subsequent_indent = ''

    result = ''
    for x in re.split(r'\n{2,}', s):
        wrapped = wrapper.fill(x.strip())
        if len(wrapped) > 0 and wrapped[0] == '$':
            result += wrapped[1:] + '\n'
            wrapper.initial_indent = wrapper.subsequent_indent = ' '*4
        else:
            result += wrapped + '\n\n'
            wrapper.initial_indent = wrapper.subsequent_indent = ''
    return result.rstrip()


def extract(filename, node, prefix):
    num_extracted = 0
    if not (node.location.file is None or os.path.samefile(d(node.location.file.name), filename)):
        return 0
    if node.kind in RECURSE_LIST:
        sub_prefix = prefix
        if node.kind != CursorKind.TRANSLATION_UNIT:
            if len(sub_prefix) > 0:
                sub_prefix += '_'
            sub_prefix += d(node.spelling)
        for i in node.get_children():
            num_extracted += extract(filename, i, sub_prefix)
        if num_extracted == 0:
            return 0
    if node.kind in PRINT_LIST:
        comment = d(node.raw_comment) if node.raw_comment is not None else ''
        comment = process_comment(comment)
        name = sanitize_name(prefix + '_' + d(node.spelling))
        print('\nstatic const char *%s = %sR"doc(%s)doc";' % (name, '\n' if '\n' in comment else '', comment))
        num_extracted += 1
    return num_extracted

if __name__ == '__main__':
    parameters = ['-x', 'c++', '-std=c++11']
    filenames = []

    for item in sys.argv[1:]:
        if item.startswith('-'):
            parameters.append(item)
        else:
            filenames.append(item)

    if len(filenames) == 0:
        print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
        exit(-1)

    print('''/*
  This file contains docstrings for the Python bindings.
  Do not edit! These were automatically extracted by mkdoc.py
 */

#define __COUNT(_1, _2, _3, _4, _5, COUNT, ...)  COUNT
#define __VA_SIZE(...)                           __COUNT(__VA_ARGS__, 5, 4, 3, 2, 1)
#define __CAT1(a, b)                             a ## b
#define __CAT2(a, b)                             __CAT1(a, b)
#define __DOC1(n1)                               __doc_##n1
#define __DOC2(n1, n2)                           __doc_##n1##_##n2
#define __DOC3(n1, n2, n3)                       __doc_##n1##_##n2##_##n3
#define __DOC4(n1, n2, n3, n4)                   __doc_##n1##_##n2##_##n3##_##n4
#define __DOC5(n1, n2, n3, n4, n5)               __doc_##n1##_##n2##_##n3##_##n4_##n5
#define DOC(...)                                 __CAT2(__DOC, __VA_SIZE(__VA_ARGS__))(__VA_ARGS__)''')
    for filename in filenames:
        print('Processing "%s"..' % filename, file = sys.stderr)
        index = cindex.Index(cindex.conf.lib.clang_createIndex(False, True))
        tu = index.parse(filename, parameters)
        extract(filename, tu.cursor, '')