parameter_generator.py 14.3 KB
Newer Older
1
# coding: utf-8
2
3
4
"""Helper script for generating config file and parameters list.

This script generates LightGBM/src/io/config_auto.cpp file
5
6
7
8
with list of all parameters, aliases table and other routines
along with parameters description in LightGBM/docs/Parameters.rst file
from the information in LightGBM/include/LightGBM/config.h file.
"""
9

10
import re
11
from collections import defaultdict
12
from pathlib import Path
13
from typing import Dict, List, Tuple
Guolin Ke's avatar
Guolin Ke committed
14
15


16
def get_parameter_infos(config_hpp: Path) -> Tuple[List[Tuple[str, int]], List[List[Dict[str, List]]]]:
17
18
19
20
    """Parse config header file.

    Parameters
    ----------
21
    config_hpp : pathlib.Path
22
23
24
25
26
27
28
        Path to the config header file.

    Returns
    -------
    infos : tuple
        Tuple with names and content of sections.
    """
Guolin Ke's avatar
Guolin Ke committed
29
30
    is_inparameter = False
    cur_key = None
31
    key_lvl = 0
32
    cur_info: Dict[str, List] = {}
Guolin Ke's avatar
Guolin Ke committed
33
    keys = []
34
    member_infos: List[List[Dict[str, List]]] = []
Guolin Ke's avatar
Guolin Ke committed
35
36
    with open(config_hpp) as config_hpp_file:
        for line in config_hpp_file:
37
38
            if line.strip() in {"#ifndef __NVCC__", "#endif  // __NVCC__"}:
                continue
Guolin Ke's avatar
Guolin Ke committed
39
40
41
            if "#pragma region Parameters" in line:
                is_inparameter = True
            elif "#pragma region" in line and "Parameters" in line:
42
                key_lvl += 1
Guolin Ke's avatar
Guolin Ke committed
43
                cur_key = line.split("region")[1].strip()
44
                keys.append((cur_key, key_lvl))
Guolin Ke's avatar
Guolin Ke committed
45
                member_infos.append([])
46
            elif "#pragma endregion" in line:
47
                key_lvl -= 1
Guolin Ke's avatar
Guolin Ke committed
48
49
50
51
52
53
54
                if cur_key is not None:
                    cur_key = None
                elif is_inparameter:
                    is_inparameter = False
            elif cur_key is not None:
                line = line.strip()
                if line.startswith("//"):
55
56
57
                    key, _, val = line[2:].partition("=")
                    key = key.strip()
                    val = val.strip()
Guolin Ke's avatar
Guolin Ke committed
58
                    if key not in cur_info:
59
                        if key == "descl2" and "desc" not in cur_info:
Guolin Ke's avatar
Guolin Ke committed
60
                            cur_info["desc"] = []
61
                        elif key != "descl2":
Guolin Ke's avatar
Guolin Ke committed
62
63
                            cur_info[key] = []
                    if key == "desc":
64
                        cur_info["desc"].append(("l1", val))
Guolin Ke's avatar
Guolin Ke committed
65
                    elif key == "descl2":
66
                        cur_info["desc"].append(("l2", val))
Guolin Ke's avatar
Guolin Ke committed
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
                    else:
                        cur_info[key].append(val)
                elif line:
                    has_eqsgn = False
                    tokens = line.split("=")
                    if len(tokens) == 2:
                        if "default" not in cur_info:
                            cur_info["default"] = [tokens[1][:-1].strip()]
                        has_eqsgn = True
                    tokens = line.split()
                    cur_info["inner_type"] = [tokens[0].strip()]
                    if "name" not in cur_info:
                        if has_eqsgn:
                            cur_info["name"] = [tokens[1].strip()]
                        else:
                            cur_info["name"] = [tokens[1][:-1].strip()]
                    member_infos[-1].append(cur_info)
                    cur_info = {}
85

86
    return keys, member_infos
Guolin Ke's avatar
Guolin Ke committed
87
88


89
def get_names(infos: List[List[Dict[str, List]]]) -> List[str]:
90
91
92
93
94
95
96
97
98
99
100
101
    """Get names of all parameters.

    Parameters
    ----------
    infos : list
        Content of the config header file.

    Returns
    -------
    names : list
        Names of all parameters.
    """
Guolin Ke's avatar
Guolin Ke committed
102
103
104
105
106
107
108
    names = []
    for x in infos:
        for y in x:
            names.append(y["name"][0])
    return names


109
def get_alias(infos: List[List[Dict[str, List]]]) -> List[Tuple[str, str]]:
110
111
112
113
114
115
116
117
118
119
120
121
    """Get aliases of all parameters.

    Parameters
    ----------
    infos : list
        Content of the config header file.

    Returns
    -------
    pairs : list
        List of tuples (param alias, param name).
    """
Guolin Ke's avatar
Guolin Ke committed
122
123
124
125
126
    pairs = []
    for x in infos:
        for y in x:
            if "alias" in y:
                name = y["name"][0]
127
                alias = y["alias"][0].split(",")
Guolin Ke's avatar
Guolin Ke committed
128
                for name2 in alias:
129
                    pairs.append((name2.strip(), name))
Guolin Ke's avatar
Guolin Ke committed
130
131
132
    return pairs


133
def parse_check(check: str, reverse: bool = False) -> Tuple[str, str]:
134
135
136
137
    """Parse the constraint.

    Parameters
    ----------
138
    check : str
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
        String representation of the constraint.
    reverse : bool, optional (default=False)
        Whether to reverse the sign of the constraint.

    Returns
    -------
    pair : tuple
        Parsed constraint in the form of tuple (value, sign).
    """
    try:
        idx = 1
        float(check[idx:])
    except ValueError:
        idx = 2
        float(check[idx:])
    if reverse:
155
        reversed_sign = {"<": ">", ">": "<", "<=": ">=", ">=": "<="}
156
157
158
159
160
        return check[idx:], reversed_sign[check[:idx]]
    else:
        return check[idx:], check[:idx]


161
def set_one_var_from_string(name: str, param_type: str, checks: List[str]) -> str:
162
163
164
165
    """Construct code for auto config file for one param value.

    Parameters
    ----------
166
    name : str
167
        Name of the parameter.
168
    param_type : str
169
170
171
172
173
174
        Type of the parameter.
    checks : list
        Constraints of the parameter.

    Returns
    -------
175
    ret : str
176
177
        Lines of auto config file with getting and checks of one parameter value.
    """
Guolin Ke's avatar
Guolin Ke committed
178
179
    ret = ""
    univar_mapper = {"int": "GetInt", "double": "GetDouble", "bool": "GetBool", "std::string": "GetString"}
180
    if "vector" not in param_type:
181
        ret += f'  {univar_mapper[param_type]}(params, "{name}", &{name});\n'
Guolin Ke's avatar
Guolin Ke committed
182
        if len(checks) > 0:
183
            check_mapper = {"<": "LT", ">": "GT", "<=": "LE", ">=": "GE"}
Guolin Ke's avatar
Guolin Ke committed
184
            for check in checks:
185
                value, sign = parse_check(check)
186
                ret += f"  CHECK_{check_mapper[sign]}({name}, {value});\n"
Guolin Ke's avatar
Guolin Ke committed
187
188
        ret += "\n"
    else:
189
        ret += f'  if (GetString(params, "{name}", &tmp_str)) {{\n'
190
        type2 = param_type.split("<")[1][:-1]
Guolin Ke's avatar
Guolin Ke committed
191
        if type2 == "std::string":
192
            ret += f"    {name} = Common::Split(tmp_str.c_str(), ',');\n"
Guolin Ke's avatar
Guolin Ke committed
193
        else:
194
            ret += f"    {name} = Common::StringToArray<{type2}>(tmp_str, ',');\n"
Guolin Ke's avatar
Guolin Ke committed
195
196
197
198
        ret += "  }\n\n"
    return ret


199
def gen_parameter_description(
200
    sections: List[Tuple[str, int]], descriptions: List[List[Dict[str, List]]], params_rst: Path
201
) -> None:
202
    """Write descriptions of parameters to the documentation file.
203

204
205
206
207
208
209
    Parameters
    ----------
    sections : list
        Names of parameters sections.
    descriptions : list
        Structured descriptions of parameters.
210
    params_rst : pathlib.Path
211
212
        Path to the file with parameters documentation.
    """
213
    params_to_write = []
214
    lvl_mapper = {1: "-", 2: "~"}
215
216
    for (section_name, section_lvl), section_params in zip(sections, descriptions):
        heading_sign = lvl_mapper[section_lvl]
217
        params_to_write.append(f"{section_name}\n{heading_sign * len(section_name)}")
218
        for param_desc in section_params:
219
220
            name = param_desc["name"][0]
            default_raw = param_desc["default"][0]
221
            default = default_raw.strip('"') if len(default_raw.strip('"')) > 0 else default_raw
222
223
            param_type = param_desc.get("type", param_desc["inner_type"])[0].split(":")[-1].split("<")[-1].strip(">")
            options = param_desc.get("options", [])
224
            if len(options) > 0:
225
226
                opts = "``, ``".join([x.strip() for x in options[0].split(",")])
                options_str = f", options: ``{opts}``"
227
            else:
228
229
                options_str = ""
            aliases = param_desc.get("alias", [])
230
            if len(aliases) > 0:
231
232
                aliases_joined = "``, ``".join([x.strip() for x in aliases[0].split(",")])
                aliases_str = f", aliases: ``{aliases_joined}``"
233
            else:
234
235
                aliases_str = ""
            checks = sorted(param_desc.get("check", []))
236
237
238
239
            checks_len = len(checks)
            if checks_len > 1:
                number1, sign1 = parse_check(checks[0])
                number2, sign2 = parse_check(checks[1], reverse=True)
240
                checks_str = f", constraints: ``{number2} {sign2} {name} {sign1} {number1}``"
241
242
            elif checks_len == 1:
                number, sign = parse_check(checks[0])
243
                checks_str = f", constraints: ``{name} {sign} {number}``"
244
            else:
245
                checks_str = ""
246
            main_desc = f'-  ``{name}`` :raw-html:`<a id="{name}" title="Permalink to this parameter" href="#{name}">&#x1F517;&#xFE0E;</a>`, default = ``{default}``, type = {param_type}{options_str}{aliases_str}{checks_str}'
247
            params_to_write.append(main_desc)
248
            params_to_write.extend([f"{' ' * 3 * int(desc[0][-1])}-  {desc[1]}" for desc in param_desc["desc"]])
249
250
251

    with open(params_rst) as original_params_file:
        all_lines = original_params_file.read()
252
253
        before, start_sep, _ = all_lines.partition(".. start params list\n\n")
        _, end_sep, after = all_lines.partition("\n\n.. end params list")
254
255
256
257

    with open(params_rst, "w") as new_params_file:
        new_params_file.write(before)
        new_params_file.write(start_sep)
258
        new_params_file.write("\n\n".join(params_to_write))
259
260
261
262
        new_params_file.write(end_sep)
        new_params_file.write(after)


263
def gen_parameter_code(
264
    config_hpp: Path, config_out_cpp: Path
265
) -> Tuple[List[Tuple[str, int]], List[List[Dict[str, List]]]]:
266
267
268
269
    """Generate auto config file.

    Parameters
    ----------
270
    config_hpp : pathlib.Path
271
        Path to the config header file.
272
    config_out_cpp : pathlib.Path
273
274
275
276
277
278
279
280
281
282
        Path to the auto config file.

    Returns
    -------
    infos : tuple
        Tuple with names and content of sections.
    """
    keys, infos = get_parameter_infos(config_hpp)
    names = get_names(infos)
    alias = get_alias(infos)
283
    names_with_aliases = defaultdict(list)
284
285
286
287
288
289
290
291
    str_to_write = r"""/*!
 * Copyright (c) 2018 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 *
 * \note
 * This file is auto generated by LightGBM\helpers\parameter_generator.py from LightGBM\include\LightGBM\config.h file.
 */
"""
Guolin Ke's avatar
Guolin Ke committed
292
293
    str_to_write += "#include<LightGBM/config.h>\nnamespace LightGBM {\n"
    # alias table
jcipar's avatar
jcipar committed
294
295
296
    str_to_write += "const std::unordered_map<std::string, std::string>& Config::alias_table() {\n"
    str_to_write += "  static std::unordered_map<std::string, std::string> aliases({\n"

Guolin Ke's avatar
Guolin Ke committed
297
    for pair in alias:
298
        str_to_write += f'  {{"{pair[0]}", "{pair[1]}"}},\n'
299
        names_with_aliases[pair[1]].append(pair[0])
jcipar's avatar
jcipar committed
300
301
302
303
    str_to_write += "  });\n"
    str_to_write += "  return aliases;\n"
    str_to_write += "}\n\n"

Guolin Ke's avatar
Guolin Ke committed
304
    # names
jcipar's avatar
jcipar committed
305
306
307
    str_to_write += "const std::unordered_set<std::string>& Config::parameter_set() {\n"
    str_to_write += "  static std::unordered_set<std::string> params({\n"

Guolin Ke's avatar
Guolin Ke committed
308
    for name in names:
309
        str_to_write += f'  "{name}",\n'
jcipar's avatar
jcipar committed
310
311
312
    str_to_write += "  });\n"
    str_to_write += "  return params;\n"
    str_to_write += "}\n\n"
Guolin Ke's avatar
Guolin Ke committed
313
314
    # from strings
    str_to_write += "void Config::GetMembersFromString(const std::unordered_map<std::string, std::string>& params) {\n"
315
    str_to_write += '  std::string tmp_str = "";\n'
Guolin Ke's avatar
Guolin Ke committed
316
317
    for x in infos:
        for y in x:
318
            if "[no-automatically-extract]" in y:
Guolin Ke's avatar
Guolin Ke committed
319
                continue
320
            param_type = y["inner_type"][0]
Guolin Ke's avatar
Guolin Ke committed
321
322
323
324
            name = y["name"][0]
            checks = []
            if "check" in y:
                checks = y["check"]
325
            tmp = set_one_var_from_string(name, param_type, checks)
Guolin Ke's avatar
Guolin Ke committed
326
327
            str_to_write += tmp
    # tails
328
    str_to_write = f"{str_to_write.strip()}\n}}\n\n"
Guolin Ke's avatar
Guolin Ke committed
329
330
331
332
    str_to_write += "std::string Config::SaveMembersToString() const {\n"
    str_to_write += "  std::stringstream str_buf;\n"
    for x in infos:
        for y in x:
333
            if "[no-save]" in y:
Guolin Ke's avatar
Guolin Ke committed
334
                continue
335
            param_type = y["inner_type"][0]
Guolin Ke's avatar
Guolin Ke committed
336
            name = y["name"][0]
337
338
            if "vector" in param_type:
                if "int8" in param_type:
339
                    str_to_write += f'  str_buf << "[{name}: " << Common::Join(Common::ArrayCast<int8_t, int>({name}), ",") << "]\\n";\n'
Guolin Ke's avatar
Guolin Ke committed
340
                else:
341
                    str_to_write += f'  str_buf << "[{name}: " << Common::Join({name}, ",") << "]\\n";\n'
Guolin Ke's avatar
Guolin Ke committed
342
            else:
343
                str_to_write += f'  str_buf << "[{name}: " << {name} << "]\\n";\n'
Guolin Ke's avatar
Guolin Ke committed
344
345
346
    # tails
    str_to_write += "  return str_buf.str();\n"
    str_to_write += "}\n\n"
347

348
349
350
351
352
353
354
    str_to_write += """const std::unordered_map<std::string, std::vector<std::string>>& Config::parameter2aliases() {
  static std::unordered_map<std::string, std::vector<std::string>> map({"""
    for name in names:
        str_to_write += '\n    {"' + name + '", '
        if names_with_aliases[name]:
            str_to_write += '{"' + '", "'.join(names_with_aliases[name]) + '"}},'
        else:
355
            str_to_write += "{}},"
356
357
358
359
    str_to_write += """
  });
  return map;
}
360

361
"""
362
363
    str_to_write += """const std::unordered_map<std::string, std::string>& Config::ParameterTypes() {
  static std::unordered_map<std::string, std::string> map({"""
364
    int_t_pat = re.compile(r"int\d+_t")
365
366
    # the following are stored as comma separated strings but are arrays in the wrappers
    overrides = {
367
368
369
        "categorical_feature": "vector<int>",
        "ignore_column": "vector<int>",
        "interaction_constraints": "vector<vector<int>>",
370
371
372
373
    }
    for x in infos:
        for y in x:
            name = y["name"][0]
374
            if name == "task":
375
376
377
378
                continue
            if name in overrides:
                param_type = overrides[name]
            else:
379
                param_type = int_t_pat.sub("int", y["inner_type"][0]).replace("std::", "")
380
381
382
383
384
385
386
387
            str_to_write += '\n    {"' + name + '", "' + param_type + '"},'
    str_to_write += """
  });
  return map;
}

"""

388
    str_to_write += "}  // namespace LightGBM\n"
Guolin Ke's avatar
Guolin Ke committed
389
390
391
    with open(config_out_cpp, "w") as config_out_cpp_file:
        config_out_cpp_file.write(str_to_write)

392
393
    return keys, infos

Guolin Ke's avatar
Guolin Ke committed
394
395

if __name__ == "__main__":
396
    current_dir = Path(__file__).absolute().parent
397
398
399
    config_hpp = current_dir.parent / "include" / "LightGBM" / "config.h"
    config_out_cpp = current_dir.parent / "src" / "io" / "config_auto.cpp"
    params_rst = current_dir.parent / "docs" / "Parameters.rst"
400
401
    sections, descriptions = gen_parameter_code(config_hpp, config_out_cpp)
    gen_parameter_description(sections, descriptions, params_rst)