Add C API function that returns all parameter names with their aliases (#4829)

* add C API function that returns all param names with aliases * add C API function that returns all param names with aliases * add R code * test R code * remove debug CI * fix R lint * refactor * run CI * fix R * fix * revert CI checks * revert changes in docs * Try to make function `const` Co-authored-by: James Lamb <jaylamb20@gmail.com> * add `const` in cpp file * address review comments and sync with `master` Co-authored-by: James Lamb <jaylamb20@gmail.com>

Add C API function that returns all parameter names with their aliases (#4829)
* add C API function that returns all param names with aliases * add C API function that returns all param names with aliases * add R code * test R code * remove debug CI * fix R lint * refactor * run CI * fix R * fix * revert CI checks * revert changes in docs * Try to make function `const` Co-authored-by: James Lamb <jaylamb20@gmail.com> * add `const` in cpp file * address review comments and sync with `master` Co-authored-by: James Lamb <jaylamb20@gmail.com>
cf38071b · Nikita Titov · GitHub · 946817a5 · cf38071b · cf38071b
Unverified Commit cf38071b authored Dec 03, 2021 by Nikita Titov Committed by GitHub Dec 02, 2021
11 changed files
--- a/R-package/R/aliases.R
+++ b/R-package/R/aliases.R
@@ -7,85 +7,30 @@
 # [return] A named list, where each key is a parameter relevant to lgb.Dataset and each value is a character
 #          vector of corresponding aliases.
 .DATASET_PARAMETERS <- function() {
-    return(
-        list(
-            "bin_construct_sample_cnt" = c(
+    all_aliases <- .PARAMETER_ALIASES()
+    return(all_aliases[c(
        "bin_construct_sample_cnt"
-                , "subsample_for_bin"
-            )
-            , "categorical_feature" = c(
-                "categorical_feature"
-                , "cat_feature"
-                , "categorical_column"
-                , "cat_column"
-                , "categorical_features"
-            )
-            , "data_random_seed" = c(
-                "data_random_seed"
-                , "data_seed"
-            )
-            , "enable_bundle" = c(
-                "enable_bundle"
-                , "is_enable_bundle"
-                , "bundle"
-            )
-            , "feature_pre_filter" = "feature_pre_filter"
-            , "forcedbins_filename" = "forcedbins_filename"
-            , "group_column" = c(
-                "group_column"
-                , "group"
-                , "group_id"
-                , "query_column"
-                , "query"
-                , "query_id"
-            )
-            , "header" = c(
-                "header"
-                , "has_header"
-            )
-            , "ignore_column" = c(
-                "ignore_column"
-                , "ignore_feature"
-                , "blacklist"
-            )
-            , "is_enable_sparse" = c(
-                "is_enable_sparse"
-                , "is_sparse"
-                , "enable_sparse"
-                , "sparse"
-            )
-            , "label_column" = c(
-                "label_column"
-                , "label"
-            )
-            , "linear_tree" = c(
-                "linear_tree"
-                , "linear_trees"
-            )
-            , "max_bin" = c(
-                "max_bin"
-                , "max_bins"
-            )
-            , "max_bin_by_feature" = "max_bin_by_feature"
-            , "min_data_in_bin" = "min_data_in_bin"
-            , "pre_partition" = c(
-                "pre_partition"
-                , "is_pre_partition"
-            )
-            , "precise_float_parser" = "precise_float_parser"
-            , "two_round" = c(
-                "two_round"
-                , "two_round_loading"
-                , "use_two_round_loading"
-            )
-            , "use_missing" = "use_missing"
-            , "weight_column" = c(
-                "weight_column"
-                , "weight"
-            )
-            , "zero_as_missing" = "zero_as_missing"
-        )
-    )
+        , "categorical_feature"
+        , "data_random_seed"
+        , "enable_bundle"
+        , "feature_pre_filter"
+        , "forcedbins_filename"
+        , "group_column"
+        , "header"
+        , "ignore_column"
+        , "is_enable_sparse"
+        , "label_column"
+        , "linear_tree"
+        , "max_bin"
+        , "max_bin_by_feature"
+        , "min_data_in_bin"
+        , "pre_partition"
+        , "precise_float_parser"
+        , "two_round"
+        , "use_missing"
+        , "weight_column"
+        , "zero_as_missing"
+    )])
 }

 # [description] List of respected parameter aliases. Wrapped in a function to take advantage of
@@ -93,33 +38,16 @@
 # [return] A named list, where each key is a main LightGBM parameter and each value is a character
 #          vector of corresponding aliases.
 .PARAMETER_ALIASES <- function() {
-    learning_params <- list(
-        "boosting" = c(
-            "boosting"
-            , "boost"
-            , "boosting_type"
-        )
-        , "early_stopping_round" = c(
-            "early_stopping_round"
-            , "early_stopping_rounds"
-            , "early_stopping"
-            , "n_iter_no_change"
-        )
-        , "num_iterations" = c(
-            "num_iterations"
-            , "num_iteration"
-            , "n_iter"
-            , "num_tree"
-            , "num_trees"
-            , "num_round"
-            , "num_rounds"
-            , "nrounds"
-            , "num_boost_round"
-            , "n_estimators"
-            , "max_iter"
+    params_to_aliases <- jsonlite::fromJSON(
+        .Call(
+            LGBM_DumpParamAliases_R
        )
    )
-    return(c(learning_params, .DATASET_PARAMETERS()))
+    for (main_name in names(params_to_aliases)) {
+        aliases_with_main_name <- c(main_name, unlist(params_to_aliases[[main_name]]))
+        params_to_aliases[[main_name]] <- aliases_with_main_name
+    }
+    return(params_to_aliases)
 }

 # [description]

--- a/R-package/src/lightgbm_R.cpp
+++ b/R-package/src/lightgbm_R.cpp
@@ -872,6 +872,26 @@ SEXP LGBM_BoosterDumpModel_R(SEXP handle,
  R_API_END();
 }

+SEXP LGBM_DumpParamAliases_R() {
+  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  R_API_BEGIN();
+  SEXP aliases_str;
+  int64_t out_len = 0;
+  int64_t buf_len = 1024 * 1024;
+  std::vector<char> inner_char_buf(buf_len);
+  CHECK_CALL(LGBM_DumpParamAliases(buf_len, &out_len, inner_char_buf.data()));
+  // if aliases string was larger than the initial buffer, allocate a bigger buffer and try again
+  if (out_len > buf_len) {
+    inner_char_buf.resize(out_len);
+    CHECK_CALL(LGBM_DumpParamAliases(out_len, &out_len, inner_char_buf.data()));
+  }
+  aliases_str = PROTECT(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
+  SET_STRING_ELT(aliases_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token));
+  UNPROTECT(2);
+  return aliases_str;
+  R_API_END();
+}
+
 // .Call() calls
 static const R_CallMethodDef CallEntries[] = {
  {"LGBM_HandleIsNull_R"              , (DL_FUNC) &LGBM_HandleIsNull_R              , 1},
@@ -916,6 +936,7 @@ static const R_CallMethodDef CallEntries[] = {
  {"LGBM_BoosterSaveModel_R"          , (DL_FUNC) &LGBM_BoosterSaveModel_R          , 4},
  {"LGBM_BoosterSaveModelToString_R"  , (DL_FUNC) &LGBM_BoosterSaveModelToString_R  , 3},
  {"LGBM_BoosterDumpModel_R"          , (DL_FUNC) &LGBM_BoosterDumpModel_R          , 3},
+  {"LGBM_DumpParamAliases_R"          , (DL_FUNC) &LGBM_DumpParamAliases_R          , 0},
  {NULL, NULL, 0}
 };


--- a/R-package/src/lightgbm_R.h
+++ b/R-package/src/lightgbm_R.h
@@ -596,4 +596,10 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterDumpModel_R(
  SEXP feature_importance_type
 );

+/*!
+* \brief Dump parameter aliases to JSON
+* \return R character vector (length=1) with aliases JSON
+*/
+LIGHTGBM_C_EXPORT SEXP LGBM_DumpParamAliases_R();
+
 #endif  // LIGHTGBM_R_H_
--- a/R-package/tests/testthat/test_parameters.R
+++ b/R-package/tests/testthat/test_parameters.R
@@ -50,6 +50,7 @@ context("parameter aliases")
 test_that(".PARAMETER_ALIASES() returns a named list of character vectors, where names are unique", {
  param_aliases <- .PARAMETER_ALIASES()
  expect_identical(class(param_aliases), "list")
+  expect_true(length(param_aliases) > 100L)
  expect_true(is.character(names(param_aliases)))
  expect_true(is.character(param_aliases[["boosting"]]))
  expect_true(is.character(param_aliases[["early_stopping_round"]]))
@@ -58,6 +59,7 @@ test_that(".PARAMETER_ALIASES() returns a named list of character vectors, where
  expect_true(length(names(param_aliases)) == length(param_aliases))
  expect_true(all(sapply(param_aliases, is.character)))
  expect_true(length(unique(names(param_aliases))) == length(param_aliases))
+  expect_equal(sort(param_aliases[["task"]]), c("task", "task_type"))
 })

 test_that("training should warn if you use 'dart' boosting, specified with 'boosting' or aliases", {

--- a/helpers/parameter_generator.py
+++ b/helpers/parameter_generator.py
@@ -6,6 +6,7 @@ with list of all parameters, aliases table and other routines
 along with parameters description in LightGBM/docs/Parameters.rst file
 from the information in LightGBM/include/LightGBM/config.h file.
 """
+from collections import defaultdict
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple

@@ -291,6 +292,7 @@ def gen_parameter_code(
    keys, infos = get_parameter_infos(config_hpp)
    names = get_names(infos)
    alias = get_alias(infos)
+    names_with_aliases = defaultdict(list)
    str_to_write = r"""/*!
 * Copyright (c) 2018 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
@@ -306,6 +308,7 @@ def gen_parameter_code(

    for pair in alias:
        str_to_write += f'  {{"{pair[0]}", "{pair[1]}"}},\n'
+        names_with_aliases[pair[1]].append(pair[0])
    str_to_write += "  });\n"
    str_to_write += "  return aliases;\n"
    str_to_write += "}\n\n"
@@ -353,6 +356,21 @@ def gen_parameter_code(
    # tails
    str_to_write += "  return str_buf.str();\n"
    str_to_write += "}\n\n"
+
+    str_to_write += "const std::string Config::DumpAliases() {\n"
+    str_to_write += "  std::stringstream str_buf;\n"
+    str_to_write += '  str_buf << "{";\n'
+    for idx, name in enumerate(names):
+        if idx > 0:
+            str_to_write += ', ";\n'
+        aliases = '\\", \\"'.join([alias for alias in names_with_aliases[name]])
+        aliases = f'[\\"{aliases}\\"]' if aliases else '[]'
+        str_to_write += f'  str_buf << "\\"{name}\\": {aliases}'
+    str_to_write += '";\n'
+    str_to_write += '  str_buf << "}";\n'
+    str_to_write += "  return str_buf.str();\n"
+    str_to_write += "}\n\n"
+
    str_to_write += "}  // namespace LightGBM\n"
    with open(config_out_cpp, "w") as config_out_cpp_file:
        config_out_cpp_file.write(str_to_write)

--- a/include/LightGBM/c_api.h
+++ b/include/LightGBM/c_api.h
@@ -52,6 +52,17 @@ typedef void* FastConfigHandle; /*!< \brief Handle of FastConfig. */
 */
 LIGHTGBM_C_EXPORT const char* LGBM_GetLastError();

+/*!
+ * \brief Dump all parameter names with their aliases to JSON.
+ * \param buffer_len String buffer length, if ``buffer_len < out_len``, you should re-allocate buffer
+ * \param[out] out_len Actual output length
+ * \param[out] out_str JSON format string of parameters, should pre-allocate memory
+ * \return 0 when succeed, -1 when failure happens
+ */
+LIGHTGBM_C_EXPORT int LGBM_DumpParamAliases(int64_t buffer_len,
+                                            int64_t* out_len,
+                                            char* out_str);
+
 /*!
 * \brief Register a callback function for log redirecting.
 * \param callback The callback function to register

--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -1041,6 +1041,7 @@ struct Config {
  static const std::unordered_set<std::string>& parameter_set();
  std::vector<std::vector<double>> auc_mu_weights_matrix;
  std::vector<std::vector<int>> interaction_constraints_vector;
+  static const std::string DumpAliases();

 private:
  void CheckParamConflict();

--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -325,106 +325,47 @@ class LGBMDeprecationWarning(UserWarning):


 class _ConfigAliases:
-    aliases = {"bin_construct_sample_cnt": {"bin_construct_sample_cnt",
-                                            "subsample_for_bin"},
-               "boosting": {"boosting",
-                            "boosting_type",
-                            "boost"},
-               "categorical_feature": {"categorical_feature",
-                                       "cat_feature",
-                                       "categorical_column",
-                                       "cat_column",
-                                       "categorical_features"},
-               "data_random_seed": {"data_random_seed",
-                                    "data_seed"},
-               "early_stopping_round": {"early_stopping_round",
-                                        "early_stopping_rounds",
-                                        "early_stopping",
-                                        "n_iter_no_change"},
-               "enable_bundle": {"enable_bundle",
-                                 "is_enable_bundle",
-                                 "bundle"},
-               "eval_at": {"eval_at",
-                           "ndcg_eval_at",
-                           "ndcg_at",
-                           "map_eval_at",
-                           "map_at"},
-               "group_column": {"group_column",
-                                "group",
-                                "group_id",
-                                "query_column",
-                                "query",
-                                "query_id"},
-               "header": {"header",
-                          "has_header"},
-               "ignore_column": {"ignore_column",
-                                 "ignore_feature",
-                                 "blacklist"},
-               "is_enable_sparse": {"is_enable_sparse",
-                                    "is_sparse",
-                                    "enable_sparse",
-                                    "sparse"},
-               "label_column": {"label_column",
-                                "label"},
-               "linear_tree": {"linear_tree",
-                               "linear_trees"},
-               "local_listen_port": {"local_listen_port",
-                                     "local_port",
-                                     "port"},
-               "machines": {"machines",
-                            "workers",
-                            "nodes"},
-               "max_bin": {"max_bin",
-                           "max_bins"},
-               "metric": {"metric",
-                          "metrics",
-                          "metric_types"},
-               "num_class": {"num_class",
-                             "num_classes"},
-               "num_iterations": {"num_iterations",
-                                  "num_iteration",
-                                  "n_iter",
-                                  "num_tree",
-                                  "num_trees",
-                                  "num_round",
-                                  "num_rounds",
-                                  "nrounds",
-                                  "num_boost_round",
-                                  "n_estimators",
-                                  "max_iter"},
-               "num_machines": {"num_machines",
-                                "num_machine"},
-               "num_threads": {"num_threads",
-                               "num_thread",
-                               "nthread",
-                               "nthreads",
-                               "n_jobs"},
-               "objective": {"objective",
-                             "objective_type",
-                             "app",
-                             "application",
-                             "loss"},
-               "pre_partition": {"pre_partition",
-                                 "is_pre_partition"},
-               "tree_learner": {"tree_learner",
-                                "tree",
-                                "tree_type",
-                                "tree_learner_type"},
-               "two_round": {"two_round",
-                             "two_round_loading",
-                             "use_two_round_loading"},
-               "weight_column": {"weight_column",
-                                 "weight"}}
+    # lazy evaluation to allow import without dynamic library, e.g., for docs generation
+    aliases = None
+
+    @staticmethod
+    def _get_all_param_aliases() -> Dict[str, Set[str]]:
+        buffer_len = 1 << 20
+        tmp_out_len = ctypes.c_int64(0)
+        string_buffer = ctypes.create_string_buffer(buffer_len)
+        ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
+        _safe_call(_LIB.LGBM_DumpParamAliases(
+            ctypes.c_int64(buffer_len),
+            ctypes.byref(tmp_out_len),
+            ptr_string_buffer))
+        actual_len = tmp_out_len.value
+        # if buffer length is not long enough, re-allocate a buffer
+        if actual_len > buffer_len:
+            string_buffer = ctypes.create_string_buffer(actual_len)
+            ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
+            _safe_call(_LIB.LGBM_DumpParamAliases(
+                ctypes.c_int64(actual_len),
+                ctypes.byref(tmp_out_len),
+                ptr_string_buffer))
+        aliases = json.loads(
+            string_buffer.value.decode('utf-8'),
+            object_hook=lambda obj: {k: set(v) | {k} for k, v in obj.items()}
+        )
+        return aliases

    @classmethod
-    def get(cls, *args):
+    def get(cls, *args) -> Set[str]:
+        if cls.aliases is None:
+            cls.aliases = cls._get_all_param_aliases()
        ret = set()
        for i in args:
            ret |= cls.aliases.get(i, {i})
        return ret

    @classmethod
-    def get_by_alias(cls, *args):
+    def get_by_alias(cls, *args) -> Set[str]:
+        if cls.aliases is None:
+            cls.aliases = cls._get_all_param_aliases()
        ret = set(args)
        for arg in args:
            for aliases in cls.aliases.values():

--- a/src/c_api.cpp
+++ b/src/c_api.cpp
@@ -888,6 +888,18 @@ const char* LGBM_GetLastError() {
  return LastErrorMsg();
 }

+int LGBM_DumpParamAliases(int64_t buffer_len,
+                          int64_t* out_len,
+                          char* out_str) {
+  API_BEGIN();
+  std::string aliases = Config::DumpAliases();
+  *out_len = static_cast<int64_t>(aliases.size()) + 1;
+  if (*out_len <= buffer_len) {
+    std::memcpy(out_str, aliases.c_str(), *out_len);
+  }
+  API_END();
+}
+
 int LGBM_RegisterLogCallback(void (*callback)(const char*)) {
  API_BEGIN();
  Log::ResetCallBack(callback);

--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -756,4 +756,143 @@ std::string Config::SaveMembersToString() const {
  return str_buf.str();
 }

+const std::string Config::DumpAliases() {
+  std::stringstream str_buf;
+  str_buf << "{";
+  str_buf << "\"config\": [\"config_file\"], ";
+  str_buf << "\"task\": [\"task_type\"], ";
+  str_buf << "\"objective\": [\"objective_type\", \"app\", \"application\", \"loss\"], ";
+  str_buf << "\"boosting\": [\"boosting_type\", \"boost\"], ";
+  str_buf << "\"data\": [\"train\", \"train_data\", \"train_data_file\", \"data_filename\"], ";
+  str_buf << "\"valid\": [\"test\", \"valid_data\", \"valid_data_file\", \"test_data\", \"test_data_file\", \"valid_filenames\"], ";
+  str_buf << "\"num_iterations\": [\"num_iteration\", \"n_iter\", \"num_tree\", \"num_trees\", \"num_round\", \"num_rounds\", \"nrounds\", \"num_boost_round\", \"n_estimators\", \"max_iter\"], ";
+  str_buf << "\"learning_rate\": [\"shrinkage_rate\", \"eta\"], ";
+  str_buf << "\"num_leaves\": [\"num_leaf\", \"max_leaves\", \"max_leaf\", \"max_leaf_nodes\"], ";
+  str_buf << "\"tree_learner\": [\"tree\", \"tree_type\", \"tree_learner_type\"], ";
+  str_buf << "\"num_threads\": [\"num_thread\", \"nthread\", \"nthreads\", \"n_jobs\"], ";
+  str_buf << "\"device_type\": [\"device\"], ";
+  str_buf << "\"seed\": [\"random_seed\", \"random_state\"], ";
+  str_buf << "\"deterministic\": [], ";
+  str_buf << "\"force_col_wise\": [], ";
+  str_buf << "\"force_row_wise\": [], ";
+  str_buf << "\"histogram_pool_size\": [\"hist_pool_size\"], ";
+  str_buf << "\"max_depth\": [], ";
+  str_buf << "\"min_data_in_leaf\": [\"min_data_per_leaf\", \"min_data\", \"min_child_samples\", \"min_samples_leaf\"], ";
+  str_buf << "\"min_sum_hessian_in_leaf\": [\"min_sum_hessian_per_leaf\", \"min_sum_hessian\", \"min_hessian\", \"min_child_weight\"], ";
+  str_buf << "\"bagging_fraction\": [\"sub_row\", \"subsample\", \"bagging\"], ";
+  str_buf << "\"pos_bagging_fraction\": [\"pos_sub_row\", \"pos_subsample\", \"pos_bagging\"], ";
+  str_buf << "\"neg_bagging_fraction\": [\"neg_sub_row\", \"neg_subsample\", \"neg_bagging\"], ";
+  str_buf << "\"bagging_freq\": [\"subsample_freq\"], ";
+  str_buf << "\"bagging_seed\": [\"bagging_fraction_seed\"], ";
+  str_buf << "\"feature_fraction\": [\"sub_feature\", \"colsample_bytree\"], ";
+  str_buf << "\"feature_fraction_bynode\": [\"sub_feature_bynode\", \"colsample_bynode\"], ";
+  str_buf << "\"feature_fraction_seed\": [], ";
+  str_buf << "\"extra_trees\": [\"extra_tree\"], ";
+  str_buf << "\"extra_seed\": [], ";
+  str_buf << "\"early_stopping_round\": [\"early_stopping_rounds\", \"early_stopping\", \"n_iter_no_change\"], ";
+  str_buf << "\"first_metric_only\": [], ";
+  str_buf << "\"max_delta_step\": [\"max_tree_output\", \"max_leaf_output\"], ";
+  str_buf << "\"lambda_l1\": [\"reg_alpha\", \"l1_regularization\"], ";
+  str_buf << "\"lambda_l2\": [\"reg_lambda\", \"lambda\", \"l2_regularization\"], ";
+  str_buf << "\"linear_lambda\": [], ";
+  str_buf << "\"min_gain_to_split\": [\"min_split_gain\"], ";
+  str_buf << "\"drop_rate\": [\"rate_drop\"], ";
+  str_buf << "\"max_drop\": [], ";
+  str_buf << "\"skip_drop\": [], ";
+  str_buf << "\"xgboost_dart_mode\": [], ";
+  str_buf << "\"uniform_drop\": [], ";
+  str_buf << "\"drop_seed\": [], ";
+  str_buf << "\"top_rate\": [], ";
+  str_buf << "\"other_rate\": [], ";
+  str_buf << "\"min_data_per_group\": [], ";
+  str_buf << "\"max_cat_threshold\": [], ";
+  str_buf << "\"cat_l2\": [], ";
+  str_buf << "\"cat_smooth\": [], ";
+  str_buf << "\"max_cat_to_onehot\": [], ";
+  str_buf << "\"top_k\": [\"topk\"], ";
+  str_buf << "\"monotone_constraints\": [\"mc\", \"monotone_constraint\", \"monotonic_cst\"], ";
+  str_buf << "\"monotone_constraints_method\": [\"monotone_constraining_method\", \"mc_method\"], ";
+  str_buf << "\"monotone_penalty\": [\"monotone_splits_penalty\", \"ms_penalty\", \"mc_penalty\"], ";
+  str_buf << "\"feature_contri\": [\"feature_contrib\", \"fc\", \"fp\", \"feature_penalty\"], ";
+  str_buf << "\"forcedsplits_filename\": [\"fs\", \"forced_splits_filename\", \"forced_splits_file\", \"forced_splits\"], ";
+  str_buf << "\"refit_decay_rate\": [], ";
+  str_buf << "\"cegb_tradeoff\": [], ";
+  str_buf << "\"cegb_penalty_split\": [], ";
+  str_buf << "\"cegb_penalty_feature_lazy\": [], ";
+  str_buf << "\"cegb_penalty_feature_coupled\": [], ";
+  str_buf << "\"path_smooth\": [], ";
+  str_buf << "\"interaction_constraints\": [], ";
+  str_buf << "\"verbosity\": [\"verbose\"], ";
+  str_buf << "\"input_model\": [\"model_input\", \"model_in\"], ";
+  str_buf << "\"output_model\": [\"model_output\", \"model_out\"], ";
+  str_buf << "\"saved_feature_importance_type\": [], ";
+  str_buf << "\"snapshot_freq\": [\"save_period\"], ";
+  str_buf << "\"linear_tree\": [\"linear_trees\"], ";
+  str_buf << "\"max_bin\": [\"max_bins\"], ";
+  str_buf << "\"max_bin_by_feature\": [], ";
+  str_buf << "\"min_data_in_bin\": [], ";
+  str_buf << "\"bin_construct_sample_cnt\": [\"subsample_for_bin\"], ";
+  str_buf << "\"data_random_seed\": [\"data_seed\"], ";
+  str_buf << "\"is_enable_sparse\": [\"is_sparse\", \"enable_sparse\", \"sparse\"], ";
+  str_buf << "\"enable_bundle\": [\"is_enable_bundle\", \"bundle\"], ";
+  str_buf << "\"use_missing\": [], ";
+  str_buf << "\"zero_as_missing\": [], ";
+  str_buf << "\"feature_pre_filter\": [], ";
+  str_buf << "\"pre_partition\": [\"is_pre_partition\"], ";
+  str_buf << "\"two_round\": [\"two_round_loading\", \"use_two_round_loading\"], ";
+  str_buf << "\"header\": [\"has_header\"], ";
+  str_buf << "\"label_column\": [\"label\"], ";
+  str_buf << "\"weight_column\": [\"weight\"], ";
+  str_buf << "\"group_column\": [\"group\", \"group_id\", \"query_column\", \"query\", \"query_id\"], ";
+  str_buf << "\"ignore_column\": [\"ignore_feature\", \"blacklist\"], ";
+  str_buf << "\"categorical_feature\": [\"cat_feature\", \"categorical_column\", \"cat_column\", \"categorical_features\"], ";
+  str_buf << "\"forcedbins_filename\": [], ";
+  str_buf << "\"save_binary\": [\"is_save_binary\", \"is_save_binary_file\"], ";
+  str_buf << "\"precise_float_parser\": [], ";
+  str_buf << "\"parser_config_file\": [], ";
+  str_buf << "\"start_iteration_predict\": [], ";
+  str_buf << "\"num_iteration_predict\": [], ";
+  str_buf << "\"predict_raw_score\": [\"is_predict_raw_score\", \"predict_rawscore\", \"raw_score\"], ";
+  str_buf << "\"predict_leaf_index\": [\"is_predict_leaf_index\", \"leaf_index\"], ";
+  str_buf << "\"predict_contrib\": [\"is_predict_contrib\", \"contrib\"], ";
+  str_buf << "\"predict_disable_shape_check\": [], ";
+  str_buf << "\"pred_early_stop\": [], ";
+  str_buf << "\"pred_early_stop_freq\": [], ";
+  str_buf << "\"pred_early_stop_margin\": [], ";
+  str_buf << "\"output_result\": [\"predict_result\", \"prediction_result\", \"predict_name\", \"prediction_name\", \"pred_name\", \"name_pred\"], ";
+  str_buf << "\"convert_model_language\": [], ";
+  str_buf << "\"convert_model\": [\"convert_model_file\"], ";
+  str_buf << "\"objective_seed\": [], ";
+  str_buf << "\"num_class\": [\"num_classes\"], ";
+  str_buf << "\"is_unbalance\": [\"unbalance\", \"unbalanced_sets\"], ";
+  str_buf << "\"scale_pos_weight\": [], ";
+  str_buf << "\"sigmoid\": [], ";
+  str_buf << "\"boost_from_average\": [], ";
+  str_buf << "\"reg_sqrt\": [], ";
+  str_buf << "\"alpha\": [], ";
+  str_buf << "\"fair_c\": [], ";
+  str_buf << "\"poisson_max_delta_step\": [], ";
+  str_buf << "\"tweedie_variance_power\": [], ";
+  str_buf << "\"lambdarank_truncation_level\": [], ";
+  str_buf << "\"lambdarank_norm\": [], ";
+  str_buf << "\"label_gain\": [], ";
+  str_buf << "\"metric\": [\"metrics\", \"metric_types\"], ";
+  str_buf << "\"metric_freq\": [\"output_freq\"], ";
+  str_buf << "\"is_provide_training_metric\": [\"training_metric\", \"is_training_metric\", \"train_metric\"], ";
+  str_buf << "\"eval_at\": [\"ndcg_eval_at\", \"ndcg_at\", \"map_eval_at\", \"map_at\"], ";
+  str_buf << "\"multi_error_top_k\": [], ";
+  str_buf << "\"auc_mu_weights\": [], ";
+  str_buf << "\"num_machines\": [\"num_machine\"], ";
+  str_buf << "\"local_listen_port\": [\"local_port\", \"port\"], ";
+  str_buf << "\"time_out\": [], ";
+  str_buf << "\"machine_list_filename\": [\"machine_list_file\", \"machine_list\", \"mlist\"], ";
+  str_buf << "\"machines\": [\"workers\", \"nodes\"], ";
+  str_buf << "\"gpu_platform_id\": [], ";
+  str_buf << "\"gpu_device_id\": [], ";
+  str_buf << "\"gpu_use_dp\": [], ";
+  str_buf << "\"num_gpu\": []";
+  str_buf << "}";
+  return str_buf.str();
+}
+
 }  // namespace LightGBM
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -569,3 +569,13 @@ def test_smoke_custom_parser(tmp_path):
    with pytest.raises(lgb.basic.LightGBMError,
                       match="Cannot find parser class 'dummy', please register first or check config format"):
        data.construct()
+
+
+def test_param_aliases():
+    aliases = lgb.basic._ConfigAliases.aliases
+    assert isinstance(aliases, dict)
+    assert len(aliases) > 100
+    assert all(isinstance(i, set) for i in aliases.values())
+    assert all(len(i) >= 1 for i in aliases.values())
+    assert all(k in v for k, v in aliases.items())
+    assert lgb.basic._ConfigAliases.get('config', 'task') == {'config', 'config_file', 'task', 'task_type'}