Unverified Commit aab8fc18 authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

fix param aliases (#4387)

parent 0701a32d
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
# [description] List of respected parameter aliases specific to lgb.Dataset. Wrapped in a function to # [description] List of respected parameter aliases specific to lgb.Dataset. Wrapped in a function to
# take advantage of lazy evaluation (so it doesn't matter what order # take advantage of lazy evaluation (so it doesn't matter what order
# R sources files during installation). # R sources files during installation).
# [return] A named list, where each key is a parameter relevant to lgb.DataSet and each value is a character # [return] A named list, where each key is a parameter relevant to lgb.Dataset and each value is a character
# vector of corresponding aliases. # vector of corresponding aliases.
.DATASET_PARAMETERS <- function() { .DATASET_PARAMETERS <- function() {
return( return(
...@@ -57,6 +57,10 @@ ...@@ -57,6 +57,10 @@
"label_column" "label_column"
, "label" , "label"
) )
, "linear_tree" = c(
"linear_tree"
, "linear_trees"
)
, "max_bin" = "max_bin" , "max_bin" = "max_bin"
, "max_bin_by_feature" = "max_bin_by_feature" , "max_bin_by_feature" = "max_bin_by_feature"
, "min_data_in_bin" = "min_data_in_bin" , "min_data_in_bin" = "min_data_in_bin"
...@@ -64,6 +68,7 @@ ...@@ -64,6 +68,7 @@
"pre_partition" "pre_partition"
, "is_pre_partition" , "is_pre_partition"
) )
, "precise_float_parser" = "precise_float_parser"
, "two_round" = c( , "two_round" = c(
"two_round" "two_round"
, "two_round_loading" , "two_round_loading"
......
...@@ -139,28 +139,6 @@ Core Parameters ...@@ -139,28 +139,6 @@ Core Parameters
- **Note**: internally, LightGBM uses ``gbdt`` mode for the first ``1 / learning_rate`` iterations - **Note**: internally, LightGBM uses ``gbdt`` mode for the first ``1 / learning_rate`` iterations
- ``linear_tree`` :raw-html:`<a id="linear_tree" title="Permalink to this parameter" href="#linear_tree">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool, aliases: ``linear_trees``
- fit piecewise linear gradient boosting tree
- tree splits are chosen in the usual way, but the model at each leaf is linear instead of constant
- the linear model at each leaf includes all the numerical features in that leaf's branch
- categorical features are used for splits as normal but are not used in the linear models
- missing values should not be encoded as ``0``. Use ``np.nan`` for Python, ``NA`` for the CLI, and ``NA``, ``NA_real_``, or ``NA_integer_`` for R
- it is recommended to rescale data before training so that features have similar mean and standard deviation
- **Note**: only works with CPU and ``serial`` tree learner
- **Note**: ``regression_l1`` objective is not supported with linear tree boosting
- **Note**: setting ``linear_tree=true`` significantly increases the memory use of LightGBM
- **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves
- ``data`` :raw-html:`<a id="data" title="Permalink to this parameter" href="#data">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string, aliases: ``train``, ``train_data``, ``train_data_file``, ``data_filename`` - ``data`` :raw-html:`<a id="data" title="Permalink to this parameter" href="#data">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string, aliases: ``train``, ``train_data``, ``train_data_file``, ``data_filename``
- path of training data, LightGBM will train from this data - path of training data, LightGBM will train from this data
...@@ -672,6 +650,28 @@ IO Parameters ...@@ -672,6 +650,28 @@ IO Parameters
Dataset Parameters Dataset Parameters
~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~
- ``linear_tree`` :raw-html:`<a id="linear_tree" title="Permalink to this parameter" href="#linear_tree">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool, aliases: ``linear_trees``
- fit piecewise linear gradient boosting tree
- tree splits are chosen in the usual way, but the model at each leaf is linear instead of constant
- the linear model at each leaf includes all the numerical features in that leaf's branch
- categorical features are used for splits as normal but are not used in the linear models
- missing values should not be encoded as ``0``. Use ``np.nan`` for Python, ``NA`` for the CLI, and ``NA``, ``NA_real_``, or ``NA_integer_`` for R
- it is recommended to rescale data before training so that features have similar mean and standard deviation
- **Note**: only works with CPU and ``serial`` tree learner
- **Note**: ``regression_l1`` objective is not supported with linear tree boosting
- **Note**: setting ``linear_tree=true`` significantly increases the memory use of LightGBM
- **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves
- ``max_bin`` :raw-html:`<a id="max_bin" title="Permalink to this parameter" href="#max_bin">&#x1F517;&#xFE0E;</a>`, default = ``255``, type = int, constraints: ``max_bin > 1`` - ``max_bin`` :raw-html:`<a id="max_bin" title="Permalink to this parameter" href="#max_bin">&#x1F517;&#xFE0E;</a>`, default = ``255``, type = int, constraints: ``max_bin > 1``
- max number of bins that feature values will be bucketed in - max number of bins that feature values will be bucketed in
......
...@@ -149,19 +149,6 @@ struct Config { ...@@ -149,19 +149,6 @@ struct Config {
// descl2 = **Note**: internally, LightGBM uses ``gbdt`` mode for the first ``1 / learning_rate`` iterations // descl2 = **Note**: internally, LightGBM uses ``gbdt`` mode for the first ``1 / learning_rate`` iterations
std::string boosting = "gbdt"; std::string boosting = "gbdt";
// alias = linear_trees
// desc = fit piecewise linear gradient boosting tree
// descl2 = tree splits are chosen in the usual way, but the model at each leaf is linear instead of constant
// descl2 = the linear model at each leaf includes all the numerical features in that leaf's branch
// descl2 = categorical features are used for splits as normal but are not used in the linear models
// descl2 = missing values should not be encoded as ``0``. Use ``np.nan`` for Python, ``NA`` for the CLI, and ``NA``, ``NA_real_``, or ``NA_integer_`` for R
// descl2 = it is recommended to rescale data before training so that features have similar mean and standard deviation
// descl2 = **Note**: only works with CPU and ``serial`` tree learner
// descl2 = **Note**: ``regression_l1`` objective is not supported with linear tree boosting
// descl2 = **Note**: setting ``linear_tree=true`` significantly increases the memory use of LightGBM
// descl2 = **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves
bool linear_tree = false;
// alias = train, train_data, train_data_file, data_filename // alias = train, train_data, train_data_file, data_filename
// desc = path of training data, LightGBM will train from this data // desc = path of training data, LightGBM will train from this data
// desc = **Note**: can be used only in CLI version // desc = **Note**: can be used only in CLI version
...@@ -586,6 +573,19 @@ struct Config { ...@@ -586,6 +573,19 @@ struct Config {
#pragma region Dataset Parameters #pragma region Dataset Parameters
// alias = linear_trees
// desc = fit piecewise linear gradient boosting tree
// descl2 = tree splits are chosen in the usual way, but the model at each leaf is linear instead of constant
// descl2 = the linear model at each leaf includes all the numerical features in that leaf's branch
// descl2 = categorical features are used for splits as normal but are not used in the linear models
// descl2 = missing values should not be encoded as ``0``. Use ``np.nan`` for Python, ``NA`` for the CLI, and ``NA``, ``NA_real_``, or ``NA_integer_`` for R
// descl2 = it is recommended to rescale data before training so that features have similar mean and standard deviation
// descl2 = **Note**: only works with CPU and ``serial`` tree learner
// descl2 = **Note**: ``regression_l1`` objective is not supported with linear tree boosting
// descl2 = **Note**: setting ``linear_tree=true`` significantly increases the memory use of LightGBM
// descl2 = **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves
bool linear_tree = false;
// check = >1 // check = >1
// desc = max number of bins that feature values will be bucketed in // desc = max number of bins that feature values will be bucketed in
// desc = small number of bins may reduce training accuracy but may increase general power (deal with over-fitting) // desc = small number of bins may reduce training accuracy but may increase general power (deal with over-fitting)
......
...@@ -311,6 +311,8 @@ class _ConfigAliases: ...@@ -311,6 +311,8 @@ class _ConfigAliases:
"sparse"}, "sparse"},
"label_column": {"label_column", "label_column": {"label_column",
"label"}, "label"},
"linear_tree": {"linear_tree",
"linear_trees"},
"local_listen_port": {"local_listen_port", "local_listen_port": {"local_listen_port",
"local_port", "local_port",
"port"}, "port"},
...@@ -1144,6 +1146,7 @@ class Dataset: ...@@ -1144,6 +1146,7 @@ class Dataset:
"max_bin_by_feature", "max_bin_by_feature",
"min_data_in_bin", "min_data_in_bin",
"pre_partition", "pre_partition",
"precise_float_parser",
"two_round", "two_round",
"use_missing", "use_missing",
"weight_column", "weight_column",
...@@ -3180,7 +3183,11 @@ class Booster: ...@@ -3180,7 +3183,11 @@ class Booster:
_safe_call(_LIB.LGBM_BoosterGetLinear( _safe_call(_LIB.LGBM_BoosterGetLinear(
self.handle, self.handle,
ctypes.byref(out_is_linear))) ctypes.byref(out_is_linear)))
new_params = deepcopy(self.params) new_params = _choose_param_value(
main_param_name="linear_tree",
params=self.params,
default_value=None
)
new_params["linear_tree"] = out_is_linear.value new_params["linear_tree"] = out_is_linear.value
train_set = Dataset(data, label, silent=True, params=new_params) train_set = Dataset(data, label, silent=True, params=new_params)
new_params['refit_decay_rate'] = decay_rate new_params['refit_decay_rate'] = decay_rate
......
...@@ -287,9 +287,13 @@ class Booster { ...@@ -287,9 +287,13 @@ class Booster {
"You need to set `feature_pre_filter=false` to dynamically change " "You need to set `feature_pre_filter=false` to dynamically change "
"the `min_data_in_leaf`."); "the `min_data_in_leaf`.");
} }
if (new_param.count("linear_tree") && (new_config.linear_tree != old_config.linear_tree)) { if (new_param.count("linear_tree") && new_config.linear_tree != old_config.linear_tree) {
Log::Fatal("Cannot change linear_tree after constructed Dataset handle."); Log::Fatal("Cannot change linear_tree after constructed Dataset handle.");
} }
if (new_param.count("precise_float_parser") &&
new_config.precise_float_parser != old_config.precise_float_parser) {
Log::Fatal("Cannot change precise_float_parser after constructed Dataset handle.");
}
} }
void ResetConfig(const char* parameters) { void ResetConfig(const char* parameters) {
......
...@@ -16,7 +16,6 @@ const std::unordered_map<std::string, std::string>& Config::alias_table() { ...@@ -16,7 +16,6 @@ const std::unordered_map<std::string, std::string>& Config::alias_table() {
{"application", "objective"}, {"application", "objective"},
{"boosting_type", "boosting"}, {"boosting_type", "boosting"},
{"boost", "boosting"}, {"boost", "boosting"},
{"linear_trees", "linear_tree"},
{"train", "data"}, {"train", "data"},
{"train_data", "data"}, {"train_data", "data"},
{"train_data_file", "data"}, {"train_data_file", "data"},
...@@ -106,6 +105,7 @@ const std::unordered_map<std::string, std::string>& Config::alias_table() { ...@@ -106,6 +105,7 @@ const std::unordered_map<std::string, std::string>& Config::alias_table() {
{"model_output", "output_model"}, {"model_output", "output_model"},
{"model_out", "output_model"}, {"model_out", "output_model"},
{"save_period", "snapshot_freq"}, {"save_period", "snapshot_freq"},
{"linear_trees", "linear_tree"},
{"subsample_for_bin", "bin_construct_sample_cnt"}, {"subsample_for_bin", "bin_construct_sample_cnt"},
{"data_seed", "data_random_seed"}, {"data_seed", "data_random_seed"},
{"is_sparse", "is_enable_sparse"}, {"is_sparse", "is_enable_sparse"},
...@@ -176,7 +176,6 @@ const std::unordered_set<std::string>& Config::parameter_set() { ...@@ -176,7 +176,6 @@ const std::unordered_set<std::string>& Config::parameter_set() {
"task", "task",
"objective", "objective",
"boosting", "boosting",
"linear_tree",
"data", "data",
"valid", "valid",
"num_iterations", "num_iterations",
...@@ -241,6 +240,7 @@ const std::unordered_set<std::string>& Config::parameter_set() { ...@@ -241,6 +240,7 @@ const std::unordered_set<std::string>& Config::parameter_set() {
"output_model", "output_model",
"saved_feature_importance_type", "saved_feature_importance_type",
"snapshot_freq", "snapshot_freq",
"linear_tree",
"max_bin", "max_bin",
"max_bin_by_feature", "max_bin_by_feature",
"min_data_in_bin", "min_data_in_bin",
...@@ -309,8 +309,6 @@ const std::unordered_set<std::string>& Config::parameter_set() { ...@@ -309,8 +309,6 @@ const std::unordered_set<std::string>& Config::parameter_set() {
void Config::GetMembersFromString(const std::unordered_map<std::string, std::string>& params) { void Config::GetMembersFromString(const std::unordered_map<std::string, std::string>& params) {
std::string tmp_str = ""; std::string tmp_str = "";
GetBool(params, "linear_tree", &linear_tree);
GetString(params, "data", &data); GetString(params, "data", &data);
if (GetString(params, "valid", &tmp_str)) { if (GetString(params, "valid", &tmp_str)) {
...@@ -483,6 +481,8 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str ...@@ -483,6 +481,8 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str
GetInt(params, "snapshot_freq", &snapshot_freq); GetInt(params, "snapshot_freq", &snapshot_freq);
GetBool(params, "linear_tree", &linear_tree);
GetInt(params, "max_bin", &max_bin); GetInt(params, "max_bin", &max_bin);
CHECK_GT(max_bin, 1); CHECK_GT(max_bin, 1);
...@@ -634,7 +634,6 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str ...@@ -634,7 +634,6 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str
std::string Config::SaveMembersToString() const { std::string Config::SaveMembersToString() const {
std::stringstream str_buf; std::stringstream str_buf;
str_buf << "[linear_tree: " << linear_tree << "]\n";
str_buf << "[data: " << data << "]\n"; str_buf << "[data: " << data << "]\n";
str_buf << "[valid: " << Common::Join(valid, ",") << "]\n"; str_buf << "[valid: " << Common::Join(valid, ",") << "]\n";
str_buf << "[num_iterations: " << num_iterations << "]\n"; str_buf << "[num_iterations: " << num_iterations << "]\n";
...@@ -693,6 +692,7 @@ std::string Config::SaveMembersToString() const { ...@@ -693,6 +692,7 @@ std::string Config::SaveMembersToString() const {
str_buf << "[interaction_constraints: " << interaction_constraints << "]\n"; str_buf << "[interaction_constraints: " << interaction_constraints << "]\n";
str_buf << "[verbosity: " << verbosity << "]\n"; str_buf << "[verbosity: " << verbosity << "]\n";
str_buf << "[saved_feature_importance_type: " << saved_feature_importance_type << "]\n"; str_buf << "[saved_feature_importance_type: " << saved_feature_importance_type << "]\n";
str_buf << "[linear_tree: " << linear_tree << "]\n";
str_buf << "[max_bin: " << max_bin << "]\n"; str_buf << "[max_bin: " << max_bin << "]\n";
str_buf << "[max_bin_by_feature: " << Common::Join(max_bin_by_feature, ",") << "]\n"; str_buf << "[max_bin_by_feature: " << Common::Join(max_bin_by_feature, ",") << "]\n";
str_buf << "[min_data_in_bin: " << min_data_in_bin << "]\n"; str_buf << "[min_data_in_bin: " << min_data_in_bin << "]\n";
......
...@@ -2345,6 +2345,7 @@ def test_dataset_update_params(): ...@@ -2345,6 +2345,7 @@ def test_dataset_update_params():
"ignore_column": 0, "ignore_column": 0,
"min_data_in_leaf": 10, "min_data_in_leaf": 10,
"linear_tree": False, "linear_tree": False,
"precise_float_parser": True,
"verbose": -1} "verbose": -1}
unchangeable_params = {"max_bin": 150, unchangeable_params = {"max_bin": 150,
"max_bin_by_feature": [30, 5], "max_bin_by_feature": [30, 5],
...@@ -2366,7 +2367,8 @@ def test_dataset_update_params(): ...@@ -2366,7 +2367,8 @@ def test_dataset_update_params():
"ignore_column": 1, "ignore_column": 1,
"forcedbins_filename": "/some/path/forcedbins.json", "forcedbins_filename": "/some/path/forcedbins.json",
"min_data_in_leaf": 2, "min_data_in_leaf": 2,
"linear_tree": True} "linear_tree": True,
"precise_float_parser": False}
X = np.random.random((100, 2)) X = np.random.random((100, 2))
y = np.random.random(100) y = np.random.random(100)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment