Commit 3abff370 authored by Guolin Ke's avatar Guolin Ke
Browse files

support disable missing value handle.

parent 348c2b51
......@@ -128,6 +128,8 @@ The parameter format is `key1=value1 key2=value2 ... ` . And parameters can be s
* `max_bin`, default=`255`, type=int
* max number of bin that feature values will bucket in. Small bin may reduce training accuracy but may increase general power (deal with over-fit).
* LightGBM will auto compress memory according `max_bin`. For example, LightGBM will use `uint8_t` for feature value if `max_bin=255`.
* `min_data_in_bin`, default=`5`, type=int
* min number of data inside one bin, use this to avoid one-data-one-bin (may over-fitting).
* `data_random_seed`, default=`1`, type=int
* random seed for data partition in parallel learning(not include feature parallel).
* `output_model`, default=`LightGBM_model.txt`, type=string, alias=`model_output`,`model_out`
......@@ -190,6 +192,8 @@ The parameter format is `key1=value1 key2=value2 ... ` . And parameters can be s
* `num_iteration_predict`, default=`-1`, type=int
* only used in prediction task, used to how many trained iterations will be used in prediction.
* `<= 0` means no limit
* `use_missing`, default=`true`, type=bool
* Set to `false` will disbale the special handle of missing value.
## Objective parameters
......
......@@ -208,6 +208,8 @@ public:
int gpu_device_id = -1;
/*! \brief Set to true to use double precision math on GPU (default using single precision) */
bool gpu_use_dp = false;
/*! \brief Set to false to disable the handle of missing values */
bool use_missing = true;
LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params) override;
};
......
......@@ -328,6 +328,7 @@ void TreeConfig::Set(const std::unordered_map<std::string, std::string>& params)
GetInt(params, "gpu_platform_id", &gpu_platform_id);
GetInt(params, "gpu_device_id", &gpu_device_id);
GetBool(params, "gpu_use_dp", &gpu_use_dp);
GetBool(params, "use_missing", &use_missing);
}
......
......@@ -82,15 +82,18 @@ public:
double gain_shift = GetLeafSplitGain(sum_gradient, sum_hessian,
meta_->tree_config->lambda_l1, meta_->tree_config->lambda_l2);
double min_gain_shift = gain_shift + meta_->tree_config->min_gain_to_split;
FindBestThresholdSequence(sum_gradient, sum_hessian, num_data, min_gain_shift, output, 0);
// Zero is not in leftmost or rightmost
if (static_cast<int>(meta_->default_bin) > 0 && static_cast<int>(meta_->default_bin) < meta_->num_bin - 1) {
if (meta_->tree_config->use_missing) {
FindBestThresholdSequence(sum_gradient, sum_hessian, num_data, min_gain_shift, output, 0);
// Zero is not in leftmost or rightmost
if (static_cast<int>(meta_->default_bin) > 0 && static_cast<int>(meta_->default_bin) < meta_->num_bin - 1) {
FindBestThresholdSequence(sum_gradient, sum_hessian, num_data, min_gain_shift, output, meta_->default_bin);
}
if (meta_->num_bin > 2) {
FindBestThresholdSequence(sum_gradient, sum_hessian, num_data, min_gain_shift, output, meta_->num_bin - 1);
}
} else {
FindBestThresholdSequence(sum_gradient, sum_hessian, num_data, min_gain_shift, output, meta_->default_bin);
}
if (meta_->num_bin > 2) {
FindBestThresholdSequence(sum_gradient, sum_hessian, num_data, min_gain_shift, output, meta_->num_bin - 1);
}
output->gain -= min_gain_shift;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment