Unverified Commit 5dcd4be9 authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

[python] handle params aliases centralized (#2489)

* handle aliases centralized

* convert aliases dict to class
parent fbf95687
...@@ -170,6 +170,57 @@ class LightGBMError(Exception): ...@@ -170,6 +170,57 @@ class LightGBMError(Exception):
pass pass
class _ConfigAliases(object):
aliases = {"boosting": {"boosting",
"boosting_type",
"boost"},
"categorical_feature": {"categorical_feature",
"cat_feature",
"categorical_column",
"cat_column"},
"early_stopping_round": {"early_stopping_round",
"early_stopping_rounds",
"early_stopping",
"n_iter_no_change"},
"eval_at": {"eval_at",
"ndcg_eval_at",
"ndcg_at",
"map_eval_at",
"map_at"},
"header": {"header",
"has_header"},
"machines": {"machines",
"workers",
"nodes"},
"metric": {"metric",
"metrics",
"metric_types"},
"num_class": {"num_class",
"num_classes"},
"num_iterations": {"num_iterations",
"num_iteration",
"n_iter",
"num_tree",
"num_trees",
"num_round",
"num_rounds",
"num_boost_round",
"n_estimators"},
"objective": {"objective",
"objective_type",
"app",
"application"},
"verbosity": {"verbosity",
"verbose"}}
@classmethod
def get(cls, *args):
ret = set()
for i in args:
ret |= cls.aliases.get(i, set())
return ret
MAX_INT32 = (1 << 31) - 1 MAX_INT32 = (1 << 31) - 1
"""Macro definition of data type in C API of LightGBM""" """Macro definition of data type in C API of LightGBM"""
...@@ -741,8 +792,7 @@ class Dataset(object): ...@@ -741,8 +792,7 @@ class Dataset(object):
data_has_header = False data_has_header = False
if isinstance(data, string_type): if isinstance(data, string_type):
# check data has header or not # check data has header or not
if self.params.get("has_header", False) or self.params.get("header", False): data_has_header = any(self.params.get(alias, False) for alias in _ConfigAliases.get("header"))
data_has_header = True
init_score = predictor.predict(data, init_score = predictor.predict(data,
raw_score=True, raw_score=True,
data_has_header=data_has_header, data_has_header=data_has_header,
...@@ -793,7 +843,7 @@ class Dataset(object): ...@@ -793,7 +843,7 @@ class Dataset(object):
'Please use {0} argument of the Dataset constructor to pass this parameter.' 'Please use {0} argument of the Dataset constructor to pass this parameter.'
.format(key)) .format(key))
# user can set verbose with params, it has higher priority # user can set verbose with params, it has higher priority
if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and silent: if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and silent:
params["verbose"] = -1 params["verbose"] = -1
# get categorical features # get categorical features
if categorical_feature is not None: if categorical_feature is not None:
...@@ -810,10 +860,10 @@ class Dataset(object): ...@@ -810,10 +860,10 @@ class Dataset(object):
raise TypeError("Wrong type({}) or unknown name({}) in categorical_feature" raise TypeError("Wrong type({}) or unknown name({}) in categorical_feature"
.format(type(name).__name__, name)) .format(type(name).__name__, name))
if categorical_indices: if categorical_indices:
if "categorical_feature" in params or "categorical_column" in params: for cat_alias in _ConfigAliases.get("categorical_feature"):
warnings.warn('categorical_feature in param dict is overridden.') if cat_alias in params:
params.pop("categorical_feature", None) warnings.warn('{} in param dict is overridden.'.format(cat_alias))
params.pop("categorical_column", None) params.pop(cat_alias, None)
params['categorical_column'] = sorted(categorical_indices) params['categorical_column'] = sorted(categorical_indices)
params_str = param_dict_to_str(params) params_str = param_dict_to_str(params)
...@@ -1259,7 +1309,9 @@ class Dataset(object): ...@@ -1259,7 +1309,9 @@ class Dataset(object):
""" """
if predictor is self._predictor: if predictor is self._predictor:
return self return self
if self.data is not None or (self.used_indices is not None and self.reference is not None and self.reference.data is not None): if self.data is not None or (self.used_indices is not None
and self.reference is not None
and self.reference.data is not None):
self._predictor = predictor self._predictor = predictor
return self._free_handle() return self._free_handle()
else: else:
...@@ -1634,7 +1686,7 @@ class Booster(object): ...@@ -1634,7 +1686,7 @@ class Booster(object):
self.best_score = {} self.best_score = {}
params = {} if params is None else copy.deepcopy(params) params = {} if params is None else copy.deepcopy(params)
# user can set verbose with params, it has higher priority # user can set verbose with params, it has higher priority
if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and silent: if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and silent:
params["verbose"] = -1 params["verbose"] = -1
if train_set is not None: if train_set is not None:
# Training task # Training task
...@@ -1643,7 +1695,7 @@ class Booster(object): ...@@ -1643,7 +1695,7 @@ class Booster(object):
.format(type(train_set).__name__)) .format(type(train_set).__name__))
params_str = param_dict_to_str(params) params_str = param_dict_to_str(params)
# set network if necessary # set network if necessary
for alias in ["machines", "workers", "nodes"]: for alias in _ConfigAliases.get("machines"):
if alias in params: if alias in params:
machines = params[alias] machines = params[alias]
if isinstance(machines, string_type): if isinstance(machines, string_type):
...@@ -1863,7 +1915,7 @@ class Booster(object): ...@@ -1863,7 +1915,7 @@ class Booster(object):
self : Booster self : Booster
Booster with new parameters. Booster with new parameters.
""" """
if any(metric_alias in params for metric_alias in ('metric', 'metrics', 'metric_types')): if any(metric_alias in params for metric_alias in _ConfigAliases.get("metric")):
self.__need_reload_eval_info = True self.__need_reload_eval_info = True
params_str = param_dict_to_str(params) params_str = param_dict_to_str(params)
if params_str: if params_str:
......
...@@ -7,6 +7,7 @@ import collections ...@@ -7,6 +7,7 @@ import collections
import warnings import warnings
from operator import gt, lt from operator import gt, lt
from .basic import _ConfigAliases
from .compat import range_ from .compat import range_
...@@ -130,9 +131,7 @@ def reset_parameter(**kwargs): ...@@ -130,9 +131,7 @@ def reset_parameter(**kwargs):
def _callback(env): def _callback(env):
new_parameters = {} new_parameters = {}
for key, value in kwargs.items(): for key, value in kwargs.items():
if key in ['num_class', 'num_classes', if key in _ConfigAliases.get("num_class", "boosting", "metric"):
'boosting', 'boost', 'boosting_type',
'metric', 'metrics', 'metric_types']:
raise RuntimeError("Cannot reset {} during training".format(repr(key))) raise RuntimeError("Cannot reset {} during training".format(repr(key)))
if isinstance(value, list): if isinstance(value, list):
if len(value) != env.end_iteration - env.begin_iteration: if len(value) != env.end_iteration - env.begin_iteration:
...@@ -184,10 +183,8 @@ def early_stopping(stopping_rounds, first_metric_only=False, verbose=True): ...@@ -184,10 +183,8 @@ def early_stopping(stopping_rounds, first_metric_only=False, verbose=True):
first_metric = [''] first_metric = ['']
def _init(env): def _init(env):
enabled[0] = not any((boost_alias in env.params enabled[0] = not any(env.params.get(boost_alias, "") == 'dart' for boost_alias
and env.params[boost_alias] == 'dart') for boost_alias in ('boosting', in _ConfigAliases.get("boosting"))
'boosting_type',
'boost'))
if not enabled[0]: if not enabled[0]:
warnings.warn('Early stopping is not available in dart mode') warnings.warn('Early stopping is not available in dart mode')
return return
......
...@@ -11,7 +11,7 @@ from operator import attrgetter ...@@ -11,7 +11,7 @@ from operator import attrgetter
import numpy as np import numpy as np
from . import callback from . import callback
from .basic import Booster, Dataset, LightGBMError, _InnerPredictor from .basic import Booster, Dataset, LightGBMError, _ConfigAliases, _InnerPredictor
from .compat import (SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold, from .compat import (SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold,
string_type, integer_types, range_, zip_) string_type, integer_types, range_, zip_)
...@@ -140,14 +140,15 @@ def train(params, train_set, num_boost_round=100, ...@@ -140,14 +140,15 @@ def train(params, train_set, num_boost_round=100,
# create predictor first # create predictor first
params = copy.deepcopy(params) params = copy.deepcopy(params)
if fobj is not None: if fobj is not None:
for obj_alias in _ConfigAliases.get("objective"):
params.pop(obj_alias, None)
params['objective'] = 'none' params['objective'] = 'none'
for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees", for alias in _ConfigAliases.get("num_iterations"):
"num_round", "num_rounds", "num_boost_round", "n_estimators"]:
if alias in params: if alias in params:
num_boost_round = params.pop(alias) num_boost_round = params.pop(alias)
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
break break
for alias in ["early_stopping_round", "early_stopping_rounds", "early_stopping", "n_iter_no_change"]: for alias in _ConfigAliases.get("early_stopping_round"):
if alias in params: if alias in params:
early_stopping_rounds = params.pop(alias) early_stopping_rounds = params.pop(alias)
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
...@@ -314,7 +315,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi ...@@ -314,7 +315,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
flatted_group = np.zeros(num_data, dtype=np.int32) flatted_group = np.zeros(num_data, dtype=np.int32)
folds = folds.split(X=np.zeros(num_data), y=full_data.get_label(), groups=flatted_group) folds = folds.split(X=np.zeros(num_data), y=full_data.get_label(), groups=flatted_group)
else: else:
if 'objective' in params and params['objective'] == 'lambdarank': if any(params.get(obj_alias, "") == "lambdarank" for obj_alias in _ConfigAliases.get("objective")):
if not SKLEARN_INSTALLED: if not SKLEARN_INSTALLED:
raise LightGBMError('Scikit-learn is required for lambdarank cv.') raise LightGBMError('Scikit-learn is required for lambdarank cv.')
# lambdarank task, split according to groups # lambdarank task, split according to groups
...@@ -495,14 +496,15 @@ def cv(params, train_set, num_boost_round=100, ...@@ -495,14 +496,15 @@ def cv(params, train_set, num_boost_round=100,
params = copy.deepcopy(params) params = copy.deepcopy(params)
if fobj is not None: if fobj is not None:
for obj_alias in _ConfigAliases.get("objective"):
params.pop(obj_alias, None)
params['objective'] = 'none' params['objective'] = 'none'
for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees", for alias in _ConfigAliases.get("num_iterations"):
"num_round", "num_rounds", "num_boost_round", "n_estimators"]:
if alias in params: if alias in params:
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
num_boost_round = params.pop(alias) num_boost_round = params.pop(alias)
break break
for alias in ["early_stopping_round", "early_stopping_rounds", "early_stopping", "n_iter_no_change"]: for alias in _ConfigAliases.get("early_stopping_round"):
if alias in params: if alias in params:
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias)) warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
early_stopping_rounds = params.pop(alias) early_stopping_rounds = params.pop(alias)
...@@ -523,6 +525,8 @@ def cv(params, train_set, num_boost_round=100, ...@@ -523,6 +525,8 @@ def cv(params, train_set, num_boost_round=100,
.set_categorical_feature(categorical_feature) .set_categorical_feature(categorical_feature)
if metrics is not None: if metrics is not None:
for metric_alias in _ConfigAliases.get("metric"):
params.pop(metric_alias, None)
params['metric'] = metrics params['metric'] = metrics
results = collections.defaultdict(list) results = collections.defaultdict(list)
......
...@@ -5,7 +5,7 @@ from __future__ import absolute_import ...@@ -5,7 +5,7 @@ from __future__ import absolute_import
import numpy as np import numpy as np
from .basic import Dataset, LightGBMError from .basic import Dataset, LightGBMError, _ConfigAliases
from .compat import (SKLEARN_INSTALLED, _LGBMClassifierBase, from .compat import (SKLEARN_INSTALLED, _LGBMClassifierBase,
LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase, LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase,
_LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckConsistentLength, _LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckConsistentLength,
...@@ -489,15 +489,21 @@ class LGBMModel(_LGBMModelBase): ...@@ -489,15 +489,21 @@ class LGBMModel(_LGBMModelBase):
evals_result = {} evals_result = {}
params = self.get_params() params = self.get_params()
# user can set verbose with kwargs, it has higher priority # user can set verbose with kwargs, it has higher priority
if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and self.silent: if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and self.silent:
params['verbose'] = -1 params['verbose'] = -1
params.pop('silent', None) params.pop('silent', None)
params.pop('importance_type', None) params.pop('importance_type', None)
params.pop('n_estimators', None) params.pop('n_estimators', None)
params.pop('class_weight', None) params.pop('class_weight', None)
for alias in _ConfigAliases.get('objective'):
params.pop(alias, None)
if self._n_classes is not None and self._n_classes > 2: if self._n_classes is not None and self._n_classes > 2:
for alias in _ConfigAliases.get('num_class'):
params.pop(alias, None)
params['num_class'] = self._n_classes params['num_class'] = self._n_classes
if hasattr(self, '_eval_at'): if hasattr(self, '_eval_at'):
for alias in _ConfigAliases.get('eval_at'):
params.pop(alias, None)
params['eval_at'] = self._eval_at params['eval_at'] = self._eval_at
params['objective'] = self._objective params['objective'] = self._objective
if self._fobj: if self._fobj:
...@@ -518,7 +524,7 @@ class LGBMModel(_LGBMModelBase): ...@@ -518,7 +524,7 @@ class LGBMModel(_LGBMModelBase):
elif isinstance(self, LGBMRanker): elif isinstance(self, LGBMRanker):
original_metric = "ndcg" original_metric = "ndcg"
# overwrite default metric by explicitly set metric # overwrite default metric by explicitly set metric
for metric_alias in ['metric', 'metrics', 'metric_types']: for metric_alias in _ConfigAliases.get("metric"):
if metric_alias in params: if metric_alias in params:
original_metric = params.pop(metric_alias) original_metric = params.pop(metric_alias)
# concatenate metric from params (or default if not provided in params) and eval_metric # concatenate metric from params (or default if not provided in params) and eval_metric
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment