Commit 9bbfffe6 authored by Nikita Titov's avatar Nikita Titov Committed by Guolin Ke
Browse files

do not modify users params (#1722)

parent 52dbc572
...@@ -13,10 +13,11 @@ from tempfile import NamedTemporaryFile ...@@ -13,10 +13,11 @@ from tempfile import NamedTemporaryFile
import numpy as np import numpy as np
import scipy.sparse import scipy.sparse
from .compat import (DataFrame, LGBMDeprecationWarning, Series, from .compat import (DataFrame, Series,
decode_string, integer_types, decode_string, string_type,
integer_types, numeric_types,
json, json_default_with_numpy, json, json_default_with_numpy,
numeric_types, range_, zip_, string_type) range_, zip_)
from .libpath import find_lib_path from .libpath import find_lib_path
...@@ -1470,7 +1471,7 @@ class Booster(object): ...@@ -1470,7 +1471,7 @@ class Booster(object):
self.__set_objective_to_none = False self.__set_objective_to_none = False
self.best_iteration = -1 self.best_iteration = -1
self.best_score = {} self.best_score = {}
params = {} if params is None else params params = {} if params is None else copy.deepcopy(params)
# user can set verbose with params, it has higher priority # user can set verbose with params, it has higher priority
if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and silent: if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and silent:
params["verbose"] = -1 params["verbose"] = -1
...@@ -1540,7 +1541,7 @@ class Booster(object): ...@@ -1540,7 +1541,7 @@ class Booster(object):
self.model_from_string(params['model_str']) self.model_from_string(params['model_str'])
else: else:
raise TypeError('Need at least one training dataset or model file to create booster instance') raise TypeError('Need at least one training dataset or model file to create booster instance')
self.params = params.copy() self.params = params
def __del__(self): def __del__(self):
try: try:
...@@ -2139,7 +2140,7 @@ class Booster(object): ...@@ -2139,7 +2140,7 @@ class Booster(object):
result : numpy array result : numpy array
Prediction result. Prediction result.
""" """
predictor = self._to_predictor(kwargs) predictor = self._to_predictor(copy.deepcopy(kwargs))
if num_iteration is None: if num_iteration is None:
num_iteration = self.best_iteration num_iteration = self.best_iteration
return predictor.predict(data, num_iteration, return predictor.predict(data, num_iteration,
...@@ -2169,7 +2170,7 @@ class Booster(object): ...@@ -2169,7 +2170,7 @@ class Booster(object):
""" """
if self.__set_objective_to_none: if self.__set_objective_to_none:
raise LightGBMError('Cannot refit due to null objective function.') raise LightGBMError('Cannot refit due to null objective function.')
predictor = self._to_predictor(kwargs) predictor = self._to_predictor(copy.deepcopy(kwargs))
leaf_preds = predictor.predict(data, -1, pred_leaf=True) leaf_preds = predictor.predict(data, -1, pred_leaf=True)
nrow, ncol = leaf_preds.shape nrow, ncol = leaf_preds.shape
train_set = Dataset(data, label, silent=True) train_set = Dataset(data, label, silent=True)
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
from __future__ import absolute_import from __future__ import absolute_import
import collections import collections
import copy
import warnings import warnings
from operator import attrgetter from operator import attrgetter
...@@ -12,7 +13,7 @@ import numpy as np ...@@ -12,7 +13,7 @@ import numpy as np
from . import callback from . import callback
from .basic import Booster, Dataset, LightGBMError, _InnerPredictor from .basic import Booster, Dataset, LightGBMError, _InnerPredictor
from .compat import (SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold, from .compat import (SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold,
integer_types, range_, zip_, string_type) string_type, integer_types, range_, zip_)
def train(params, train_set, num_boost_round=100, def train(params, train_set, num_boost_round=100,
...@@ -104,6 +105,7 @@ def train(params, train_set, num_boost_round=100, ...@@ -104,6 +105,7 @@ def train(params, train_set, num_boost_round=100,
The trained Booster model. The trained Booster model.
""" """
# create predictor first # create predictor first
params = copy.deepcopy(params)
for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees", for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees",
"num_round", "num_rounds", "num_boost_round", "n_estimators"]: "num_round", "num_rounds", "num_boost_round", "n_estimators"]:
if alias in params: if alias in params:
...@@ -417,6 +419,7 @@ def cv(params, train_set, num_boost_round=100, ...@@ -417,6 +419,7 @@ def cv(params, train_set, num_boost_round=100,
if not isinstance(train_set, Dataset): if not isinstance(train_set, Dataset):
raise TypeError("Traninig only accepts Dataset object") raise TypeError("Traninig only accepts Dataset object")
params = copy.deepcopy(params)
for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees", for alias in ["num_iterations", "num_iteration", "n_iter", "num_tree", "num_trees",
"num_round", "num_rounds", "num_boost_round", "n_estimators"]: "num_round", "num_rounds", "num_boost_round", "n_estimators"]:
if alias in params: if alias in params:
......
...@@ -11,7 +11,7 @@ from .compat import (SKLEARN_INSTALLED, _LGBMClassifierBase, ...@@ -11,7 +11,7 @@ from .compat import (SKLEARN_INSTALLED, _LGBMClassifierBase,
LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase, LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase,
_LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckConsistentLength, _LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckConsistentLength,
_LGBMAssertAllFinite, _LGBMCheckClassificationTargets, _LGBMComputeSampleWeight, _LGBMAssertAllFinite, _LGBMCheckClassificationTargets, _LGBMComputeSampleWeight,
argc_, range_, string_type, DataFrame, LGBMDeprecationWarning) argc_, range_, string_type, DataFrame)
from .engine import train from .engine import train
......
...@@ -438,7 +438,8 @@ class TestEngine(unittest.TestCase): ...@@ -438,7 +438,8 @@ class TestEngine(unittest.TestCase):
lgb_train = lgb.Dataset(X_train, y_train) lgb_train = lgb.Dataset(X_train, y_train)
# shuffle = False, override metric in params # shuffle = False, override metric in params
params_with_metric = {'metric': 'l2', 'verbose': -1} params_with_metric = {'metric': 'l2', 'verbose': -1}
cv_res = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, nfold=3, stratified=False, shuffle=False, cv_res = lgb.cv(params_with_metric, lgb_train, num_boost_round=10,
nfold=3, stratified=False, shuffle=False,
metrics='l1', verbose_eval=False) metrics='l1', verbose_eval=False)
self.assertIn('l1-mean', cv_res) self.assertIn('l1-mean', cv_res)
self.assertNotIn('l2-mean', cv_res) self.assertNotIn('l2-mean', cv_res)
...@@ -452,10 +453,10 @@ class TestEngine(unittest.TestCase): ...@@ -452,10 +453,10 @@ class TestEngine(unittest.TestCase):
# self defined folds # self defined folds
tss = TimeSeriesSplit(3) tss = TimeSeriesSplit(3)
folds = tss.split(X_train) folds = tss.split(X_train)
cv_res_gen = lgb.cv(params, lgb_train, num_boost_round=10, folds=folds, cv_res_gen = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=folds,
metrics='l2', verbose_eval=False) verbose_eval=False)
cv_res_obj = lgb.cv(params, lgb_train, num_boost_round=10, folds=tss, cv_res_obj = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=tss,
metrics='l2', verbose_eval=False) verbose_eval=False)
np.testing.assert_almost_equal(cv_res_gen['l2-mean'], cv_res_obj['l2-mean']) np.testing.assert_almost_equal(cv_res_gen['l2-mean'], cv_res_obj['l2-mean'])
# lambdarank # lambdarank
X_train, y_train = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)), X_train, y_train = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)),
...@@ -464,21 +465,21 @@ class TestEngine(unittest.TestCase): ...@@ -464,21 +465,21 @@ class TestEngine(unittest.TestCase):
'../../examples/lambdarank/rank.train.query')) '../../examples/lambdarank/rank.train.query'))
params_lambdarank = {'objective': 'lambdarank', 'verbose': -1, 'eval_at': 3} params_lambdarank = {'objective': 'lambdarank', 'verbose': -1, 'eval_at': 3}
lgb_train = lgb.Dataset(X_train, y_train, group=q_train) lgb_train = lgb.Dataset(X_train, y_train, group=q_train)
# ... with NDCG (default) metric
cv_res_lambda = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, nfold=3,
verbose_eval=False)
self.assertEqual(len(cv_res_lambda), 2)
self.assertFalse(np.isnan(cv_res_lambda['ndcg@3-mean']).any())
# ... with l2 metric # ... with l2 metric
cv_res_lambda = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, nfold=3, cv_res_lambda = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, nfold=3,
metrics='l2', verbose_eval=False) metrics='l2', verbose_eval=False)
self.assertEqual(len(cv_res_lambda), 2) self.assertEqual(len(cv_res_lambda), 2)
self.assertFalse(np.isnan(cv_res_lambda['l2-mean']).any()) self.assertFalse(np.isnan(cv_res_lambda['l2-mean']).any())
# ... with NDCG (default) metric
cv_res_lambda = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, nfold=3,
verbose_eval=False)
self.assertEqual(len(cv_res_lambda), 2)
self.assertFalse(np.isnan(cv_res_lambda['ndcg@3-mean']).any())
# self defined folds with lambdarank # self defined folds with lambdarank
cv_res_lambda_obj = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, cv_res_lambda_obj = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10,
folds=GroupKFold(n_splits=3), folds=GroupKFold(n_splits=3),
metrics='l2', verbose_eval=False) verbose_eval=False)
np.testing.assert_almost_equal(cv_res_lambda['l2-mean'], cv_res_lambda_obj['l2-mean']) np.testing.assert_almost_equal(cv_res_lambda['ndcg@3-mean'], cv_res_lambda_obj['ndcg@3-mean'])
def test_feature_name(self): def test_feature_name(self):
X, y = load_boston(True) X, y = load_boston(True)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment