"tests/vscode:/vscode.git/clone" did not exist on "939a02482bda2ff5e15887549a8ddfbe9865b087"
Unverified Commit 25e32e94 authored by José Morales's avatar José Morales Committed by GitHub
Browse files

[python-package] add `validate_features` argument to `refit()` (#5331)

add validate_features to refit
parent dc590725
...@@ -3602,6 +3602,7 @@ class Booster: ...@@ -3602,6 +3602,7 @@ class Booster:
categorical_feature='auto', categorical_feature='auto',
dataset_params=None, dataset_params=None,
free_raw_data=True, free_raw_data=True,
validate_features=False,
**kwargs **kwargs
): ):
"""Refit the existing Booster by new data. """Refit the existing Booster by new data.
...@@ -3645,6 +3646,9 @@ class Booster: ...@@ -3645,6 +3646,9 @@ class Booster:
Other parameters for Dataset ``data``. Other parameters for Dataset ``data``.
free_raw_data : bool, optional (default=True) free_raw_data : bool, optional (default=True)
If True, raw data is freed after constructing inner Dataset for ``data``. If True, raw data is freed after constructing inner Dataset for ``data``.
validate_features : bool, optional (default=False)
If True, ensure that the features used to refit the model match the original ones.
Used only if data is pandas DataFrame.
**kwargs **kwargs
Other parameters for refit. Other parameters for refit.
These parameters will be passed to ``predict`` method. These parameters will be passed to ``predict`` method.
...@@ -3659,7 +3663,7 @@ class Booster: ...@@ -3659,7 +3663,7 @@ class Booster:
if dataset_params is None: if dataset_params is None:
dataset_params = {} dataset_params = {}
predictor = self._to_predictor(deepcopy(kwargs)) predictor = self._to_predictor(deepcopy(kwargs))
leaf_preds = predictor.predict(data, -1, pred_leaf=True) leaf_preds = predictor.predict(data, -1, pred_leaf=True, validate_features=validate_features)
nrow, ncol = leaf_preds.shape nrow, ncol = leaf_preds.shape
out_is_linear = ctypes.c_int(0) out_is_linear = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterGetLinear( _safe_call(_LIB.LGBM_BoosterGetLinear(
......
...@@ -3657,3 +3657,10 @@ def test_validate_features(): ...@@ -3657,3 +3657,10 @@ def test_validate_features():
# check that disabling the check doesn't raise the error # check that disabling the check doesn't raise the error
bst.predict(df2, validate_features=False) bst.predict(df2, validate_features=False)
# try to refit with a different feature
with pytest.raises(lgb.basic.LightGBMError, match="Expected 'x3' at position 2 but found 'z'"):
bst.refit(df2, y, validate_features=True)
# check that disabling the check doesn't raise the error
bst.refit(df2, y, validate_features=False)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment