"googlemock/git@developer.sourcefind.cn:yangql/googletest.git" did not exist on "78579756a80619ae06e8850796ed95bc6043a92d"
Unverified Commit 668bf5da authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[python-package] deprecate support for H2O 'datatable' (#6670)


Co-authored-by: default avatarNikita Titov <nekit94-08@mail.ru>
parent 5b675461
...@@ -35,7 +35,7 @@ The LightGBM Python module can load data from: ...@@ -35,7 +35,7 @@ The LightGBM Python module can load data from:
- LibSVM (zero-based) / TSV / CSV format text file - LibSVM (zero-based) / TSV / CSV format text file
- NumPy 2D array(s), pandas DataFrame, H2O DataTable's Frame, SciPy sparse matrix - NumPy 2D array(s), pandas DataFrame, H2O DataTable's Frame (deprecated), SciPy sparse matrix
- LightGBM binary file - LightGBM binary file
......
...@@ -559,6 +559,15 @@ class LGBMDeprecationWarning(FutureWarning): ...@@ -559,6 +559,15 @@ class LGBMDeprecationWarning(FutureWarning):
pass pass
def _emit_datatable_deprecation_warning() -> None:
msg = (
"Support for 'datatable' in LightGBM is deprecated, and will be removed in a future release. "
"To avoid this warning, convert 'datatable' inputs to a supported format "
"(for example, use the 'to_numpy()' method)."
)
warnings.warn(msg, category=LGBMDeprecationWarning, stacklevel=2)
class _ConfigAliases: class _ConfigAliases:
# lazy evaluation to allow import without dynamic library, e.g., for docs generation # lazy evaluation to allow import without dynamic library, e.g., for docs generation
aliases = None aliases = None
...@@ -1086,7 +1095,7 @@ class _InnerPredictor: ...@@ -1086,7 +1095,7 @@ class _InnerPredictor:
Parameters Parameters
---------- ----------
data : str, pathlib.Path, numpy array, pandas DataFrame, pyarrow Table, H2O DataTable's Frame or scipy.sparse data : str, pathlib.Path, numpy array, pandas DataFrame, pyarrow Table, H2O DataTable's Frame (deprecated) or scipy.sparse
Data source for prediction. Data source for prediction.
If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM). If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM).
start_iteration : int, optional (default=0) start_iteration : int, optional (default=0)
...@@ -1200,6 +1209,7 @@ class _InnerPredictor: ...@@ -1200,6 +1209,7 @@ class _InnerPredictor:
predict_type=predict_type, predict_type=predict_type,
) )
elif isinstance(data, dt_DataTable): elif isinstance(data, dt_DataTable):
_emit_datatable_deprecation_warning()
preds, nrow = self.__pred_for_np2d( preds, nrow = self.__pred_for_np2d(
mat=data.to_numpy(), mat=data.to_numpy(),
start_iteration=start_iteration, start_iteration=start_iteration,
...@@ -1766,7 +1776,7 @@ class Dataset: ...@@ -1766,7 +1776,7 @@ class Dataset:
Parameters Parameters
---------- ----------
data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequence, list of numpy array or pyarrow Table data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame (deprecated), scipy.sparse, Sequence, list of Sequence, list of numpy array or pyarrow Table
Data source of Dataset. Data source of Dataset.
If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM) or a LightGBM Dataset binary file. If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM) or a LightGBM Dataset binary file.
label : list, numpy 1-D array, pandas Series / one-column DataFrame, pyarrow Array, pyarrow ChunkedArray or None, optional (default=None) label : list, numpy 1-D array, pandas Series / one-column DataFrame, pyarrow Array, pyarrow ChunkedArray or None, optional (default=None)
...@@ -2172,6 +2182,7 @@ class Dataset: ...@@ -2172,6 +2182,7 @@ class Dataset:
elif isinstance(data, Sequence): elif isinstance(data, Sequence):
self.__init_from_seqs([data], ref_dataset) self.__init_from_seqs([data], ref_dataset)
elif isinstance(data, dt_DataTable): elif isinstance(data, dt_DataTable):
_emit_datatable_deprecation_warning()
self.__init_from_np2d(data.to_numpy(), params_str, ref_dataset) self.__init_from_np2d(data.to_numpy(), params_str, ref_dataset)
else: else:
try: try:
...@@ -2598,7 +2609,7 @@ class Dataset: ...@@ -2598,7 +2609,7 @@ class Dataset:
Parameters Parameters
---------- ----------
data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequence or list of numpy array data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame (deprecated), scipy.sparse, Sequence, list of Sequence or list of numpy array
Data source of Dataset. Data source of Dataset.
If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM) or a LightGBM Dataset binary file. If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM) or a LightGBM Dataset binary file.
label : list, numpy 1-D array, pandas Series / one-column DataFrame, pyarrow Array, pyarrow ChunkedArray or None, optional (default=None) label : list, numpy 1-D array, pandas Series / one-column DataFrame, pyarrow Array, pyarrow ChunkedArray or None, optional (default=None)
...@@ -3255,7 +3266,7 @@ class Dataset: ...@@ -3255,7 +3266,7 @@ class Dataset:
Returns Returns
------- -------
data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequence or list of numpy array or None data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame (deprecated), scipy.sparse, Sequence, list of Sequence or list of numpy array or None
Raw data used in the Dataset construction. Raw data used in the Dataset construction.
""" """
if self._handle is None: if self._handle is None:
...@@ -3268,6 +3279,7 @@ class Dataset: ...@@ -3268,6 +3279,7 @@ class Dataset:
elif isinstance(self.data, pd_DataFrame): elif isinstance(self.data, pd_DataFrame):
self.data = self.data.iloc[self.used_indices].copy() self.data = self.data.iloc[self.used_indices].copy()
elif isinstance(self.data, dt_DataTable): elif isinstance(self.data, dt_DataTable):
_emit_datatable_deprecation_warning()
self.data = self.data[self.used_indices, :] self.data = self.data[self.used_indices, :]
elif isinstance(self.data, Sequence): elif isinstance(self.data, Sequence):
self.data = self.data[self.used_indices] self.data = self.data[self.used_indices]
...@@ -3456,6 +3468,7 @@ class Dataset: ...@@ -3456,6 +3468,7 @@ class Dataset:
elif isinstance(other.data, pd_DataFrame): elif isinstance(other.data, pd_DataFrame):
self.data = np.hstack((self.data, other.data.values)) self.data = np.hstack((self.data, other.data.values))
elif isinstance(other.data, dt_DataTable): elif isinstance(other.data, dt_DataTable):
_emit_datatable_deprecation_warning()
self.data = np.hstack((self.data, other.data.to_numpy())) self.data = np.hstack((self.data, other.data.to_numpy()))
else: else:
self.data = None self.data = None
...@@ -3466,6 +3479,7 @@ class Dataset: ...@@ -3466,6 +3479,7 @@ class Dataset:
elif isinstance(other.data, pd_DataFrame): elif isinstance(other.data, pd_DataFrame):
self.data = scipy.sparse.hstack((self.data, other.data.values), format=sparse_format) self.data = scipy.sparse.hstack((self.data, other.data.values), format=sparse_format)
elif isinstance(other.data, dt_DataTable): elif isinstance(other.data, dt_DataTable):
_emit_datatable_deprecation_warning()
self.data = scipy.sparse.hstack((self.data, other.data.to_numpy()), format=sparse_format) self.data = scipy.sparse.hstack((self.data, other.data.to_numpy()), format=sparse_format)
else: else:
self.data = None self.data = None
...@@ -3483,10 +3497,12 @@ class Dataset: ...@@ -3483,10 +3497,12 @@ class Dataset:
elif isinstance(other.data, pd_DataFrame): elif isinstance(other.data, pd_DataFrame):
self.data = concat((self.data, other.data), axis=1, ignore_index=True) self.data = concat((self.data, other.data), axis=1, ignore_index=True)
elif isinstance(other.data, dt_DataTable): elif isinstance(other.data, dt_DataTable):
_emit_datatable_deprecation_warning()
self.data = concat((self.data, pd_DataFrame(other.data.to_numpy())), axis=1, ignore_index=True) self.data = concat((self.data, pd_DataFrame(other.data.to_numpy())), axis=1, ignore_index=True)
else: else:
self.data = None self.data = None
elif isinstance(self.data, dt_DataTable): elif isinstance(self.data, dt_DataTable):
_emit_datatable_deprecation_warning()
if isinstance(other.data, np.ndarray): if isinstance(other.data, np.ndarray):
self.data = dt_DataTable(np.hstack((self.data.to_numpy(), other.data))) self.data = dt_DataTable(np.hstack((self.data.to_numpy(), other.data)))
elif isinstance(other.data, scipy.sparse.spmatrix): elif isinstance(other.data, scipy.sparse.spmatrix):
...@@ -4688,7 +4704,7 @@ class Booster: ...@@ -4688,7 +4704,7 @@ class Booster:
Parameters Parameters
---------- ----------
data : str, pathlib.Path, numpy array, pandas DataFrame, pyarrow Table, H2O DataTable's Frame or scipy.sparse data : str, pathlib.Path, numpy array, pandas DataFrame, pyarrow Table, H2O DataTable's Frame (deprecated) or scipy.sparse
Data source for prediction. Data source for prediction.
If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM). If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM).
start_iteration : int, optional (default=0) start_iteration : int, optional (default=0)
...@@ -4769,7 +4785,7 @@ class Booster: ...@@ -4769,7 +4785,7 @@ class Booster:
Parameters Parameters
---------- ----------
data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequence or list of numpy array data : str, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame (deprecated), scipy.sparse, Sequence, list of Sequence or list of numpy array
Data source for refit. Data source for refit.
If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM). If str or pathlib.Path, it represents the path to a text file (CSV, TSV, or LibSVM).
label : list, numpy 1-D array, pandas Series / one-column DataFrame, pyarrow Array or pyarrow ChunkedArray label : list, numpy 1-D array, pandas Series / one-column DataFrame, pyarrow Array or pyarrow ChunkedArray
......
...@@ -1043,7 +1043,7 @@ class LGBMModel(_LGBMModelBase): ...@@ -1043,7 +1043,7 @@ class LGBMModel(_LGBMModelBase):
fit.__doc__ = ( fit.__doc__ = (
_lgbmmodel_doc_fit.format( _lgbmmodel_doc_fit.format(
X_shape="numpy array, pandas DataFrame, H2O DataTable's Frame , scipy.sparse, list of lists of int or float of shape = [n_samples, n_features]", X_shape="numpy array, pandas DataFrame, H2O DataTable's Frame (deprecated), scipy.sparse, list of lists of int or float of shape = [n_samples, n_features]",
y_shape="numpy array, pandas DataFrame, pandas Series, list of int or float of shape = [n_samples]", y_shape="numpy array, pandas DataFrame, pandas Series, list of int or float of shape = [n_samples]",
sample_weight_shape="numpy array, pandas Series, list of int or float of shape = [n_samples] or None, optional (default=None)", sample_weight_shape="numpy array, pandas Series, list of int or float of shape = [n_samples] or None, optional (default=None)",
init_score_shape="numpy array, pandas DataFrame, pandas Series, list of int or float of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task) or shape = [n_samples, n_classes] (for multi-class task) or None, optional (default=None)", init_score_shape="numpy array, pandas DataFrame, pandas Series, list of int or float of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task) or shape = [n_samples, n_classes] (for multi-class task) or None, optional (default=None)",
...@@ -1120,7 +1120,7 @@ class LGBMModel(_LGBMModelBase): ...@@ -1120,7 +1120,7 @@ class LGBMModel(_LGBMModelBase):
predict.__doc__ = _lgbmmodel_doc_predict.format( predict.__doc__ = _lgbmmodel_doc_predict.format(
description="Return the predicted value for each sample.", description="Return the predicted value for each sample.",
X_shape="numpy array, pandas DataFrame, H2O DataTable's Frame , scipy.sparse, list of lists of int or float of shape = [n_samples, n_features]", X_shape="numpy array, pandas DataFrame, H2O DataTable's Frame (deprecated), scipy.sparse, list of lists of int or float of shape = [n_samples, n_features]",
output_name="predicted_result", output_name="predicted_result",
predicted_result_shape="array-like of shape = [n_samples] or shape = [n_samples, n_classes]", predicted_result_shape="array-like of shape = [n_samples] or shape = [n_samples, n_classes]",
X_leaves_shape="array-like of shape = [n_samples, n_trees] or shape = [n_samples, n_trees * n_classes]", X_leaves_shape="array-like of shape = [n_samples, n_trees] or shape = [n_samples, n_trees * n_classes]",
...@@ -1509,7 +1509,7 @@ class LGBMClassifier(_LGBMClassifierBase, LGBMModel): ...@@ -1509,7 +1509,7 @@ class LGBMClassifier(_LGBMClassifierBase, LGBMModel):
predict_proba.__doc__ = _lgbmmodel_doc_predict.format( predict_proba.__doc__ = _lgbmmodel_doc_predict.format(
description="Return the predicted probability for each class for each sample.", description="Return the predicted probability for each class for each sample.",
X_shape="numpy array, pandas DataFrame, H2O DataTable's Frame , scipy.sparse, list of lists of int or float of shape = [n_samples, n_features]", X_shape="numpy array, pandas DataFrame, H2O DataTable's Frame (deprecated), scipy.sparse, list of lists of int or float of shape = [n_samples, n_features]",
output_name="predicted_probability", output_name="predicted_probability",
predicted_result_shape="array-like of shape = [n_samples] or shape = [n_samples, n_classes]", predicted_result_shape="array-like of shape = [n_samples] or shape = [n_samples, n_classes]",
X_leaves_shape="array-like of shape = [n_samples, n_trees] or shape = [n_samples, n_trees * n_classes]", X_leaves_shape="array-like of shape = [n_samples, n_trees] or shape = [n_samples, n_trees * n_classes]",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment