Unverified Commit 066720ef authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

[python] use better names for imported classes from extra libraries (#3862)

parent 9eeac3c7
...@@ -14,7 +14,7 @@ from typing import Any, Dict ...@@ -14,7 +14,7 @@ from typing import Any, Dict
import numpy as np import numpy as np
import scipy.sparse import scipy.sparse
from .compat import PANDAS_INSTALLED, DataFrame, Series, concat, is_dtype_sparse, DataTable from .compat import PANDAS_INSTALLED, pd_DataFrame, pd_Series, concat, is_dtype_sparse, dt_DataTable
from .libpath import find_lib_path from .libpath import find_lib_path
...@@ -140,7 +140,7 @@ def list_to_1d_numpy(data, dtype=np.float32, name='list'): ...@@ -140,7 +140,7 @@ def list_to_1d_numpy(data, dtype=np.float32, name='list'):
return data.astype(dtype=dtype, copy=False) return data.astype(dtype=dtype, copy=False)
elif is_1d_list(data): elif is_1d_list(data):
return np.array(data, dtype=dtype, copy=False) return np.array(data, dtype=dtype, copy=False)
elif isinstance(data, Series): elif isinstance(data, pd_Series):
if _get_bad_pandas_dtypes([data.dtypes]): if _get_bad_pandas_dtypes([data.dtypes]):
raise ValueError('Series.dtypes must be int, float or bool') raise ValueError('Series.dtypes must be int, float or bool')
return np.array(data, dtype=dtype, copy=False) # SparseArray should be supported as well return np.array(data, dtype=dtype, copy=False) # SparseArray should be supported as well
...@@ -493,7 +493,7 @@ def _get_bad_pandas_dtypes(dtypes): ...@@ -493,7 +493,7 @@ def _get_bad_pandas_dtypes(dtypes):
def _data_from_pandas(data, feature_name, categorical_feature, pandas_categorical): def _data_from_pandas(data, feature_name, categorical_feature, pandas_categorical):
if isinstance(data, DataFrame): if isinstance(data, pd_DataFrame):
if len(data.shape) != 2 or data.shape[0] < 1: if len(data.shape) != 2 or data.shape[0] < 1:
raise ValueError('Input data must be 2 dimensional and non empty.') raise ValueError('Input data must be 2 dimensional and non empty.')
if feature_name == 'auto' or feature_name is None: if feature_name == 'auto' or feature_name is None:
...@@ -537,7 +537,7 @@ def _data_from_pandas(data, feature_name, categorical_feature, pandas_categorica ...@@ -537,7 +537,7 @@ def _data_from_pandas(data, feature_name, categorical_feature, pandas_categorica
def _label_from_pandas(label): def _label_from_pandas(label):
if isinstance(label, DataFrame): if isinstance(label, pd_DataFrame):
if len(label.columns) > 1: if len(label.columns) > 1:
raise ValueError('DataFrame for label cannot have multiple columns') raise ValueError('DataFrame for label cannot have multiple columns')
if _get_bad_pandas_dtypes(label.dtypes): if _get_bad_pandas_dtypes(label.dtypes):
...@@ -720,7 +720,7 @@ class _InnerPredictor: ...@@ -720,7 +720,7 @@ class _InnerPredictor:
except BaseException: except BaseException:
raise ValueError('Cannot convert data list to numpy array.') raise ValueError('Cannot convert data list to numpy array.')
preds, nrow = self.__pred_for_np2d(data, start_iteration, num_iteration, predict_type) preds, nrow = self.__pred_for_np2d(data, start_iteration, num_iteration, predict_type)
elif isinstance(data, DataTable): elif isinstance(data, dt_DataTable):
preds, nrow = self.__pred_for_np2d(data.to_numpy(), start_iteration, num_iteration, predict_type) preds, nrow = self.__pred_for_np2d(data.to_numpy(), start_iteration, num_iteration, predict_type)
else: else:
try: try:
...@@ -1258,7 +1258,7 @@ class Dataset: ...@@ -1258,7 +1258,7 @@ class Dataset:
self.__init_from_np2d(data, params_str, ref_dataset) self.__init_from_np2d(data, params_str, ref_dataset)
elif isinstance(data, list) and len(data) > 0 and all(isinstance(x, np.ndarray) for x in data): elif isinstance(data, list) and len(data) > 0 and all(isinstance(x, np.ndarray) for x in data):
self.__init_from_list_np2d(data, params_str, ref_dataset) self.__init_from_list_np2d(data, params_str, ref_dataset)
elif isinstance(data, DataTable): elif isinstance(data, dt_DataTable):
self.__init_from_np2d(data.to_numpy(), params_str, ref_dataset) self.__init_from_np2d(data.to_numpy(), params_str, ref_dataset)
else: else:
try: try:
...@@ -1939,9 +1939,9 @@ class Dataset: ...@@ -1939,9 +1939,9 @@ class Dataset:
if self.data is not None: if self.data is not None:
if isinstance(self.data, np.ndarray) or scipy.sparse.issparse(self.data): if isinstance(self.data, np.ndarray) or scipy.sparse.issparse(self.data):
self.data = self.data[self.used_indices, :] self.data = self.data[self.used_indices, :]
elif isinstance(self.data, DataFrame): elif isinstance(self.data, pd_DataFrame):
self.data = self.data.iloc[self.used_indices].copy() self.data = self.data.iloc[self.used_indices].copy()
elif isinstance(self.data, DataTable): elif isinstance(self.data, dt_DataTable):
self.data = self.data[self.used_indices, :] self.data = self.data[self.used_indices, :]
else: else:
_log_warning("Cannot subset {} type of raw data.\n" _log_warning("Cannot subset {} type of raw data.\n"
...@@ -2061,9 +2061,9 @@ class Dataset: ...@@ -2061,9 +2061,9 @@ class Dataset:
self.data = np.hstack((self.data, other.data)) self.data = np.hstack((self.data, other.data))
elif scipy.sparse.issparse(other.data): elif scipy.sparse.issparse(other.data):
self.data = np.hstack((self.data, other.data.toarray())) self.data = np.hstack((self.data, other.data.toarray()))
elif isinstance(other.data, DataFrame): elif isinstance(other.data, pd_DataFrame):
self.data = np.hstack((self.data, other.data.values)) self.data = np.hstack((self.data, other.data.values))
elif isinstance(other.data, DataTable): elif isinstance(other.data, dt_DataTable):
self.data = np.hstack((self.data, other.data.to_numpy())) self.data = np.hstack((self.data, other.data.to_numpy()))
else: else:
self.data = None self.data = None
...@@ -2071,39 +2071,39 @@ class Dataset: ...@@ -2071,39 +2071,39 @@ class Dataset:
sparse_format = self.data.getformat() sparse_format = self.data.getformat()
if isinstance(other.data, np.ndarray) or scipy.sparse.issparse(other.data): if isinstance(other.data, np.ndarray) or scipy.sparse.issparse(other.data):
self.data = scipy.sparse.hstack((self.data, other.data), format=sparse_format) self.data = scipy.sparse.hstack((self.data, other.data), format=sparse_format)
elif isinstance(other.data, DataFrame): elif isinstance(other.data, pd_DataFrame):
self.data = scipy.sparse.hstack((self.data, other.data.values), format=sparse_format) self.data = scipy.sparse.hstack((self.data, other.data.values), format=sparse_format)
elif isinstance(other.data, DataTable): elif isinstance(other.data, dt_DataTable):
self.data = scipy.sparse.hstack((self.data, other.data.to_numpy()), format=sparse_format) self.data = scipy.sparse.hstack((self.data, other.data.to_numpy()), format=sparse_format)
else: else:
self.data = None self.data = None
elif isinstance(self.data, DataFrame): elif isinstance(self.data, pd_DataFrame):
if not PANDAS_INSTALLED: if not PANDAS_INSTALLED:
raise LightGBMError("Cannot add features to DataFrame type of raw data " raise LightGBMError("Cannot add features to DataFrame type of raw data "
"without pandas installed") "without pandas installed")
if isinstance(other.data, np.ndarray): if isinstance(other.data, np.ndarray):
self.data = concat((self.data, DataFrame(other.data)), self.data = concat((self.data, pd_DataFrame(other.data)),
axis=1, ignore_index=True) axis=1, ignore_index=True)
elif scipy.sparse.issparse(other.data): elif scipy.sparse.issparse(other.data):
self.data = concat((self.data, DataFrame(other.data.toarray())), self.data = concat((self.data, pd_DataFrame(other.data.toarray())),
axis=1, ignore_index=True) axis=1, ignore_index=True)
elif isinstance(other.data, DataFrame): elif isinstance(other.data, pd_DataFrame):
self.data = concat((self.data, other.data), self.data = concat((self.data, other.data),
axis=1, ignore_index=True) axis=1, ignore_index=True)
elif isinstance(other.data, DataTable): elif isinstance(other.data, dt_DataTable):
self.data = concat((self.data, DataFrame(other.data.to_numpy())), self.data = concat((self.data, pd_DataFrame(other.data.to_numpy())),
axis=1, ignore_index=True) axis=1, ignore_index=True)
else: else:
self.data = None self.data = None
elif isinstance(self.data, DataTable): elif isinstance(self.data, dt_DataTable):
if isinstance(other.data, np.ndarray): if isinstance(other.data, np.ndarray):
self.data = DataTable(np.hstack((self.data.to_numpy(), other.data))) self.data = dt_DataTable(np.hstack((self.data.to_numpy(), other.data)))
elif scipy.sparse.issparse(other.data): elif scipy.sparse.issparse(other.data):
self.data = DataTable(np.hstack((self.data.to_numpy(), other.data.toarray()))) self.data = dt_DataTable(np.hstack((self.data.to_numpy(), other.data.toarray())))
elif isinstance(other.data, DataFrame): elif isinstance(other.data, pd_DataFrame):
self.data = DataTable(np.hstack((self.data.to_numpy(), other.data.values))) self.data = dt_DataTable(np.hstack((self.data.to_numpy(), other.data.values)))
elif isinstance(other.data, DataTable): elif isinstance(other.data, dt_DataTable):
self.data = DataTable(np.hstack((self.data.to_numpy(), other.data.to_numpy()))) self.data = dt_DataTable(np.hstack((self.data.to_numpy(), other.data.to_numpy())))
else: else:
self.data = None self.data = None
else: else:
...@@ -2496,7 +2496,7 @@ class Booster: ...@@ -2496,7 +2496,7 @@ class Booster:
tree_index=tree['tree_index'], tree_index=tree['tree_index'],
feature_names=feature_names)) feature_names=feature_names))
return DataFrame(model_list, columns=model_list[0].keys()) return pd_DataFrame(model_list, columns=model_list[0].keys())
def set_train_data_name(self, name): def set_train_data_name(self, name):
"""Set the name to the training Dataset. """Set the name to the training Dataset.
...@@ -3345,7 +3345,7 @@ class Booster: ...@@ -3345,7 +3345,7 @@ class Booster:
ret = np.column_stack((bin_edges[1:], hist)) ret = np.column_stack((bin_edges[1:], hist))
ret = ret[ret[:, 1] > 0] ret = ret[ret[:, 1] > 0]
if PANDAS_INSTALLED: if PANDAS_INSTALLED:
return DataFrame(ret, columns=['SplitValue', 'Count']) return pd_DataFrame(ret, columns=['SplitValue', 'Count'])
else: else:
return ret return ret
else: else:
......
...@@ -3,18 +3,20 @@ ...@@ -3,18 +3,20 @@
"""pandas""" """pandas"""
try: try:
from pandas import Series, DataFrame, concat from pandas import concat
from pandas import Series as pd_Series
from pandas import DataFrame as pd_DataFrame
from pandas.api.types import is_sparse as is_dtype_sparse from pandas.api.types import is_sparse as is_dtype_sparse
PANDAS_INSTALLED = True PANDAS_INSTALLED = True
except ImportError: except ImportError:
PANDAS_INSTALLED = False PANDAS_INSTALLED = False
class Series: class pd_Series:
"""Dummy class for pandas.Series.""" """Dummy class for pandas.Series."""
pass pass
class DataFrame: class pd_DataFrame:
"""Dummy class for pandas.DataFrame.""" """Dummy class for pandas.DataFrame."""
pass pass
...@@ -40,15 +42,15 @@ except ImportError: ...@@ -40,15 +42,15 @@ except ImportError:
try: try:
import datatable import datatable
if hasattr(datatable, "Frame"): if hasattr(datatable, "Frame"):
DataTable = datatable.Frame dt_DataTable = datatable.Frame
else: else:
DataTable = datatable.DataTable dt_DataTable = datatable.DataTable
DATATABLE_INSTALLED = True DATATABLE_INSTALLED = True
except ImportError: except ImportError:
DATATABLE_INSTALLED = False DATATABLE_INSTALLED = False
class DataTable: class dt_DataTable:
"""Dummy class for DataTable.""" """Dummy class for datatable.DataTable."""
pass pass
...@@ -128,6 +130,6 @@ except ImportError: ...@@ -128,6 +130,6 @@ except ImportError:
pass pass
class dask_Frame: class dask_Frame:
"""Dummy class for ddask.dataframe._Frame.""" """Dummy class for dask.dataframe._Frame."""
pass pass
...@@ -16,7 +16,7 @@ import numpy as np ...@@ -16,7 +16,7 @@ import numpy as np
import scipy.sparse as ss import scipy.sparse as ss
from .basic import _choose_param_value, _ConfigAliases, _LIB, _log_warning, _safe_call, LightGBMError from .basic import _choose_param_value, _ConfigAliases, _LIB, _log_warning, _safe_call, LightGBMError
from .compat import (PANDAS_INSTALLED, DataFrame, Series, concat, from .compat import (PANDAS_INSTALLED, pd_DataFrame, pd_Series, concat,
SKLEARN_INSTALLED, SKLEARN_INSTALLED,
DASK_INSTALLED, dask_Frame, dask_Array, delayed, Client, default_client, get_worker, wait) DASK_INSTALLED, dask_Frame, dask_Array, delayed, Client, default_client, get_worker, wait)
from .sklearn import LGBMClassifier, LGBMRegressor, LGBMRanker from .sklearn import LGBMClassifier, LGBMRegressor, LGBMRanker
...@@ -105,7 +105,7 @@ def _find_ports_for_workers(client: Client, worker_addresses: Iterable[str], loc ...@@ -105,7 +105,7 @@ def _find_ports_for_workers(client: Client, worker_addresses: Iterable[str], loc
def _concat(seq): def _concat(seq):
if isinstance(seq[0], np.ndarray): if isinstance(seq[0], np.ndarray):
return np.concatenate(seq, axis=0) return np.concatenate(seq, axis=0)
elif isinstance(seq[0], (DataFrame, Series)): elif isinstance(seq[0], (pd_DataFrame, pd_Series)):
return concat(seq, axis=0) return concat(seq, axis=0)
elif isinstance(seq[0], ss.spmatrix): elif isinstance(seq[0], ss.spmatrix):
return ss.vstack(seq, format='csr') return ss.vstack(seq, format='csr')
...@@ -304,7 +304,7 @@ def _train(client, data, label, params, model_factory, sample_weight=None, group ...@@ -304,7 +304,7 @@ def _train(client, data, label, params, model_factory, sample_weight=None, group
def _predict_part(part, model, raw_score, pred_proba, pred_leaf, pred_contrib, **kwargs): def _predict_part(part, model, raw_score, pred_proba, pred_leaf, pred_contrib, **kwargs):
data = part.values if isinstance(part, DataFrame) else part data = part.values if isinstance(part, pd_DataFrame) else part
if data.shape[0] == 0: if data.shape[0] == 0:
result = np.array([]) result = np.array([])
...@@ -325,11 +325,11 @@ def _predict_part(part, model, raw_score, pred_proba, pred_leaf, pred_contrib, * ...@@ -325,11 +325,11 @@ def _predict_part(part, model, raw_score, pred_proba, pred_leaf, pred_contrib, *
**kwargs **kwargs
) )
if isinstance(part, DataFrame): if isinstance(part, pd_DataFrame):
if pred_proba or pred_contrib: if pred_proba or pred_contrib:
result = DataFrame(result, index=part.index) result = pd_DataFrame(result, index=part.index)
else: else:
result = Series(result, index=part.index, name='predictions') result = pd_Series(result, index=part.index, name='predictions')
return result return result
...@@ -361,7 +361,7 @@ def _predict(model, data, raw_score=False, pred_proba=False, pred_leaf=False, pr ...@@ -361,7 +361,7 @@ def _predict(model, data, raw_score=False, pred_proba=False, pred_leaf=False, pr
------- -------
predicted_result : dask array of shape = [n_samples] or shape = [n_samples, n_classes] predicted_result : dask array of shape = [n_samples] or shape = [n_samples, n_classes]
The predicted values. The predicted values.
X_leaves : dask arrayof shape = [n_samples, n_trees] or shape = [n_samples, n_trees * n_classes] X_leaves : dask array of shape = [n_samples, n_trees] or shape = [n_samples, n_trees * n_classes]
If ``pred_leaf=True``, the predicted leaf of every tree for each sample. If ``pred_leaf=True``, the predicted leaf of every tree for each sample.
X_SHAP_values : dask array of shape = [n_samples, n_features + 1] or shape = [n_samples, (n_features + 1) * n_classes] or list with n_classes length of such objects X_SHAP_values : dask array of shape = [n_samples, n_features + 1] or shape = [n_samples, (n_features + 1) * n_classes] or list with n_classes length of such objects
If ``pred_contrib=True``, the feature contributions for each sample. If ``pred_contrib=True``, the feature contributions for each sample.
......
...@@ -11,7 +11,7 @@ from .compat import (SKLEARN_INSTALLED, _LGBMClassifierBase, ...@@ -11,7 +11,7 @@ from .compat import (SKLEARN_INSTALLED, _LGBMClassifierBase,
LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase, LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase,
_LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckSampleWeight, _LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckSampleWeight,
_LGBMAssertAllFinite, _LGBMCheckClassificationTargets, _LGBMComputeSampleWeight, _LGBMAssertAllFinite, _LGBMCheckClassificationTargets, _LGBMComputeSampleWeight,
DataFrame, DataTable) pd_DataFrame, dt_DataTable)
from .engine import train from .engine import train
...@@ -560,7 +560,7 @@ class LGBMModel(_LGBMModelBase): ...@@ -560,7 +560,7 @@ class LGBMModel(_LGBMModelBase):
params['metric'] = [e for e in eval_metrics_builtin if e not in original_metric] + original_metric params['metric'] = [e for e in eval_metrics_builtin if e not in original_metric] + original_metric
params['metric'] = [metric for metric in params['metric'] if metric is not None] params['metric'] = [metric for metric in params['metric'] if metric is not None]
if not isinstance(X, (DataFrame, DataTable)): if not isinstance(X, (pd_DataFrame, dt_DataTable)):
_X, _y = _LGBMCheckXY(X, y, accept_sparse=True, force_all_finite=False, ensure_min_samples=2) _X, _y = _LGBMCheckXY(X, y, accept_sparse=True, force_all_finite=False, ensure_min_samples=2)
if sample_weight is not None: if sample_weight is not None:
sample_weight = _LGBMCheckSampleWeight(sample_weight, _X) sample_weight = _LGBMCheckSampleWeight(sample_weight, _X)
...@@ -695,7 +695,7 @@ class LGBMModel(_LGBMModelBase): ...@@ -695,7 +695,7 @@ class LGBMModel(_LGBMModelBase):
""" """
if self._n_features is None: if self._n_features is None:
raise LGBMNotFittedError("Estimator not fitted, call `fit` before exploiting the model.") raise LGBMNotFittedError("Estimator not fitted, call `fit` before exploiting the model.")
if not isinstance(X, (DataFrame, DataTable)): if not isinstance(X, (pd_DataFrame, dt_DataTable)):
X = _LGBMCheckArray(X, accept_sparse=True, force_all_finite=False) X = _LGBMCheckArray(X, accept_sparse=True, force_all_finite=False)
n_features = X.shape[1] n_features = X.shape[1]
if self._n_features != n_features: if self._n_features != n_features:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment