"vscode:/vscode.git/clone" did not exist on "d5c2c55682cdf52b9371dbead9bd551e2acbffdc"
Unverified Commit 066720ef authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

[python] use better names for imported classes from extra libraries (#3862)

parent 9eeac3c7
......@@ -14,7 +14,7 @@ from typing import Any, Dict
import numpy as np
import scipy.sparse
from .compat import PANDAS_INSTALLED, DataFrame, Series, concat, is_dtype_sparse, DataTable
from .compat import PANDAS_INSTALLED, pd_DataFrame, pd_Series, concat, is_dtype_sparse, dt_DataTable
from .libpath import find_lib_path
......@@ -140,7 +140,7 @@ def list_to_1d_numpy(data, dtype=np.float32, name='list'):
return data.astype(dtype=dtype, copy=False)
elif is_1d_list(data):
return np.array(data, dtype=dtype, copy=False)
elif isinstance(data, Series):
elif isinstance(data, pd_Series):
if _get_bad_pandas_dtypes([data.dtypes]):
raise ValueError('Series.dtypes must be int, float or bool')
return np.array(data, dtype=dtype, copy=False) # SparseArray should be supported as well
......@@ -493,7 +493,7 @@ def _get_bad_pandas_dtypes(dtypes):
def _data_from_pandas(data, feature_name, categorical_feature, pandas_categorical):
if isinstance(data, DataFrame):
if isinstance(data, pd_DataFrame):
if len(data.shape) != 2 or data.shape[0] < 1:
raise ValueError('Input data must be 2 dimensional and non empty.')
if feature_name == 'auto' or feature_name is None:
......@@ -537,7 +537,7 @@ def _data_from_pandas(data, feature_name, categorical_feature, pandas_categorica
def _label_from_pandas(label):
if isinstance(label, DataFrame):
if isinstance(label, pd_DataFrame):
if len(label.columns) > 1:
raise ValueError('DataFrame for label cannot have multiple columns')
if _get_bad_pandas_dtypes(label.dtypes):
......@@ -720,7 +720,7 @@ class _InnerPredictor:
except BaseException:
raise ValueError('Cannot convert data list to numpy array.')
preds, nrow = self.__pred_for_np2d(data, start_iteration, num_iteration, predict_type)
elif isinstance(data, DataTable):
elif isinstance(data, dt_DataTable):
preds, nrow = self.__pred_for_np2d(data.to_numpy(), start_iteration, num_iteration, predict_type)
else:
try:
......@@ -1258,7 +1258,7 @@ class Dataset:
self.__init_from_np2d(data, params_str, ref_dataset)
elif isinstance(data, list) and len(data) > 0 and all(isinstance(x, np.ndarray) for x in data):
self.__init_from_list_np2d(data, params_str, ref_dataset)
elif isinstance(data, DataTable):
elif isinstance(data, dt_DataTable):
self.__init_from_np2d(data.to_numpy(), params_str, ref_dataset)
else:
try:
......@@ -1939,9 +1939,9 @@ class Dataset:
if self.data is not None:
if isinstance(self.data, np.ndarray) or scipy.sparse.issparse(self.data):
self.data = self.data[self.used_indices, :]
elif isinstance(self.data, DataFrame):
elif isinstance(self.data, pd_DataFrame):
self.data = self.data.iloc[self.used_indices].copy()
elif isinstance(self.data, DataTable):
elif isinstance(self.data, dt_DataTable):
self.data = self.data[self.used_indices, :]
else:
_log_warning("Cannot subset {} type of raw data.\n"
......@@ -2061,9 +2061,9 @@ class Dataset:
self.data = np.hstack((self.data, other.data))
elif scipy.sparse.issparse(other.data):
self.data = np.hstack((self.data, other.data.toarray()))
elif isinstance(other.data, DataFrame):
elif isinstance(other.data, pd_DataFrame):
self.data = np.hstack((self.data, other.data.values))
elif isinstance(other.data, DataTable):
elif isinstance(other.data, dt_DataTable):
self.data = np.hstack((self.data, other.data.to_numpy()))
else:
self.data = None
......@@ -2071,39 +2071,39 @@ class Dataset:
sparse_format = self.data.getformat()
if isinstance(other.data, np.ndarray) or scipy.sparse.issparse(other.data):
self.data = scipy.sparse.hstack((self.data, other.data), format=sparse_format)
elif isinstance(other.data, DataFrame):
elif isinstance(other.data, pd_DataFrame):
self.data = scipy.sparse.hstack((self.data, other.data.values), format=sparse_format)
elif isinstance(other.data, DataTable):
elif isinstance(other.data, dt_DataTable):
self.data = scipy.sparse.hstack((self.data, other.data.to_numpy()), format=sparse_format)
else:
self.data = None
elif isinstance(self.data, DataFrame):
elif isinstance(self.data, pd_DataFrame):
if not PANDAS_INSTALLED:
raise LightGBMError("Cannot add features to DataFrame type of raw data "
"without pandas installed")
if isinstance(other.data, np.ndarray):
self.data = concat((self.data, DataFrame(other.data)),
self.data = concat((self.data, pd_DataFrame(other.data)),
axis=1, ignore_index=True)
elif scipy.sparse.issparse(other.data):
self.data = concat((self.data, DataFrame(other.data.toarray())),
self.data = concat((self.data, pd_DataFrame(other.data.toarray())),
axis=1, ignore_index=True)
elif isinstance(other.data, DataFrame):
elif isinstance(other.data, pd_DataFrame):
self.data = concat((self.data, other.data),
axis=1, ignore_index=True)
elif isinstance(other.data, DataTable):
self.data = concat((self.data, DataFrame(other.data.to_numpy())),
elif isinstance(other.data, dt_DataTable):
self.data = concat((self.data, pd_DataFrame(other.data.to_numpy())),
axis=1, ignore_index=True)
else:
self.data = None
elif isinstance(self.data, DataTable):
elif isinstance(self.data, dt_DataTable):
if isinstance(other.data, np.ndarray):
self.data = DataTable(np.hstack((self.data.to_numpy(), other.data)))
self.data = dt_DataTable(np.hstack((self.data.to_numpy(), other.data)))
elif scipy.sparse.issparse(other.data):
self.data = DataTable(np.hstack((self.data.to_numpy(), other.data.toarray())))
elif isinstance(other.data, DataFrame):
self.data = DataTable(np.hstack((self.data.to_numpy(), other.data.values)))
elif isinstance(other.data, DataTable):
self.data = DataTable(np.hstack((self.data.to_numpy(), other.data.to_numpy())))
self.data = dt_DataTable(np.hstack((self.data.to_numpy(), other.data.toarray())))
elif isinstance(other.data, pd_DataFrame):
self.data = dt_DataTable(np.hstack((self.data.to_numpy(), other.data.values)))
elif isinstance(other.data, dt_DataTable):
self.data = dt_DataTable(np.hstack((self.data.to_numpy(), other.data.to_numpy())))
else:
self.data = None
else:
......@@ -2496,7 +2496,7 @@ class Booster:
tree_index=tree['tree_index'],
feature_names=feature_names))
return DataFrame(model_list, columns=model_list[0].keys())
return pd_DataFrame(model_list, columns=model_list[0].keys())
def set_train_data_name(self, name):
"""Set the name to the training Dataset.
......@@ -3345,7 +3345,7 @@ class Booster:
ret = np.column_stack((bin_edges[1:], hist))
ret = ret[ret[:, 1] > 0]
if PANDAS_INSTALLED:
return DataFrame(ret, columns=['SplitValue', 'Count'])
return pd_DataFrame(ret, columns=['SplitValue', 'Count'])
else:
return ret
else:
......
......@@ -3,18 +3,20 @@
"""pandas"""
try:
from pandas import Series, DataFrame, concat
from pandas import concat
from pandas import Series as pd_Series
from pandas import DataFrame as pd_DataFrame
from pandas.api.types import is_sparse as is_dtype_sparse
PANDAS_INSTALLED = True
except ImportError:
PANDAS_INSTALLED = False
class Series:
class pd_Series:
"""Dummy class for pandas.Series."""
pass
class DataFrame:
class pd_DataFrame:
"""Dummy class for pandas.DataFrame."""
pass
......@@ -40,15 +42,15 @@ except ImportError:
try:
import datatable
if hasattr(datatable, "Frame"):
DataTable = datatable.Frame
dt_DataTable = datatable.Frame
else:
DataTable = datatable.DataTable
dt_DataTable = datatable.DataTable
DATATABLE_INSTALLED = True
except ImportError:
DATATABLE_INSTALLED = False
class DataTable:
"""Dummy class for DataTable."""
class dt_DataTable:
"""Dummy class for datatable.DataTable."""
pass
......@@ -128,6 +130,6 @@ except ImportError:
pass
class dask_Frame:
"""Dummy class for ddask.dataframe._Frame."""
"""Dummy class for dask.dataframe._Frame."""
pass
......@@ -16,7 +16,7 @@ import numpy as np
import scipy.sparse as ss
from .basic import _choose_param_value, _ConfigAliases, _LIB, _log_warning, _safe_call, LightGBMError
from .compat import (PANDAS_INSTALLED, DataFrame, Series, concat,
from .compat import (PANDAS_INSTALLED, pd_DataFrame, pd_Series, concat,
SKLEARN_INSTALLED,
DASK_INSTALLED, dask_Frame, dask_Array, delayed, Client, default_client, get_worker, wait)
from .sklearn import LGBMClassifier, LGBMRegressor, LGBMRanker
......@@ -105,7 +105,7 @@ def _find_ports_for_workers(client: Client, worker_addresses: Iterable[str], loc
def _concat(seq):
if isinstance(seq[0], np.ndarray):
return np.concatenate(seq, axis=0)
elif isinstance(seq[0], (DataFrame, Series)):
elif isinstance(seq[0], (pd_DataFrame, pd_Series)):
return concat(seq, axis=0)
elif isinstance(seq[0], ss.spmatrix):
return ss.vstack(seq, format='csr')
......@@ -304,7 +304,7 @@ def _train(client, data, label, params, model_factory, sample_weight=None, group
def _predict_part(part, model, raw_score, pred_proba, pred_leaf, pred_contrib, **kwargs):
data = part.values if isinstance(part, DataFrame) else part
data = part.values if isinstance(part, pd_DataFrame) else part
if data.shape[0] == 0:
result = np.array([])
......@@ -325,11 +325,11 @@ def _predict_part(part, model, raw_score, pred_proba, pred_leaf, pred_contrib, *
**kwargs
)
if isinstance(part, DataFrame):
if isinstance(part, pd_DataFrame):
if pred_proba or pred_contrib:
result = DataFrame(result, index=part.index)
result = pd_DataFrame(result, index=part.index)
else:
result = Series(result, index=part.index, name='predictions')
result = pd_Series(result, index=part.index, name='predictions')
return result
......@@ -361,7 +361,7 @@ def _predict(model, data, raw_score=False, pred_proba=False, pred_leaf=False, pr
-------
predicted_result : dask array of shape = [n_samples] or shape = [n_samples, n_classes]
The predicted values.
X_leaves : dask arrayof shape = [n_samples, n_trees] or shape = [n_samples, n_trees * n_classes]
X_leaves : dask array of shape = [n_samples, n_trees] or shape = [n_samples, n_trees * n_classes]
If ``pred_leaf=True``, the predicted leaf of every tree for each sample.
X_SHAP_values : dask array of shape = [n_samples, n_features + 1] or shape = [n_samples, (n_features + 1) * n_classes] or list with n_classes length of such objects
If ``pred_contrib=True``, the feature contributions for each sample.
......
......@@ -11,7 +11,7 @@ from .compat import (SKLEARN_INSTALLED, _LGBMClassifierBase,
LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase,
_LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckSampleWeight,
_LGBMAssertAllFinite, _LGBMCheckClassificationTargets, _LGBMComputeSampleWeight,
DataFrame, DataTable)
pd_DataFrame, dt_DataTable)
from .engine import train
......@@ -560,7 +560,7 @@ class LGBMModel(_LGBMModelBase):
params['metric'] = [e for e in eval_metrics_builtin if e not in original_metric] + original_metric
params['metric'] = [metric for metric in params['metric'] if metric is not None]
if not isinstance(X, (DataFrame, DataTable)):
if not isinstance(X, (pd_DataFrame, dt_DataTable)):
_X, _y = _LGBMCheckXY(X, y, accept_sparse=True, force_all_finite=False, ensure_min_samples=2)
if sample_weight is not None:
sample_weight = _LGBMCheckSampleWeight(sample_weight, _X)
......@@ -695,7 +695,7 @@ class LGBMModel(_LGBMModelBase):
"""
if self._n_features is None:
raise LGBMNotFittedError("Estimator not fitted, call `fit` before exploiting the model.")
if not isinstance(X, (DataFrame, DataTable)):
if not isinstance(X, (pd_DataFrame, dt_DataTable)):
X = _LGBMCheckArray(X, accept_sparse=True, force_all_finite=False)
n_features = X.shape[1]
if self._n_features != n_features:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment