Unverified Commit 46278af5 authored by superlaut's avatar superlaut Committed by GitHub
Browse files

[python-package] replace .values usage with .to_numpy() (#5612)

parent 73531662
...@@ -602,7 +602,16 @@ def _data_from_pandas(data, feature_name, categorical_feature, pandas_categorica ...@@ -602,7 +602,16 @@ def _data_from_pandas(data, feature_name, categorical_feature, pandas_categorica
df_dtypes = [dtype.type for dtype in data.dtypes] df_dtypes = [dtype.type for dtype in data.dtypes]
df_dtypes.append(np.float32) # so that the target dtype considers floats df_dtypes.append(np.float32) # so that the target dtype considers floats
target_dtype = np.find_common_type(df_dtypes, []) target_dtype = np.find_common_type(df_dtypes, [])
data = data.astype(target_dtype, copy=False).values try:
# most common case (no nullable dtypes)
data = data.to_numpy(dtype=target_dtype, copy=False)
except TypeError:
# 1.0 <= pd version < 1.1 and nullable dtypes, least common case
# raises error because array is casted to type(pd.NA) and there's no na_value argument
data = data.astype(target_dtype, copy=False).values
except ValueError:
# data has nullable dtypes, but we can specify na_value argument and copy will be made
data = data.to_numpy(dtype=target_dtype, na_value=np.nan)
else: else:
if feature_name == 'auto': if feature_name == 'auto':
feature_name = None feature_name = None
...@@ -2291,7 +2300,17 @@ class Dataset: ...@@ -2291,7 +2300,17 @@ class Dataset:
if len(label.columns) > 1: if len(label.columns) > 1:
raise ValueError('DataFrame for label cannot have multiple columns') raise ValueError('DataFrame for label cannot have multiple columns')
_check_for_bad_pandas_dtypes(label.dtypes) _check_for_bad_pandas_dtypes(label.dtypes)
label_array = np.ravel(label.values.astype(np.float32, copy=False)) try:
# most common case (no nullable dtypes)
label = label.to_numpy(dtype=np.float32, copy=False)
except TypeError:
# 1.0 <= pd version < 1.1 and nullable dtypes, least common case
# raises error because array is casted to type(pd.NA) and there's no na_value argument
label = label.astype(np.float32, copy=False).values
except ValueError:
# data has nullable dtypes, but we can specify na_value argument and copy will be made
label = label.to_numpy(dtype=np.float32, na_value=np.nan)
label_array = np.ravel(label)
else: else:
label_array = _list_to_1d_numpy(label, name='label') label_array = _list_to_1d_numpy(label, name='label')
self.set_field('label', label_array) self.set_field('label', label_array)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment