Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
76c0077a
Unverified
Commit
76c0077a
authored
Feb 12, 2023
by
James Lamb
Committed by
GitHub
Feb 12, 2023
Browse files
[python-package] remove some inner function definitions (#5704)
parent
771bad8c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
263 additions
and
168 deletions
+263
-168
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+263
-168
No files found.
python-package/lightgbm/basic.py
View file @
76c0077a
...
...
@@ -75,6 +75,10 @@ _INFO_METHOD_NAME = "info"
_WARNING_METHOD_NAME
=
"warning"
def
_has_method
(
logger
:
Any
,
method_name
:
str
)
->
bool
:
return
callable
(
getattr
(
logger
,
method_name
,
None
))
def
register_logger
(
logger
:
Any
,
info_method_name
:
str
=
"info"
,
warning_method_name
:
str
=
"warning"
)
->
None
:
...
...
@@ -89,9 +93,6 @@ def register_logger(
warning_method_name : str, optional (default="warning")
Method used to log warning messages.
"""
def
_has_method
(
logger
:
Any
,
method_name
:
str
)
->
bool
:
return
callable
(
getattr
(
logger
,
method_name
,
None
))
if
not
_has_method
(
logger
,
info_method_name
)
or
not
_has_method
(
logger
,
warning_method_name
):
raise
TypeError
(
f
"Logger must provide '
{
info_method_name
}
' and '
{
warning_method_name
}
' method"
...
...
@@ -323,6 +324,14 @@ def _json_default_with_numpy(obj: Any) -> Any:
return
obj
def
_to_string
(
x
:
Union
[
int
,
float
,
str
,
List
])
->
str
:
if
isinstance
(
x
,
list
):
val_list
=
","
.
join
(
str
(
val
)
for
val
in
x
)
return
f
"[
{
val_list
}
]"
else
:
return
str
(
x
)
def
_param_dict_to_str
(
data
:
Optional
[
Dict
[
str
,
Any
]])
->
str
:
"""Convert Python dictionary to string, which is passed to C API."""
if
data
is
None
or
not
data
:
...
...
@@ -330,12 +339,7 @@ def _param_dict_to_str(data: Optional[Dict[str, Any]]) -> str:
pairs
=
[]
for
key
,
val
in
data
.
items
():
if
isinstance
(
val
,
(
list
,
tuple
,
set
))
or
_is_numpy_1d_array
(
val
):
def
to_string
(
x
):
if
isinstance
(
x
,
list
):
return
f
"[
{
','
.
join
(
map
(
str
,
x
))
}
]"
else
:
return
str
(
x
)
pairs
.
append
(
f
"
{
key
}
=
{
','
.
join
(
map
(
to_string
,
val
))
}
"
)
pairs
.
append
(
f
"
{
key
}
=
{
','
.
join
(
map
(
_to_string
,
val
))
}
"
)
elif
isinstance
(
val
,
(
str
,
Path
,
_NUMERIC_TYPES
))
or
_is_numeric
(
val
):
pairs
.
append
(
f
"
{
key
}
=
{
val
}
"
)
elif
val
is
not
None
:
...
...
@@ -564,19 +568,19 @@ def _c_int_array(data):
return
(
ptr_data
,
type_data
,
data
)
# return `data` to avoid the temporary copy is freed
def
_
check_for_bad_pandas_dtypes
(
pandas_dtypes_series
:
pd_Series
)
->
None
:
def
_
is_allowed_numpy_dtype
(
dtype
)
->
bool
:
float128
=
getattr
(
np
,
'float128'
,
type
(
None
))
return
(
issubclass
(
dtype
,
(
np
.
integer
,
np
.
floating
,
np
.
bool_
))
and
not
issubclass
(
dtype
,
(
np
.
timedelta64
,
float128
))
)
def
is_allowed_numpy_dtype
(
dtype
):
return
(
issubclass
(
dtype
,
(
np
.
integer
,
np
.
floating
,
np
.
bool_
))
and
not
issubclass
(
dtype
,
(
np
.
timedelta64
,
float128
))
)
def
_check_for_bad_pandas_dtypes
(
pandas_dtypes_series
:
pd_Series
)
->
None
:
bad_pandas_dtypes
=
[
f
'
{
column_name
}
:
{
pandas_dtype
}
'
for
column_name
,
pandas_dtype
in
pandas_dtypes_series
.
items
()
if
not
is_allowed_numpy_dtype
(
pandas_dtype
.
type
)
if
not
_
is_allowed_numpy_dtype
(
pandas_dtype
.
type
)
]
if
bad_pandas_dtypes
:
raise
ValueError
(
'pandas dtypes must be int, float or bool.
\n
'
...
...
@@ -934,40 +938,53 @@ class _InnerPredictor:
ctypes
.
byref
(
n_preds
)))
return
n_preds
.
value
def
__pred_for_np2d
(
self
,
mat
,
start_iteration
,
num_iteration
,
predict_type
):
def
__inner_predict_np2d
(
self
,
mat
:
np
.
ndarray
,
start_iteration
:
int
,
num_iteration
:
int
,
predict_type
:
int
,
preds
:
Optional
[
np
.
ndarray
]
)
->
Tuple
[
np
.
ndarray
,
int
]:
if
mat
.
dtype
==
np
.
float32
or
mat
.
dtype
==
np
.
float64
:
data
=
np
.
array
(
mat
.
reshape
(
mat
.
size
),
dtype
=
mat
.
dtype
,
copy
=
False
)
else
:
# change non-float data to float data, need to copy
data
=
np
.
array
(
mat
.
reshape
(
mat
.
size
),
dtype
=
np
.
float32
)
ptr_data
,
type_ptr_data
,
_
=
_c_float_array
(
data
)
n_preds
=
self
.
__get_num_preds
(
start_iteration
,
num_iteration
,
mat
.
shape
[
0
],
predict_type
)
if
preds
is
None
:
preds
=
np
.
empty
(
n_preds
,
dtype
=
np
.
float64
)
elif
len
(
preds
.
shape
)
!=
1
or
len
(
preds
)
!=
n_preds
:
raise
ValueError
(
"Wrong length of pre-allocated predict array"
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
_safe_call
(
_LIB
.
LGBM_BoosterPredictForMat
(
self
.
handle
,
ptr_data
,
ctypes
.
c_int
(
type_ptr_data
),
ctypes
.
c_int32
(
mat
.
shape
[
0
]),
ctypes
.
c_int32
(
mat
.
shape
[
1
]),
ctypes
.
c_int
(
_C_API_IS_ROW_MAJOR
),
ctypes
.
c_int
(
predict_type
),
ctypes
.
c_int
(
start_iteration
),
ctypes
.
c_int
(
num_iteration
),
_c_str
(
self
.
pred_parameter
),
ctypes
.
byref
(
out_num_preds
),
preds
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_double
))))
if
n_preds
!=
out_num_preds
.
value
:
raise
ValueError
(
"Wrong length for predict results"
)
return
preds
,
mat
.
shape
[
0
]
def
__pred_for_np2d
(
self
,
mat
:
np
.
ndarray
,
start_iteration
:
int
,
num_iteration
:
int
,
predict_type
:
int
)
->
Tuple
[
np
.
ndarray
,
int
]:
"""Predict for a 2-D numpy matrix."""
if
len
(
mat
.
shape
)
!=
2
:
raise
ValueError
(
'Input numpy.ndarray or list must be 2 dimensional'
)
def
inner_predict
(
mat
,
start_iteration
,
num_iteration
,
predict_type
,
preds
=
None
):
if
mat
.
dtype
==
np
.
float32
or
mat
.
dtype
==
np
.
float64
:
data
=
np
.
array
(
mat
.
reshape
(
mat
.
size
),
dtype
=
mat
.
dtype
,
copy
=
False
)
else
:
# change non-float data to float data, need to copy
data
=
np
.
array
(
mat
.
reshape
(
mat
.
size
),
dtype
=
np
.
float32
)
ptr_data
,
type_ptr_data
,
_
=
_c_float_array
(
data
)
n_preds
=
self
.
__get_num_preds
(
start_iteration
,
num_iteration
,
mat
.
shape
[
0
],
predict_type
)
if
preds
is
None
:
preds
=
np
.
empty
(
n_preds
,
dtype
=
np
.
float64
)
elif
len
(
preds
.
shape
)
!=
1
or
len
(
preds
)
!=
n_preds
:
raise
ValueError
(
"Wrong length of pre-allocated predict array"
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
_safe_call
(
_LIB
.
LGBM_BoosterPredictForMat
(
self
.
handle
,
ptr_data
,
ctypes
.
c_int
(
type_ptr_data
),
ctypes
.
c_int32
(
mat
.
shape
[
0
]),
ctypes
.
c_int32
(
mat
.
shape
[
1
]),
ctypes
.
c_int
(
_C_API_IS_ROW_MAJOR
),
ctypes
.
c_int
(
predict_type
),
ctypes
.
c_int
(
start_iteration
),
ctypes
.
c_int
(
num_iteration
),
_c_str
(
self
.
pred_parameter
),
ctypes
.
byref
(
out_num_preds
),
preds
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_double
))))
if
n_preds
!=
out_num_preds
.
value
:
raise
ValueError
(
"Wrong length for predict results"
)
return
preds
,
mat
.
shape
[
0
]
nrow
=
mat
.
shape
[
0
]
if
nrow
>
_MAX_INT32
:
sections
=
np
.
arange
(
start
=
_MAX_INT32
,
stop
=
nrow
,
step
=
_MAX_INT32
)
...
...
@@ -978,13 +995,34 @@ class _InnerPredictor:
for
chunk
,
(
start_idx_pred
,
end_idx_pred
)
in
zip
(
np
.
array_split
(
mat
,
sections
),
zip
(
n_preds_sections
,
n_preds_sections
[
1
:])):
# avoid memory consumption by arrays concatenation operations
inner_predict
(
chunk
,
start_iteration
,
num_iteration
,
predict_type
,
preds
[
start_idx_pred
:
end_idx_pred
])
self
.
__inner_predict_np2d
(
mat
=
chunk
,
start_iteration
=
start_iteration
,
num_iteration
=
num_iteration
,
predict_type
=
predict_type
,
preds
=
preds
[
start_idx_pred
:
end_idx_pred
]
)
return
preds
,
nrow
else
:
return
inner_predict
(
mat
,
start_iteration
,
num_iteration
,
predict_type
)
return
self
.
__inner_predict_np2d
(
mat
=
mat
,
start_iteration
=
start_iteration
,
num_iteration
=
num_iteration
,
predict_type
=
predict_type
,
preds
=
None
)
def
__create_sparse_native
(
self
,
cs
,
out_shape
,
out_ptr_indptr
,
out_ptr_indices
,
out_ptr_data
,
indptr_type
,
data_type
,
is_csr
=
True
):
def
__create_sparse_native
(
self
,
cs
:
Union
[
scipy
.
sparse
.
csc_matrix
,
scipy
.
sparse
.
csr_matrix
],
out_shape
,
out_ptr_indptr
,
out_ptr_indices
,
out_ptr_data
,
indptr_type
,
data_type
,
is_csr
:
bool
):
# create numpy array from output arrays
data_indices_len
=
out_shape
[
0
]
indptr_len
=
out_shape
[
1
]
...
...
@@ -1029,84 +1067,110 @@ class _InnerPredictor:
return
cs_output_matrices
[
0
]
return
cs_output_matrices
def
__pred_for_csr
(
self
,
csr
,
start_iteration
,
num_iteration
,
predict_type
):
"""Predict for a CSR data."""
def
inner_predict
(
csr
,
start_iteration
,
num_iteration
,
predict_type
,
preds
=
None
):
nrow
=
len
(
csr
.
indptr
)
-
1
n_preds
=
self
.
__get_num_preds
(
start_iteration
,
num_iteration
,
nrow
,
predict_type
)
if
preds
is
None
:
preds
=
np
.
empty
(
n_preds
,
dtype
=
np
.
float64
)
elif
len
(
preds
.
shape
)
!=
1
or
len
(
preds
)
!=
n_preds
:
raise
ValueError
(
"Wrong length of pre-allocated predict array"
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
ptr_indptr
,
type_ptr_indptr
,
__
=
_c_int_array
(
csr
.
indptr
)
ptr_data
,
type_ptr_data
,
_
=
_c_float_array
(
csr
.
data
)
assert
csr
.
shape
[
1
]
<=
_MAX_INT32
csr_indices
=
csr
.
indices
.
astype
(
np
.
int32
,
copy
=
False
)
_safe_call
(
_LIB
.
LGBM_BoosterPredictForCSR
(
self
.
handle
,
ptr_indptr
,
ctypes
.
c_int
(
type_ptr_indptr
),
csr_indices
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int32
)),
ptr_data
,
ctypes
.
c_int
(
type_ptr_data
),
ctypes
.
c_int64
(
len
(
csr
.
indptr
)),
ctypes
.
c_int64
(
len
(
csr
.
data
)),
ctypes
.
c_int64
(
csr
.
shape
[
1
]),
ctypes
.
c_int
(
predict_type
),
ctypes
.
c_int
(
start_iteration
),
ctypes
.
c_int
(
num_iteration
),
_c_str
(
self
.
pred_parameter
),
ctypes
.
byref
(
out_num_preds
),
preds
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_double
))))
if
n_preds
!=
out_num_preds
.
value
:
raise
ValueError
(
"Wrong length for predict results"
)
return
preds
,
nrow
def
__inner_predict_csr
(
self
,
csr
:
scipy
.
sparse
.
csr_matrix
,
start_iteration
:
int
,
num_iteration
:
int
,
predict_type
:
int
,
preds
:
Optional
[
np
.
ndarray
]
)
->
Tuple
[
np
.
ndarray
,
int
]:
nrow
=
len
(
csr
.
indptr
)
-
1
n_preds
=
self
.
__get_num_preds
(
start_iteration
,
num_iteration
,
nrow
,
predict_type
)
if
preds
is
None
:
preds
=
np
.
empty
(
n_preds
,
dtype
=
np
.
float64
)
elif
len
(
preds
.
shape
)
!=
1
or
len
(
preds
)
!=
n_preds
:
raise
ValueError
(
"Wrong length of pre-allocated predict array"
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
def
inner_predict_sparse
(
csr
,
start_iteration
,
num_iteration
,
predict_type
):
ptr_indptr
,
type_ptr_indptr
,
__
=
_c_int_array
(
csr
.
indptr
)
ptr_data
,
type_ptr_data
,
_
=
_c_float_array
(
csr
.
data
)
csr_indices
=
csr
.
indices
.
astype
(
np
.
int32
,
copy
=
False
)
matrix_type
=
_C_API_MATRIX_TYPE_CSR
if
type_ptr_indptr
==
_C_API_DTYPE_INT32
:
out_ptr_indptr
=
ctypes
.
POINTER
(
ctypes
.
c_int32
)()
else
:
out_ptr_indptr
=
ctypes
.
POINTER
(
ctypes
.
c_int64
)()
out_ptr_indices
=
ctypes
.
POINTER
(
ctypes
.
c_int32
)()
if
type_ptr_data
==
_C_API_DTYPE_FLOAT32
:
out_ptr_data
=
ctypes
.
POINTER
(
ctypes
.
c_float
)()
else
:
out_ptr_data
=
ctypes
.
POINTER
(
ctypes
.
c_double
)()
out_shape
=
np
.
empty
(
2
,
dtype
=
np
.
int64
)
_safe_call
(
_LIB
.
LGBM_BoosterPredictSparseOutput
(
self
.
handle
,
ptr_indptr
,
ctypes
.
c_int
(
type_ptr_indptr
),
csr_indices
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int32
)),
ptr_data
,
ctypes
.
c_int
(
type_ptr_data
),
ctypes
.
c_int64
(
len
(
csr
.
indptr
)),
ctypes
.
c_int64
(
len
(
csr
.
data
)),
ctypes
.
c_int64
(
csr
.
shape
[
1
]),
ctypes
.
c_int
(
predict_type
),
ctypes
.
c_int
(
start_iteration
),
ctypes
.
c_int
(
num_iteration
),
_c_str
(
self
.
pred_parameter
),
ctypes
.
c_int
(
matrix_type
),
out_shape
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int64
)),
ctypes
.
byref
(
out_ptr_indptr
),
ctypes
.
byref
(
out_ptr_indices
),
ctypes
.
byref
(
out_ptr_data
)))
matrices
=
self
.
__create_sparse_native
(
csr
,
out_shape
,
out_ptr_indptr
,
out_ptr_indices
,
out_ptr_data
,
type_ptr_indptr
,
type_ptr_data
,
is_csr
=
True
)
nrow
=
len
(
csr
.
indptr
)
-
1
return
matrices
,
nrow
ptr_indptr
,
type_ptr_indptr
,
_
=
_c_int_array
(
csr
.
indptr
)
ptr_data
,
type_ptr_data
,
_
=
_c_float_array
(
csr
.
data
)
assert
csr
.
shape
[
1
]
<=
_MAX_INT32
csr_indices
=
csr
.
indices
.
astype
(
np
.
int32
,
copy
=
False
)
_safe_call
(
_LIB
.
LGBM_BoosterPredictForCSR
(
self
.
handle
,
ptr_indptr
,
ctypes
.
c_int
(
type_ptr_indptr
),
csr_indices
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int32
)),
ptr_data
,
ctypes
.
c_int
(
type_ptr_data
),
ctypes
.
c_int64
(
len
(
csr
.
indptr
)),
ctypes
.
c_int64
(
len
(
csr
.
data
)),
ctypes
.
c_int64
(
csr
.
shape
[
1
]),
ctypes
.
c_int
(
predict_type
),
ctypes
.
c_int
(
start_iteration
),
ctypes
.
c_int
(
num_iteration
),
_c_str
(
self
.
pred_parameter
),
ctypes
.
byref
(
out_num_preds
),
preds
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_double
))))
if
n_preds
!=
out_num_preds
.
value
:
raise
ValueError
(
"Wrong length for predict results"
)
return
preds
,
nrow
def
__inner_predict_csr_sparse
(
self
,
csr
:
scipy
.
sparse
.
csr_matrix
,
start_iteration
:
int
,
num_iteration
:
int
,
predict_type
:
int
):
ptr_indptr
,
type_ptr_indptr
,
__
=
_c_int_array
(
csr
.
indptr
)
ptr_data
,
type_ptr_data
,
_
=
_c_float_array
(
csr
.
data
)
csr_indices
=
csr
.
indices
.
astype
(
np
.
int32
,
copy
=
False
)
matrix_type
=
_C_API_MATRIX_TYPE_CSR
if
type_ptr_indptr
==
_C_API_DTYPE_INT32
:
out_ptr_indptr
=
ctypes
.
POINTER
(
ctypes
.
c_int32
)()
else
:
out_ptr_indptr
=
ctypes
.
POINTER
(
ctypes
.
c_int64
)()
out_ptr_indices
=
ctypes
.
POINTER
(
ctypes
.
c_int32
)()
if
type_ptr_data
==
_C_API_DTYPE_FLOAT32
:
out_ptr_data
=
ctypes
.
POINTER
(
ctypes
.
c_float
)()
else
:
out_ptr_data
=
ctypes
.
POINTER
(
ctypes
.
c_double
)()
out_shape
=
np
.
empty
(
2
,
dtype
=
np
.
int64
)
_safe_call
(
_LIB
.
LGBM_BoosterPredictSparseOutput
(
self
.
handle
,
ptr_indptr
,
ctypes
.
c_int
(
type_ptr_indptr
),
csr_indices
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int32
)),
ptr_data
,
ctypes
.
c_int
(
type_ptr_data
),
ctypes
.
c_int64
(
len
(
csr
.
indptr
)),
ctypes
.
c_int64
(
len
(
csr
.
data
)),
ctypes
.
c_int64
(
csr
.
shape
[
1
]),
ctypes
.
c_int
(
predict_type
),
ctypes
.
c_int
(
start_iteration
),
ctypes
.
c_int
(
num_iteration
),
_c_str
(
self
.
pred_parameter
),
ctypes
.
c_int
(
matrix_type
),
out_shape
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int64
)),
ctypes
.
byref
(
out_ptr_indptr
),
ctypes
.
byref
(
out_ptr_indices
),
ctypes
.
byref
(
out_ptr_data
)))
matrices
=
self
.
__create_sparse_native
(
cs
=
csr
,
out_shape
=
out_shape
,
out_ptr_indptr
=
out_ptr_indptr
,
out_ptr_indices
=
out_ptr_indices
,
out_ptr_data
=
out_ptr_data
,
indptr_type
=
type_ptr_indptr
,
data_type
=
type_ptr_data
,
is_csr
=
True
)
nrow
=
len
(
csr
.
indptr
)
-
1
return
matrices
,
nrow
def
__pred_for_csr
(
self
,
csr
,
start_iteration
,
num_iteration
,
predict_type
):
"""Predict for a CSR data."""
if
predict_type
==
_C_API_PREDICT_CONTRIB
:
return
inner_predict_sparse
(
csr
,
start_iteration
,
num_iteration
,
predict_type
)
return
self
.
__inner_predict_csr_sparse
(
csr
=
csr
,
start_iteration
=
start_iteration
,
num_iteration
=
num_iteration
,
predict_type
=
predict_type
)
nrow
=
len
(
csr
.
indptr
)
-
1
if
nrow
>
_MAX_INT32
:
sections
=
[
0
]
+
list
(
np
.
arange
(
start
=
_MAX_INT32
,
stop
=
nrow
,
step
=
_MAX_INT32
))
+
[
nrow
]
...
...
@@ -1117,57 +1181,88 @@ class _InnerPredictor:
for
(
start_idx
,
end_idx
),
(
start_idx_pred
,
end_idx_pred
)
in
zip
(
zip
(
sections
,
sections
[
1
:]),
zip
(
n_preds_sections
,
n_preds_sections
[
1
:])):
# avoid memory consumption by arrays concatenation operations
inner_predict
(
csr
[
start_idx
:
end_idx
],
start_iteration
,
num_iteration
,
predict_type
,
preds
[
start_idx_pred
:
end_idx_pred
])
self
.
__inner_predict_csr
(
csr
=
csr
[
start_idx
:
end_idx
],
start_iteration
=
start_iteration
,
num_iteration
=
num_iteration
,
predict_type
=
predict_type
,
preds
=
preds
[
start_idx_pred
:
end_idx_pred
]
)
return
preds
,
nrow
else
:
return
inner_predict
(
csr
,
start_iteration
,
num_iteration
,
predict_type
)
return
self
.
__inner_predict_csr
(
csr
=
csr
,
start_iteration
=
start_iteration
,
num_iteration
=
num_iteration
,
predict_type
=
predict_type
,
preds
=
None
)
def
__inner_predict_sparse_csc
(
self
,
csc
,
start_iteration
,
num_iteration
,
predict_type
):
ptr_indptr
,
type_ptr_indptr
,
__
=
_c_int_array
(
csc
.
indptr
)
ptr_data
,
type_ptr_data
,
_
=
_c_float_array
(
csc
.
data
)
csc_indices
=
csc
.
indices
.
astype
(
np
.
int32
,
copy
=
False
)
matrix_type
=
_C_API_MATRIX_TYPE_CSC
if
type_ptr_indptr
==
_C_API_DTYPE_INT32
:
out_ptr_indptr
=
ctypes
.
POINTER
(
ctypes
.
c_int32
)()
else
:
out_ptr_indptr
=
ctypes
.
POINTER
(
ctypes
.
c_int64
)()
out_ptr_indices
=
ctypes
.
POINTER
(
ctypes
.
c_int32
)()
if
type_ptr_data
==
_C_API_DTYPE_FLOAT32
:
out_ptr_data
=
ctypes
.
POINTER
(
ctypes
.
c_float
)()
else
:
out_ptr_data
=
ctypes
.
POINTER
(
ctypes
.
c_double
)()
out_shape
=
np
.
empty
(
2
,
dtype
=
np
.
int64
)
_safe_call
(
_LIB
.
LGBM_BoosterPredictSparseOutput
(
self
.
handle
,
ptr_indptr
,
ctypes
.
c_int
(
type_ptr_indptr
),
csc_indices
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int32
)),
ptr_data
,
ctypes
.
c_int
(
type_ptr_data
),
ctypes
.
c_int64
(
len
(
csc
.
indptr
)),
ctypes
.
c_int64
(
len
(
csc
.
data
)),
ctypes
.
c_int64
(
csc
.
shape
[
0
]),
ctypes
.
c_int
(
predict_type
),
ctypes
.
c_int
(
start_iteration
),
ctypes
.
c_int
(
num_iteration
),
_c_str
(
self
.
pred_parameter
),
ctypes
.
c_int
(
matrix_type
),
out_shape
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int64
)),
ctypes
.
byref
(
out_ptr_indptr
),
ctypes
.
byref
(
out_ptr_indices
),
ctypes
.
byref
(
out_ptr_data
)))
matrices
=
self
.
__create_sparse_native
(
cs
=
csc
,
out_shape
=
out_shape
,
out_ptr_indptr
=
out_ptr_indptr
,
out_ptr_indices
=
out_ptr_indices
,
out_ptr_data
=
out_ptr_data
,
indptr_type
=
type_ptr_indptr
,
data_type
=
type_ptr_data
,
is_csr
=
False
)
nrow
=
csc
.
shape
[
0
]
return
matrices
,
nrow
def
__pred_for_csc
(
self
,
csc
,
start_iteration
,
num_iteration
,
predict_type
):
"""Predict for a CSC data."""
def
inner_predict_sparse
(
csc
,
start_iteration
,
num_iteration
,
predict_type
):
ptr_indptr
,
type_ptr_indptr
,
__
=
_c_int_array
(
csc
.
indptr
)
ptr_data
,
type_ptr_data
,
_
=
_c_float_array
(
csc
.
data
)
csc_indices
=
csc
.
indices
.
astype
(
np
.
int32
,
copy
=
False
)
matrix_type
=
_C_API_MATRIX_TYPE_CSC
if
type_ptr_indptr
==
_C_API_DTYPE_INT32
:
out_ptr_indptr
=
ctypes
.
POINTER
(
ctypes
.
c_int32
)()
else
:
out_ptr_indptr
=
ctypes
.
POINTER
(
ctypes
.
c_int64
)()
out_ptr_indices
=
ctypes
.
POINTER
(
ctypes
.
c_int32
)()
if
type_ptr_data
==
_C_API_DTYPE_FLOAT32
:
out_ptr_data
=
ctypes
.
POINTER
(
ctypes
.
c_float
)()
else
:
out_ptr_data
=
ctypes
.
POINTER
(
ctypes
.
c_double
)()
out_shape
=
np
.
empty
(
2
,
dtype
=
np
.
int64
)
_safe_call
(
_LIB
.
LGBM_BoosterPredictSparseOutput
(
self
.
handle
,
ptr_indptr
,
ctypes
.
c_int
(
type_ptr_indptr
),
csc_indices
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int32
)),
ptr_data
,
ctypes
.
c_int
(
type_ptr_data
),
ctypes
.
c_int64
(
len
(
csc
.
indptr
)),
ctypes
.
c_int64
(
len
(
csc
.
data
)),
ctypes
.
c_int64
(
csc
.
shape
[
0
]),
ctypes
.
c_int
(
predict_type
),
ctypes
.
c_int
(
start_iteration
),
ctypes
.
c_int
(
num_iteration
),
_c_str
(
self
.
pred_parameter
),
ctypes
.
c_int
(
matrix_type
),
out_shape
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int64
)),
ctypes
.
byref
(
out_ptr_indptr
),
ctypes
.
byref
(
out_ptr_indices
),
ctypes
.
byref
(
out_ptr_data
)))
matrices
=
self
.
__create_sparse_native
(
csc
,
out_shape
,
out_ptr_indptr
,
out_ptr_indices
,
out_ptr_data
,
type_ptr_indptr
,
type_ptr_data
,
is_csr
=
False
)
nrow
=
csc
.
shape
[
0
]
return
matrices
,
nrow
nrow
=
csc
.
shape
[
0
]
if
nrow
>
_MAX_INT32
:
return
self
.
__pred_for_csr
(
csc
.
tocsr
(),
start_iteration
,
num_iteration
,
predict_type
)
if
predict_type
==
_C_API_PREDICT_CONTRIB
:
return
inner_predict_sparse
(
csc
,
start_iteration
,
num_iteration
,
predict_type
)
return
self
.
__inner_predict_sparse_csc
(
csc
=
csc
,
start_iteration
=
start_iteration
,
num_iteration
=
num_iteration
,
predict_type
=
predict_type
)
n_preds
=
self
.
__get_num_preds
(
start_iteration
,
num_iteration
,
nrow
,
predict_type
)
preds
=
np
.
empty
(
n_preds
,
dtype
=
np
.
float64
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
...
...
@@ -4162,7 +4257,7 @@ class Booster:
ret
.
append
((
data_name
,
eval_name
,
val
,
is_higher_better
))
return
ret
def
__inner_predict
(
self
,
data_idx
:
int
):
def
__inner_predict
(
self
,
data_idx
:
int
)
->
np
.
ndarray
:
"""Predict for training and validation dataset."""
if
data_idx
>=
self
.
__num_dataset
:
raise
ValueError
(
"Data_idx should be smaller than number of dataset"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment