Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
e8cdc2c9
Unverified
Commit
e8cdc2c9
authored
Feb 26, 2023
by
James Lamb
Committed by
GitHub
Feb 26, 2023
Browse files
[python-package] add more type hints in basic.py (#5729)
parent
77132aa7
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
97 additions
and
29 deletions
+97
-29
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+97
-29
No files found.
python-package/lightgbm/basic.py
View file @
e8cdc2c9
...
@@ -275,7 +275,7 @@ def _data_to_2d_numpy(data: Any, dtype: type = np.float32, name: str = 'list') -
...
@@ -275,7 +275,7 @@ def _data_to_2d_numpy(data: Any, dtype: type = np.float32, name: str = 'list') -
"It should be list of lists, numpy 2-D array or pandas DataFrame"
)
"It should be list of lists, numpy 2-D array or pandas DataFrame"
)
def
_cfloat32_array_to_numpy
(
cptr
:
Any
,
length
:
int
)
->
np
.
ndarray
:
def
_cfloat32_array_to_numpy
(
cptr
:
"ctypes._Pointer"
,
length
:
int
)
->
np
.
ndarray
:
"""Convert a ctypes float pointer array to a numpy array."""
"""Convert a ctypes float pointer array to a numpy array."""
if
isinstance
(
cptr
,
ctypes
.
POINTER
(
ctypes
.
c_float
)):
if
isinstance
(
cptr
,
ctypes
.
POINTER
(
ctypes
.
c_float
)):
return
np
.
ctypeslib
.
as_array
(
cptr
,
shape
=
(
length
,)).
copy
()
return
np
.
ctypeslib
.
as_array
(
cptr
,
shape
=
(
length
,)).
copy
()
...
@@ -283,7 +283,7 @@ def _cfloat32_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
...
@@ -283,7 +283,7 @@ def _cfloat32_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
raise
RuntimeError
(
'Expected float pointer'
)
raise
RuntimeError
(
'Expected float pointer'
)
def
_cfloat64_array_to_numpy
(
cptr
:
Any
,
length
:
int
)
->
np
.
ndarray
:
def
_cfloat64_array_to_numpy
(
cptr
:
"ctypes._Pointer"
,
length
:
int
)
->
np
.
ndarray
:
"""Convert a ctypes double pointer array to a numpy array."""
"""Convert a ctypes double pointer array to a numpy array."""
if
isinstance
(
cptr
,
ctypes
.
POINTER
(
ctypes
.
c_double
)):
if
isinstance
(
cptr
,
ctypes
.
POINTER
(
ctypes
.
c_double
)):
return
np
.
ctypeslib
.
as_array
(
cptr
,
shape
=
(
length
,)).
copy
()
return
np
.
ctypeslib
.
as_array
(
cptr
,
shape
=
(
length
,)).
copy
()
...
@@ -291,7 +291,7 @@ def _cfloat64_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
...
@@ -291,7 +291,7 @@ def _cfloat64_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
raise
RuntimeError
(
'Expected double pointer'
)
raise
RuntimeError
(
'Expected double pointer'
)
def
_cint32_array_to_numpy
(
cptr
:
Any
,
length
:
int
)
->
np
.
ndarray
:
def
_cint32_array_to_numpy
(
cptr
:
"ctypes._Pointer"
,
length
:
int
)
->
np
.
ndarray
:
"""Convert a ctypes int pointer array to a numpy array."""
"""Convert a ctypes int pointer array to a numpy array."""
if
isinstance
(
cptr
,
ctypes
.
POINTER
(
ctypes
.
c_int32
)):
if
isinstance
(
cptr
,
ctypes
.
POINTER
(
ctypes
.
c_int32
)):
return
np
.
ctypeslib
.
as_array
(
cptr
,
shape
=
(
length
,)).
copy
()
return
np
.
ctypeslib
.
as_array
(
cptr
,
shape
=
(
length
,)).
copy
()
...
@@ -299,7 +299,7 @@ def _cint32_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
...
@@ -299,7 +299,7 @@ def _cint32_array_to_numpy(cptr: Any, length: int) -> np.ndarray:
raise
RuntimeError
(
'Expected int32 pointer'
)
raise
RuntimeError
(
'Expected int32 pointer'
)
def
_cint64_array_to_numpy
(
cptr
:
Any
,
length
:
int
)
->
np
.
ndarray
:
def
_cint64_array_to_numpy
(
cptr
:
"ctypes._Pointer"
,
length
:
int
)
->
np
.
ndarray
:
"""Convert a ctypes int pointer array to a numpy array."""
"""Convert a ctypes int pointer array to a numpy array."""
if
isinstance
(
cptr
,
ctypes
.
POINTER
(
ctypes
.
c_int64
)):
if
isinstance
(
cptr
,
ctypes
.
POINTER
(
ctypes
.
c_int64
)):
return
np
.
ctypeslib
.
as_array
(
cptr
,
shape
=
(
length
,)).
copy
()
return
np
.
ctypeslib
.
as_array
(
cptr
,
shape
=
(
length
,)).
copy
()
...
@@ -902,26 +902,56 @@ class _InnerPredictor:
...
@@ -902,26 +902,56 @@ class _InnerPredictor:
preds
=
np
.
loadtxt
(
f
.
name
,
dtype
=
np
.
float64
)
preds
=
np
.
loadtxt
(
f
.
name
,
dtype
=
np
.
float64
)
nrow
=
preds
.
shape
[
0
]
nrow
=
preds
.
shape
[
0
]
elif
isinstance
(
data
,
scipy
.
sparse
.
csr_matrix
):
elif
isinstance
(
data
,
scipy
.
sparse
.
csr_matrix
):
preds
,
nrow
=
self
.
__pred_for_csr
(
data
,
start_iteration
,
num_iteration
,
predict_type
)
preds
,
nrow
=
self
.
__pred_for_csr
(
csr
=
data
,
start_iteration
=
start_iteration
,
num_iteration
=
num_iteration
,
predict_type
=
predict_type
)
elif
isinstance
(
data
,
scipy
.
sparse
.
csc_matrix
):
elif
isinstance
(
data
,
scipy
.
sparse
.
csc_matrix
):
preds
,
nrow
=
self
.
__pred_for_csc
(
data
,
start_iteration
,
num_iteration
,
predict_type
)
preds
,
nrow
=
self
.
__pred_for_csc
(
csc
=
data
,
start_iteration
=
start_iteration
,
num_iteration
=
num_iteration
,
predict_type
=
predict_type
)
elif
isinstance
(
data
,
np
.
ndarray
):
elif
isinstance
(
data
,
np
.
ndarray
):
preds
,
nrow
=
self
.
__pred_for_np2d
(
data
,
start_iteration
,
num_iteration
,
predict_type
)
preds
,
nrow
=
self
.
__pred_for_np2d
(
mat
=
data
,
start_iteration
=
start_iteration
,
num_iteration
=
num_iteration
,
predict_type
=
predict_type
)
elif
isinstance
(
data
,
list
):
elif
isinstance
(
data
,
list
):
try
:
try
:
data
=
np
.
array
(
data
)
data
=
np
.
array
(
data
)
except
BaseException
:
except
BaseException
:
raise
ValueError
(
'Cannot convert data list to numpy array.'
)
raise
ValueError
(
'Cannot convert data list to numpy array.'
)
preds
,
nrow
=
self
.
__pred_for_np2d
(
data
,
start_iteration
,
num_iteration
,
predict_type
)
preds
,
nrow
=
self
.
__pred_for_np2d
(
mat
=
data
,
start_iteration
=
start_iteration
,
num_iteration
=
num_iteration
,
predict_type
=
predict_type
)
elif
isinstance
(
data
,
dt_DataTable
):
elif
isinstance
(
data
,
dt_DataTable
):
preds
,
nrow
=
self
.
__pred_for_np2d
(
data
.
to_numpy
(),
start_iteration
,
num_iteration
,
predict_type
)
preds
,
nrow
=
self
.
__pred_for_np2d
(
mat
=
data
.
to_numpy
(),
start_iteration
=
start_iteration
,
num_iteration
=
num_iteration
,
predict_type
=
predict_type
)
else
:
else
:
try
:
try
:
_log_warning
(
'Converting data to scipy sparse matrix.'
)
_log_warning
(
'Converting data to scipy sparse matrix.'
)
csr
=
scipy
.
sparse
.
csr_matrix
(
data
)
csr
=
scipy
.
sparse
.
csr_matrix
(
data
)
except
BaseException
:
except
BaseException
:
raise
TypeError
(
f
'Cannot predict data for type
{
type
(
data
).
__name__
}
'
)
raise
TypeError
(
f
'Cannot predict data for type
{
type
(
data
).
__name__
}
'
)
preds
,
nrow
=
self
.
__pred_for_csr
(
csr
,
start_iteration
,
num_iteration
,
predict_type
)
preds
,
nrow
=
self
.
__pred_for_csr
(
csr
=
csr
,
start_iteration
=
start_iteration
,
num_iteration
=
num_iteration
,
predict_type
=
predict_type
)
if
pred_leaf
:
if
pred_leaf
:
preds
=
preds
.
astype
(
np
.
int32
)
preds
=
preds
.
astype
(
np
.
int32
)
is_sparse
=
scipy
.
sparse
.
issparse
(
preds
)
or
isinstance
(
preds
,
list
)
is_sparse
=
scipy
.
sparse
.
issparse
(
preds
)
or
isinstance
(
preds
,
list
)
...
@@ -932,7 +962,13 @@ class _InnerPredictor:
...
@@ -932,7 +962,13 @@ class _InnerPredictor:
raise
ValueError
(
f
'Length of predict result (
{
preds
.
size
}
) cannot be divide nrow (
{
nrow
}
)'
)
raise
ValueError
(
f
'Length of predict result (
{
preds
.
size
}
) cannot be divide nrow (
{
nrow
}
)'
)
return
preds
return
preds
def
__get_num_preds
(
self
,
start_iteration
,
num_iteration
,
nrow
,
predict_type
):
def
__get_num_preds
(
self
,
start_iteration
:
int
,
num_iteration
:
int
,
nrow
:
int
,
predict_type
:
int
)
->
int
:
"""Get size of prediction result."""
"""Get size of prediction result."""
if
nrow
>
_MAX_INT32
:
if
nrow
>
_MAX_INT32
:
raise
LightGBMError
(
'LightGBM cannot perform prediction for data '
raise
LightGBMError
(
'LightGBM cannot perform prediction for data '
...
@@ -962,7 +998,12 @@ class _InnerPredictor:
...
@@ -962,7 +998,12 @@ class _InnerPredictor:
else
:
# change non-float data to float data, need to copy
else
:
# change non-float data to float data, need to copy
data
=
np
.
array
(
mat
.
reshape
(
mat
.
size
),
dtype
=
np
.
float32
)
data
=
np
.
array
(
mat
.
reshape
(
mat
.
size
),
dtype
=
np
.
float32
)
ptr_data
,
type_ptr_data
,
_
=
_c_float_array
(
data
)
ptr_data
,
type_ptr_data
,
_
=
_c_float_array
(
data
)
n_preds
=
self
.
__get_num_preds
(
start_iteration
,
num_iteration
,
mat
.
shape
[
0
],
predict_type
)
n_preds
=
self
.
__get_num_preds
(
start_iteration
=
start_iteration
,
num_iteration
=
num_iteration
,
nrow
=
mat
.
shape
[
0
],
predict_type
=
predict_type
)
if
preds
is
None
:
if
preds
is
None
:
preds
=
np
.
empty
(
n_preds
,
dtype
=
np
.
float64
)
preds
=
np
.
empty
(
n_preds
,
dtype
=
np
.
float64
)
elif
len
(
preds
.
shape
)
!=
1
or
len
(
preds
)
!=
n_preds
:
elif
len
(
preds
.
shape
)
!=
1
or
len
(
preds
)
!=
n_preds
:
...
@@ -1026,14 +1067,14 @@ class _InnerPredictor:
...
@@ -1026,14 +1067,14 @@ class _InnerPredictor:
def
__create_sparse_native
(
def
__create_sparse_native
(
self
,
self
,
cs
:
Union
[
scipy
.
sparse
.
csc_matrix
,
scipy
.
sparse
.
csr_matrix
],
cs
:
Union
[
scipy
.
sparse
.
csc_matrix
,
scipy
.
sparse
.
csr_matrix
],
out_shape
,
out_shape
:
np
.
ndarray
,
out_ptr_indptr
,
out_ptr_indptr
:
"ctypes._Pointer"
,
out_ptr_indices
,
out_ptr_indices
:
"ctypes._Pointer"
,
out_ptr_data
,
out_ptr_data
:
"ctypes._Pointer"
,
indptr_type
,
indptr_type
:
int
,
data_type
,
data_type
:
int
,
is_csr
:
bool
is_csr
:
bool
):
)
->
Union
[
List
[
scipy
.
sparse
.
csc_matrix
],
List
[
scipy
.
sparse
.
csr_matrix
]]
:
# create numpy array from output arrays
# create numpy array from output arrays
data_indices_len
=
out_shape
[
0
]
data_indices_len
=
out_shape
[
0
]
indptr_len
=
out_shape
[
1
]
indptr_len
=
out_shape
[
1
]
...
@@ -1087,7 +1128,12 @@ class _InnerPredictor:
...
@@ -1087,7 +1128,12 @@ class _InnerPredictor:
preds
:
Optional
[
np
.
ndarray
]
preds
:
Optional
[
np
.
ndarray
]
)
->
Tuple
[
np
.
ndarray
,
int
]:
)
->
Tuple
[
np
.
ndarray
,
int
]:
nrow
=
len
(
csr
.
indptr
)
-
1
nrow
=
len
(
csr
.
indptr
)
-
1
n_preds
=
self
.
__get_num_preds
(
start_iteration
,
num_iteration
,
nrow
,
predict_type
)
n_preds
=
self
.
__get_num_preds
(
start_iteration
=
start_iteration
,
num_iteration
=
num_iteration
,
nrow
=
nrow
,
predict_type
=
predict_type
)
if
preds
is
None
:
if
preds
is
None
:
preds
=
np
.
empty
(
n_preds
,
dtype
=
np
.
float64
)
preds
=
np
.
empty
(
n_preds
,
dtype
=
np
.
float64
)
elif
len
(
preds
.
shape
)
!=
1
or
len
(
preds
)
!=
n_preds
:
elif
len
(
preds
.
shape
)
!=
1
or
len
(
preds
)
!=
n_preds
:
...
@@ -1126,7 +1172,7 @@ class _InnerPredictor:
...
@@ -1126,7 +1172,7 @@ class _InnerPredictor:
start_iteration
:
int
,
start_iteration
:
int
,
num_iteration
:
int
,
num_iteration
:
int
,
predict_type
:
int
predict_type
:
int
):
)
->
Tuple
[
Union
[
List
[
scipy
.
sparse
.
csc_matrix
],
List
[
scipy
.
sparse
.
csr_matrix
]],
int
]
:
ptr_indptr
,
type_ptr_indptr
,
__
=
_c_int_array
(
csr
.
indptr
)
ptr_indptr
,
type_ptr_indptr
,
__
=
_c_int_array
(
csr
.
indptr
)
ptr_data
,
type_ptr_data
,
_
=
_c_float_array
(
csr
.
data
)
ptr_data
,
type_ptr_data
,
_
=
_c_float_array
(
csr
.
data
)
csr_indices
=
csr
.
indices
.
astype
(
np
.
int32
,
copy
=
False
)
csr_indices
=
csr
.
indices
.
astype
(
np
.
int32
,
copy
=
False
)
...
@@ -1173,7 +1219,13 @@ class _InnerPredictor:
...
@@ -1173,7 +1219,13 @@ class _InnerPredictor:
nrow
=
len
(
csr
.
indptr
)
-
1
nrow
=
len
(
csr
.
indptr
)
-
1
return
matrices
,
nrow
return
matrices
,
nrow
def
__pred_for_csr
(
self
,
csr
,
start_iteration
,
num_iteration
,
predict_type
):
def
__pred_for_csr
(
self
,
csr
:
scipy
.
sparse
.
csr_matrix
,
start_iteration
:
int
,
num_iteration
:
int
,
predict_type
:
int
)
->
Tuple
[
np
.
ndarray
,
int
]:
"""Predict for a CSR data."""
"""Predict for a CSR data."""
if
predict_type
==
_C_API_PREDICT_CONTRIB
:
if
predict_type
==
_C_API_PREDICT_CONTRIB
:
return
self
.
__inner_predict_csr_sparse
(
return
self
.
__inner_predict_csr_sparse
(
...
@@ -1211,10 +1263,10 @@ class _InnerPredictor:
...
@@ -1211,10 +1263,10 @@ class _InnerPredictor:
def
__inner_predict_sparse_csc
(
def
__inner_predict_sparse_csc
(
self
,
self
,
csc
,
csc
:
scipy
.
sparse
.
csc_matrix
,
start_iteration
,
start_iteration
:
int
,
num_iteration
,
num_iteration
:
int
,
predict_type
predict_type
:
int
):
):
ptr_indptr
,
type_ptr_indptr
,
__
=
_c_int_array
(
csc
.
indptr
)
ptr_indptr
,
type_ptr_indptr
,
__
=
_c_int_array
(
csc
.
indptr
)
ptr_data
,
type_ptr_data
,
_
=
_c_float_array
(
csc
.
data
)
ptr_data
,
type_ptr_data
,
_
=
_c_float_array
(
csc
.
data
)
...
@@ -1262,11 +1314,22 @@ class _InnerPredictor:
...
@@ -1262,11 +1314,22 @@ class _InnerPredictor:
nrow
=
csc
.
shape
[
0
]
nrow
=
csc
.
shape
[
0
]
return
matrices
,
nrow
return
matrices
,
nrow
def
__pred_for_csc
(
self
,
csc
,
start_iteration
,
num_iteration
,
predict_type
):
def
__pred_for_csc
(
self
,
csc
:
scipy
.
sparse
.
csc_matrix
,
start_iteration
:
int
,
num_iteration
:
int
,
predict_type
:
int
)
->
Tuple
[
np
.
ndarray
,
int
]:
"""Predict for a CSC data."""
"""Predict for a CSC data."""
nrow
=
csc
.
shape
[
0
]
nrow
=
csc
.
shape
[
0
]
if
nrow
>
_MAX_INT32
:
if
nrow
>
_MAX_INT32
:
return
self
.
__pred_for_csr
(
csc
.
tocsr
(),
start_iteration
,
num_iteration
,
predict_type
)
return
self
.
__pred_for_csr
(
csr
=
csc
.
tocsr
(),
start_iteration
=
start_iteration
,
num_iteration
=
num_iteration
,
predict_type
=
predict_type
)
if
predict_type
==
_C_API_PREDICT_CONTRIB
:
if
predict_type
==
_C_API_PREDICT_CONTRIB
:
return
self
.
__inner_predict_sparse_csc
(
return
self
.
__inner_predict_sparse_csc
(
csc
=
csc
,
csc
=
csc
,
...
@@ -1274,7 +1337,12 @@ class _InnerPredictor:
...
@@ -1274,7 +1337,12 @@ class _InnerPredictor:
num_iteration
=
num_iteration
,
num_iteration
=
num_iteration
,
predict_type
=
predict_type
predict_type
=
predict_type
)
)
n_preds
=
self
.
__get_num_preds
(
start_iteration
,
num_iteration
,
nrow
,
predict_type
)
n_preds
=
self
.
__get_num_preds
(
start_iteration
=
start_iteration
,
num_iteration
=
num_iteration
,
nrow
=
nrow
,
predict_type
=
predict_type
)
preds
=
np
.
empty
(
n_preds
,
dtype
=
np
.
float64
)
preds
=
np
.
empty
(
n_preds
,
dtype
=
np
.
float64
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment