Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
5b539788
"...git@developer.sourcefind.cn:tianlh/lightgbm-dcu.git" did not exist on "742d72f8bb051105484fd5cca11620493ffb0b2b"
Commit
5b539788
authored
Dec 01, 2016
by
Guolin Ke
Browse files
fix some pep8 check
parent
1a8c23ed
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
146 additions
and
133 deletions
+146
-133
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+93
-84
python-package/lightgbm/callback.py
python-package/lightgbm/callback.py
+2
-5
python-package/lightgbm/engine.py
python-package/lightgbm/engine.py
+36
-34
python-package/lightgbm/libpath.py
python-package/lightgbm/libpath.py
+3
-3
python-package/lightgbm/sklearn.py
python-package/lightgbm/sklearn.py
+12
-7
No files found.
python-package/lightgbm/basic.py
View file @
5b539788
...
@@ -4,8 +4,6 @@ from __future__ import absolute_import
...
@@ -4,8 +4,6 @@ from __future__ import absolute_import
import
sys
import
sys
import
os
import
os
import
ctypes
import
ctypes
import
collections
import
re
import
tempfile
import
tempfile
import
numpy
as
np
import
numpy
as
np
...
@@ -59,7 +57,7 @@ def is_1d_list(data):
...
@@ -59,7 +57,7 @@ def is_1d_list(data):
if
not
isinstance
(
data
,
list
):
if
not
isinstance
(
data
,
list
):
return
False
return
False
if
len
(
data
)
>
0
:
if
len
(
data
)
>
0
:
if
not
isinstance
(
data
[
0
],
(
int
,
float
,
bool
)
):
if
not
isinstance
(
data
[
0
],
(
int
,
float
,
bool
)):
return
False
return
False
return
True
return
True
...
@@ -108,29 +106,29 @@ def param_dict_to_str(data):
...
@@ -108,29 +106,29 @@ def param_dict_to_str(data):
if
is_str
(
val
):
if
is_str
(
val
):
pairs
.
append
(
str
(
key
)
+
'='
+
str
(
val
))
pairs
.
append
(
str
(
key
)
+
'='
+
str
(
val
))
elif
isinstance
(
val
,
(
list
,
tuple
)):
elif
isinstance
(
val
,
(
list
,
tuple
)):
pairs
.
append
(
str
(
key
)
+
'='
+
','
.
join
(
map
(
str
,
val
)))
pairs
.
append
(
str
(
key
)
+
'='
+
','
.
join
(
map
(
str
,
val
)))
elif
isinstance
(
val
,
(
int
,
float
,
bool
)):
elif
isinstance
(
val
,
(
int
,
float
,
bool
)):
pairs
.
append
(
str
(
key
)
+
'='
+
str
(
val
))
pairs
.
append
(
str
(
key
)
+
'='
+
str
(
val
))
else
:
else
:
raise
TypeError
(
'unknow type of parameter:%s , got:%s'
%
(
key
,
type
(
val
).
__name__
))
raise
TypeError
(
'unknow type of parameter:%s , got:%s'
%
(
key
,
type
(
val
).
__name__
))
return
' '
.
join
(
pairs
)
return
' '
.
join
(
pairs
)
"""marco definition of data type in c_api of LightGBM"""
"""marco definition of data type in c_api of LightGBM"""
C_API_DTYPE_FLOAT32
=
0
C_API_DTYPE_FLOAT32
=
0
C_API_DTYPE_FLOAT64
=
1
C_API_DTYPE_FLOAT64
=
1
C_API_DTYPE_INT32
=
2
C_API_DTYPE_INT32
=
2
C_API_DTYPE_INT64
=
3
C_API_DTYPE_INT64
=
3
"""Matric is row major in python"""
"""Matric is row major in python"""
C_API_IS_ROW_MAJOR
=
1
C_API_IS_ROW_MAJOR
=
1
C_API_PREDICT_NORMAL
=
0
C_API_PREDICT_NORMAL
=
0
C_API_PREDICT_RAW_SCORE
=
1
C_API_PREDICT_RAW_SCORE
=
1
C_API_PREDICT_LEAF_INDEX
=
2
C_API_PREDICT_LEAF_INDEX
=
2
FIELD_TYPE_MAPPER
=
{
"label"
:
C_API_DTYPE_FLOAT32
,
FIELD_TYPE_MAPPER
=
{
"label"
:
C_API_DTYPE_FLOAT32
,
"weight"
:
C_API_DTYPE_FLOAT32
,
"weight"
:
C_API_DTYPE_FLOAT32
,
"init_score"
:
C_API_DTYPE_FLOAT32
,
"init_score"
:
C_API_DTYPE_FLOAT32
,
"group"
:
C_API_DTYPE_INT32
,
"group"
:
C_API_DTYPE_INT32
}
}
def
c_float_array
(
data
):
def
c_float_array
(
data
):
"""Convert numpy array / list to c float array."""
"""Convert numpy array / list to c float array."""
...
@@ -144,7 +142,8 @@ def c_float_array(data):
...
@@ -144,7 +142,8 @@ def c_float_array(data):
ptr_data
=
data
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_double
))
ptr_data
=
data
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_double
))
type_data
=
C_API_DTYPE_FLOAT64
type_data
=
C_API_DTYPE_FLOAT64
else
:
else
:
raise
TypeError
(
"expected np.float32 or np.float64, met type({})"
.
format
(
data
.
dtype
))
raise
TypeError
(
"expected np.float32 or np.float64, met type({})"
.
format
(
data
.
dtype
))
else
:
else
:
raise
TypeError
(
"Unknow type({})"
.
format
(
type
(
data
).
__name__
))
raise
TypeError
(
"Unknow type({})"
.
format
(
type
(
data
).
__name__
))
return
(
ptr_data
,
type_data
)
return
(
ptr_data
,
type_data
)
...
@@ -161,7 +160,8 @@ def c_int_array(data):
...
@@ -161,7 +160,8 @@ def c_int_array(data):
ptr_data
=
data
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int64
))
ptr_data
=
data
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int64
))
type_data
=
C_API_DTYPE_INT64
type_data
=
C_API_DTYPE_INT64
else
:
else
:
raise
TypeError
(
"expected np.int32 or np.int64, met type({})"
.
format
(
data
.
dtype
))
raise
TypeError
(
"expected np.int32 or np.int64, met type({})"
.
format
(
data
.
dtype
))
else
:
else
:
raise
TypeError
(
"Unknow type({})"
.
format
(
type
(
data
).
__name__
))
raise
TypeError
(
"Unknow type({})"
.
format
(
type
(
data
).
__name__
))
return
(
ptr_data
,
type_data
)
return
(
ptr_data
,
type_data
)
...
@@ -169,13 +169,13 @@ def c_int_array(data):
...
@@ -169,13 +169,13 @@ def c_int_array(data):
class
Predictor
(
object
):
class
Predictor
(
object
):
""""A Predictor of LightGBM.
""""A Predictor of LightGBM.
"""
"""
def
__init__
(
self
,
model_file
=
None
,
booster_handle
=
None
,
is_manage_handle
=
True
):
def
__init__
(
self
,
model_file
=
None
,
booster_handle
=
None
,
is_manage_handle
=
True
):
"""Initialize the Predictor.
"""Initialize the Predictor.
Parameters
Parameters
----------
----------
model_file : string
model_file : string
Path to the model file.
Path to the model file.
"""
"""
self
.
handle
=
ctypes
.
c_void_p
()
self
.
handle
=
ctypes
.
c_void_p
()
self
.
__is_manage_handle
=
True
self
.
__is_manage_handle
=
True
...
@@ -191,7 +191,7 @@ class Predictor(object):
...
@@ -191,7 +191,7 @@ class Predictor(object):
self
.
handle
,
self
.
handle
,
ctypes
.
byref
(
out_num_class
)))
ctypes
.
byref
(
out_num_class
)))
self
.
num_class
=
out_num_class
.
value
self
.
num_class
=
out_num_class
.
value
self
.
__num_total_iteration
=
out_num_iterations
.
value
self
.
__num_total_iteration
=
out_num_iterations
.
value
elif
booster_handle
is
not
None
:
elif
booster_handle
is
not
None
:
self
.
__is_manage_handle
=
is_manage_handle
self
.
__is_manage_handle
=
is_manage_handle
self
.
handle
=
booster_handle
self
.
handle
=
booster_handle
...
@@ -204,7 +204,7 @@ class Predictor(object):
...
@@ -204,7 +204,7 @@ class Predictor(object):
_safe_call
(
_LIB
.
LGBM_BoosterGetCurrentIteration
(
_safe_call
(
_LIB
.
LGBM_BoosterGetCurrentIteration
(
self
.
handle
,
self
.
handle
,
ctypes
.
byref
(
out_num_iterations
)))
ctypes
.
byref
(
out_num_iterations
)))
self
.
__num_total_iteration
=
out_num_iterations
.
value
self
.
__num_total_iteration
=
out_num_iterations
.
value
else
:
else
:
raise
TypeError
(
'Need Model file to create a booster'
)
raise
TypeError
(
'Need Model file to create a booster'
)
...
@@ -213,7 +213,9 @@ class Predictor(object):
...
@@ -213,7 +213,9 @@ class Predictor(object):
_safe_call
(
_LIB
.
LGBM_BoosterFree
(
self
.
handle
))
_safe_call
(
_LIB
.
LGBM_BoosterFree
(
self
.
handle
))
def
predict
(
self
,
data
,
num_iteration
=-
1
,
raw_score
=
False
,
pred_leaf
=
False
,
data_has_header
=
False
,
is_reshape
=
True
):
def
predict
(
self
,
data
,
num_iteration
=-
1
,
raw_score
=
False
,
pred_leaf
=
False
,
data_has_header
=
False
,
is_reshape
=
True
):
"""
"""
Predict logic
Predict logic
...
@@ -222,23 +224,24 @@ class Predictor(object):
...
@@ -222,23 +224,24 @@ class Predictor(object):
data : string/numpy array/scipy.sparse
data : string/numpy array/scipy.sparse
Data source for prediction
Data source for prediction
When data is string type, it represents the path of txt file,
When data is string type, it represents the path of txt file,
num_iteration :
num_iteration :
int
used iteration for prediction
used iteration for prediction
raw_score : bool
raw_score : bool
True for predict raw score
True for predict raw score
pred_leaf : bool
pred_leaf : bool
True for predict leaf index
True for predict leaf index
data_has_header : bool
data_has_header : bool
Used for txt data
Used for txt data
is_reshape : bool
is_reshape : bool
True for reshape to [nrow, ...]
True for reshape to [nrow, ...]
Returns
Returns
-------
-------
Prediction result
Prediction result
"""
"""
if
isinstance
(
data
,
Dataset
):
if
isinstance
(
data
,
Dataset
):
raise
TypeError
(
"cannot use Dataset instance for prediction, please use raw data instead"
)
raise
TypeError
(
"cannot use Dataset instance for prediction,
\
please use raw data instead"
)
predict_type
=
C_API_PREDICT_NORMAL
predict_type
=
C_API_PREDICT_NORMAL
if
raw_score
:
if
raw_score
:
predict_type
=
C_API_PREDICT_RAW_SCORE
predict_type
=
C_API_PREDICT_RAW_SCORE
...
@@ -251,12 +254,12 @@ class Predictor(object):
...
@@ -251,12 +254,12 @@ class Predictor(object):
tmp_pred_fname
=
tempfile
.
NamedTemporaryFile
(
prefix
=
"lightgbm_tmp_pred_"
).
name
tmp_pred_fname
=
tempfile
.
NamedTemporaryFile
(
prefix
=
"lightgbm_tmp_pred_"
).
name
_safe_call
(
_LIB
.
LGBM_BoosterPredictForFile
(
_safe_call
(
_LIB
.
LGBM_BoosterPredictForFile
(
self
.
handle
,
self
.
handle
,
c_str
(
data
),
c_str
(
data
),
int_data_has_header
,
int_data_has_header
,
predict_type
,
predict_type
,
num_iteration
,
num_iteration
,
c_str
(
tmp_pred_fname
)))
c_str
(
tmp_pred_fname
)))
tmp_file
=
open
(
tmp_pred_fname
,
"r"
)
tmp_file
=
open
(
tmp_pred_fname
,
"r"
)
lines
=
tmp_file
.
readlines
()
lines
=
tmp_file
.
readlines
()
tmp_file
.
close
()
tmp_file
.
close
()
nrow
=
len
(
lines
)
nrow
=
len
(
lines
)
...
@@ -267,15 +270,19 @@ class Predictor(object):
...
@@ -267,15 +270,19 @@ class Predictor(object):
preds
=
np
.
array
(
preds
,
copy
=
False
)
preds
=
np
.
array
(
preds
,
copy
=
False
)
os
.
remove
(
tmp_pred_fname
)
os
.
remove
(
tmp_pred_fname
)
elif
isinstance
(
data
,
scipy
.
sparse
.
csr_matrix
):
elif
isinstance
(
data
,
scipy
.
sparse
.
csr_matrix
):
preds
,
nrow
=
self
.
__pred_for_csr
(
data
,
num_iteration
,
predict_type
)
preds
,
nrow
=
self
.
__pred_for_csr
(
data
,
num_iteration
,
predict_type
)
elif
isinstance
(
data
,
np
.
ndarray
):
elif
isinstance
(
data
,
np
.
ndarray
):
preds
,
nrow
=
self
.
__pred_for_np2d
(
data
,
num_iteration
,
predict_type
)
preds
,
nrow
=
self
.
__pred_for_np2d
(
data
,
num_iteration
,
predict_type
)
else
:
else
:
try
:
try
:
csr
=
scipy
.
sparse
.
csr_matrix
(
data
)
csr
=
scipy
.
sparse
.
csr_matrix
(
data
)
preds
,
nrow
=
self
.
__pred_for_csr
(
csr
,
num_iteration
,
predict_type
)
preds
,
nrow
=
self
.
__pred_for_csr
(
csr
,
num_iteration
,
predict_type
)
except
:
except
:
raise
TypeError
(
'can not predict data for type {}'
.
format
(
type
(
data
).
__name__
))
raise
TypeError
(
'can not predict data for type {}'
.
format
(
type
(
data
).
__name__
))
if
pred_leaf
:
if
pred_leaf
:
preds
=
preds
.
astype
(
np
.
int32
)
preds
=
preds
.
astype
(
np
.
int32
)
if
preds
.
size
!=
nrow
and
is_reshape
:
if
preds
.
size
!=
nrow
and
is_reshape
:
...
@@ -283,7 +290,8 @@ class Predictor(object):
...
@@ -283,7 +290,8 @@ class Predictor(object):
ncol
=
int
(
preds
.
size
/
nrow
)
ncol
=
int
(
preds
.
size
/
nrow
)
preds
=
preds
.
reshape
(
nrow
,
ncol
)
preds
=
preds
.
reshape
(
nrow
,
ncol
)
else
:
else
:
raise
ValueError
(
'len of predict result(%d) cannot be divide nrow(%d)'
%
(
preds
.
size
,
nrow
)
)
raise
ValueError
(
'len of predict result(%d) cannot be divide nrow (%d)'
%
(
preds
.
size
,
nrow
))
return
preds
return
preds
def
__get_num_preds
(
self
,
num_iteration
,
nrow
,
predict_type
):
def
__get_num_preds
(
self
,
num_iteration
,
nrow
,
predict_type
):
...
@@ -308,12 +316,13 @@ class Predictor(object):
...
@@ -308,12 +316,13 @@ class Predictor(object):
"""change non-float data to float data, need to copy"""
"""change non-float data to float data, need to copy"""
data
=
np
.
array
(
mat
.
reshape
(
mat
.
size
),
dtype
=
np
.
float32
)
data
=
np
.
array
(
mat
.
reshape
(
mat
.
size
),
dtype
=
np
.
float32
)
ptr_data
,
type_ptr_data
=
c_float_array
(
data
)
ptr_data
,
type_ptr_data
=
c_float_array
(
data
)
n_preds
=
self
.
__get_num_preds
(
num_iteration
,
mat
.
shape
[
0
],
predict_type
)
n_preds
=
self
.
__get_num_preds
(
num_iteration
,
mat
.
shape
[
0
],
predict_type
)
preds
=
np
.
zeros
(
n_preds
,
dtype
=
np
.
float32
)
preds
=
np
.
zeros
(
n_preds
,
dtype
=
np
.
float32
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
_safe_call
(
_LIB
.
LGBM_BoosterPredictForMat
(
_safe_call
(
_LIB
.
LGBM_BoosterPredictForMat
(
self
.
handle
,
self
.
handle
,
ptr_data
,
ptr_data
,
type_ptr_data
,
type_ptr_data
,
mat
.
shape
[
0
],
mat
.
shape
[
0
],
mat
.
shape
[
1
],
mat
.
shape
[
1
],
...
@@ -341,12 +350,12 @@ class Predictor(object):
...
@@ -341,12 +350,12 @@ class Predictor(object):
_safe_call
(
_LIB
.
LGBM_BoosterPredictForCSR
(
_safe_call
(
_LIB
.
LGBM_BoosterPredictForCSR
(
self
.
handle
,
self
.
handle
,
ptr_indptr
,
ptr_indptr
,
type_ptr_indptr
,
type_ptr_indptr
,
csr
.
indices
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int32
)),
csr
.
indices
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int32
)),
ptr_data
,
ptr_data
,
type_ptr_data
,
type_ptr_data
,
len
(
csr
.
indptr
),
len
(
csr
.
indptr
),
len
(
csr
.
data
),
len
(
csr
.
data
),
csr
.
shape
[
1
],
csr
.
shape
[
1
],
predict_type
,
predict_type
,
...
@@ -365,10 +374,10 @@ except ImportError:
...
@@ -365,10 +374,10 @@ except ImportError:
class
DataFrame
(
object
):
class
DataFrame
(
object
):
pass
pass
PANDAS_DTYPE_MAPPER
=
{
'int8'
:
'int'
,
'int16'
:
'int'
,
'int32'
:
'int'
,
'int64'
:
'int'
,
PANDAS_DTYPE_MAPPER
=
{
'int8'
:
'int'
,
'int16'
:
'int'
,
'int32'
:
'int'
,
'
u
int
8'
:
'int'
,
'uint1
6'
:
'int'
,
'uint
32
'
:
'int'
,
'uint6
4
'
:
'int'
,
'int6
4
'
:
'int'
,
'uint
8
'
:
'int'
,
'uint
1
6'
:
'int'
,
'
float16'
:
'float'
,
'float32'
:
'floa
t'
,
'float6
4
'
:
'float'
,
'
uint32'
:
'int'
,
'uint64'
:
'in
t'
,
'float
1
6'
:
'float'
,
'bool'
:
'i'
}
'float32'
:
'float'
,
'float64'
:
'float'
,
'bool'
:
'i'
}
def
_data_from_pandas
(
data
):
def
_data_from_pandas
(
data
):
if
isinstance
(
data
,
DataFrame
):
if
isinstance
(
data
,
DataFrame
):
...
@@ -399,8 +408,8 @@ class Dataset(object):
...
@@ -399,8 +408,8 @@ class Dataset(object):
"""
"""
def
__init__
(
self
,
data
,
label
=
None
,
max_bin
=
255
,
reference
=
None
,
def
__init__
(
self
,
data
,
label
=
None
,
max_bin
=
255
,
reference
=
None
,
weight
=
None
,
group
=
None
,
predictor
=
None
,
weight
=
None
,
group
=
None
,
predictor
=
None
,
silent
=
False
,
params
=
None
):
silent
=
False
,
params
=
None
):
"""
"""
Dataset used in LightGBM.
Dataset used in LightGBM.
...
@@ -412,7 +421,7 @@ class Dataset(object):
...
@@ -412,7 +421,7 @@ class Dataset(object):
label : list or numpy 1-D array, optional
label : list or numpy 1-D array, optional
Label of the data
Label of the data
max_bin : int, required
max_bin : int, required
max number of discrete bin for features
max number of discrete bin for features
reference : Other Dataset, optional
reference : Other Dataset, optional
If this dataset validation, need to use training data as reference
If this dataset validation, need to use training data as reference
weight : list or numpy 1-D array , optional
weight : list or numpy 1-D array , optional
...
@@ -482,10 +491,10 @@ class Dataset(object):
...
@@ -482,10 +491,10 @@ class Dataset(object):
self
.
set_group
(
group
)
self
.
set_group
(
group
)
# load init score
# load init score
if
self
.
predictor
is
not
None
and
isinstance
(
self
.
predictor
,
Predictor
):
if
self
.
predictor
is
not
None
and
isinstance
(
self
.
predictor
,
Predictor
):
init_score
=
self
.
predictor
.
predict
(
data
,
init_score
=
self
.
predictor
.
predict
(
data
,
raw_score
=
True
,
raw_score
=
True
,
data_has_header
=
self
.
data_has_header
,
data_has_header
=
self
.
data_has_header
,
is_reshape
=
False
)
is_reshape
=
False
)
if
self
.
predictor
.
num_class
>
1
:
if
self
.
predictor
.
num_class
>
1
:
# need re group init score
# need re group init score
new_init_score
=
np
.
zeros
(
init_score
.
size
(),
dtype
=
np
.
float32
)
new_init_score
=
np
.
zeros
(
init_score
.
size
(),
dtype
=
np
.
float32
)
...
@@ -496,8 +505,8 @@ class Dataset(object):
...
@@ -496,8 +505,8 @@ class Dataset(object):
init_score
=
new_init_score
init_score
=
new_init_score
self
.
set_init_score
(
init_score
)
self
.
set_init_score
(
init_score
)
def
create_valid
(
self
,
data
,
label
=
None
,
weight
=
None
,
group
=
None
,
def
create_valid
(
self
,
data
,
label
=
None
,
weight
=
None
,
group
=
None
,
silent
=
False
,
params
=
None
):
silent
=
False
,
params
=
None
):
"""
"""
Create validation data align with current dataset
Create validation data align with current dataset
...
@@ -518,8 +527,8 @@ class Dataset(object):
...
@@ -518,8 +527,8 @@ class Dataset(object):
other parameters
other parameters
"""
"""
return
Dataset
(
data
,
label
=
label
,
max_bin
=
self
.
max_bin
,
reference
=
self
,
return
Dataset
(
data
,
label
=
label
,
max_bin
=
self
.
max_bin
,
reference
=
self
,
weight
=
weight
,
group
=
group
,
predictor
=
self
.
predictor
,
weight
=
weight
,
group
=
group
,
predictor
=
self
.
predictor
,
silent
=
silent
,
params
=
params
)
silent
=
silent
,
params
=
params
)
def
subset
(
self
,
used_indices
,
params
=
None
):
def
subset
(
self
,
used_indices
,
params
=
None
):
"""
"""
...
@@ -530,10 +539,10 @@ class Dataset(object):
...
@@ -530,10 +539,10 @@ class Dataset(object):
ret
.
handle
=
ctypes
.
c_void_p
()
ret
.
handle
=
ctypes
.
c_void_p
()
params_str
=
param_dict_to_str
(
params
)
params_str
=
param_dict_to_str
(
params
)
_safe_call
(
_LIB
.
LGBM_DatasetGetSubset
(
_safe_call
(
_LIB
.
LGBM_DatasetGetSubset
(
ctypes
.
byref
(
self
.
handle
),
ctypes
.
byref
(
self
.
handle
),
used_indices
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int32
)),
used_indices
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int32
)),
used_indices
.
shape
[
0
],
used_indices
.
shape
[
0
],
c_str
(
params_str
),
c_str
(
params_str
),
ctypes
.
byref
(
ret
.
handle
)))
ctypes
.
byref
(
ret
.
handle
)))
ret
.
max_bin
=
self
.
max_bin
ret
.
max_bin
=
self
.
max_bin
ret
.
predictor
=
self
.
predictor
ret
.
predictor
=
self
.
predictor
...
@@ -557,13 +566,13 @@ class Dataset(object):
...
@@ -557,13 +566,13 @@ class Dataset(object):
ptr_data
,
type_ptr_data
=
c_float_array
(
data
)
ptr_data
,
type_ptr_data
=
c_float_array
(
data
)
_safe_call
(
_LIB
.
LGBM_DatasetCreateFromMat
(
_safe_call
(
_LIB
.
LGBM_DatasetCreateFromMat
(
ptr_data
,
ptr_data
,
type_ptr_data
,
type_ptr_data
,
mat
.
shape
[
0
],
mat
.
shape
[
0
],
mat
.
shape
[
1
],
mat
.
shape
[
1
],
C_API_IS_ROW_MAJOR
,
C_API_IS_ROW_MAJOR
,
c_str
(
params_str
),
c_str
(
params_str
),
ref_dataset
,
ref_dataset
,
ctypes
.
byref
(
self
.
handle
)))
ctypes
.
byref
(
self
.
handle
)))
def
__init_from_csr
(
self
,
csr
,
params_str
,
ref_dataset
):
def
__init_from_csr
(
self
,
csr
,
params_str
,
ref_dataset
):
...
@@ -578,16 +587,16 @@ class Dataset(object):
...
@@ -578,16 +587,16 @@ class Dataset(object):
ptr_data
,
type_ptr_data
=
c_float_array
(
csr
.
data
)
ptr_data
,
type_ptr_data
=
c_float_array
(
csr
.
data
)
_safe_call
(
_LIB
.
LGBM_DatasetCreateFromCSR
(
_safe_call
(
_LIB
.
LGBM_DatasetCreateFromCSR
(
ptr_indptr
,
ptr_indptr
,
type_ptr_indptr
,
type_ptr_indptr
,
csr
.
indices
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int32
)),
csr
.
indices
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int32
)),
ptr_data
,
ptr_data
,
type_ptr_data
,
type_ptr_data
,
len
(
csr
.
indptr
),
len
(
csr
.
indptr
),
len
(
csr
.
data
),
len
(
csr
.
data
),
csr
.
shape
[
1
],
csr
.
shape
[
1
],
c_str
(
params_str
),
c_str
(
params_str
),
ref_dataset
,
ref_dataset
,
ctypes
.
byref
(
self
.
handle
)))
ctypes
.
byref
(
self
.
handle
)))
def
__del__
(
self
):
def
__del__
(
self
):
...
@@ -784,7 +793,7 @@ class Dataset(object):
...
@@ -784,7 +793,7 @@ class Dataset(object):
"""
"""
ret
=
ctypes
.
c_int64
()
ret
=
ctypes
.
c_int64
()
_safe_call
(
_LIB
.
LGBM_DatasetGetNumData
(
self
.
handle
,
_safe_call
(
_LIB
.
LGBM_DatasetGetNumData
(
self
.
handle
,
ctypes
.
byref
(
ret
)))
ctypes
.
byref
(
ret
)))
return
ret
.
value
return
ret
.
value
def
num_feature
(
self
):
def
num_feature
(
self
):
...
@@ -796,7 +805,7 @@ class Dataset(object):
...
@@ -796,7 +805,7 @@ class Dataset(object):
"""
"""
ret
=
ctypes
.
c_int64
()
ret
=
ctypes
.
c_int64
()
_safe_call
(
_LIB
.
LGBM_DatasetGetNumFeature
(
self
.
handle
,
_safe_call
(
_LIB
.
LGBM_DatasetGetNumFeature
(
self
.
handle
,
ctypes
.
byref
(
ret
)))
ctypes
.
byref
(
ret
)))
return
ret
.
value
return
ret
.
value
class
Booster
(
object
):
class
Booster
(
object
):
...
@@ -812,7 +821,7 @@ class Booster(object):
...
@@ -812,7 +821,7 @@ class Booster(object):
train_set : Dataset
train_set : Dataset
training dataset
training dataset
model_file : string
model_file : string
Path to the model file.
Path to the model file.
silent : boolean, optional
silent : boolean, optional
Whether print messages during construction
Whether print messages during construction
"""
"""
...
@@ -833,7 +842,7 @@ class Booster(object):
...
@@ -833,7 +842,7 @@ class Booster(object):
params_str
=
param_dict_to_str
(
params
)
params_str
=
param_dict_to_str
(
params
)
"""construct booster object"""
"""construct booster object"""
_safe_call
(
_LIB
.
LGBM_BoosterCreate
(
_safe_call
(
_LIB
.
LGBM_BoosterCreate
(
train_set
.
handle
,
train_set
.
handle
,
c_str
(
params_str
),
c_str
(
params_str
),
ctypes
.
byref
(
self
.
handle
)))
ctypes
.
byref
(
self
.
handle
)))
"""save reference to data"""
"""save reference to data"""
...
@@ -859,7 +868,7 @@ class Booster(object):
...
@@ -859,7 +868,7 @@ class Booster(object):
"""Prediction task"""
"""Prediction task"""
out_num_iterations
=
ctypes
.
c_int64
(
0
)
out_num_iterations
=
ctypes
.
c_int64
(
0
)
_safe_call
(
_LIB
.
LGBM_BoosterCreateFromModelfile
(
_safe_call
(
_LIB
.
LGBM_BoosterCreateFromModelfile
(
c_str
(
model_file
),
c_str
(
model_file
),
ctypes
.
byref
(
out_num_iterations
),
ctypes
.
byref
(
out_num_iterations
),
ctypes
.
byref
(
self
.
handle
)))
ctypes
.
byref
(
self
.
handle
)))
out_num_class
=
ctypes
.
c_int64
(
0
)
out_num_class
=
ctypes
.
c_int64
(
0
)
...
@@ -939,13 +948,13 @@ class Booster(object):
...
@@ -939,13 +948,13 @@ class Booster(object):
raise
Exception
(
"Replace training data failed, you should use same predictor for these data"
)
raise
Exception
(
"Replace training data failed, you should use same predictor for these data"
)
self
.
train_set
=
train_set
self
.
train_set
=
train_set
_safe_call
(
_LIB
.
LGBM_BoosterResetTrainingData
(
_safe_call
(
_LIB
.
LGBM_BoosterResetTrainingData
(
self
.
handle
,
self
.
handle
,
self
.
train_set
.
handle
))
self
.
train_set
.
handle
))
self
.
__inner_predict_buffer
[
0
]
=
None
self
.
__inner_predict_buffer
[
0
]
=
None
is_finished
=
ctypes
.
c_int
(
0
)
is_finished
=
ctypes
.
c_int
(
0
)
if
fobj
is
None
:
if
fobj
is
None
:
_safe_call
(
_LIB
.
LGBM_BoosterUpdateOneIter
(
_safe_call
(
_LIB
.
LGBM_BoosterUpdateOneIter
(
self
.
handle
,
self
.
handle
,
ctypes
.
byref
(
is_finished
)))
ctypes
.
byref
(
is_finished
)))
self
.
__is_predicted_cur_iter
=
[
False
for
_
in
range
(
self
.
__num_dataset
)]
self
.
__is_predicted_cur_iter
=
[
False
for
_
in
range
(
self
.
__num_dataset
)]
return
is_finished
.
value
==
1
return
is_finished
.
value
==
1
...
@@ -1080,7 +1089,7 @@ class Booster(object):
...
@@ -1080,7 +1089,7 @@ class Booster(object):
Parameters
Parameters
----------
----------
filename : str
filename : str
filename to save
filename to save
num_iteration: int
num_iteration: int
number of iteration that want to save. < 0 means save all
number of iteration that want to save. < 0 means save all
"""
"""
...
@@ -1098,16 +1107,16 @@ class Booster(object):
...
@@ -1098,16 +1107,16 @@ class Booster(object):
data : string/numpy array/scipy.sparse
data : string/numpy array/scipy.sparse
Data source for prediction
Data source for prediction
When data is string type, it represents the path of txt file,
When data is string type, it represents the path of txt file,
num_iteration :
num_iteration :
int
used iteration for prediction
used iteration for prediction
raw_score : bool
raw_score : bool
True for predict raw score
True for predict raw score
pred_leaf : bool
pred_leaf : bool
True for predict leaf index
True for predict leaf index
data_has_header : bool
data_has_header : bool
Used for txt data
Used for txt data
is_reshape : bool
is_reshape : bool
True for reshape to [nrow, ...]
True for reshape to [nrow, ...]
Returns
Returns
-------
-------
...
@@ -1136,8 +1145,8 @@ class Booster(object):
...
@@ -1136,8 +1145,8 @@ class Booster(object):
result
=
np
.
array
([
0.0
for
_
in
range
(
self
.
__num_inner_eval
)],
dtype
=
np
.
float32
)
result
=
np
.
array
([
0.0
for
_
in
range
(
self
.
__num_inner_eval
)],
dtype
=
np
.
float32
)
tmp_out_len
=
ctypes
.
c_int64
(
0
)
tmp_out_len
=
ctypes
.
c_int64
(
0
)
_safe_call
(
_LIB
.
LGBM_BoosterGetEval
(
_safe_call
(
_LIB
.
LGBM_BoosterGetEval
(
self
.
handle
,
self
.
handle
,
data_idx
,
data_idx
,
ctypes
.
byref
(
tmp_out_len
),
ctypes
.
byref
(
tmp_out_len
),
result
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_float
))))
result
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_float
))))
if
tmp_out_len
.
value
!=
self
.
__num_inner_eval
:
if
tmp_out_len
.
value
!=
self
.
__num_inner_eval
:
...
@@ -1176,12 +1185,12 @@ class Booster(object):
...
@@ -1176,12 +1185,12 @@ class Booster(object):
tmp_out_len
=
ctypes
.
c_int64
(
0
)
tmp_out_len
=
ctypes
.
c_int64
(
0
)
data_ptr
=
self
.
__inner_predict_buffer
[
data_idx
].
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_float
))
data_ptr
=
self
.
__inner_predict_buffer
[
data_idx
].
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_float
))
_safe_call
(
_LIB
.
LGBM_BoosterGetPredict
(
_safe_call
(
_LIB
.
LGBM_BoosterGetPredict
(
self
.
handle
,
self
.
handle
,
data_idx
,
data_idx
,
ctypes
.
byref
(
tmp_out_len
),
ctypes
.
byref
(
tmp_out_len
),
data_ptr
))
data_ptr
))
if
tmp_out_len
.
value
!=
len
(
self
.
__inner_predict_buffer
[
data_idx
]):
if
tmp_out_len
.
value
!=
len
(
self
.
__inner_predict_buffer
[
data_idx
]):
raise
ValueError
(
"incorrect number of predict results for data %d"
%
(
data_idx
)
)
raise
ValueError
(
"incorrect number of predict results for data %d"
%
(
data_idx
)
)
self
.
__is_predicted_cur_iter
[
data_idx
]
=
True
self
.
__is_predicted_cur_iter
[
data_idx
]
=
True
return
self
.
__inner_predict_buffer
[
data_idx
]
return
self
.
__inner_predict_buffer
[
data_idx
]
...
...
python-package/lightgbm/callback.py
View file @
5b539788
...
@@ -148,7 +148,6 @@ def early_stop(stopping_rounds, verbose=True):
...
@@ -148,7 +148,6 @@ def early_stop(stopping_rounds, verbose=True):
callback : function
callback : function
The requested callback function.
The requested callback function.
"""
"""
state
=
{}
factor_to_bigger_better
=
{}
factor_to_bigger_better
=
{}
best_score
=
{}
best_score
=
{}
best_iter
=
{}
best_iter
=
{}
...
@@ -172,23 +171,21 @@ def early_stop(stopping_rounds, verbose=True):
...
@@ -172,23 +171,21 @@ def early_stop(stopping_rounds, verbose=True):
factor_to_bigger_better
[
i
]
=
-
1.0
factor_to_bigger_better
[
i
]
=
-
1.0
if
env
.
evaluation_result_list
[
i
][
3
]:
if
env
.
evaluation_result_list
[
i
][
3
]:
factor_to_bigger_better
[
i
]
=
1.0
factor_to_bigger_better
[
i
]
=
1.0
state
[
'best_iter'
]
=
0
def
callback
(
env
):
def
callback
(
env
):
"""internal function"""
"""internal function"""
if
len
(
best_score
)
==
0
:
if
len
(
best_score
)
==
0
:
init
(
env
)
init
(
env
)
for
i
in
range
(
len
(
env
.
evaluation_result_list
)):
for
i
in
range
(
len
(
env
.
evaluation_result_list
)):
score
=
env
.
evaluation_result_list
[
i
][
2
]
*
factor_to_bigger_better
[
i
]
score
=
env
.
evaluation_result_list
[
i
][
2
]
*
factor_to_bigger_better
[
i
]
if
score
>
best_score
[
i
]:
if
score
>
best_score
[
i
]:
best_score
[
i
]
=
score
best_score
[
i
]
=
score
best_iter
[
i
]
=
env
.
iteration
best_iter
[
i
]
=
env
.
iteration
if
verbose
:
if
verbose
:
best_msg
[
i
]
=
'[%d]
\t
%s'
%
(
env
.
iteration
,
best_msg
[
i
]
=
'[%d]
\t
%s'
%
(
env
.
iteration
,
'
\t
'
.
join
([
_format_eval_result
(
x
)
for
x
in
env
.
evaluation_result_list
]))
'
\t
'
.
join
([
_format_eval_result
(
x
)
for
x
in
env
.
evaluation_result_list
]))
else
:
else
:
if
env
.
iteration
-
best_iter
[
i
]
>=
stopping_rounds
:
if
env
.
iteration
-
best_iter
[
i
]
>=
stopping_rounds
:
state
[
'best_iter'
]
=
best_iter
[
i
]
if
env
.
model
is
not
None
:
if
env
.
model
is
not
None
:
env
.
model
.
set_attr
(
best_iteration
=
str
(
best_iter
[
i
]))
env
.
model
.
set_attr
(
best_iteration
=
str
(
best_iter
[
i
]))
if
verbose
:
if
verbose
:
...
...
python-package/lightgbm/engine.py
View file @
5b539788
"""Training Library containing training routines of LightGBM."""
"""Training Library containing training routines of LightGBM."""
from
__future__
import
absolute_import
from
__future__
import
absolute_import
import
collections
import
numpy
as
np
import
numpy
as
np
from
.basic
import
LightGBMError
,
Predictor
,
Dataset
,
Booster
,
is_str
from
.basic
import
LightGBMError
,
Predictor
,
Dataset
,
Booster
,
is_str
from
.
import
callback
from
.
import
callback
def
_construct_dataset
(
X_y
,
reference
=
None
,
def
_construct_dataset
(
X_y
,
reference
=
None
,
params
=
None
,
other_fields
=
None
,
predictor
=
None
):
params
=
None
,
other_fields
=
None
,
predictor
=
None
):
if
'max_bin'
in
params
:
if
'max_bin'
in
params
:
max_bin
=
int
(
params
[
'max_bin'
])
max_bin
=
int
(
params
[
'max_bin'
])
else
:
else
:
...
@@ -31,20 +31,22 @@ def _construct_dataset(X_y, reference=None,
...
@@ -31,20 +31,22 @@ def _construct_dataset(X_y, reference=None,
label
=
X_y
[
1
]
label
=
X_y
[
1
]
if
reference
is
None
:
if
reference
is
None
:
ret
=
Dataset
(
data
,
label
=
label
,
max_bin
=
max_bin
,
ret
=
Dataset
(
data
,
label
=
label
,
max_bin
=
max_bin
,
weight
=
weight
,
group
=
group
,
predictor
=
predictor
,
params
=
params
)
weight
=
weight
,
group
=
group
,
predictor
=
predictor
,
params
=
params
)
else
:
else
:
ret
=
reference
.
create_valid
(
data
,
label
=
label
,
weight
=
weight
,
group
=
group
,
params
=
params
)
ret
=
reference
.
create_valid
(
data
,
label
=
label
,
weight
=
weight
,
group
=
group
,
params
=
params
)
if
init_score
is
not
None
:
if
init_score
is
not
None
:
ret
.
set_init_score
(
init_score
)
ret
.
set_init_score
(
init_score
)
return
ret
return
ret
def
train
(
params
,
train_data
,
num_boost_round
=
100
,
def
train
(
params
,
train_data
,
num_boost_round
=
100
,
valid_datas
=
None
,
valid_names
=
None
,
valid_datas
=
None
,
valid_names
=
None
,
fobj
=
None
,
feval
=
None
,
init_model
=
None
,
fobj
=
None
,
feval
=
None
,
init_model
=
None
,
train_fields
=
None
,
valid_fields
=
None
,
train_fields
=
None
,
valid_fields
=
None
,
early_stopping_rounds
=
None
,
evals_result
=
None
,
early_stopping_rounds
=
None
,
evals_result
=
None
,
verbose_eval
=
True
,
learning_rates
=
None
,
callbacks
=
None
):
verbose_eval
=
True
,
learning_rates
=
None
,
callbacks
=
None
):
"""Train with given parameters.
"""Train with given parameters.
Parameters
Parameters
...
@@ -134,9 +136,9 @@ def train(params, train_data, num_boost_round=100,
...
@@ -134,9 +136,9 @@ def train(params, train_data, num_boost_round=100,
continue
continue
valid_set
=
_construct_dataset
(
valid_set
=
_construct_dataset
(
valid_datas
[
i
],
valid_datas
[
i
],
train_set
,
train_set
,
params
,
params
,
other_fields
,
other_fields
,
predictor
)
predictor
)
valid_sets
.
append
(
valid_set
)
valid_sets
.
append
(
valid_set
)
if
valid_names
is
not
None
:
if
valid_names
is
not
None
:
...
@@ -182,11 +184,11 @@ def train(params, train_data, num_boost_round=100,
...
@@ -182,11 +184,11 @@ def train(params, train_data, num_boost_round=100,
for
i
in
range
(
num_boost_round
):
for
i
in
range
(
num_boost_round
):
for
cb
in
callbacks_before_iter
:
for
cb
in
callbacks_before_iter
:
cb
(
callback
.
CallbackEnv
(
model
=
booster
,
cb
(
callback
.
CallbackEnv
(
model
=
booster
,
cvfolds
=
None
,
cvfolds
=
None
,
iteration
=
i
,
iteration
=
i
,
begin_iteration
=
0
,
begin_iteration
=
0
,
end_iteration
=
num_boost_round
,
end_iteration
=
num_boost_round
,
evaluation_result_list
=
None
))
evaluation_result_list
=
None
))
booster
.
update
(
fobj
=
fobj
)
booster
.
update
(
fobj
=
fobj
)
...
@@ -199,11 +201,11 @@ def train(params, train_data, num_boost_round=100,
...
@@ -199,11 +201,11 @@ def train(params, train_data, num_boost_round=100,
try
:
try
:
for
cb
in
callbacks_after_iter
:
for
cb
in
callbacks_after_iter
:
cb
(
callback
.
CallbackEnv
(
model
=
booster
,
cb
(
callback
.
CallbackEnv
(
model
=
booster
,
cvfolds
=
None
,
cvfolds
=
None
,
iteration
=
i
,
iteration
=
i
,
begin_iteration
=
0
,
begin_iteration
=
0
,
end_iteration
=
num_boost_round
,
end_iteration
=
num_boost_round
,
evaluation_result_list
=
evaluation_result_list
))
evaluation_result_list
=
evaluation_result_list
))
except
callback
.
EarlyStopException
:
except
callback
.
EarlyStopException
:
break
break
if
booster
.
attr
(
'best_iteration'
)
is
not
None
:
if
booster
.
attr
(
'best_iteration'
)
is
not
None
:
...
@@ -384,11 +386,11 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False,
...
@@ -384,11 +386,11 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False,
for
i
in
range
(
num_boost_round
):
for
i
in
range
(
num_boost_round
):
for
cb
in
callbacks_before_iter
:
for
cb
in
callbacks_before_iter
:
cb
(
callback
.
CallbackEnv
(
model
=
None
,
cb
(
callback
.
CallbackEnv
(
model
=
None
,
cvfolds
=
cvfolds
,
cvfolds
=
cvfolds
,
iteration
=
i
,
iteration
=
i
,
begin_iteration
=
0
,
begin_iteration
=
0
,
end_iteration
=
num_boost_round
,
end_iteration
=
num_boost_round
,
evaluation_result_list
=
None
))
evaluation_result_list
=
None
))
for
fold
in
cvfolds
:
for
fold
in
cvfolds
:
fold
.
update
(
fobj
)
fold
.
update
(
fobj
)
res
=
_agg_cv_result
([
f
.
eval
(
feval
)
for
f
in
cvfolds
])
res
=
_agg_cv_result
([
f
.
eval
(
feval
)
for
f
in
cvfolds
])
...
@@ -402,13 +404,13 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False,
...
@@ -402,13 +404,13 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False,
try
:
try
:
for
cb
in
callbacks_after_iter
:
for
cb
in
callbacks_after_iter
:
cb
(
callback
.
CallbackEnv
(
model
=
None
,
cb
(
callback
.
CallbackEnv
(
model
=
None
,
cvfolds
=
cvfolds
,
cvfolds
=
cvfolds
,
iteration
=
i
,
iteration
=
i
,
begin_iteration
=
0
,
begin_iteration
=
0
,
end_iteration
=
num_boost_round
,
end_iteration
=
num_boost_round
,
evaluation_result_list
=
res
))
evaluation_result_list
=
res
))
except
callback
.
EarlyStopException
as
e
:
except
callback
.
EarlyStopException
as
e
:
for
k
in
results
.
keys
():
for
k
in
results
.
keys
():
results
[
k
]
=
results
[
k
][:(
e
.
state
[
'
best_iter
'
]
+
1
)]
results
[
k
]
=
results
[
k
][:(
e
.
best_iter
ation
+
1
)]
break
break
return
results
return
results
python-package/lightgbm/libpath.py
View file @
5b539788
...
@@ -12,9 +12,9 @@ def find_lib_path():
...
@@ -12,9 +12,9 @@ def find_lib_path():
"""
"""
curr_path
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
os
.
path
.
expanduser
(
__file__
)))
curr_path
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
os
.
path
.
expanduser
(
__file__
)))
dll_path
=
[
curr_path
,
os
.
path
.
join
(
curr_path
,
'../../lib/'
),
dll_path
=
[
curr_path
,
os
.
path
.
join
(
curr_path
,
'../../lib/'
),
os
.
path
.
join
(
curr_path
,
'../../'
),
os
.
path
.
join
(
curr_path
,
'../../'
),
os
.
path
.
join
(
curr_path
,
'./lib/'
),
os
.
path
.
join
(
curr_path
,
'./lib/'
),
os
.
path
.
join
(
sys
.
prefix
,
'lightgbm'
)]
os
.
path
.
join
(
sys
.
prefix
,
'lightgbm'
)]
if
os
.
name
==
'nt'
:
if
os
.
name
==
'nt'
:
dll_path
.
append
(
os
.
path
.
join
(
curr_path
,
'../../windows/x64/Dll/'
))
dll_path
.
append
(
os
.
path
.
join
(
curr_path
,
'../../windows/x64/Dll/'
))
dll_path
.
append
(
os
.
path
.
join
(
curr_path
,
'./windows/x64/Dll/'
))
dll_path
.
append
(
os
.
path
.
join
(
curr_path
,
'./windows/x64/Dll/'
))
...
...
python-package/lightgbm/sklearn.py
View file @
5b539788
...
@@ -194,7 +194,8 @@ class LGBMModel(LGBMModelBase):
...
@@ -194,7 +194,8 @@ class LGBMModel(LGBMModelBase):
return
params
return
params
def
fit
(
self
,
X
,
y
,
eval_set
=
None
,
eval_metric
=
None
,
def
fit
(
self
,
X
,
y
,
eval_set
=
None
,
eval_metric
=
None
,
early_stopping_rounds
=
None
,
verbose
=
True
,
train_fields
=
None
,
valid_fields
=
None
,
other_params
=
None
):
early_stopping_rounds
=
None
,
verbose
=
True
,
train_fields
=
None
,
valid_fields
=
None
,
other_params
=
None
):
"""
"""
Fit the gradient boosting model
Fit the gradient boosting model
...
@@ -308,7 +309,7 @@ class LGBMClassifier(LGBMModel, LGBMClassifierBase):
...
@@ -308,7 +309,7 @@ class LGBMClassifier(LGBMModel, LGBMClassifierBase):
"""
+
'
\n
'
.
join
(
LGBMModel
.
__doc__
.
split
(
'
\n
'
)[
2
:])
"""
+
'
\n
'
.
join
(
LGBMModel
.
__doc__
.
split
(
'
\n
'
)[
2
:])
def
fit
(
self
,
X
,
y
,
eval_set
=
None
,
eval_metric
=
None
,
def
fit
(
self
,
X
,
y
,
eval_set
=
None
,
eval_metric
=
None
,
early_stopping_rounds
=
None
,
verbose
=
True
,
early_stopping_rounds
=
None
,
verbose
=
True
,
train_fields
=
None
,
valid_fields
=
None
,
other_params
=
None
):
train_fields
=
None
,
valid_fields
=
None
,
other_params
=
None
):
self
.
classes_
=
np
.
unique
(
y
)
self
.
classes_
=
np
.
unique
(
y
)
...
@@ -328,8 +329,10 @@ class LGBMClassifier(LGBMModel, LGBMClassifierBase):
...
@@ -328,8 +329,10 @@ class LGBMClassifier(LGBMModel, LGBMClassifierBase):
if
eval_set
is
not
None
:
if
eval_set
is
not
None
:
eval_set
=
list
(
(
x
[
0
],
self
.
_le
.
transform
(
x
[
1
]))
for
x
in
eval_set
)
eval_set
=
list
(
(
x
[
0
],
self
.
_le
.
transform
(
x
[
1
]))
for
x
in
eval_set
)
super
(
LGBMClassifier
,
self
).
fit
(
X
,
training_labels
,
eval_set
,
eval_metric
,
super
(
LGBMClassifier
,
self
).
fit
(
X
,
training_labels
,
eval_set
,
early_stopping_rounds
,
verbose
,
train_fields
,
valid_fields
,
other_params
)
eval_metric
,
early_stopping_rounds
,
verbose
,
train_fields
,
valid_fields
,
other_params
)
return
self
return
self
def
predict
(
self
,
data
,
raw_score
=
False
,
num_iteration
=
0
):
def
predict
(
self
,
data
,
raw_score
=
False
,
num_iteration
=
0
):
...
@@ -405,7 +408,7 @@ class LGBMRanker(LGBMModel):
...
@@ -405,7 +408,7 @@ class LGBMRanker(LGBMModel):
"""
+
'
\n
'
.
join
(
LGBMModel
.
__doc__
.
split
(
'
\n
'
)[
2
:])
"""
+
'
\n
'
.
join
(
LGBMModel
.
__doc__
.
split
(
'
\n
'
)[
2
:])
def
fit
(
self
,
X
,
y
,
eval_set
=
None
,
eval_metric
=
None
,
def
fit
(
self
,
X
,
y
,
eval_set
=
None
,
eval_metric
=
None
,
early_stopping_rounds
=
None
,
verbose
=
True
,
early_stopping_rounds
=
None
,
verbose
=
True
,
train_fields
=
None
,
valid_fields
=
None
,
other_params
=
None
):
train_fields
=
None
,
valid_fields
=
None
,
other_params
=
None
):
"""check group data"""
"""check group data"""
...
@@ -428,6 +431,8 @@ class LGBMRanker(LGBMModel):
...
@@ -428,6 +431,8 @@ class LGBMRanker(LGBMModel):
self
.
objective
=
"lambdarank"
self
.
objective
=
"lambdarank"
self
.
fobj
=
None
self
.
fobj
=
None
super
(
LGBMRanker
,
self
).
fit
(
X
,
y
,
eval_set
,
eval_metric
,
super
(
LGBMRanker
,
self
).
fit
(
X
,
y
,
eval_set
,
eval_metric
,
early_stopping_rounds
,
verbose
,
train_fields
,
valid_fields
,
other_params
)
early_stopping_rounds
,
verbose
,
train_fields
,
valid_fields
,
other_params
)
return
self
return
self
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment