Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
244db078
Commit
244db078
authored
Aug 25, 2018
by
Nikita Titov
Committed by
Qiwei Ye
Aug 25, 2018
Browse files
return self (#1602)
parent
dcf9ad2e
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
226 additions
and
117 deletions
+226
-117
examples/python-guide/advanced_example.py
examples/python-guide/advanced_example.py
+1
-1
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+193
-74
python-package/lightgbm/engine.py
python-package/lightgbm/engine.py
+4
-13
python-package/lightgbm/sklearn.py
python-package/lightgbm/sklearn.py
+28
-29
No files found.
examples/python-guide/advanced_example.py
View file @
244db078
...
@@ -64,7 +64,7 @@ print('7th feature name is:', repr(lgb_train.feature_name[6]))
...
@@ -64,7 +64,7 @@ print('7th feature name is:', repr(lgb_train.feature_name[6]))
# save model to file
# save model to file
gbm
.
save_model
(
'model.txt'
)
gbm
.
save_model
(
'model.txt'
)
# dump model to
json
(and save to file)
# dump model to
JSON
(and save to file)
print
(
'Dump model to JSON...'
)
print
(
'Dump model to JSON...'
)
model_json
=
gbm
.
dump_model
()
model_json
=
gbm
.
dump_model
()
...
...
python-package/lightgbm/basic.py
View file @
244db078
...
@@ -184,7 +184,7 @@ def convert_from_sliced_object(data):
...
@@ -184,7 +184,7 @@ def convert_from_sliced_object(data):
"""fix the memory of multi-dimensional sliced object"""
"""fix the memory of multi-dimensional sliced object"""
if
data
.
base
is
not
None
and
isinstance
(
data
,
np
.
ndarray
)
and
isinstance
(
data
.
base
,
np
.
ndarray
):
if
data
.
base
is
not
None
and
isinstance
(
data
,
np
.
ndarray
)
and
isinstance
(
data
.
base
,
np
.
ndarray
):
if
not
data
.
flags
.
c_contiguous
:
if
not
data
.
flags
.
c_contiguous
:
warnings
.
warn
(
"Usage subset(sliced data)
of np.ndarray
is not recommended due to it will double the peak memory cost in LightGBM."
)
warnings
.
warn
(
"Usage
of np.ndarray
subset
(sliced data) is not recommended due to it will double the peak memory cost in LightGBM."
)
return
np
.
copy
(
data
)
return
np
.
copy
(
data
)
return
data
return
data
...
@@ -607,9 +607,9 @@ class Dataset(object):
...
@@ -607,9 +607,9 @@ class Dataset(object):
If 'auto' and data is pandas DataFrame, pandas categorical columns are used.
If 'auto' and data is pandas DataFrame, pandas categorical columns are used.
All values in categorical features should be less than int32 max value (2147483647).
All values in categorical features should be less than int32 max value (2147483647).
All negative values in categorical features will be treated as missing values.
All negative values in categorical features will be treated as missing values.
params: dict or None, optional (default=None)
params
: dict or None, optional (default=None)
Other parameters.
Other parameters.
free_raw_data: bool, optional (default=True)
free_raw_data
: bool, optional (default=True)
If True, raw data is freed after constructing inner Dataset.
If True, raw data is freed after constructing inner Dataset.
"""
"""
self
.
handle
=
None
self
.
handle
=
None
...
@@ -639,6 +639,7 @@ class Dataset(object):
...
@@ -639,6 +639,7 @@ class Dataset(object):
if
self
.
handle
is
not
None
:
if
self
.
handle
is
not
None
:
_safe_call
(
_LIB
.
LGBM_DatasetFree
(
self
.
handle
))
_safe_call
(
_LIB
.
LGBM_DatasetFree
(
self
.
handle
))
self
.
handle
=
None
self
.
handle
=
None
return
self
def
_lazy_init
(
self
,
data
,
label
=
None
,
reference
=
None
,
def
_lazy_init
(
self
,
data
,
label
=
None
,
reference
=
None
,
weight
=
None
,
group
=
None
,
init_score
=
None
,
predictor
=
None
,
weight
=
None
,
group
=
None
,
init_score
=
None
,
predictor
=
None
,
...
@@ -646,7 +647,7 @@ class Dataset(object):
...
@@ -646,7 +647,7 @@ class Dataset(object):
categorical_feature
=
'auto'
,
params
=
None
):
categorical_feature
=
'auto'
,
params
=
None
):
if
data
is
None
:
if
data
is
None
:
self
.
handle
=
None
self
.
handle
=
None
return
return
self
if
reference
is
not
None
:
if
reference
is
not
None
:
self
.
pandas_categorical
=
reference
.
pandas_categorical
self
.
pandas_categorical
=
reference
.
pandas_categorical
categorical_feature
=
reference
.
categorical_feature
categorical_feature
=
reference
.
categorical_feature
...
@@ -747,7 +748,7 @@ class Dataset(object):
...
@@ -747,7 +748,7 @@ class Dataset(object):
elif
self
.
predictor
is
not
None
:
elif
self
.
predictor
is
not
None
:
raise
TypeError
(
'wrong predictor type {}'
.
format
(
type
(
self
.
predictor
).
__name__
))
raise
TypeError
(
'wrong predictor type {}'
.
format
(
type
(
self
.
predictor
).
__name__
))
# set feature names
# set feature names
self
.
set_feature_name
(
feature_name
)
return
self
.
set_feature_name
(
feature_name
)
def
__init_from_np2d
(
self
,
mat
,
params_str
,
ref_dataset
):
def
__init_from_np2d
(
self
,
mat
,
params_str
,
ref_dataset
):
"""
"""
...
@@ -773,6 +774,7 @@ class Dataset(object):
...
@@ -773,6 +774,7 @@ class Dataset(object):
c_str
(
params_str
),
c_str
(
params_str
),
ref_dataset
,
ref_dataset
,
ctypes
.
byref
(
self
.
handle
)))
ctypes
.
byref
(
self
.
handle
)))
return
self
def
__init_from_list_np2d
(
self
,
mats
,
params_str
,
ref_dataset
):
def
__init_from_list_np2d
(
self
,
mats
,
params_str
,
ref_dataset
):
"""
"""
...
@@ -821,6 +823,7 @@ class Dataset(object):
...
@@ -821,6 +823,7 @@ class Dataset(object):
c_str
(
params_str
),
c_str
(
params_str
),
ref_dataset
,
ref_dataset
,
ctypes
.
byref
(
self
.
handle
)))
ctypes
.
byref
(
self
.
handle
)))
return
self
def
__init_from_csr
(
self
,
csr
,
params_str
,
ref_dataset
):
def
__init_from_csr
(
self
,
csr
,
params_str
,
ref_dataset
):
"""
"""
...
@@ -845,6 +848,7 @@ class Dataset(object):
...
@@ -845,6 +848,7 @@ class Dataset(object):
c_str
(
params_str
),
c_str
(
params_str
),
ref_dataset
,
ref_dataset
,
ctypes
.
byref
(
self
.
handle
)))
ctypes
.
byref
(
self
.
handle
)))
return
self
def
__init_from_csc
(
self
,
csc
,
params_str
,
ref_dataset
):
def
__init_from_csc
(
self
,
csc
,
params_str
,
ref_dataset
):
"""
"""
...
@@ -869,6 +873,7 @@ class Dataset(object):
...
@@ -869,6 +873,7 @@ class Dataset(object):
c_str
(
params_str
),
c_str
(
params_str
),
ref_dataset
,
ref_dataset
,
ctypes
.
byref
(
self
.
handle
)))
ctypes
.
byref
(
self
.
handle
)))
return
self
def
construct
(
self
):
def
construct
(
self
):
"""Lazy init.
"""Lazy init.
...
@@ -876,7 +881,7 @@ class Dataset(object):
...
@@ -876,7 +881,7 @@ class Dataset(object):
Returns
Returns
-------
-------
self : Dataset
self : Dataset
Returns self
.
Constructed Dataset object
.
"""
"""
if
self
.
handle
is
None
:
if
self
.
handle
is
None
:
if
self
.
reference
is
not
None
:
if
self
.
reference
is
not
None
:
...
@@ -928,13 +933,13 @@ class Dataset(object):
...
@@ -928,13 +933,13 @@ class Dataset(object):
Init score for Dataset.
Init score for Dataset.
silent : bool, optional (default=False)
silent : bool, optional (default=False)
Whether to print messages during construction.
Whether to print messages during construction.
params: dict or None, optional (default=None)
params
: dict or None, optional (default=None)
Other parameters.
Other parameters.
Returns
Returns
-------
-------
self
: Dataset
valid
: Dataset
Returns
self.
Validation Dataset with reference to
self.
"""
"""
ret
=
Dataset
(
data
,
label
=
label
,
reference
=
self
,
ret
=
Dataset
(
data
,
label
=
label
,
reference
=
self
,
weight
=
weight
,
group
=
group
,
init_score
=
init_score
,
weight
=
weight
,
group
=
group
,
init_score
=
init_score
,
...
@@ -950,7 +955,7 @@ class Dataset(object):
...
@@ -950,7 +955,7 @@ class Dataset(object):
----------
----------
used_indices : list of int
used_indices : list of int
Indices used to create the subset.
Indices used to create the subset.
params: dict or None, optional (default=None)
params
: dict or None, optional (default=None)
Other parameters.
Other parameters.
Returns
Returns
...
@@ -974,10 +979,16 @@ class Dataset(object):
...
@@ -974,10 +979,16 @@ class Dataset(object):
----------
----------
filename : string
filename : string
Name of the output file.
Name of the output file.
Returns
-------
self : Dataset
Returns self.
"""
"""
_safe_call
(
_LIB
.
LGBM_DatasetSaveBinary
(
_safe_call
(
_LIB
.
LGBM_DatasetSaveBinary
(
self
.
construct
().
handle
,
self
.
construct
().
handle
,
c_str
(
filename
)))
c_str
(
filename
)))
return
self
def
_update_params
(
self
,
params
):
def
_update_params
(
self
,
params
):
if
not
self
.
params
:
if
not
self
.
params
:
...
@@ -985,20 +996,27 @@ class Dataset(object):
...
@@ -985,20 +996,27 @@ class Dataset(object):
else
:
else
:
self
.
params_back_up
=
copy
.
deepcopy
(
self
.
params
)
self
.
params_back_up
=
copy
.
deepcopy
(
self
.
params
)
self
.
params
.
update
(
params
)
self
.
params
.
update
(
params
)
return
self
def
_reverse_update_params
(
self
):
def
_reverse_update_params
(
self
):
self
.
params
=
copy
.
deepcopy
(
self
.
params_back_up
)
self
.
params
=
copy
.
deepcopy
(
self
.
params_back_up
)
self
.
params_back_up
=
None
self
.
params_back_up
=
None
return
self
def
set_field
(
self
,
field_name
,
data
):
def
set_field
(
self
,
field_name
,
data
):
"""Set property into the Dataset.
"""Set property into the Dataset.
Parameters
Parameters
----------
----------
field_name: string
field_name
: string
The field name of the information.
The field name of the information.
data: list, numpy array or None
data
: list, numpy array or None
The array of data to be set.
The array of data to be set.
Returns
-------
self : Dataset
Dataset with set property.
"""
"""
if
self
.
handle
is
None
:
if
self
.
handle
is
None
:
raise
Exception
(
"Cannot set %s before construct dataset"
%
field_name
)
raise
Exception
(
"Cannot set %s before construct dataset"
%
field_name
)
...
@@ -1010,7 +1028,7 @@ class Dataset(object):
...
@@ -1010,7 +1028,7 @@ class Dataset(object):
None
,
None
,
ctypes
.
c_int
(
0
),
ctypes
.
c_int
(
0
),
ctypes
.
c_int
(
FIELD_TYPE_MAPPER
[
field_name
])))
ctypes
.
c_int
(
FIELD_TYPE_MAPPER
[
field_name
])))
return
return
self
dtype
=
np
.
float32
dtype
=
np
.
float32
if
field_name
==
'group'
:
if
field_name
==
'group'
:
dtype
=
np
.
int32
dtype
=
np
.
int32
...
@@ -1031,13 +1049,14 @@ class Dataset(object):
...
@@ -1031,13 +1049,14 @@ class Dataset(object):
ptr_data
,
ptr_data
,
ctypes
.
c_int
(
len
(
data
)),
ctypes
.
c_int
(
len
(
data
)),
ctypes
.
c_int
(
type_data
)))
ctypes
.
c_int
(
type_data
)))
return
self
def
get_field
(
self
,
field_name
):
def
get_field
(
self
,
field_name
):
"""Get property from the Dataset.
"""Get property from the Dataset.
Parameters
Parameters
----------
----------
field_name: string
field_name
: string
The field name of the information.
The field name of the information.
Returns
Returns
...
@@ -1076,19 +1095,25 @@ class Dataset(object):
...
@@ -1076,19 +1095,25 @@ class Dataset(object):
----------
----------
categorical_feature : list of int or strings
categorical_feature : list of int or strings
Names or indices of categorical features.
Names or indices of categorical features.
Returns
-------
self : Dataset
Dataset with set categorical features.
"""
"""
if
self
.
categorical_feature
==
categorical_feature
:
if
self
.
categorical_feature
==
categorical_feature
:
return
return
self
if
self
.
data
is
not
None
:
if
self
.
data
is
not
None
:
if
self
.
categorical_feature
is
None
:
if
self
.
categorical_feature
is
None
:
self
.
categorical_feature
=
categorical_feature
self
.
categorical_feature
=
categorical_feature
self
.
_free_handle
()
return
self
.
_free_handle
()
elif
categorical_feature
==
'auto'
:
elif
categorical_feature
==
'auto'
:
warnings
.
warn
(
'Using categorical_feature in Dataset.'
)
warnings
.
warn
(
'Using categorical_feature in Dataset.'
)
return
self
else
:
else
:
warnings
.
warn
(
'categorical_feature in Dataset is overridden. New categorical_feature is {}'
.
format
(
sorted
(
list
(
categorical_feature
))))
warnings
.
warn
(
'categorical_feature in Dataset is overridden. New categorical_feature is {}'
.
format
(
sorted
(
list
(
categorical_feature
))))
self
.
categorical_feature
=
categorical_feature
self
.
categorical_feature
=
categorical_feature
self
.
_free_handle
()
return
self
.
_free_handle
()
else
:
else
:
raise
LightGBMError
(
"Cannot set categorical feature after freed raw data, set free_raw_data=False when construct Dataset to avoid this."
)
raise
LightGBMError
(
"Cannot set categorical feature after freed raw data, set free_raw_data=False when construct Dataset to avoid this."
)
...
@@ -1098,10 +1123,10 @@ class Dataset(object):
...
@@ -1098,10 +1123,10 @@ class Dataset(object):
Please set init_model in engine.train or engine.cv
Please set init_model in engine.train or engine.cv
"""
"""
if
predictor
is
self
.
_predictor
:
if
predictor
is
self
.
_predictor
:
return
return
self
if
self
.
data
is
not
None
:
if
self
.
data
is
not
None
:
self
.
_predictor
=
predictor
self
.
_predictor
=
predictor
self
.
_free_handle
()
return
self
.
_free_handle
()
else
:
else
:
raise
LightGBMError
(
"Cannot set predictor after freed raw data, set free_raw_data=False when construct Dataset to avoid this."
)
raise
LightGBMError
(
"Cannot set predictor after freed raw data, set free_raw_data=False when construct Dataset to avoid this."
)
...
@@ -1112,16 +1137,19 @@ class Dataset(object):
...
@@ -1112,16 +1137,19 @@ class Dataset(object):
----------
----------
reference : Dataset
reference : Dataset
Reference that is used as a template to consturct the current Dataset.
Reference that is used as a template to consturct the current Dataset.
Returns
-------
self : Dataset
Dataset with set reference.
"""
"""
self
.
set_categorical_feature
(
reference
.
categorical_feature
)
self
.
set_categorical_feature
(
reference
.
categorical_feature
).
set_feature_name
(
reference
.
feature_name
).
_set_predictor
(
reference
.
_predictor
)
self
.
set_feature_name
(
reference
.
feature_name
)
self
.
_set_predictor
(
reference
.
_predictor
)
# we're done if self and reference share a common upstrem reference
# we're done if self and reference share a common upstrem reference
if
self
.
get_ref_chain
().
intersection
(
reference
.
get_ref_chain
()):
if
self
.
get_ref_chain
().
intersection
(
reference
.
get_ref_chain
()):
return
return
self
if
self
.
data
is
not
None
:
if
self
.
data
is
not
None
:
self
.
reference
=
reference
self
.
reference
=
reference
self
.
_free_handle
()
return
self
.
_free_handle
()
else
:
else
:
raise
LightGBMError
(
"Cannot set reference after freed raw data, set free_raw_data=False when construct Dataset to avoid this."
)
raise
LightGBMError
(
"Cannot set reference after freed raw data, set free_raw_data=False when construct Dataset to avoid this."
)
...
@@ -1132,6 +1160,11 @@ class Dataset(object):
...
@@ -1132,6 +1160,11 @@ class Dataset(object):
----------
----------
feature_name : list of strings
feature_name : list of strings
Feature names.
Feature names.
Returns
-------
self : Dataset
Dataset with set feature name.
"""
"""
if
feature_name
!=
'auto'
:
if
feature_name
!=
'auto'
:
self
.
feature_name
=
feature_name
self
.
feature_name
=
feature_name
...
@@ -1143,19 +1176,26 @@ class Dataset(object):
...
@@ -1143,19 +1176,26 @@ class Dataset(object):
self
.
handle
,
self
.
handle
,
c_array
(
ctypes
.
c_char_p
,
c_feature_name
),
c_array
(
ctypes
.
c_char_p
,
c_feature_name
),
ctypes
.
c_int
(
len
(
feature_name
))))
ctypes
.
c_int
(
len
(
feature_name
))))
return
self
def
set_label
(
self
,
label
):
def
set_label
(
self
,
label
):
"""Set label of Dataset
"""Set label of Dataset
Parameters
Parameters
----------
----------
label: list, numpy array or None
label
: list, numpy array or None
The label information to be set into Dataset.
The label information to be set into Dataset.
Returns
-------
self : Dataset
Dataset with set label.
"""
"""
self
.
label
=
label
self
.
label
=
label
if
self
.
handle
is
not
None
:
if
self
.
handle
is
not
None
:
label
=
list_to_1d_numpy
(
label
,
name
=
'label'
)
label
=
list_to_1d_numpy
(
label
,
name
=
'label'
)
self
.
set_field
(
'label'
,
label
)
self
.
set_field
(
'label'
,
label
)
return
self
def
set_weight
(
self
,
weight
):
def
set_weight
(
self
,
weight
):
"""Set weight of each instance.
"""Set weight of each instance.
...
@@ -1164,6 +1204,11 @@ class Dataset(object):
...
@@ -1164,6 +1204,11 @@ class Dataset(object):
----------
----------
weight : list, numpy array or None
weight : list, numpy array or None
Weight to be set for each data point.
Weight to be set for each data point.
Returns
-------
self : Dataset
Dataset with set weight.
"""
"""
if
weight
is
not
None
and
np
.
all
(
weight
==
1
):
if
weight
is
not
None
and
np
.
all
(
weight
==
1
):
weight
=
None
weight
=
None
...
@@ -1171,6 +1216,7 @@ class Dataset(object):
...
@@ -1171,6 +1216,7 @@ class Dataset(object):
if
self
.
handle
is
not
None
and
weight
is
not
None
:
if
self
.
handle
is
not
None
and
weight
is
not
None
:
weight
=
list_to_1d_numpy
(
weight
,
name
=
'weight'
)
weight
=
list_to_1d_numpy
(
weight
,
name
=
'weight'
)
self
.
set_field
(
'weight'
,
weight
)
self
.
set_field
(
'weight'
,
weight
)
return
self
def
set_init_score
(
self
,
init_score
):
def
set_init_score
(
self
,
init_score
):
"""Set init score of Booster to start from.
"""Set init score of Booster to start from.
...
@@ -1179,11 +1225,17 @@ class Dataset(object):
...
@@ -1179,11 +1225,17 @@ class Dataset(object):
----------
----------
init_score : list, numpy array or None
init_score : list, numpy array or None
Init score for Booster.
Init score for Booster.
Returns
-------
self : Dataset
Dataset with set init score.
"""
"""
self
.
init_score
=
init_score
self
.
init_score
=
init_score
if
self
.
handle
is
not
None
and
init_score
is
not
None
:
if
self
.
handle
is
not
None
and
init_score
is
not
None
:
init_score
=
list_to_1d_numpy
(
init_score
,
np
.
float64
,
name
=
'init_score'
)
init_score
=
list_to_1d_numpy
(
init_score
,
np
.
float64
,
name
=
'init_score'
)
self
.
set_field
(
'init_score'
,
init_score
)
self
.
set_field
(
'init_score'
,
init_score
)
return
self
def
set_group
(
self
,
group
):
def
set_group
(
self
,
group
):
"""Set group size of Dataset (used for ranking).
"""Set group size of Dataset (used for ranking).
...
@@ -1192,18 +1244,24 @@ class Dataset(object):
...
@@ -1192,18 +1244,24 @@ class Dataset(object):
----------
----------
group : list, numpy array or None
group : list, numpy array or None
Group size of each group.
Group size of each group.
Returns
-------
self : Dataset
Dataset with set group.
"""
"""
self
.
group
=
group
self
.
group
=
group
if
self
.
handle
is
not
None
and
group
is
not
None
:
if
self
.
handle
is
not
None
and
group
is
not
None
:
group
=
list_to_1d_numpy
(
group
,
np
.
int32
,
name
=
'group'
)
group
=
list_to_1d_numpy
(
group
,
np
.
int32
,
name
=
'group'
)
self
.
set_field
(
'group'
,
group
)
self
.
set_field
(
'group'
,
group
)
return
self
def
get_label
(
self
):
def
get_label
(
self
):
"""Get the label of the Dataset.
"""Get the label of the Dataset.
Returns
Returns
-------
-------
label : numpy array
label : numpy array
or None
The label information from the Dataset.
The label information from the Dataset.
"""
"""
if
self
.
label
is
None
:
if
self
.
label
is
None
:
...
@@ -1215,7 +1273,7 @@ class Dataset(object):
...
@@ -1215,7 +1273,7 @@ class Dataset(object):
Returns
Returns
-------
-------
weight : numpy array
weight : numpy array
or None
Weight for each data point from the Dataset.
Weight for each data point from the Dataset.
"""
"""
if
self
.
weight
is
None
:
if
self
.
weight
is
None
:
...
@@ -1227,7 +1285,7 @@ class Dataset(object):
...
@@ -1227,7 +1285,7 @@ class Dataset(object):
Returns
Returns
-------
-------
init_score : numpy array
init_score : numpy array
or None
Init score of Booster.
Init score of Booster.
"""
"""
if
self
.
init_score
is
None
:
if
self
.
init_score
is
None
:
...
@@ -1239,17 +1297,14 @@ class Dataset(object):
...
@@ -1239,17 +1297,14 @@ class Dataset(object):
Returns
Returns
-------
-------
group : numpy array
group : numpy array
or None
Group size of each group.
Group size of each group.
"""
"""
if
self
.
group
is
None
:
if
self
.
group
is
None
:
self
.
group
=
self
.
get_field
(
'group'
)
self
.
group
=
self
.
get_field
(
'group'
)
if
self
.
group
is
not
None
:
if
self
.
group
is
not
None
:
# group data from LightGBM is boundaries data, need to convert to group size
# group data from LightGBM is boundaries data, need to convert to group size
new_group
=
[]
self
.
group
=
np
.
diff
(
self
.
group
)
for
i
in
range_
(
len
(
self
.
group
)
-
1
):
new_group
.
append
(
self
.
group
[
i
+
1
]
-
self
.
group
[
i
])
self
.
group
=
new_group
return
self
.
group
return
self
.
group
def
num_data
(
self
):
def
num_data
(
self
):
...
@@ -1309,7 +1364,7 @@ class Dataset(object):
...
@@ -1309,7 +1364,7 @@ class Dataset(object):
break
break
else
:
else
:
break
break
return
(
ref_chain
)
return
ref_chain
class
Booster
(
object
):
class
Booster
(
object
):
...
@@ -1319,7 +1374,7 @@ class Booster(object):
...
@@ -1319,7 +1374,7 @@ class Booster(object):
Parameters
Parameters
----------
----------
params: dict or None, optional (default=None)
params
: dict or None, optional (default=None)
Parameters for Booster.
Parameters for Booster.
train_set : Dataset or None, optional (default=None)
train_set : Dataset or None, optional (default=None)
Training dataset.
Training dataset.
...
@@ -1448,14 +1503,22 @@ class Booster(object):
...
@@ -1448,14 +1503,22 @@ class Booster(object):
self
.
__dict__
.
update
(
state
)
self
.
__dict__
.
update
(
state
)
def
free_dataset
(
self
):
def
free_dataset
(
self
):
"""Free Booster's Datasets."""
"""Free Booster's Datasets.
Returns
-------
self : Booster
Booster without Datasets.
"""
self
.
__dict__
.
pop
(
'train_set'
,
None
)
self
.
__dict__
.
pop
(
'train_set'
,
None
)
self
.
__dict__
.
pop
(
'valid_sets'
,
None
)
self
.
__dict__
.
pop
(
'valid_sets'
,
None
)
self
.
__num_dataset
=
0
self
.
__num_dataset
=
0
return
self
def
_free_buffer
(
self
):
def
_free_buffer
(
self
):
self
.
__inner_predict_buffer
=
[]
self
.
__inner_predict_buffer
=
[]
self
.
__is_predicted_cur_iter
=
[]
self
.
__is_predicted_cur_iter
=
[]
return
self
def
set_network
(
self
,
machines
,
local_listen_port
=
12400
,
def
set_network
(
self
,
machines
,
local_listen_port
=
12400
,
listen_time_out
=
120
,
num_machines
=
1
):
listen_time_out
=
120
,
num_machines
=
1
):
...
@@ -1463,35 +1526,54 @@ class Booster(object):
...
@@ -1463,35 +1526,54 @@ class Booster(object):
Parameters
Parameters
----------
----------
machines: list, set or string
machines
: list, set or string
Names of machines.
Names of machines.
local_listen_port: int, optional (default=12400)
local_listen_port
: int, optional (default=12400)
TCP listen port for local machines.
TCP listen port for local machines.
listen_time_out: int, optional (default=120)
listen_time_out
: int, optional (default=120)
Socket time-out in minutes.
Socket time-out in minutes.
num_machines: int, optional (default=1)
num_machines
: int, optional (default=1)
The number of machines for parallel learning application.
The number of machines for parallel learning application.
Returns
-------
self : Booster
Booster with set network.
"""
"""
_safe_call
(
_LIB
.
LGBM_NetworkInit
(
c_str
(
machines
),
_safe_call
(
_LIB
.
LGBM_NetworkInit
(
c_str
(
machines
),
ctypes
.
c_int
(
local_listen_port
),
ctypes
.
c_int
(
local_listen_port
),
ctypes
.
c_int
(
listen_time_out
),
ctypes
.
c_int
(
listen_time_out
),
ctypes
.
c_int
(
num_machines
)))
ctypes
.
c_int
(
num_machines
)))
self
.
network
=
True
self
.
network
=
True
return
self
def
free_network
(
self
):
def
free_network
(
self
):
"""Free network."""
"""Free Booster's network.
Returns
-------
self : Booster
Booster with freed network.
"""
_safe_call
(
_LIB
.
LGBM_NetworkFree
())
_safe_call
(
_LIB
.
LGBM_NetworkFree
())
self
.
network
=
False
self
.
network
=
False
return
self
def
set_train_data_name
(
self
,
name
):
def
set_train_data_name
(
self
,
name
):
"""Set the name to the training Dataset.
"""Set the name to the training Dataset.
Parameters
Parameters
----------
----------
name: string
name : string
Name for training Dataset.
Name for the training Dataset.
Returns
-------
self : Booster
Booster with set training Dataset name.
"""
"""
self
.
__train_data_name
=
name
self
.
__train_data_name
=
name
return
self
def
add_valid
(
self
,
data
,
name
):
def
add_valid
(
self
,
data
,
name
):
"""Add validation data.
"""Add validation data.
...
@@ -1502,6 +1584,11 @@ class Booster(object):
...
@@ -1502,6 +1584,11 @@ class Booster(object):
Validation data.
Validation data.
name : string
name : string
Name of validation data.
Name of validation data.
Returns
-------
self : Booster
Booster with set validation data.
"""
"""
if
not
isinstance
(
data
,
Dataset
):
if
not
isinstance
(
data
,
Dataset
):
raise
TypeError
(
'Validation data should be Dataset instance, met {}'
.
format
(
type
(
data
).
__name__
))
raise
TypeError
(
'Validation data should be Dataset instance, met {}'
.
format
(
type
(
data
).
__name__
))
...
@@ -1515,6 +1602,7 @@ class Booster(object):
...
@@ -1515,6 +1602,7 @@ class Booster(object):
self
.
__num_dataset
+=
1
self
.
__num_dataset
+=
1
self
.
__inner_predict_buffer
.
append
(
None
)
self
.
__inner_predict_buffer
.
append
(
None
)
self
.
__is_predicted_cur_iter
.
append
(
False
)
self
.
__is_predicted_cur_iter
.
append
(
False
)
return
self
def
reset_parameter
(
self
,
params
):
def
reset_parameter
(
self
,
params
):
"""Reset parameters of Booster.
"""Reset parameters of Booster.
...
@@ -1523,6 +1611,11 @@ class Booster(object):
...
@@ -1523,6 +1611,11 @@ class Booster(object):
----------
----------
params : dict
params : dict
New parameters for Booster.
New parameters for Booster.
Returns
-------
self : Booster
Booster with new parameters.
"""
"""
if
any
(
metric_alias
in
params
for
metric_alias
in
(
'metric'
,
'metrics'
,
'metric_types'
)):
if
any
(
metric_alias
in
params
for
metric_alias
in
(
'metric'
,
'metrics'
,
'metric_types'
)):
self
.
__need_reload_eval_info
=
True
self
.
__need_reload_eval_info
=
True
...
@@ -1531,9 +1624,10 @@ class Booster(object):
...
@@ -1531,9 +1624,10 @@ class Booster(object):
_safe_call
(
_LIB
.
LGBM_BoosterResetParameter
(
_safe_call
(
_LIB
.
LGBM_BoosterResetParameter
(
self
.
handle
,
self
.
handle
,
c_str
(
params_str
)))
c_str
(
params_str
)))
return
self
def
update
(
self
,
train_set
=
None
,
fobj
=
None
):
def
update
(
self
,
train_set
=
None
,
fobj
=
None
):
"""Update for one iteration.
"""Update
Booster
for one iteration.
Parameters
Parameters
----------
----------
...
@@ -1575,28 +1669,29 @@ class Booster(object):
...
@@ -1575,28 +1669,29 @@ class Booster(object):
return
is_finished
.
value
==
1
return
is_finished
.
value
==
1
else
:
else
:
if
not
self
.
__set_objective_to_none
:
if
not
self
.
__set_objective_to_none
:
self
.
reset_parameter
({
"objective"
:
"none"
})
self
.
reset_parameter
({
"objective"
:
"none"
}).
__set_objective_to_none
=
True
self
.
__set_objective_to_none
=
True
grad
,
hess
=
fobj
(
self
.
__inner_predict
(
0
),
self
.
train_set
)
grad
,
hess
=
fobj
(
self
.
__inner_predict
(
0
),
self
.
train_set
)
return
self
.
__boost
(
grad
,
hess
)
return
self
.
__boost
(
grad
,
hess
)
def
__boost
(
self
,
grad
,
hess
):
def
__boost
(
self
,
grad
,
hess
):
"""
"""
Boost the booster for one iteration, with customized gradient statistics.
Boost Booster for one iteration with customized gradient statistics.
Note: for multi-class task, the score is group by class_id first, then group by row_id
if you want to get i-th row score in j-th class, the access way is score[j*num_data+i]
Note: For multi-class task, the score is group by class_id first, then group by row_id.
and you should group grad and hess in this way as well
If you want to get i-th row score in j-th class, the access way is score[j * num_data + i]
and you should group grad and hess in this way as well.
Parameters
Parameters
----------
----------
grad : 1d numpy
or 1d
list
grad : 1d numpy
array or
list
The first order
of
gradient.
The first order
derivative (
gradient
)
.
hess : 1d numpy or 1d list
hess : 1d numpy or 1d list
The second order
of gradient
.
The second order
derivative (Hessian)
.
Returns
Returns
-------
-------
is_finished, bool
is_finished : bool
Whether the boost was successfully finished.
"""
"""
grad
=
list_to_1d_numpy
(
grad
,
name
=
'gradient'
)
grad
=
list_to_1d_numpy
(
grad
,
name
=
'gradient'
)
hess
=
list_to_1d_numpy
(
hess
,
name
=
'hessian'
)
hess
=
list_to_1d_numpy
(
hess
,
name
=
'hessian'
)
...
@@ -1614,10 +1709,17 @@ class Booster(object):
...
@@ -1614,10 +1709,17 @@ class Booster(object):
return
is_finished
.
value
==
1
return
is_finished
.
value
==
1
def
rollback_one_iter
(
self
):
def
rollback_one_iter
(
self
):
"""Rollback one iteration."""
"""Rollback one iteration.
Returns
-------
self : Booster
Booster with rolled back one iteration.
"""
_safe_call
(
_LIB
.
LGBM_BoosterRollbackOneIter
(
_safe_call
(
_LIB
.
LGBM_BoosterRollbackOneIter
(
self
.
handle
))
self
.
handle
))
self
.
__is_predicted_cur_iter
=
[
False
for
_
in
range_
(
self
.
__num_dataset
)]
self
.
__is_predicted_cur_iter
=
[
False
for
_
in
range_
(
self
.
__num_dataset
)]
return
self
def
current_iteration
(
self
):
def
current_iteration
(
self
):
"""Get the index of the current iteration.
"""Get the index of the current iteration.
...
@@ -1651,7 +1753,7 @@ class Booster(object):
...
@@ -1651,7 +1753,7 @@ class Booster(object):
Returns
Returns
-------
-------
result: list
result
: list
List with evaluation results.
List with evaluation results.
"""
"""
if
not
isinstance
(
data
,
Dataset
):
if
not
isinstance
(
data
,
Dataset
):
...
@@ -1685,7 +1787,7 @@ class Booster(object):
...
@@ -1685,7 +1787,7 @@ class Booster(object):
Returns
Returns
-------
-------
result: list
result
: list
List with evaluation results.
List with evaluation results.
"""
"""
return
self
.
__inner_eval
(
self
.
__train_data_name
,
0
,
feval
)
return
self
.
__inner_eval
(
self
.
__train_data_name
,
0
,
feval
)
...
@@ -1704,7 +1806,7 @@ class Booster(object):
...
@@ -1704,7 +1806,7 @@ class Booster(object):
Returns
Returns
-------
-------
result: list
result
: list
List with evaluation results.
List with evaluation results.
"""
"""
return
[
item
for
i
in
range_
(
1
,
self
.
__num_dataset
)
return
[
item
for
i
in
range_
(
1
,
self
.
__num_dataset
)
...
@@ -1721,8 +1823,13 @@ class Booster(object):
...
@@ -1721,8 +1823,13 @@ class Booster(object):
Index of the iteration that should be saved.
Index of the iteration that should be saved.
If None, if the best iteration exists, it is saved; otherwise, all iterations are saved.
If None, if the best iteration exists, it is saved; otherwise, all iterations are saved.
If <= 0, all iterations are saved.
If <= 0, all iterations are saved.
start_iteration: int, optional (default=0)
start_iteration
: int, optional (default=0)
Start index of the iteration that should be saved.
Start index of the iteration that should be saved.
Returns
-------
self : Booster
Returns self.
"""
"""
if
num_iteration
is
None
:
if
num_iteration
is
None
:
num_iteration
=
self
.
best_iteration
num_iteration
=
self
.
best_iteration
...
@@ -1732,25 +1839,32 @@ class Booster(object):
...
@@ -1732,25 +1839,32 @@ class Booster(object):
ctypes
.
c_int
(
num_iteration
),
ctypes
.
c_int
(
num_iteration
),
c_str
(
filename
)))
c_str
(
filename
)))
_save_pandas_categorical
(
filename
,
self
.
pandas_categorical
)
_save_pandas_categorical
(
filename
,
self
.
pandas_categorical
)
return
self
def
shuffle_models
(
self
):
def
shuffle_models
(
self
):
"""Shuffle models.
"""Shuffle models.
Returns
-------
self : Booster
Booster with shuffled models.
"""
"""
_safe_call
(
_LIB
.
LGBM_BoosterShuffleModels
(
self
.
handle
))
_safe_call
(
_LIB
.
LGBM_BoosterShuffleModels
(
self
.
handle
))
return
self
def
model_from_string
(
self
,
model_str
,
verbose
=
True
):
def
model_from_string
(
self
,
model_str
,
verbose
=
True
):
"""Load Booster from a string.
"""Load Booster from a string.
Parameters
Parameters
----------
----------
model_str: string
model_str
: string
Model will be loaded from this string.
Model will be loaded from this string.
verbose: bool, optional (default=True)
verbose
: bool, optional (default=True)
Set to False to disable log when
loading model.
Whether to print messages while
loading model.
Returns
Returns
-------
-------
result
: Booster
self
: Booster
Loaded Booster object.
Loaded Booster object.
"""
"""
if
self
.
handle
is
not
None
:
if
self
.
handle
is
not
None
:
...
@@ -1767,7 +1881,7 @@ class Booster(object):
...
@@ -1767,7 +1881,7 @@ class Booster(object):
self
.
handle
,
self
.
handle
,
ctypes
.
byref
(
out_num_class
)))
ctypes
.
byref
(
out_num_class
)))
if
verbose
:
if
verbose
:
print
(
'Finished loading model, total used %d iterations'
%
(
int
(
out_num_iterations
.
value
))
)
print
(
'Finished loading model, total used %d iterations'
%
int
(
out_num_iterations
.
value
))
self
.
__num_class
=
out_num_class
.
value
self
.
__num_class
=
out_num_class
.
value
return
self
return
self
...
@@ -1780,12 +1894,12 @@ class Booster(object):
...
@@ -1780,12 +1894,12 @@ class Booster(object):
Index of the iteration that should be saved.
Index of the iteration that should be saved.
If None, if the best iteration exists, it is saved; otherwise, all iterations are saved.
If None, if the best iteration exists, it is saved; otherwise, all iterations are saved.
If <= 0, all iterations are saved.
If <= 0, all iterations are saved.
start_iteration: int, optional (default=0)
start_iteration
: int, optional (default=0)
Start index of the iteration that should be saved.
Start index of the iteration that should be saved.
Returns
Returns
-------
-------
result
: string
str_repr
: string
String representation of Booster.
String representation of Booster.
"""
"""
if
num_iteration
is
None
:
if
num_iteration
is
None
:
...
@@ -1816,7 +1930,7 @@ class Booster(object):
...
@@ -1816,7 +1930,7 @@ class Booster(object):
return
string_buffer
.
value
.
decode
()
return
string_buffer
.
value
.
decode
()
def
dump_model
(
self
,
num_iteration
=
None
,
start_iteration
=
0
):
def
dump_model
(
self
,
num_iteration
=
None
,
start_iteration
=
0
):
"""Dump Booster to
json
format.
"""Dump Booster to
JSON
format.
Parameters
Parameters
----------
----------
...
@@ -1824,13 +1938,13 @@ class Booster(object):
...
@@ -1824,13 +1938,13 @@ class Booster(object):
Index of the iteration that should be dumped.
Index of the iteration that should be dumped.
If None, if the best iteration exists, it is dumped; otherwise, all iterations are dumped.
If None, if the best iteration exists, it is dumped; otherwise, all iterations are dumped.
If <= 0, all iterations are dumped.
If <= 0, all iterations are dumped.
start_iteration: int, optional (default=0)
start_iteration
: int, optional (default=0)
Start index of the iteration that should be dumped.
Start index of the iteration that should be dumped.
Returns
Returns
-------
-------
json_repr : dict
json_repr : dict
J
son
format of Booster.
J
SON
format of Booster.
"""
"""
if
num_iteration
is
None
:
if
num_iteration
is
None
:
num_iteration
=
self
.
best_iteration
num_iteration
=
self
.
best_iteration
...
@@ -1990,8 +2104,7 @@ class Booster(object):
...
@@ -1990,8 +2104,7 @@ class Booster(object):
importance_type_int
=
1
importance_type_int
=
1
else
:
else
:
importance_type_int
=
-
1
importance_type_int
=
-
1
num_feature
=
self
.
num_feature
()
result
=
np
.
zeros
(
self
.
num_feature
(),
dtype
=
np
.
float64
)
result
=
np
.
array
([
0
for
_
in
range_
(
num_feature
)],
dtype
=
np
.
float64
)
_safe_call
(
_LIB
.
LGBM_BoosterFeatureImportance
(
_safe_call
(
_LIB
.
LGBM_BoosterFeatureImportance
(
self
.
handle
,
self
.
handle
,
ctypes
.
c_int
(
iteration
),
ctypes
.
c_int
(
iteration
),
...
@@ -2004,7 +2117,7 @@ class Booster(object):
...
@@ -2004,7 +2117,7 @@ class Booster(object):
def
__inner_eval
(
self
,
data_name
,
data_idx
,
feval
=
None
):
def
__inner_eval
(
self
,
data_name
,
data_idx
,
feval
=
None
):
"""
"""
Eva
u
late training or validation data
Eval
u
ate training or validation data
"""
"""
if
data_idx
>=
self
.
__num_dataset
:
if
data_idx
>=
self
.
__num_dataset
:
raise
ValueError
(
"Data_idx should be smaller than number of dataset"
)
raise
ValueError
(
"Data_idx should be smaller than number of dataset"
)
...
@@ -2102,7 +2215,7 @@ class Booster(object):
...
@@ -2102,7 +2215,7 @@ class Booster(object):
-------
-------
value : string or None
value : string or None
The attribute value.
The attribute value.
Returns None if attribute do not exist.
Returns None if attribute do
es
not exist.
"""
"""
return
self
.
__attr
.
get
(
key
,
None
)
return
self
.
__attr
.
get
(
key
,
None
)
...
@@ -2114,11 +2227,17 @@ class Booster(object):
...
@@ -2114,11 +2227,17 @@ class Booster(object):
**kwargs
**kwargs
The attributes to set.
The attributes to set.
Setting a value to None deletes an attribute.
Setting a value to None deletes an attribute.
Returns
-------
self : Booster
Booster with set attribute.
"""
"""
for
key
,
value
in
kwargs
.
items
():
for
key
,
value
in
kwargs
.
items
():
if
value
is
not
None
:
if
value
is
not
None
:
if
not
isinstance
(
value
,
string_type
):
if
not
isinstance
(
value
,
string_type
):
raise
ValueError
(
"
Set attr only accepts strings
"
)
raise
ValueError
(
"
Only string values are accepted
"
)
self
.
__attr
[
key
]
=
value
self
.
__attr
[
key
]
=
value
else
:
else
:
self
.
__attr
.
pop
(
key
,
None
)
self
.
__attr
.
pop
(
key
,
None
)
return
self
python-package/lightgbm/engine.py
View file @
244db078
...
@@ -125,10 +125,7 @@ def train(params, train_set, num_boost_round=100,
...
@@ -125,10 +125,7 @@ def train(params, train_set, num_boost_round=100,
if
not
isinstance
(
train_set
,
Dataset
):
if
not
isinstance
(
train_set
,
Dataset
):
raise
TypeError
(
"Training only accepts Dataset object"
)
raise
TypeError
(
"Training only accepts Dataset object"
)
train_set
.
_update_params
(
params
)
train_set
.
_update_params
(
params
).
_set_predictor
(
predictor
).
set_feature_name
(
feature_name
).
set_categorical_feature
(
categorical_feature
)
train_set
.
_set_predictor
(
predictor
)
train_set
.
set_feature_name
(
feature_name
)
train_set
.
set_categorical_feature
(
categorical_feature
)
is_valid_contain_train
=
False
is_valid_contain_train
=
False
train_data_name
=
"training"
train_data_name
=
"training"
...
@@ -148,9 +145,7 @@ def train(params, train_set, num_boost_round=100,
...
@@ -148,9 +145,7 @@ def train(params, train_set, num_boost_round=100,
continue
continue
if
not
isinstance
(
valid_data
,
Dataset
):
if
not
isinstance
(
valid_data
,
Dataset
):
raise
TypeError
(
"Traninig only accepts Dataset object"
)
raise
TypeError
(
"Traninig only accepts Dataset object"
)
valid_data
.
_update_params
(
params
)
reduced_valid_sets
.
append
(
valid_data
.
_update_params
(
params
).
set_reference
(
train_set
))
valid_data
.
set_reference
(
train_set
)
reduced_valid_sets
.
append
(
valid_data
)
if
valid_names
is
not
None
and
len
(
valid_names
)
>
i
:
if
valid_names
is
not
None
and
len
(
valid_names
)
>
i
:
name_valid_sets
.
append
(
valid_names
[
i
])
name_valid_sets
.
append
(
valid_names
[
i
])
else
:
else
:
...
@@ -230,8 +225,7 @@ def train(params, train_set, num_boost_round=100,
...
@@ -230,8 +225,7 @@ def train(params, train_set, num_boost_round=100,
for
dataset_name
,
eval_name
,
score
,
_
in
evaluation_result_list
:
for
dataset_name
,
eval_name
,
score
,
_
in
evaluation_result_list
:
booster
.
best_score
[
dataset_name
][
eval_name
]
=
score
booster
.
best_score
[
dataset_name
][
eval_name
]
=
score
if
not
keep_training_booster
:
if
not
keep_training_booster
:
booster
.
model_from_string
(
booster
.
model_to_string
(),
False
)
booster
.
model_from_string
(
booster
.
model_to_string
(),
False
).
free_dataset
()
booster
.
free_dataset
()
return
booster
return
booster
...
@@ -421,10 +415,7 @@ def cv(params, train_set, num_boost_round=100,
...
@@ -421,10 +415,7 @@ def cv(params, train_set, num_boost_round=100,
predictor
=
init_model
.
_to_predictor
()
predictor
=
init_model
.
_to_predictor
()
else
:
else
:
predictor
=
None
predictor
=
None
train_set
.
_update_params
(
params
)
train_set
.
_update_params
(
params
).
_set_predictor
(
predictor
).
set_feature_name
(
feature_name
).
set_categorical_feature
(
categorical_feature
)
train_set
.
_set_predictor
(
predictor
)
train_set
.
set_feature_name
(
feature_name
)
train_set
.
set_categorical_feature
(
categorical_feature
)
if
metrics
is
not
None
:
if
metrics
is
not
None
:
params
[
'metric'
]
=
metrics
params
[
'metric'
]
=
metrics
...
...
python-package/lightgbm/sklearn.py
View file @
244db078
...
@@ -23,24 +23,24 @@ def _objective_function_wrapper(func):
...
@@ -23,24 +23,24 @@ def _objective_function_wrapper(func):
Parameters
Parameters
----------
----------
func: callable
func
: callable
Expects a callable with signature ``func(y_true, y_pred)`` or ``func(y_true, y_pred, group):
Expects a callable with signature ``func(y_true, y_pred)`` or ``func(y_true, y_pred, group):
y_true: array-like of shape = [n_samples]
y_true
: array-like of shape = [n_samples]
The target values.
The target values.
y_pred: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
y_pred
: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
The predicted values.
The predicted values.
group: array-like
group
: array-like
Group/query data, used for ranking task.
Group/query data, used for ranking task.
Returns
Returns
-------
-------
new_func: callable
new_func
: callable
The new objective function as expected by ``lightgbm.engine.train``.
The new objective function as expected by ``lightgbm.engine.train``.
The signature is ``new_func(preds, dataset)``:
The signature is ``new_func(preds, dataset)``:
preds: array-like of shape = [n_samples] or shape = [n_samples * n_classes]
preds
: array-like of shape = [n_samples] or shape = [n_samples * n_classes]
The predicted values.
The predicted values.
dataset: ``dataset``
dataset
: ``dataset``
The training set from which the labels will be extracted using
The training set from which the labels will be extracted using
``dataset.get_label()``.
``dataset.get_label()``.
"""
"""
...
@@ -82,31 +82,31 @@ def _eval_function_wrapper(func):
...
@@ -82,31 +82,31 @@ def _eval_function_wrapper(func):
Parameters
Parameters
----------
----------
func: callable
func
: callable
Expects a callable with following functions:
Expects a callable with following functions:
``func(y_true, y_pred)``,
``func(y_true, y_pred)``,
``func(y_true, y_pred, weight)``
``func(y_true, y_pred, weight)``
or ``func(y_true, y_pred, weight, group)``
or ``func(y_true, y_pred, weight, group)``
and return (eval_name->str, eval_result->float, is_bigger_better->Bool):
and return (eval_name->str, eval_result->float, is_bigger_better->Bool):
y_true: array-like of shape = [n_samples]
y_true
: array-like of shape = [n_samples]
The target values.
The target values.
y_pred: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
y_pred
: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
The predicted values.
The predicted values.
weight: array_like of shape = [n_samples]
weight
: array_like of shape = [n_samples]
The weight of samples.
The weight of samples.
group: array-like
group
: array-like
Group/query data, used for ranking task.
Group/query data, used for ranking task.
Returns
Returns
-------
-------
new_func: callable
new_func
: callable
The new eval function as expected by ``lightgbm.engine.train``.
The new eval function as expected by ``lightgbm.engine.train``.
The signature is ``new_func(preds, dataset)``:
The signature is ``new_func(preds, dataset)``:
preds: array-like of shape = [n_samples] or shape = [n_samples * n_classes]
preds
: array-like of shape = [n_samples] or shape = [n_samples * n_classes]
The predicted values.
The predicted values.
dataset: ``dataset``
dataset
: ``dataset``
The training set from which the labels will be extracted using
The training set from which the labels will be extracted using
``dataset.get_label()``.
``dataset.get_label()``.
"""
"""
...
@@ -232,15 +232,15 @@ class LGBMModel(_LGBMModelBase):
...
@@ -232,15 +232,15 @@ class LGBMModel(_LGBMModelBase):
``objective(y_true, y_pred) -> grad, hess`` or
``objective(y_true, y_pred) -> grad, hess`` or
``objective(y_true, y_pred, group) -> grad, hess``:
``objective(y_true, y_pred, group) -> grad, hess``:
y_true: array-like of shape = [n_samples]
y_true
: array-like of shape = [n_samples]
The target values.
The target values.
y_pred: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
y_pred
: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
The predicted values.
The predicted values.
group: array-like
group
: array-like
Group/query data, used for ranking task.
Group/query data, used for ranking task.
grad: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
grad
: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
The value of the gradient for each sample point.
The value of the gradient for each sample point.
hess: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
hess
: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
The value of the second derivative for each sample point.
The value of the second derivative for each sample point.
For multi-class task, the y_pred is group by class_id first, then group by row_id.
For multi-class task, the y_pred is group by class_id first, then group by row_id.
...
@@ -365,19 +365,19 @@ class LGBMModel(_LGBMModelBase):
...
@@ -365,19 +365,19 @@ class LGBMModel(_LGBMModelBase):
Returns (eval_name, eval_result, is_bigger_better) or
Returns (eval_name, eval_result, is_bigger_better) or
list of (eval_name, eval_result, is_bigger_better)
list of (eval_name, eval_result, is_bigger_better)
y_true: array-like of shape = [n_samples]
y_true
: array-like of shape = [n_samples]
The target values.
The target values.
y_pred: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
y_pred
: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
The predicted values.
The predicted values.
weight: array-like of shape = [n_samples]
weight
: array-like of shape = [n_samples]
The weight of samples.
The weight of samples.
group: array-like
group
: array-like
Group/query data, used for ranking task.
Group/query data, used for ranking task.
eval_name: string
eval_name
: string
The name of evaluation.
The name of evaluation.
eval_result: float
eval_result
: float
The eval result.
The eval result.
is_bigger_better: bool
is_bigger_better
: bool
Is eval result bigger better, e.g. AUC is bigger_better.
Is eval result bigger better, e.g. AUC is bigger_better.
For multi-class task, the y_pred is group by class_id first, then group by row_id.
For multi-class task, the y_pred is group by class_id first, then group by row_id.
...
@@ -434,8 +434,7 @@ class LGBMModel(_LGBMModelBase):
...
@@ -434,8 +434,7 @@ class LGBMModel(_LGBMModelBase):
def
_construct_dataset
(
X
,
y
,
sample_weight
,
init_score
,
group
,
params
):
def
_construct_dataset
(
X
,
y
,
sample_weight
,
init_score
,
group
,
params
):
ret
=
Dataset
(
X
,
label
=
y
,
weight
=
sample_weight
,
group
=
group
,
params
=
params
)
ret
=
Dataset
(
X
,
label
=
y
,
weight
=
sample_weight
,
group
=
group
,
params
=
params
)
ret
.
set_init_score
(
init_score
)
return
ret
.
set_init_score
(
init_score
)
return
ret
train_set
=
_construct_dataset
(
X
,
y
,
sample_weight
,
init_score
,
group
,
params
)
train_set
=
_construct_dataset
(
X
,
y
,
sample_weight
,
init_score
,
group
,
params
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment