Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
244db078
Commit
244db078
authored
Aug 25, 2018
by
Nikita Titov
Committed by
Qiwei Ye
Aug 25, 2018
Browse files
return self (#1602)
parent
dcf9ad2e
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
226 additions
and
117 deletions
+226
-117
examples/python-guide/advanced_example.py
examples/python-guide/advanced_example.py
+1
-1
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+193
-74
python-package/lightgbm/engine.py
python-package/lightgbm/engine.py
+4
-13
python-package/lightgbm/sklearn.py
python-package/lightgbm/sklearn.py
+28
-29
No files found.
examples/python-guide/advanced_example.py
View file @
244db078
...
...
@@ -64,7 +64,7 @@ print('7th feature name is:', repr(lgb_train.feature_name[6]))
# save model to file
gbm
.
save_model
(
'model.txt'
)
# dump model to
json
(and save to file)
# dump model to
JSON
(and save to file)
print
(
'Dump model to JSON...'
)
model_json
=
gbm
.
dump_model
()
...
...
python-package/lightgbm/basic.py
View file @
244db078
...
...
@@ -184,7 +184,7 @@ def convert_from_sliced_object(data):
"""fix the memory of multi-dimensional sliced object"""
if
data
.
base
is
not
None
and
isinstance
(
data
,
np
.
ndarray
)
and
isinstance
(
data
.
base
,
np
.
ndarray
):
if
not
data
.
flags
.
c_contiguous
:
warnings
.
warn
(
"Usage subset(sliced data)
of np.ndarray
is not recommended due to it will double the peak memory cost in LightGBM."
)
warnings
.
warn
(
"Usage
of np.ndarray
subset
(sliced data) is not recommended due to it will double the peak memory cost in LightGBM."
)
return
np
.
copy
(
data
)
return
data
...
...
@@ -607,9 +607,9 @@ class Dataset(object):
If 'auto' and data is pandas DataFrame, pandas categorical columns are used.
All values in categorical features should be less than int32 max value (2147483647).
All negative values in categorical features will be treated as missing values.
params: dict or None, optional (default=None)
params
: dict or None, optional (default=None)
Other parameters.
free_raw_data: bool, optional (default=True)
free_raw_data
: bool, optional (default=True)
If True, raw data is freed after constructing inner Dataset.
"""
self
.
handle
=
None
...
...
@@ -639,6 +639,7 @@ class Dataset(object):
if
self
.
handle
is
not
None
:
_safe_call
(
_LIB
.
LGBM_DatasetFree
(
self
.
handle
))
self
.
handle
=
None
return
self
def
_lazy_init
(
self
,
data
,
label
=
None
,
reference
=
None
,
weight
=
None
,
group
=
None
,
init_score
=
None
,
predictor
=
None
,
...
...
@@ -646,7 +647,7 @@ class Dataset(object):
categorical_feature
=
'auto'
,
params
=
None
):
if
data
is
None
:
self
.
handle
=
None
return
return
self
if
reference
is
not
None
:
self
.
pandas_categorical
=
reference
.
pandas_categorical
categorical_feature
=
reference
.
categorical_feature
...
...
@@ -747,7 +748,7 @@ class Dataset(object):
elif
self
.
predictor
is
not
None
:
raise
TypeError
(
'wrong predictor type {}'
.
format
(
type
(
self
.
predictor
).
__name__
))
# set feature names
self
.
set_feature_name
(
feature_name
)
return
self
.
set_feature_name
(
feature_name
)
def
__init_from_np2d
(
self
,
mat
,
params_str
,
ref_dataset
):
"""
...
...
@@ -773,6 +774,7 @@ class Dataset(object):
c_str
(
params_str
),
ref_dataset
,
ctypes
.
byref
(
self
.
handle
)))
return
self
def
__init_from_list_np2d
(
self
,
mats
,
params_str
,
ref_dataset
):
"""
...
...
@@ -821,6 +823,7 @@ class Dataset(object):
c_str
(
params_str
),
ref_dataset
,
ctypes
.
byref
(
self
.
handle
)))
return
self
def
__init_from_csr
(
self
,
csr
,
params_str
,
ref_dataset
):
"""
...
...
@@ -845,6 +848,7 @@ class Dataset(object):
c_str
(
params_str
),
ref_dataset
,
ctypes
.
byref
(
self
.
handle
)))
return
self
def
__init_from_csc
(
self
,
csc
,
params_str
,
ref_dataset
):
"""
...
...
@@ -869,6 +873,7 @@ class Dataset(object):
c_str
(
params_str
),
ref_dataset
,
ctypes
.
byref
(
self
.
handle
)))
return
self
def
construct
(
self
):
"""Lazy init.
...
...
@@ -876,7 +881,7 @@ class Dataset(object):
Returns
-------
self : Dataset
Returns self
.
Constructed Dataset object
.
"""
if
self
.
handle
is
None
:
if
self
.
reference
is
not
None
:
...
...
@@ -928,13 +933,13 @@ class Dataset(object):
Init score for Dataset.
silent : bool, optional (default=False)
Whether to print messages during construction.
params: dict or None, optional (default=None)
params
: dict or None, optional (default=None)
Other parameters.
Returns
-------
self
: Dataset
Returns
self.
valid
: Dataset
Validation Dataset with reference to
self.
"""
ret
=
Dataset
(
data
,
label
=
label
,
reference
=
self
,
weight
=
weight
,
group
=
group
,
init_score
=
init_score
,
...
...
@@ -950,7 +955,7 @@ class Dataset(object):
----------
used_indices : list of int
Indices used to create the subset.
params: dict or None, optional (default=None)
params
: dict or None, optional (default=None)
Other parameters.
Returns
...
...
@@ -974,10 +979,16 @@ class Dataset(object):
----------
filename : string
Name of the output file.
Returns
-------
self : Dataset
Returns self.
"""
_safe_call
(
_LIB
.
LGBM_DatasetSaveBinary
(
self
.
construct
().
handle
,
c_str
(
filename
)))
return
self
def
_update_params
(
self
,
params
):
if
not
self
.
params
:
...
...
@@ -985,20 +996,27 @@ class Dataset(object):
else
:
self
.
params_back_up
=
copy
.
deepcopy
(
self
.
params
)
self
.
params
.
update
(
params
)
return
self
def
_reverse_update_params
(
self
):
self
.
params
=
copy
.
deepcopy
(
self
.
params_back_up
)
self
.
params_back_up
=
None
return
self
def
set_field
(
self
,
field_name
,
data
):
"""Set property into the Dataset.
Parameters
----------
field_name: string
field_name
: string
The field name of the information.
data: list, numpy array or None
data
: list, numpy array or None
The array of data to be set.
Returns
-------
self : Dataset
Dataset with set property.
"""
if
self
.
handle
is
None
:
raise
Exception
(
"Cannot set %s before construct dataset"
%
field_name
)
...
...
@@ -1010,7 +1028,7 @@ class Dataset(object):
None
,
ctypes
.
c_int
(
0
),
ctypes
.
c_int
(
FIELD_TYPE_MAPPER
[
field_name
])))
return
return
self
dtype
=
np
.
float32
if
field_name
==
'group'
:
dtype
=
np
.
int32
...
...
@@ -1031,13 +1049,14 @@ class Dataset(object):
ptr_data
,
ctypes
.
c_int
(
len
(
data
)),
ctypes
.
c_int
(
type_data
)))
return
self
def
get_field
(
self
,
field_name
):
"""Get property from the Dataset.
Parameters
----------
field_name: string
field_name
: string
The field name of the information.
Returns
...
...
@@ -1076,19 +1095,25 @@ class Dataset(object):
----------
categorical_feature : list of int or strings
Names or indices of categorical features.
Returns
-------
self : Dataset
Dataset with set categorical features.
"""
if
self
.
categorical_feature
==
categorical_feature
:
return
return
self
if
self
.
data
is
not
None
:
if
self
.
categorical_feature
is
None
:
self
.
categorical_feature
=
categorical_feature
self
.
_free_handle
()
return
self
.
_free_handle
()
elif
categorical_feature
==
'auto'
:
warnings
.
warn
(
'Using categorical_feature in Dataset.'
)
return
self
else
:
warnings
.
warn
(
'categorical_feature in Dataset is overridden. New categorical_feature is {}'
.
format
(
sorted
(
list
(
categorical_feature
))))
self
.
categorical_feature
=
categorical_feature
self
.
_free_handle
()
return
self
.
_free_handle
()
else
:
raise
LightGBMError
(
"Cannot set categorical feature after freed raw data, set free_raw_data=False when construct Dataset to avoid this."
)
...
...
@@ -1098,10 +1123,10 @@ class Dataset(object):
Please set init_model in engine.train or engine.cv
"""
if
predictor
is
self
.
_predictor
:
return
return
self
if
self
.
data
is
not
None
:
self
.
_predictor
=
predictor
self
.
_free_handle
()
return
self
.
_free_handle
()
else
:
raise
LightGBMError
(
"Cannot set predictor after freed raw data, set free_raw_data=False when construct Dataset to avoid this."
)
...
...
@@ -1112,16 +1137,19 @@ class Dataset(object):
----------
reference : Dataset
Reference that is used as a template to consturct the current Dataset.
Returns
-------
self : Dataset
Dataset with set reference.
"""
self
.
set_categorical_feature
(
reference
.
categorical_feature
)
self
.
set_feature_name
(
reference
.
feature_name
)
self
.
_set_predictor
(
reference
.
_predictor
)
self
.
set_categorical_feature
(
reference
.
categorical_feature
).
set_feature_name
(
reference
.
feature_name
).
_set_predictor
(
reference
.
_predictor
)
# we're done if self and reference share a common upstrem reference
if
self
.
get_ref_chain
().
intersection
(
reference
.
get_ref_chain
()):
return
return
self
if
self
.
data
is
not
None
:
self
.
reference
=
reference
self
.
_free_handle
()
return
self
.
_free_handle
()
else
:
raise
LightGBMError
(
"Cannot set reference after freed raw data, set free_raw_data=False when construct Dataset to avoid this."
)
...
...
@@ -1132,6 +1160,11 @@ class Dataset(object):
----------
feature_name : list of strings
Feature names.
Returns
-------
self : Dataset
Dataset with set feature name.
"""
if
feature_name
!=
'auto'
:
self
.
feature_name
=
feature_name
...
...
@@ -1143,19 +1176,26 @@ class Dataset(object):
self
.
handle
,
c_array
(
ctypes
.
c_char_p
,
c_feature_name
),
ctypes
.
c_int
(
len
(
feature_name
))))
return
self
def
set_label
(
self
,
label
):
"""Set label of Dataset
Parameters
----------
label: list, numpy array or None
label
: list, numpy array or None
The label information to be set into Dataset.
Returns
-------
self : Dataset
Dataset with set label.
"""
self
.
label
=
label
if
self
.
handle
is
not
None
:
label
=
list_to_1d_numpy
(
label
,
name
=
'label'
)
self
.
set_field
(
'label'
,
label
)
return
self
def
set_weight
(
self
,
weight
):
"""Set weight of each instance.
...
...
@@ -1164,6 +1204,11 @@ class Dataset(object):
----------
weight : list, numpy array or None
Weight to be set for each data point.
Returns
-------
self : Dataset
Dataset with set weight.
"""
if
weight
is
not
None
and
np
.
all
(
weight
==
1
):
weight
=
None
...
...
@@ -1171,6 +1216,7 @@ class Dataset(object):
if
self
.
handle
is
not
None
and
weight
is
not
None
:
weight
=
list_to_1d_numpy
(
weight
,
name
=
'weight'
)
self
.
set_field
(
'weight'
,
weight
)
return
self
def
set_init_score
(
self
,
init_score
):
"""Set init score of Booster to start from.
...
...
@@ -1179,11 +1225,17 @@ class Dataset(object):
----------
init_score : list, numpy array or None
Init score for Booster.
Returns
-------
self : Dataset
Dataset with set init score.
"""
self
.
init_score
=
init_score
if
self
.
handle
is
not
None
and
init_score
is
not
None
:
init_score
=
list_to_1d_numpy
(
init_score
,
np
.
float64
,
name
=
'init_score'
)
self
.
set_field
(
'init_score'
,
init_score
)
return
self
def
set_group
(
self
,
group
):
"""Set group size of Dataset (used for ranking).
...
...
@@ -1192,18 +1244,24 @@ class Dataset(object):
----------
group : list, numpy array or None
Group size of each group.
Returns
-------
self : Dataset
Dataset with set group.
"""
self
.
group
=
group
if
self
.
handle
is
not
None
and
group
is
not
None
:
group
=
list_to_1d_numpy
(
group
,
np
.
int32
,
name
=
'group'
)
self
.
set_field
(
'group'
,
group
)
return
self
def
get_label
(
self
):
"""Get the label of the Dataset.
Returns
-------
label : numpy array
label : numpy array
or None
The label information from the Dataset.
"""
if
self
.
label
is
None
:
...
...
@@ -1215,7 +1273,7 @@ class Dataset(object):
Returns
-------
weight : numpy array
weight : numpy array
or None
Weight for each data point from the Dataset.
"""
if
self
.
weight
is
None
:
...
...
@@ -1227,7 +1285,7 @@ class Dataset(object):
Returns
-------
init_score : numpy array
init_score : numpy array
or None
Init score of Booster.
"""
if
self
.
init_score
is
None
:
...
...
@@ -1239,17 +1297,14 @@ class Dataset(object):
Returns
-------
group : numpy array
group : numpy array
or None
Group size of each group.
"""
if
self
.
group
is
None
:
self
.
group
=
self
.
get_field
(
'group'
)
if
self
.
group
is
not
None
:
# group data from LightGBM is boundaries data, need to convert to group size
new_group
=
[]
for
i
in
range_
(
len
(
self
.
group
)
-
1
):
new_group
.
append
(
self
.
group
[
i
+
1
]
-
self
.
group
[
i
])
self
.
group
=
new_group
self
.
group
=
np
.
diff
(
self
.
group
)
return
self
.
group
def
num_data
(
self
):
...
...
@@ -1309,7 +1364,7 @@ class Dataset(object):
break
else
:
break
return
(
ref_chain
)
return
ref_chain
class
Booster
(
object
):
...
...
@@ -1319,7 +1374,7 @@ class Booster(object):
Parameters
----------
params: dict or None, optional (default=None)
params
: dict or None, optional (default=None)
Parameters for Booster.
train_set : Dataset or None, optional (default=None)
Training dataset.
...
...
@@ -1448,14 +1503,22 @@ class Booster(object):
self
.
__dict__
.
update
(
state
)
def
free_dataset
(
self
):
"""Free Booster's Datasets."""
"""Free Booster's Datasets.
Returns
-------
self : Booster
Booster without Datasets.
"""
self
.
__dict__
.
pop
(
'train_set'
,
None
)
self
.
__dict__
.
pop
(
'valid_sets'
,
None
)
self
.
__num_dataset
=
0
return
self
def
_free_buffer
(
self
):
self
.
__inner_predict_buffer
=
[]
self
.
__is_predicted_cur_iter
=
[]
return
self
def
set_network
(
self
,
machines
,
local_listen_port
=
12400
,
listen_time_out
=
120
,
num_machines
=
1
):
...
...
@@ -1463,35 +1526,54 @@ class Booster(object):
Parameters
----------
machines: list, set or string
machines
: list, set or string
Names of machines.
local_listen_port: int, optional (default=12400)
local_listen_port
: int, optional (default=12400)
TCP listen port for local machines.
listen_time_out: int, optional (default=120)
listen_time_out
: int, optional (default=120)
Socket time-out in minutes.
num_machines: int, optional (default=1)
num_machines
: int, optional (default=1)
The number of machines for parallel learning application.
Returns
-------
self : Booster
Booster with set network.
"""
_safe_call
(
_LIB
.
LGBM_NetworkInit
(
c_str
(
machines
),
ctypes
.
c_int
(
local_listen_port
),
ctypes
.
c_int
(
listen_time_out
),
ctypes
.
c_int
(
num_machines
)))
self
.
network
=
True
return
self
def
free_network
(
self
):
"""Free network."""
"""Free Booster's network.
Returns
-------
self : Booster
Booster with freed network.
"""
_safe_call
(
_LIB
.
LGBM_NetworkFree
())
self
.
network
=
False
return
self
def
set_train_data_name
(
self
,
name
):
"""Set the name to the training Dataset.
Parameters
----------
name: string
Name for training Dataset.
name : string
Name for the training Dataset.
Returns
-------
self : Booster
Booster with set training Dataset name.
"""
self
.
__train_data_name
=
name
return
self
def
add_valid
(
self
,
data
,
name
):
"""Add validation data.
...
...
@@ -1502,6 +1584,11 @@ class Booster(object):
Validation data.
name : string
Name of validation data.
Returns
-------
self : Booster
Booster with set validation data.
"""
if
not
isinstance
(
data
,
Dataset
):
raise
TypeError
(
'Validation data should be Dataset instance, met {}'
.
format
(
type
(
data
).
__name__
))
...
...
@@ -1515,6 +1602,7 @@ class Booster(object):
self
.
__num_dataset
+=
1
self
.
__inner_predict_buffer
.
append
(
None
)
self
.
__is_predicted_cur_iter
.
append
(
False
)
return
self
def
reset_parameter
(
self
,
params
):
"""Reset parameters of Booster.
...
...
@@ -1523,6 +1611,11 @@ class Booster(object):
----------
params : dict
New parameters for Booster.
Returns
-------
self : Booster
Booster with new parameters.
"""
if
any
(
metric_alias
in
params
for
metric_alias
in
(
'metric'
,
'metrics'
,
'metric_types'
)):
self
.
__need_reload_eval_info
=
True
...
...
@@ -1531,9 +1624,10 @@ class Booster(object):
_safe_call
(
_LIB
.
LGBM_BoosterResetParameter
(
self
.
handle
,
c_str
(
params_str
)))
return
self
def
update
(
self
,
train_set
=
None
,
fobj
=
None
):
"""Update for one iteration.
"""Update
Booster
for one iteration.
Parameters
----------
...
...
@@ -1575,28 +1669,29 @@ class Booster(object):
return
is_finished
.
value
==
1
else
:
if
not
self
.
__set_objective_to_none
:
self
.
reset_parameter
({
"objective"
:
"none"
})
self
.
__set_objective_to_none
=
True
self
.
reset_parameter
({
"objective"
:
"none"
}).
__set_objective_to_none
=
True
grad
,
hess
=
fobj
(
self
.
__inner_predict
(
0
),
self
.
train_set
)
return
self
.
__boost
(
grad
,
hess
)
def
__boost
(
self
,
grad
,
hess
):
"""
Boost the booster for one iteration, with customized gradient statistics.
Note: for multi-class task, the score is group by class_id first, then group by row_id
if you want to get i-th row score in j-th class, the access way is score[j*num_data+i]
and you should group grad and hess in this way as well
Boost Booster for one iteration with customized gradient statistics.
Note: For multi-class task, the score is group by class_id first, then group by row_id.
If you want to get i-th row score in j-th class, the access way is score[j * num_data + i]
and you should group grad and hess in this way as well.
Parameters
----------
grad : 1d numpy
or 1d
list
The first order
of
gradient.
grad : 1d numpy
array or
list
The first order
derivative (
gradient
)
.
hess : 1d numpy or 1d list
The second order
of gradient
.
The second order
derivative (Hessian)
.
Returns
-------
is_finished, bool
is_finished : bool
Whether the boost was successfully finished.
"""
grad
=
list_to_1d_numpy
(
grad
,
name
=
'gradient'
)
hess
=
list_to_1d_numpy
(
hess
,
name
=
'hessian'
)
...
...
@@ -1614,10 +1709,17 @@ class Booster(object):
return
is_finished
.
value
==
1
def
rollback_one_iter
(
self
):
"""Rollback one iteration."""
"""Rollback one iteration.
Returns
-------
self : Booster
Booster with rolled back one iteration.
"""
_safe_call
(
_LIB
.
LGBM_BoosterRollbackOneIter
(
self
.
handle
))
self
.
__is_predicted_cur_iter
=
[
False
for
_
in
range_
(
self
.
__num_dataset
)]
return
self
def
current_iteration
(
self
):
"""Get the index of the current iteration.
...
...
@@ -1651,7 +1753,7 @@ class Booster(object):
Returns
-------
result: list
result
: list
List with evaluation results.
"""
if
not
isinstance
(
data
,
Dataset
):
...
...
@@ -1685,7 +1787,7 @@ class Booster(object):
Returns
-------
result: list
result
: list
List with evaluation results.
"""
return
self
.
__inner_eval
(
self
.
__train_data_name
,
0
,
feval
)
...
...
@@ -1704,7 +1806,7 @@ class Booster(object):
Returns
-------
result: list
result
: list
List with evaluation results.
"""
return
[
item
for
i
in
range_
(
1
,
self
.
__num_dataset
)
...
...
@@ -1721,8 +1823,13 @@ class Booster(object):
Index of the iteration that should be saved.
If None, if the best iteration exists, it is saved; otherwise, all iterations are saved.
If <= 0, all iterations are saved.
start_iteration: int, optional (default=0)
start_iteration
: int, optional (default=0)
Start index of the iteration that should be saved.
Returns
-------
self : Booster
Returns self.
"""
if
num_iteration
is
None
:
num_iteration
=
self
.
best_iteration
...
...
@@ -1732,25 +1839,32 @@ class Booster(object):
ctypes
.
c_int
(
num_iteration
),
c_str
(
filename
)))
_save_pandas_categorical
(
filename
,
self
.
pandas_categorical
)
return
self
def
shuffle_models
(
self
):
"""Shuffle models.
Returns
-------
self : Booster
Booster with shuffled models.
"""
_safe_call
(
_LIB
.
LGBM_BoosterShuffleModels
(
self
.
handle
))
return
self
def
model_from_string
(
self
,
model_str
,
verbose
=
True
):
"""Load Booster from a string.
Parameters
----------
model_str: string
model_str
: string
Model will be loaded from this string.
verbose: bool, optional (default=True)
Set to False to disable log when
loading model.
verbose
: bool, optional (default=True)
Whether to print messages while
loading model.
Returns
-------
result
: Booster
self
: Booster
Loaded Booster object.
"""
if
self
.
handle
is
not
None
:
...
...
@@ -1767,7 +1881,7 @@ class Booster(object):
self
.
handle
,
ctypes
.
byref
(
out_num_class
)))
if
verbose
:
print
(
'Finished loading model, total used %d iterations'
%
(
int
(
out_num_iterations
.
value
))
)
print
(
'Finished loading model, total used %d iterations'
%
int
(
out_num_iterations
.
value
))
self
.
__num_class
=
out_num_class
.
value
return
self
...
...
@@ -1780,12 +1894,12 @@ class Booster(object):
Index of the iteration that should be saved.
If None, if the best iteration exists, it is saved; otherwise, all iterations are saved.
If <= 0, all iterations are saved.
start_iteration: int, optional (default=0)
start_iteration
: int, optional (default=0)
Start index of the iteration that should be saved.
Returns
-------
result
: string
str_repr
: string
String representation of Booster.
"""
if
num_iteration
is
None
:
...
...
@@ -1816,7 +1930,7 @@ class Booster(object):
return
string_buffer
.
value
.
decode
()
def
dump_model
(
self
,
num_iteration
=
None
,
start_iteration
=
0
):
"""Dump Booster to
json
format.
"""Dump Booster to
JSON
format.
Parameters
----------
...
...
@@ -1824,13 +1938,13 @@ class Booster(object):
Index of the iteration that should be dumped.
If None, if the best iteration exists, it is dumped; otherwise, all iterations are dumped.
If <= 0, all iterations are dumped.
start_iteration: int, optional (default=0)
start_iteration
: int, optional (default=0)
Start index of the iteration that should be dumped.
Returns
-------
json_repr : dict
J
son
format of Booster.
J
SON
format of Booster.
"""
if
num_iteration
is
None
:
num_iteration
=
self
.
best_iteration
...
...
@@ -1990,8 +2104,7 @@ class Booster(object):
importance_type_int
=
1
else
:
importance_type_int
=
-
1
num_feature
=
self
.
num_feature
()
result
=
np
.
array
([
0
for
_
in
range_
(
num_feature
)],
dtype
=
np
.
float64
)
result
=
np
.
zeros
(
self
.
num_feature
(),
dtype
=
np
.
float64
)
_safe_call
(
_LIB
.
LGBM_BoosterFeatureImportance
(
self
.
handle
,
ctypes
.
c_int
(
iteration
),
...
...
@@ -2004,7 +2117,7 @@ class Booster(object):
def
__inner_eval
(
self
,
data_name
,
data_idx
,
feval
=
None
):
"""
Eva
u
late training or validation data
Eval
u
ate training or validation data
"""
if
data_idx
>=
self
.
__num_dataset
:
raise
ValueError
(
"Data_idx should be smaller than number of dataset"
)
...
...
@@ -2102,7 +2215,7 @@ class Booster(object):
-------
value : string or None
The attribute value.
Returns None if attribute do not exist.
Returns None if attribute do
es
not exist.
"""
return
self
.
__attr
.
get
(
key
,
None
)
...
...
@@ -2114,11 +2227,17 @@ class Booster(object):
**kwargs
The attributes to set.
Setting a value to None deletes an attribute.
Returns
-------
self : Booster
Booster with set attribute.
"""
for
key
,
value
in
kwargs
.
items
():
if
value
is
not
None
:
if
not
isinstance
(
value
,
string_type
):
raise
ValueError
(
"
Set attr only accepts strings
"
)
raise
ValueError
(
"
Only string values are accepted
"
)
self
.
__attr
[
key
]
=
value
else
:
self
.
__attr
.
pop
(
key
,
None
)
return
self
python-package/lightgbm/engine.py
View file @
244db078
...
...
@@ -125,10 +125,7 @@ def train(params, train_set, num_boost_round=100,
if
not
isinstance
(
train_set
,
Dataset
):
raise
TypeError
(
"Training only accepts Dataset object"
)
train_set
.
_update_params
(
params
)
train_set
.
_set_predictor
(
predictor
)
train_set
.
set_feature_name
(
feature_name
)
train_set
.
set_categorical_feature
(
categorical_feature
)
train_set
.
_update_params
(
params
).
_set_predictor
(
predictor
).
set_feature_name
(
feature_name
).
set_categorical_feature
(
categorical_feature
)
is_valid_contain_train
=
False
train_data_name
=
"training"
...
...
@@ -148,9 +145,7 @@ def train(params, train_set, num_boost_round=100,
continue
if
not
isinstance
(
valid_data
,
Dataset
):
raise
TypeError
(
"Traninig only accepts Dataset object"
)
valid_data
.
_update_params
(
params
)
valid_data
.
set_reference
(
train_set
)
reduced_valid_sets
.
append
(
valid_data
)
reduced_valid_sets
.
append
(
valid_data
.
_update_params
(
params
).
set_reference
(
train_set
))
if
valid_names
is
not
None
and
len
(
valid_names
)
>
i
:
name_valid_sets
.
append
(
valid_names
[
i
])
else
:
...
...
@@ -230,8 +225,7 @@ def train(params, train_set, num_boost_round=100,
for
dataset_name
,
eval_name
,
score
,
_
in
evaluation_result_list
:
booster
.
best_score
[
dataset_name
][
eval_name
]
=
score
if
not
keep_training_booster
:
booster
.
model_from_string
(
booster
.
model_to_string
(),
False
)
booster
.
free_dataset
()
booster
.
model_from_string
(
booster
.
model_to_string
(),
False
).
free_dataset
()
return
booster
...
...
@@ -421,10 +415,7 @@ def cv(params, train_set, num_boost_round=100,
predictor
=
init_model
.
_to_predictor
()
else
:
predictor
=
None
train_set
.
_update_params
(
params
)
train_set
.
_set_predictor
(
predictor
)
train_set
.
set_feature_name
(
feature_name
)
train_set
.
set_categorical_feature
(
categorical_feature
)
train_set
.
_update_params
(
params
).
_set_predictor
(
predictor
).
set_feature_name
(
feature_name
).
set_categorical_feature
(
categorical_feature
)
if
metrics
is
not
None
:
params
[
'metric'
]
=
metrics
...
...
python-package/lightgbm/sklearn.py
View file @
244db078
...
...
@@ -23,24 +23,24 @@ def _objective_function_wrapper(func):
Parameters
----------
func: callable
func
: callable
Expects a callable with signature ``func(y_true, y_pred)`` or ``func(y_true, y_pred, group):
y_true: array-like of shape = [n_samples]
y_true
: array-like of shape = [n_samples]
The target values.
y_pred: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
y_pred
: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
The predicted values.
group: array-like
group
: array-like
Group/query data, used for ranking task.
Returns
-------
new_func: callable
new_func
: callable
The new objective function as expected by ``lightgbm.engine.train``.
The signature is ``new_func(preds, dataset)``:
preds: array-like of shape = [n_samples] or shape = [n_samples * n_classes]
preds
: array-like of shape = [n_samples] or shape = [n_samples * n_classes]
The predicted values.
dataset: ``dataset``
dataset
: ``dataset``
The training set from which the labels will be extracted using
``dataset.get_label()``.
"""
...
...
@@ -82,31 +82,31 @@ def _eval_function_wrapper(func):
Parameters
----------
func: callable
func
: callable
Expects a callable with following functions:
``func(y_true, y_pred)``,
``func(y_true, y_pred, weight)``
or ``func(y_true, y_pred, weight, group)``
and return (eval_name->str, eval_result->float, is_bigger_better->Bool):
y_true: array-like of shape = [n_samples]
y_true
: array-like of shape = [n_samples]
The target values.
y_pred: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
y_pred
: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
The predicted values.
weight: array_like of shape = [n_samples]
weight
: array_like of shape = [n_samples]
The weight of samples.
group: array-like
group
: array-like
Group/query data, used for ranking task.
Returns
-------
new_func: callable
new_func
: callable
The new eval function as expected by ``lightgbm.engine.train``.
The signature is ``new_func(preds, dataset)``:
preds: array-like of shape = [n_samples] or shape = [n_samples * n_classes]
preds
: array-like of shape = [n_samples] or shape = [n_samples * n_classes]
The predicted values.
dataset: ``dataset``
dataset
: ``dataset``
The training set from which the labels will be extracted using
``dataset.get_label()``.
"""
...
...
@@ -232,15 +232,15 @@ class LGBMModel(_LGBMModelBase):
``objective(y_true, y_pred) -> grad, hess`` or
``objective(y_true, y_pred, group) -> grad, hess``:
y_true: array-like of shape = [n_samples]
y_true
: array-like of shape = [n_samples]
The target values.
y_pred: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
y_pred
: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
The predicted values.
group: array-like
group
: array-like
Group/query data, used for ranking task.
grad: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
grad
: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
The value of the gradient for each sample point.
hess: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
hess
: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
The value of the second derivative for each sample point.
For multi-class task, the y_pred is group by class_id first, then group by row_id.
...
...
@@ -365,19 +365,19 @@ class LGBMModel(_LGBMModelBase):
Returns (eval_name, eval_result, is_bigger_better) or
list of (eval_name, eval_result, is_bigger_better)
y_true: array-like of shape = [n_samples]
y_true
: array-like of shape = [n_samples]
The target values.
y_pred: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
y_pred
: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
The predicted values.
weight: array-like of shape = [n_samples]
weight
: array-like of shape = [n_samples]
The weight of samples.
group: array-like
group
: array-like
Group/query data, used for ranking task.
eval_name: string
eval_name
: string
The name of evaluation.
eval_result: float
eval_result
: float
The eval result.
is_bigger_better: bool
is_bigger_better
: bool
Is eval result bigger better, e.g. AUC is bigger_better.
For multi-class task, the y_pred is group by class_id first, then group by row_id.
...
...
@@ -434,8 +434,7 @@ class LGBMModel(_LGBMModelBase):
def
_construct_dataset
(
X
,
y
,
sample_weight
,
init_score
,
group
,
params
):
ret
=
Dataset
(
X
,
label
=
y
,
weight
=
sample_weight
,
group
=
group
,
params
=
params
)
ret
.
set_init_score
(
init_score
)
return
ret
return
ret
.
set_init_score
(
init_score
)
train_set
=
_construct_dataset
(
X
,
y
,
sample_weight
,
init_score
,
group
,
params
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment