Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
d8ecdaf5
Commit
d8ecdaf5
authored
Nov 26, 2016
by
Guolin Ke
Browse files
change metric name. update some comments
parent
f059d0fe
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
136 additions
and
38 deletions
+136
-38
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+128
-30
src/metric/binary_metric.hpp
src/metric/binary_metric.hpp
+3
-3
src/metric/multiclass_metric.hpp
src/metric/multiclass_metric.hpp
+2
-2
src/metric/rank_metric.hpp
src/metric/rank_metric.hpp
+1
-1
src/metric/regression_metric.hpp
src/metric/regression_metric.hpp
+2
-2
No files found.
python-package/lightgbm/basic.py
View file @
d8ecdaf5
...
...
@@ -187,8 +187,7 @@ class Predictor(object):
""""A Predictor of LightGBM.
"""
def
__init__
(
self
,
model_file
=
None
,
params
=
None
,
booster_handle
=
None
,
is_manage_handle
=
True
):
# pylint: disable=invalid-name
"""Initialize the Booster.
"""Initialize the Predictor.
Parameters
----------
...
...
@@ -233,6 +232,29 @@ class Predictor(object):
def
predict
(
self
,
data
,
num_iteration
=-
1
,
raw_score
=
False
,
pred_leaf
=
False
,
data_has_header
=
False
,
is_reshape
=
True
):
"""
Predict logic
Parameters
----------
data : string/numpy array/scipy.sparse
Data source for prediction
When data is string type, it represents the path of txt file,
num_iteration :
used iteration for prediction
raw_score : bool
True for predict raw score
pred_leaf : bool
True for predict leaf index
data_has_header : bool
Used for txt data
is_reshape : bool
True for reshape to [nrow, ...]
Returns
-------
Prediction result
"""
if
isinstance
(
data
,
Dataset
):
raise
TypeError
(
"cannot use Dataset instance for prediction, please use raw data instead"
)
predict_type
=
C_API_PREDICT_NORMAL
...
...
@@ -400,7 +422,7 @@ class Dataset(object):
params
[
"max_bin"
]
=
max_bin
if
silent
:
params
[
"verbose"
]
=
0
els
e
:
el
if
"verbose"
not
in
param
s
:
params
[
"verbose"
]
=
1
params_str
=
dict_to_str
(
params
)
"""process for reference dataset"""
...
...
@@ -477,7 +499,7 @@ class Dataset(object):
group/query id for each instance. Note: if having group/query id, data should group by this id
silent : boolean, optional
Whether print messages during construction
other_
params: dict, optional
params: dict, optional
other parameters
"""
return
Dataset
(
data
,
label
=
label
,
max_bin
=
self
.
max_bin
,
reference
=
self
,
...
...
@@ -758,7 +780,6 @@ class Booster(object):
""""A Booster of of LightGBM.
"""
def
__init__
(
self
,
params
=
None
,
train_set
=
None
,
model_file
=
None
,
silent
=
False
):
# pylint: disable=invalid-name
"""Initialize the Booster.
Parameters
...
...
@@ -769,6 +790,8 @@ class Booster(object):
training dataset
model_file : string
Path to the model file.
silent : boolean, optional
Whether print messages during construction
"""
self
.
handle
=
ctypes
.
c_void_p
()
self
.
__need_reload_eval_info
=
True
...
...
@@ -777,7 +800,7 @@ class Booster(object):
params
=
{}
if
silent
:
params
[
"verbose"
]
=
0
els
e
:
el
if
"verbose"
not
in
param
s
:
params
[
"verbose"
]
=
1
if
train_set
is
not
None
:
"""Training task"""
...
...
@@ -806,6 +829,7 @@ class Booster(object):
self
.
__num_class
=
out_num_class
.
value
"""buffer for inner predict"""
self
.
__inner_predict_buffer
=
[
None
]
self
.
__is_predicted_cur_iter
=
[
False
]
self
.
__get_eval_info
()
elif
model_file
is
not
None
:
"""Prediction task"""
...
...
@@ -828,6 +852,15 @@ class Booster(object):
_safe_call
(
_LIB
.
LGBM_BoosterFree
(
self
.
handle
))
def
add_valid
(
self
,
data
,
name
):
"""Add an validation data
Parameters
----------
data : Dataset
validation data
name : String
name of validation data
"""
if
data
.
predictor
is
not
self
.
init_predictor
:
raise
Exception
(
"Add validation data failed, you should use same predictor for these data"
)
_safe_call
(
_LIB
.
LGBM_BoosterAddValidData
(
...
...
@@ -836,12 +869,23 @@ class Booster(object):
self
.
valid_sets
.
append
(
data
)
self
.
name_valid_sets
.
append
(
name
)
self
.
__num_dataset
+=
1
self
.
__inner_predict_buffer
.
append
(
None
)
self
.
__is_predicted_cur_iter
.
append
(
False
)
def
reset_parameter
(
self
,
params
,
silent
=
False
):
"""Reset parameters for booster
Parameters
----------
params : dict
params
silent : boolean, optional
Whether print messages during construction
"""
self
.
__need_reload_eval_info
=
True
if
silent
:
params
[
"verbose"
]
=
0
els
e
:
el
if
"verbose"
not
in
param
s
:
params
[
"verbose"
]
=
1
params_str
=
dict_to_str
(
params
)
_safe_call
(
_LIB
.
LGBM_BoosterResetParameter
(
...
...
@@ -864,6 +908,7 @@ class Booster(object):
-------
is_finished, bool
"""
"""need reset training data"""
if
train_set
is
not
None
and
train_set
is
not
self
.
train_set
:
if
train_set
.
predictor
is
not
self
.
init_predictor
:
...
...
@@ -878,6 +923,7 @@ class Booster(object):
_safe_call
(
_LIB
.
LGBM_BoosterUpdateOneIter
(
self
.
handle
,
ctypes
.
byref
(
is_finished
)))
self
.
__is_predicted_cur_iter
=
[
False
for
_
in
range
(
self
.
__num_dataset
)]
return
is_finished
.
value
==
1
else
:
grad
,
hess
=
fobj
(
self
.
__inner_predict
(
0
),
self
.
train_set
)
...
...
@@ -891,9 +937,9 @@ class Booster(object):
and you should group grad and hess in this way as well
Parameters
----------
grad : 1d numpy or list
grad : 1d numpy or
1d
list
The first order of gradient.
hess : 1d numpy or list
hess : 1d numpy or
1d
list
The second order of gradient.
Returns
...
...
@@ -922,11 +968,16 @@ class Booster(object):
grad
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_float
)),
hess
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_float
)),
ctypes
.
byref
(
is_finished
)))
self
.
__is_predicted_cur_iter
=
[
False
for
_
in
range
(
self
.
__num_dataset
)]
return
is_finished
.
value
==
1
def
rollback_one_iter
(
self
):
"""
Rollback one iteration
"""
_safe_call
(
_LIB
.
LGBM_BoosterRollbackOneIter
(
self
.
handle
))
self
.
__is_predicted_cur_iter
=
[
False
for
_
in
range
(
self
.
__num_dataset
)]
def
current_iteration
(
self
):
out_cur_iter
=
ctypes
.
c_int64
(
0
)
...
...
@@ -946,8 +997,8 @@ class Booster(object):
Custom evaluation function.
Returns
-------
result: st
r
Evaluation result st
ring
.
result:
li
st
Evaluation result
li
st.
"""
if
not
isinstance
(
data
,
Dataset
):
raise
TypeError
(
"Can only eval for Dataset instance"
)
...
...
@@ -977,7 +1028,7 @@ class Booster(object):
Returns
-------
result: str
Evaluation result st
ring
.
Evaluation result
li
st.
"""
return
self
.
__inner_eval
(
"training"
,
0
,
feval
)
...
...
@@ -992,29 +1043,67 @@ class Booster(object):
Returns
-------
result: str
Evaluation result st
ring
.
Evaluation result
li
st.
"""
ret
=
[]
for
i
in
range
(
1
,
self
.
__num_dataset
):
ret
.
app
end
(
self
.
__inner_eval
(
self
.
name_valid_sets
[
i
-
1
],
i
,
feval
))
return
'
\n
'
.
join
(
ret
)
ret
.
ext
end
(
self
.
__inner_eval
(
self
.
name_valid_sets
[
i
-
1
],
i
,
feval
))
return
ret
def
save_model
(
self
,
filename
,
num_iteration
=-
1
):
"""Save model of booster to file
Parameters
----------
filename : str
filename to save
num_iteration: int
number of iteration that want to save. < 0 means save all
"""
_safe_call
(
_LIB
.
LGBM_BoosterSaveModel
(
self
.
handle
,
num_iteration
,
c_str
(
filename
)))
def
predict
(
self
,
data
,
num_iteration
=-
1
,
raw_score
=
False
,
pred_leaf
=
False
,
data_has_header
=
False
,
is_reshape
=
True
):
"""
Predict logic
Parameters
----------
data : string/numpy array/scipy.sparse
Data source for prediction
When data is string type, it represents the path of txt file,
num_iteration :
used iteration for prediction
raw_score : bool
True for predict raw score
pred_leaf : bool
True for predict leaf index
data_has_header : bool
Used for txt data
is_reshape : bool
True for reshape to [nrow, ...]
Returns
-------
Prediction result
"""
predictor
=
Predictor
(
booster_handle
=
self
.
handle
,
is_manage_handle
=
False
)
return
predictor
.
predict
(
data
,
num_iteration
,
raw_score
,
pred_leaf
,
data_has_header
,
is_reshape
)
def
to_predictor
(
self
):
"""Convert to predictor
Note: Predictor will manage the handle after doing this
"""
predictor
=
Predictor
(
booster_handle
=
self
.
handle
,
is_manage_handle
=
True
)
self
.
__is_manage_handle
=
False
return
predictor
def
__inner_eval
(
self
,
data_name
,
data_idx
,
feval
=
None
):
"""
Evaulate traning or validation data
"""
if
data_idx
>=
self
.
__num_dataset
:
raise
ValueError
(
"data_idx should be smaller than number of dataset"
)
self
.
__get_eval_info
()
...
...
@@ -1030,7 +1119,7 @@ class Booster(object):
if
tmp_out_len
.
value
!=
self
.
__num_inner_eval
:
raise
ValueError
(
"incorrect number of eval results"
)
for
i
in
range
(
self
.
__num_inner_eval
):
ret
.
append
(
'%s %s : %f'
%
(
data_name
,
self
.
__name_inner_eval
[
i
],
result
[
i
]))
ret
.
append
((
data_name
,
self
.
__name_inner_eval
[
i
],
result
[
i
]))
if
feval
is
not
None
:
if
data_idx
==
0
:
cur_data
=
self
.
train_set
...
...
@@ -1038,14 +1127,17 @@ class Booster(object):
cur_data
=
self
.
valid_sets
[
data_idx
-
1
]
feval_ret
=
feval
(
self
.
__inner_predict
(
data_idx
),
cur_data
)
if
isinstance
(
feval_ret
,
list
):
for
name
,
val
in
feval_ret
:
ret
.
append
(
'%s %s : %f'
%
(
data_name
,
name
,
val
))
for
eval_
name
,
val
in
feval_ret
:
ret
.
append
((
data_name
,
eval_
name
,
val
))
else
:
name
,
val
=
feval_ret
ret
.
append
(
'%s %s : %f'
%
(
data_name
,
name
,
val
))
return
'
\t
'
.
join
(
ret
)
eval_
name
,
val
=
feval_ret
ret
.
append
((
data_name
,
eval_
name
,
val
))
return
ret
def
__inner_predict
(
self
,
data_idx
):
"""
Predict for training and validation dataset
"""
if
data_idx
>=
self
.
__num_dataset
:
raise
ValueError
(
"data_idx should be smaller than number of dataset"
)
if
self
.
__inner_predict_buffer
[
data_idx
]
is
None
:
...
...
@@ -1055,6 +1147,8 @@ class Booster(object):
num_data
=
self
.
valid_sets
[
data_idx
-
1
].
num_data
()
*
self
.
__num_class
self
.
__inner_predict_buffer
[
data_idx
]
=
\
np
.
array
([
0.0
for
_
in
range
(
num_data
)],
dtype
=
np
.
float32
,
copy
=
False
)
"""avoid to predict many time in one iteration"""
if
not
self
.
__is_predicted_cur_iter
[
data_idx
]:
tmp_out_len
=
ctypes
.
c_int64
(
0
)
data_ptr
=
self
.
__inner_predict_buffer
[
data_idx
].
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_float
))
_safe_call
(
_LIB
.
LGBM_BoosterGetPredict
(
...
...
@@ -1064,9 +1158,13 @@ class Booster(object):
data_ptr
))
if
tmp_out_len
.
value
!=
len
(
self
.
__inner_predict_buffer
[
data_idx
]):
raise
ValueError
(
"incorrect number of predict results for data %d"
%
(
data_idx
)
)
self
.
__is_predicted_cur_iter
[
data_idx
]
=
True
return
self
.
__inner_predict_buffer
[
data_idx
]
def
__get_eval_info
(
self
):
"""
Get inner evaluation count and names
"""
if
self
.
__need_reload_eval_info
:
self
.
__need_reload_eval_info
=
False
out_num_eval
=
ctypes
.
c_int64
(
0
)
...
...
src/metric/binary_metric.hpp
View file @
d8ecdaf5
...
...
@@ -116,7 +116,7 @@ public:
}
inline
static
const
char
*
Name
()
{
return
"log
loss"
;
return
"logloss"
;
}
};
/*!
...
...
@@ -135,7 +135,7 @@ public:
}
inline
static
const
char
*
Name
()
{
return
"error
rate
"
;
return
"error"
;
}
};
...
...
@@ -160,7 +160,7 @@ public:
}
void
Init
(
const
Metadata
&
metadata
,
data_size_t
num_data
)
override
{
name_
.
emplace_back
(
"
AUC
"
);
name_
.
emplace_back
(
"
auc
"
);
num_data_
=
num_data
;
// get label
...
...
src/metric/multiclass_metric.hpp
View file @
d8ecdaf5
...
...
@@ -109,7 +109,7 @@ public:
}
inline
static
const
char
*
Name
()
{
return
"multi
error"
;
return
"multi
_
error"
;
}
};
...
...
@@ -129,7 +129,7 @@ public:
}
inline
static
const
char
*
Name
()
{
return
"multi
logloss"
;
return
"multi
_
logloss"
;
}
};
...
...
src/metric/rank_metric.hpp
View file @
d8ecdaf5
...
...
@@ -35,7 +35,7 @@ public:
}
void
Init
(
const
Metadata
&
metadata
,
data_size_t
num_data
)
override
{
for
(
auto
k
:
eval_at_
)
{
name_
.
emplace_back
(
std
::
string
(
"
NDCG
@"
)
+
std
::
to_string
(
k
));
name_
.
emplace_back
(
std
::
string
(
"
ndcg
@"
)
+
std
::
to_string
(
k
));
}
num_data_
=
num_data
;
// get label
...
...
src/metric/regression_metric.hpp
View file @
d8ecdaf5
...
...
@@ -101,7 +101,7 @@ public:
}
inline
static
const
char
*
Name
()
{
return
"l2
loss
"
;
return
"l2"
;
}
};
...
...
@@ -114,7 +114,7 @@ public:
return
std
::
fabs
(
score
-
label
);
}
inline
static
const
char
*
Name
()
{
return
"l1
loss
"
;
return
"l1"
;
}
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment