Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
5b539788
Commit
5b539788
authored
Dec 01, 2016
by
Guolin Ke
Browse files
fix some pep8 check
parent
1a8c23ed
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
146 additions
and
133 deletions
+146
-133
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+93
-84
python-package/lightgbm/callback.py
python-package/lightgbm/callback.py
+2
-5
python-package/lightgbm/engine.py
python-package/lightgbm/engine.py
+36
-34
python-package/lightgbm/libpath.py
python-package/lightgbm/libpath.py
+3
-3
python-package/lightgbm/sklearn.py
python-package/lightgbm/sklearn.py
+12
-7
No files found.
python-package/lightgbm/basic.py
View file @
5b539788
...
...
@@ -4,8 +4,6 @@ from __future__ import absolute_import
import
sys
import
os
import
ctypes
import
collections
import
re
import
tempfile
import
numpy
as
np
...
...
@@ -59,7 +57,7 @@ def is_1d_list(data):
if
not
isinstance
(
data
,
list
):
return
False
if
len
(
data
)
>
0
:
if
not
isinstance
(
data
[
0
],
(
int
,
float
,
bool
)
):
if
not
isinstance
(
data
[
0
],
(
int
,
float
,
bool
)):
return
False
return
True
...
...
@@ -108,29 +106,29 @@ def param_dict_to_str(data):
if
is_str
(
val
):
pairs
.
append
(
str
(
key
)
+
'='
+
str
(
val
))
elif
isinstance
(
val
,
(
list
,
tuple
)):
pairs
.
append
(
str
(
key
)
+
'='
+
','
.
join
(
map
(
str
,
val
)))
pairs
.
append
(
str
(
key
)
+
'='
+
','
.
join
(
map
(
str
,
val
)))
elif
isinstance
(
val
,
(
int
,
float
,
bool
)):
pairs
.
append
(
str
(
key
)
+
'='
+
str
(
val
))
else
:
raise
TypeError
(
'unknow type of parameter:%s , got:%s'
%
(
key
,
type
(
val
).
__name__
))
raise
TypeError
(
'unknow type of parameter:%s , got:%s'
%
(
key
,
type
(
val
).
__name__
))
return
' '
.
join
(
pairs
)
"""marco definition of data type in c_api of LightGBM"""
C_API_DTYPE_FLOAT32
=
0
C_API_DTYPE_FLOAT64
=
1
C_API_DTYPE_INT32
=
2
C_API_DTYPE_INT64
=
3
C_API_DTYPE_FLOAT32
=
0
C_API_DTYPE_FLOAT64
=
1
C_API_DTYPE_INT32
=
2
C_API_DTYPE_INT64
=
3
"""Matric is row major in python"""
C_API_IS_ROW_MAJOR
=
1
C_API_IS_ROW_MAJOR
=
1
C_API_PREDICT_NORMAL
=
0
C_API_PREDICT_RAW_SCORE
=
1
C_API_PREDICT_LEAF_INDEX
=
2
C_API_PREDICT_NORMAL
=
0
C_API_PREDICT_RAW_SCORE
=
1
C_API_PREDICT_LEAF_INDEX
=
2
FIELD_TYPE_MAPPER
=
{
"label"
:
C_API_DTYPE_FLOAT32
,
"weight"
:
C_API_DTYPE_FLOAT32
,
"init_score"
:
C_API_DTYPE_FLOAT32
,
"group"
:
C_API_DTYPE_INT32
,
}
FIELD_TYPE_MAPPER
=
{
"label"
:
C_API_DTYPE_FLOAT32
,
"weight"
:
C_API_DTYPE_FLOAT32
,
"init_score"
:
C_API_DTYPE_FLOAT32
,
"group"
:
C_API_DTYPE_INT32
}
def
c_float_array
(
data
):
"""Convert numpy array / list to c float array."""
...
...
@@ -144,7 +142,8 @@ def c_float_array(data):
ptr_data
=
data
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_double
))
type_data
=
C_API_DTYPE_FLOAT64
else
:
raise
TypeError
(
"expected np.float32 or np.float64, met type({})"
.
format
(
data
.
dtype
))
raise
TypeError
(
"expected np.float32 or np.float64, met type({})"
.
format
(
data
.
dtype
))
else
:
raise
TypeError
(
"Unknow type({})"
.
format
(
type
(
data
).
__name__
))
return
(
ptr_data
,
type_data
)
...
...
@@ -161,7 +160,8 @@ def c_int_array(data):
ptr_data
=
data
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int64
))
type_data
=
C_API_DTYPE_INT64
else
:
raise
TypeError
(
"expected np.int32 or np.int64, met type({})"
.
format
(
data
.
dtype
))
raise
TypeError
(
"expected np.int32 or np.int64, met type({})"
.
format
(
data
.
dtype
))
else
:
raise
TypeError
(
"Unknow type({})"
.
format
(
type
(
data
).
__name__
))
return
(
ptr_data
,
type_data
)
...
...
@@ -169,7 +169,7 @@ def c_int_array(data):
class
Predictor
(
object
):
""""A Predictor of LightGBM.
"""
def
__init__
(
self
,
model_file
=
None
,
booster_handle
=
None
,
is_manage_handle
=
True
):
def
__init__
(
self
,
model_file
=
None
,
booster_handle
=
None
,
is_manage_handle
=
True
):
"""Initialize the Predictor.
Parameters
...
...
@@ -213,7 +213,9 @@ class Predictor(object):
_safe_call
(
_LIB
.
LGBM_BoosterFree
(
self
.
handle
))
def
predict
(
self
,
data
,
num_iteration
=-
1
,
raw_score
=
False
,
pred_leaf
=
False
,
data_has_header
=
False
,
is_reshape
=
True
):
def
predict
(
self
,
data
,
num_iteration
=-
1
,
raw_score
=
False
,
pred_leaf
=
False
,
data_has_header
=
False
,
is_reshape
=
True
):
"""
Predict logic
...
...
@@ -222,7 +224,7 @@ class Predictor(object):
data : string/numpy array/scipy.sparse
Data source for prediction
When data is string type, it represents the path of txt file,
num_iteration :
num_iteration :
int
used iteration for prediction
raw_score : bool
True for predict raw score
...
...
@@ -238,7 +240,8 @@ class Predictor(object):
Prediction result
"""
if
isinstance
(
data
,
Dataset
):
raise
TypeError
(
"cannot use Dataset instance for prediction, please use raw data instead"
)
raise
TypeError
(
"cannot use Dataset instance for prediction,
\
please use raw data instead"
)
predict_type
=
C_API_PREDICT_NORMAL
if
raw_score
:
predict_type
=
C_API_PREDICT_RAW_SCORE
...
...
@@ -256,7 +259,7 @@ class Predictor(object):
predict_type
,
num_iteration
,
c_str
(
tmp_pred_fname
)))
tmp_file
=
open
(
tmp_pred_fname
,
"r"
)
tmp_file
=
open
(
tmp_pred_fname
,
"r"
)
lines
=
tmp_file
.
readlines
()
tmp_file
.
close
()
nrow
=
len
(
lines
)
...
...
@@ -267,15 +270,19 @@ class Predictor(object):
preds
=
np
.
array
(
preds
,
copy
=
False
)
os
.
remove
(
tmp_pred_fname
)
elif
isinstance
(
data
,
scipy
.
sparse
.
csr_matrix
):
preds
,
nrow
=
self
.
__pred_for_csr
(
data
,
num_iteration
,
predict_type
)
preds
,
nrow
=
self
.
__pred_for_csr
(
data
,
num_iteration
,
predict_type
)
elif
isinstance
(
data
,
np
.
ndarray
):
preds
,
nrow
=
self
.
__pred_for_np2d
(
data
,
num_iteration
,
predict_type
)
preds
,
nrow
=
self
.
__pred_for_np2d
(
data
,
num_iteration
,
predict_type
)
else
:
try
:
csr
=
scipy
.
sparse
.
csr_matrix
(
data
)
preds
,
nrow
=
self
.
__pred_for_csr
(
csr
,
num_iteration
,
predict_type
)
preds
,
nrow
=
self
.
__pred_for_csr
(
csr
,
num_iteration
,
predict_type
)
except
:
raise
TypeError
(
'can not predict data for type {}'
.
format
(
type
(
data
).
__name__
))
raise
TypeError
(
'can not predict data for type {}'
.
format
(
type
(
data
).
__name__
))
if
pred_leaf
:
preds
=
preds
.
astype
(
np
.
int32
)
if
preds
.
size
!=
nrow
and
is_reshape
:
...
...
@@ -283,7 +290,8 @@ class Predictor(object):
ncol
=
int
(
preds
.
size
/
nrow
)
preds
=
preds
.
reshape
(
nrow
,
ncol
)
else
:
raise
ValueError
(
'len of predict result(%d) cannot be divide nrow(%d)'
%
(
preds
.
size
,
nrow
)
)
raise
ValueError
(
'len of predict result(%d) cannot be divide nrow (%d)'
%
(
preds
.
size
,
nrow
))
return
preds
def
__get_num_preds
(
self
,
num_iteration
,
nrow
,
predict_type
):
...
...
@@ -308,7 +316,8 @@ class Predictor(object):
"""change non-float data to float data, need to copy"""
data
=
np
.
array
(
mat
.
reshape
(
mat
.
size
),
dtype
=
np
.
float32
)
ptr_data
,
type_ptr_data
=
c_float_array
(
data
)
n_preds
=
self
.
__get_num_preds
(
num_iteration
,
mat
.
shape
[
0
],
predict_type
)
n_preds
=
self
.
__get_num_preds
(
num_iteration
,
mat
.
shape
[
0
],
predict_type
)
preds
=
np
.
zeros
(
n_preds
,
dtype
=
np
.
float32
)
out_num_preds
=
ctypes
.
c_int64
(
0
)
_safe_call
(
_LIB
.
LGBM_BoosterPredictForMat
(
...
...
@@ -365,10 +374,10 @@ except ImportError:
class
DataFrame
(
object
):
pass
PANDAS_DTYPE_MAPPER
=
{
'int8'
:
'int'
,
'int16'
:
'int'
,
'int32'
:
'int'
,
'int64'
:
'int'
,
'
u
int
8'
:
'int'
,
'uint1
6'
:
'int'
,
'uint
32
'
:
'int'
,
'uint6
4
'
:
'int'
,
'
float16'
:
'float'
,
'float32'
:
'floa
t'
,
'float6
4
'
:
'float'
,
'bool'
:
'i'
}
PANDAS_DTYPE_MAPPER
=
{
'int8'
:
'int'
,
'int16'
:
'int'
,
'int32'
:
'int'
,
'int6
4
'
:
'int'
,
'uint
8
'
:
'int'
,
'uint
1
6'
:
'int'
,
'
uint32'
:
'int'
,
'uint64'
:
'in
t'
,
'float
1
6'
:
'float'
,
'float32'
:
'float'
,
'float64'
:
'float'
,
'bool'
:
'i'
}
def
_data_from_pandas
(
data
):
if
isinstance
(
data
,
DataFrame
):
...
...
@@ -1098,7 +1107,7 @@ class Booster(object):
data : string/numpy array/scipy.sparse
Data source for prediction
When data is string type, it represents the path of txt file,
num_iteration :
num_iteration :
int
used iteration for prediction
raw_score : bool
True for predict raw score
...
...
@@ -1181,7 +1190,7 @@ class Booster(object):
ctypes
.
byref
(
tmp_out_len
),
data_ptr
))
if
tmp_out_len
.
value
!=
len
(
self
.
__inner_predict_buffer
[
data_idx
]):
raise
ValueError
(
"incorrect number of predict results for data %d"
%
(
data_idx
)
)
raise
ValueError
(
"incorrect number of predict results for data %d"
%
(
data_idx
)
)
self
.
__is_predicted_cur_iter
[
data_idx
]
=
True
return
self
.
__inner_predict_buffer
[
data_idx
]
...
...
python-package/lightgbm/callback.py
View file @
5b539788
...
...
@@ -148,7 +148,6 @@ def early_stop(stopping_rounds, verbose=True):
callback : function
The requested callback function.
"""
state
=
{}
factor_to_bigger_better
=
{}
best_score
=
{}
best_iter
=
{}
...
...
@@ -172,7 +171,6 @@ def early_stop(stopping_rounds, verbose=True):
factor_to_bigger_better
[
i
]
=
-
1.0
if
env
.
evaluation_result_list
[
i
][
3
]:
factor_to_bigger_better
[
i
]
=
1.0
state
[
'best_iter'
]
=
0
def
callback
(
env
):
"""internal function"""
...
...
@@ -188,7 +186,6 @@ def early_stop(stopping_rounds, verbose=True):
'
\t
'
.
join
([
_format_eval_result
(
x
)
for
x
in
env
.
evaluation_result_list
]))
else
:
if
env
.
iteration
-
best_iter
[
i
]
>=
stopping_rounds
:
state
[
'best_iter'
]
=
best_iter
[
i
]
if
env
.
model
is
not
None
:
env
.
model
.
set_attr
(
best_iteration
=
str
(
best_iter
[
i
]))
if
verbose
:
...
...
python-package/lightgbm/engine.py
View file @
5b539788
"""Training Library containing training routines of LightGBM."""
from
__future__
import
absolute_import
import
collections
import
numpy
as
np
from
.basic
import
LightGBMError
,
Predictor
,
Dataset
,
Booster
,
is_str
from
.
import
callback
def
_construct_dataset
(
X_y
,
reference
=
None
,
params
=
None
,
other_fields
=
None
,
predictor
=
None
):
params
=
None
,
other_fields
=
None
,
predictor
=
None
):
if
'max_bin'
in
params
:
max_bin
=
int
(
params
[
'max_bin'
])
else
:
...
...
@@ -31,10 +31,12 @@ def _construct_dataset(X_y, reference=None,
label
=
X_y
[
1
]
if
reference
is
None
:
ret
=
Dataset
(
data
,
label
=
label
,
max_bin
=
max_bin
,
weight
=
weight
,
group
=
group
,
predictor
=
predictor
,
params
=
params
)
weight
=
weight
,
group
=
group
,
predictor
=
predictor
,
params
=
params
)
else
:
ret
=
reference
.
create_valid
(
data
,
label
=
label
,
weight
=
weight
,
group
=
group
,
params
=
params
)
ret
=
reference
.
create_valid
(
data
,
label
=
label
,
weight
=
weight
,
group
=
group
,
params
=
params
)
if
init_score
is
not
None
:
ret
.
set_init_score
(
init_score
)
return
ret
...
...
@@ -409,6 +411,6 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False,
evaluation_result_list
=
res
))
except
callback
.
EarlyStopException
as
e
:
for
k
in
results
.
keys
():
results
[
k
]
=
results
[
k
][:(
e
.
state
[
'
best_iter
'
]
+
1
)]
results
[
k
]
=
results
[
k
][:(
e
.
best_iter
ation
+
1
)]
break
return
results
python-package/lightgbm/libpath.py
View file @
5b539788
python-package/lightgbm/sklearn.py
View file @
5b539788
...
...
@@ -194,7 +194,8 @@ class LGBMModel(LGBMModelBase):
return
params
def
fit
(
self
,
X
,
y
,
eval_set
=
None
,
eval_metric
=
None
,
early_stopping_rounds
=
None
,
verbose
=
True
,
train_fields
=
None
,
valid_fields
=
None
,
other_params
=
None
):
early_stopping_rounds
=
None
,
verbose
=
True
,
train_fields
=
None
,
valid_fields
=
None
,
other_params
=
None
):
"""
Fit the gradient boosting model
...
...
@@ -328,8 +329,10 @@ class LGBMClassifier(LGBMModel, LGBMClassifierBase):
if
eval_set
is
not
None
:
eval_set
=
list
(
(
x
[
0
],
self
.
_le
.
transform
(
x
[
1
]))
for
x
in
eval_set
)
super
(
LGBMClassifier
,
self
).
fit
(
X
,
training_labels
,
eval_set
,
eval_metric
,
early_stopping_rounds
,
verbose
,
train_fields
,
valid_fields
,
other_params
)
super
(
LGBMClassifier
,
self
).
fit
(
X
,
training_labels
,
eval_set
,
eval_metric
,
early_stopping_rounds
,
verbose
,
train_fields
,
valid_fields
,
other_params
)
return
self
def
predict
(
self
,
data
,
raw_score
=
False
,
num_iteration
=
0
):
...
...
@@ -429,5 +432,7 @@ class LGBMRanker(LGBMModel):
self
.
fobj
=
None
super
(
LGBMRanker
,
self
).
fit
(
X
,
y
,
eval_set
,
eval_metric
,
early_stopping_rounds
,
verbose
,
train_fields
,
valid_fields
,
other_params
)
early_stopping_rounds
,
verbose
,
train_fields
,
valid_fields
,
other_params
)
return
self
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment