Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
f8267a50
Commit
f8267a50
authored
Nov 30, 2016
by
Guolin Ke
Browse files
add min_data, fix test
parent
f65164f6
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
16 additions
and
13 deletions
+16
-13
python-package/lightgbm/sklearn.py
python-package/lightgbm/sklearn.py
+7
-4
tests/python_package_test/test_sklearn.py
tests/python_package_test/test_sklearn.py
+9
-9
No files found.
python-package/lightgbm/sklearn.py
View file @
f8267a50
...
@@ -80,7 +80,9 @@ class LGBMModel(LGBMModelBase):
...
@@ -80,7 +80,9 @@ class LGBMModel(LGBMModelBase):
gamma : float
gamma : float
Minimum loss reduction required to make a further partition on a leaf node of the tree.
Minimum loss reduction required to make a further partition on a leaf node of the tree.
min_child_weight : int
min_child_weight : int
Minimum sum of instance weight(hessian) needed in a child.
Minimum sum of instance weight(hessian) needed in a child(leaf)
min_data : int
Minimum number of data need in a child(leaf)
subsample : float
subsample : float
Subsample ratio of the training instance.
Subsample ratio of the training instance.
subsample_freq : int
subsample_freq : int
...
@@ -121,10 +123,10 @@ class LGBMModel(LGBMModelBase):
...
@@ -121,10 +123,10 @@ class LGBMModel(LGBMModelBase):
and you should group grad and hess in this way as well
and you should group grad and hess in this way as well
"""
"""
def
__init__
(
self
,
num_leaves
=
6
3
,
max_depth
=-
1
,
def
__init__
(
self
,
num_leaves
=
3
1
,
max_depth
=-
1
,
learning_rate
=
0.1
,
n_estimators
=
10
0
,
max_bin
=
255
,
learning_rate
=
0.1
,
n_estimators
=
10
,
max_bin
=
255
,
silent
=
True
,
objective
=
"regression"
,
silent
=
True
,
objective
=
"regression"
,
nthread
=-
1
,
gamma
=
0
,
min_child_weight
=
1
,
nthread
=-
1
,
gamma
=
0
,
min_child_weight
=
5
,
min_data
=
10
,
subsample
=
1
,
subsample_freq
=
1
,
colsample_bytree
=
1
,
colsample_byleaf
=
1
,
subsample
=
1
,
subsample_freq
=
1
,
colsample_bytree
=
1
,
colsample_byleaf
=
1
,
reg_alpha
=
0
,
reg_lambda
=
0
,
scale_pos_weight
=
1
,
reg_alpha
=
0
,
reg_lambda
=
0
,
scale_pos_weight
=
1
,
is_unbalance
=
False
,
seed
=
0
):
is_unbalance
=
False
,
seed
=
0
):
...
@@ -141,6 +143,7 @@ class LGBMModel(LGBMModelBase):
...
@@ -141,6 +143,7 @@ class LGBMModel(LGBMModelBase):
self
.
nthread
=
nthread
self
.
nthread
=
nthread
self
.
gamma
=
gamma
self
.
gamma
=
gamma
self
.
min_child_weight
=
min_child_weight
self
.
min_child_weight
=
min_child_weight
self
.
min_data
=
min_data
self
.
subsample
=
subsample
self
.
subsample
=
subsample
self
.
subsample_freq
=
subsample_freq
self
.
subsample_freq
=
subsample_freq
self
.
colsample_bytree
=
colsample_bytree
self
.
colsample_bytree
=
colsample_bytree
...
...
tests/python_package_test/test_sklearn.py
View file @
f8267a50
...
@@ -10,13 +10,13 @@ def test_binary_classification():
...
@@ -10,13 +10,13 @@ def test_binary_classification():
from
sklearn
import
datasets
,
metrics
,
model_selection
from
sklearn
import
datasets
,
metrics
,
model_selection
X
,
y
=
datasets
.
make_classification
(
n_samples
=
10000
,
n_features
=
100
)
X
,
y
=
datasets
.
make_classification
(
n_samples
=
10000
,
n_features
=
100
)
x_train
,
x_test
,
y_train
,
y_test
=
model_selection
.
train_test_split
(
X
,
y
,
test_size
=
0.1
)
x_train
,
x_test
,
y_train
,
y_test
=
model_selection
.
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
1
)
lgb_model
=
lgb
.
LGBMClassifier
().
fit
(
x_train
,
y_train
)
lgb_model
=
lgb
.
LGBMClassifier
().
fit
(
x_train
,
y_train
)
from
sklearn.datasets
import
load_digits
from
sklearn.datasets
import
load_digits
digits
=
load_digits
(
2
)
digits
=
load_digits
(
2
)
y
=
digits
[
'target'
]
y
=
digits
[
'target'
]
X
=
digits
[
'data'
]
X
=
digits
[
'data'
]
x_train
,
x_test
,
y_train
,
y_test
=
model_selection
.
train_test_split
(
X
,
y
,
test_size
=
0.
2
)
x_train
,
x_test
,
y_train
,
y_test
=
model_selection
.
train_test_split
(
X
,
y
,
test_size
=
0.
1
,
random_state
=
1
)
lgb_model
=
lgb
.
LGBMClassifier
().
fit
(
x_train
,
y_train
)
lgb_model
=
lgb
.
LGBMClassifier
().
fit
(
x_train
,
y_train
)
preds
=
lgb_model
.
predict
(
x_test
)
preds
=
lgb_model
.
predict
(
x_test
)
err
=
sum
(
1
for
i
in
range
(
len
(
preds
))
err
=
sum
(
1
for
i
in
range
(
len
(
preds
))
...
@@ -35,7 +35,7 @@ def test_multiclass_classification():
...
@@ -35,7 +35,7 @@ def test_multiclass_classification():
X
,
y
=
datasets
.
make_classification
(
n_samples
=
10000
,
n_features
=
100
,
n_classes
=
4
,
n_informative
=
3
)
X
,
y
=
datasets
.
make_classification
(
n_samples
=
10000
,
n_features
=
100
,
n_classes
=
4
,
n_informative
=
3
)
x_train
,
x_test
,
y_train
,
y_test
=
model_selection
.
train_test_split
(
X
,
y
,
test_size
=
0.1
)
x_train
,
x_test
,
y_train
,
y_test
=
model_selection
.
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
1
)
lgb_model
=
lgb
.
LGBMClassifier
().
fit
(
x_train
,
y_train
)
lgb_model
=
lgb
.
LGBMClassifier
().
fit
(
x_train
,
y_train
)
preds
=
lgb_model
.
predict
(
x_test
)
preds
=
lgb_model
.
predict
(
x_test
)
...
@@ -51,10 +51,10 @@ def test_regression():
...
@@ -51,10 +51,10 @@ def test_regression():
boston
=
load_boston
()
boston
=
load_boston
()
y
=
boston
[
'target'
]
y
=
boston
[
'target'
]
X
=
boston
[
'data'
]
X
=
boston
[
'data'
]
x_train
,
x_test
,
y_train
,
y_test
=
model_selection
.
train_test_split
(
X
,
y
,
test_size
=
0.1
)
x_train
,
x_test
,
y_train
,
y_test
=
model_selection
.
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
1
)
lgb_model
=
lgb
.
LGBMRegressor
().
fit
(
x_train
,
y_train
)
lgb_model
=
lgb
.
LGBMRegressor
().
fit
(
x_train
,
y_train
)
preds
=
lgb_model
.
predict
(
x_test
)
preds
=
lgb_model
.
predict
(
x_test
)
assert
mean_squared_error
(
preds
,
y_test
)
<
4
0
assert
mean_squared_error
(
preds
,
y_test
)
<
10
0
def
test_regression_with_custom_objective
():
def
test_regression_with_custom_objective
():
from
sklearn.metrics
import
mean_squared_error
from
sklearn.metrics
import
mean_squared_error
...
@@ -68,10 +68,10 @@ def test_regression_with_custom_objective():
...
@@ -68,10 +68,10 @@ def test_regression_with_custom_objective():
boston
=
load_boston
()
boston
=
load_boston
()
y
=
boston
[
'target'
]
y
=
boston
[
'target'
]
X
=
boston
[
'data'
]
X
=
boston
[
'data'
]
x_train
,
x_test
,
y_train
,
y_test
=
model_selection
.
train_test_split
(
X
,
y
,
test_size
=
0.1
)
x_train
,
x_test
,
y_train
,
y_test
=
model_selection
.
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
1
)
lgb_model
=
lgb
.
LGBMRegressor
(
objective
=
objective_ls
).
fit
(
x_train
,
y_train
)
lgb_model
=
lgb
.
LGBMRegressor
(
objective
=
objective_ls
).
fit
(
x_train
,
y_train
)
preds
=
lgb_model
.
predict
(
x_test
)
preds
=
lgb_model
.
predict
(
x_test
)
assert
mean_squared_error
(
preds
,
y_test
)
<
4
0
assert
mean_squared_error
(
preds
,
y_test
)
<
10
0
def
test_binary_classification_with_custom_objective
():
def
test_binary_classification_with_custom_objective
():
...
@@ -83,13 +83,13 @@ def test_binary_classification_with_custom_objective():
...
@@ -83,13 +83,13 @@ def test_binary_classification_with_custom_objective():
hess
=
y_pred
*
(
1.0
-
y_pred
)
hess
=
y_pred
*
(
1.0
-
y_pred
)
return
grad
,
hess
return
grad
,
hess
X
,
y
=
datasets
.
make_classification
(
n_samples
=
10000
,
n_features
=
100
)
X
,
y
=
datasets
.
make_classification
(
n_samples
=
10000
,
n_features
=
100
)
x_train
,
x_test
,
y_train
,
y_test
=
model_selection
.
train_test_split
(
X
,
y
,
test_size
=
0.1
)
x_train
,
x_test
,
y_train
,
y_test
=
model_selection
.
train_test_split
(
X
,
y
,
test_size
=
0.1
,
random_state
=
1
)
lgb_model
=
lgb
.
LGBMClassifier
(
objective
=
logregobj
).
fit
(
x_train
,
y_train
)
lgb_model
=
lgb
.
LGBMClassifier
(
objective
=
logregobj
).
fit
(
x_train
,
y_train
)
from
sklearn.datasets
import
load_digits
from
sklearn.datasets
import
load_digits
digits
=
load_digits
(
2
)
digits
=
load_digits
(
2
)
y
=
digits
[
'target'
]
y
=
digits
[
'target'
]
X
=
digits
[
'data'
]
X
=
digits
[
'data'
]
x_train
,
x_test
,
y_train
,
y_test
=
model_selection
.
train_test_split
(
X
,
y
,
test_size
=
0.2
)
x_train
,
x_test
,
y_train
,
y_test
=
model_selection
.
train_test_split
(
X
,
y
,
test_size
=
0.2
,
random_state
=
1
)
lgb_model
=
lgb
.
LGBMClassifier
(
objective
=
logregobj
).
fit
(
x_train
,
y_train
)
lgb_model
=
lgb
.
LGBMClassifier
(
objective
=
logregobj
).
fit
(
x_train
,
y_train
)
preds
=
lgb_model
.
predict
(
x_test
)
preds
=
lgb_model
.
predict
(
x_test
)
err
=
sum
(
1
for
i
in
range
(
len
(
preds
))
err
=
sum
(
1
for
i
in
range
(
len
(
preds
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment