Commit f8267a50 authored by Guolin Ke's avatar Guolin Ke
Browse files

add min_data, fix test

parent f65164f6
...@@ -80,7 +80,9 @@ class LGBMModel(LGBMModelBase): ...@@ -80,7 +80,9 @@ class LGBMModel(LGBMModelBase):
gamma : float gamma : float
Minimum loss reduction required to make a further partition on a leaf node of the tree. Minimum loss reduction required to make a further partition on a leaf node of the tree.
min_child_weight : int min_child_weight : int
Minimum sum of instance weight(hessian) needed in a child. Minimum sum of instance weight(hessian) needed in a child(leaf)
min_data : int
Minimum number of data need in a child(leaf)
subsample : float subsample : float
Subsample ratio of the training instance. Subsample ratio of the training instance.
subsample_freq : int subsample_freq : int
...@@ -121,10 +123,10 @@ class LGBMModel(LGBMModelBase): ...@@ -121,10 +123,10 @@ class LGBMModel(LGBMModelBase):
and you should group grad and hess in this way as well and you should group grad and hess in this way as well
""" """
def __init__(self, num_leaves=63, max_depth=-1, def __init__(self, num_leaves=31, max_depth=-1,
learning_rate=0.1, n_estimators=100, max_bin=255, learning_rate=0.1, n_estimators=10, max_bin=255,
silent=True, objective="regression", silent=True, objective="regression",
nthread=-1, gamma=0, min_child_weight=1, nthread=-1, gamma=0, min_child_weight=5, min_data=10,
subsample=1, subsample_freq=1, colsample_bytree=1, colsample_byleaf=1, subsample=1, subsample_freq=1, colsample_bytree=1, colsample_byleaf=1,
reg_alpha=0, reg_lambda=0, scale_pos_weight=1, reg_alpha=0, reg_lambda=0, scale_pos_weight=1,
is_unbalance=False, seed=0): is_unbalance=False, seed=0):
...@@ -141,6 +143,7 @@ class LGBMModel(LGBMModelBase): ...@@ -141,6 +143,7 @@ class LGBMModel(LGBMModelBase):
self.nthread = nthread self.nthread = nthread
self.gamma = gamma self.gamma = gamma
self.min_child_weight = min_child_weight self.min_child_weight = min_child_weight
self.min_data = min_data
self.subsample = subsample self.subsample = subsample
self.subsample_freq = subsample_freq self.subsample_freq = subsample_freq
self.colsample_bytree = colsample_bytree self.colsample_bytree = colsample_bytree
......
...@@ -10,13 +10,13 @@ def test_binary_classification(): ...@@ -10,13 +10,13 @@ def test_binary_classification():
from sklearn import datasets, metrics, model_selection from sklearn import datasets, metrics, model_selection
X, y = datasets.make_classification(n_samples=10000, n_features=100) X, y = datasets.make_classification(n_samples=10000, n_features=100)
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1) x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
lgb_model = lgb.LGBMClassifier().fit(x_train, y_train) lgb_model = lgb.LGBMClassifier().fit(x_train, y_train)
from sklearn.datasets import load_digits from sklearn.datasets import load_digits
digits = load_digits(2) digits = load_digits(2)
y = digits['target'] y = digits['target']
X = digits['data'] X = digits['data']
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2) x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
lgb_model = lgb.LGBMClassifier().fit(x_train, y_train) lgb_model = lgb.LGBMClassifier().fit(x_train, y_train)
preds = lgb_model.predict(x_test) preds = lgb_model.predict(x_test)
err = sum(1 for i in range(len(preds)) err = sum(1 for i in range(len(preds))
...@@ -35,7 +35,7 @@ def test_multiclass_classification(): ...@@ -35,7 +35,7 @@ def test_multiclass_classification():
X, y = datasets.make_classification(n_samples=10000, n_features=100, n_classes=4, n_informative=3) X, y = datasets.make_classification(n_samples=10000, n_features=100, n_classes=4, n_informative=3)
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1) x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
lgb_model = lgb.LGBMClassifier().fit(x_train, y_train) lgb_model = lgb.LGBMClassifier().fit(x_train, y_train)
preds = lgb_model.predict(x_test) preds = lgb_model.predict(x_test)
...@@ -51,10 +51,10 @@ def test_regression(): ...@@ -51,10 +51,10 @@ def test_regression():
boston = load_boston() boston = load_boston()
y = boston['target'] y = boston['target']
X = boston['data'] X = boston['data']
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1) x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
lgb_model = lgb.LGBMRegressor().fit(x_train, y_train) lgb_model = lgb.LGBMRegressor().fit(x_train, y_train)
preds = lgb_model.predict(x_test) preds = lgb_model.predict(x_test)
assert mean_squared_error(preds, y_test) < 40 assert mean_squared_error(preds, y_test) < 100
def test_regression_with_custom_objective(): def test_regression_with_custom_objective():
from sklearn.metrics import mean_squared_error from sklearn.metrics import mean_squared_error
...@@ -68,10 +68,10 @@ def test_regression_with_custom_objective(): ...@@ -68,10 +68,10 @@ def test_regression_with_custom_objective():
boston = load_boston() boston = load_boston()
y = boston['target'] y = boston['target']
X = boston['data'] X = boston['data']
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1) x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
lgb_model = lgb.LGBMRegressor(objective=objective_ls).fit(x_train, y_train) lgb_model = lgb.LGBMRegressor(objective=objective_ls).fit(x_train, y_train)
preds = lgb_model.predict(x_test) preds = lgb_model.predict(x_test)
assert mean_squared_error(preds, y_test) < 40 assert mean_squared_error(preds, y_test) < 100
def test_binary_classification_with_custom_objective(): def test_binary_classification_with_custom_objective():
...@@ -83,13 +83,13 @@ def test_binary_classification_with_custom_objective(): ...@@ -83,13 +83,13 @@ def test_binary_classification_with_custom_objective():
hess = y_pred * (1.0 - y_pred) hess = y_pred * (1.0 - y_pred)
return grad, hess return grad, hess
X, y = datasets.make_classification(n_samples=10000, n_features=100) X, y = datasets.make_classification(n_samples=10000, n_features=100)
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1) x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
lgb_model = lgb.LGBMClassifier(objective=logregobj).fit(x_train, y_train) lgb_model = lgb.LGBMClassifier(objective=logregobj).fit(x_train, y_train)
from sklearn.datasets import load_digits from sklearn.datasets import load_digits
digits = load_digits(2) digits = load_digits(2)
y = digits['target'] y = digits['target']
X = digits['data'] X = digits['data']
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2) x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2, random_state=1)
lgb_model = lgb.LGBMClassifier(objective=logregobj).fit(x_train, y_train) lgb_model = lgb.LGBMClassifier(objective=logregobj).fit(x_train, y_train)
preds = lgb_model.predict(x_test) preds = lgb_model.predict(x_test)
err = sum(1 for i in range(len(preds)) err = sum(1 for i in range(len(preds))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment