Commit f8267a50 authored by Guolin Ke's avatar Guolin Ke
Browse files

add min_data, fix test

parent f65164f6
......@@ -80,7 +80,9 @@ class LGBMModel(LGBMModelBase):
gamma : float
Minimum loss reduction required to make a further partition on a leaf node of the tree.
min_child_weight : int
Minimum sum of instance weight(hessian) needed in a child.
Minimum sum of instance weight(hessian) needed in a child(leaf)
min_data : int
Minimum number of data need in a child(leaf)
subsample : float
Subsample ratio of the training instance.
subsample_freq : int
......@@ -121,10 +123,10 @@ class LGBMModel(LGBMModelBase):
and you should group grad and hess in this way as well
"""
def __init__(self, num_leaves=63, max_depth=-1,
learning_rate=0.1, n_estimators=100, max_bin=255,
def __init__(self, num_leaves=31, max_depth=-1,
learning_rate=0.1, n_estimators=10, max_bin=255,
silent=True, objective="regression",
nthread=-1, gamma=0, min_child_weight=1,
nthread=-1, gamma=0, min_child_weight=5, min_data=10,
subsample=1, subsample_freq=1, colsample_bytree=1, colsample_byleaf=1,
reg_alpha=0, reg_lambda=0, scale_pos_weight=1,
is_unbalance=False, seed=0):
......@@ -141,6 +143,7 @@ class LGBMModel(LGBMModelBase):
self.nthread = nthread
self.gamma = gamma
self.min_child_weight = min_child_weight
self.min_data = min_data
self.subsample = subsample
self.subsample_freq = subsample_freq
self.colsample_bytree = colsample_bytree
......
......@@ -10,13 +10,13 @@ def test_binary_classification():
from sklearn import datasets, metrics, model_selection
X, y = datasets.make_classification(n_samples=10000, n_features=100)
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1)
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
lgb_model = lgb.LGBMClassifier().fit(x_train, y_train)
from sklearn.datasets import load_digits
digits = load_digits(2)
y = digits['target']
X = digits['data']
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2)
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
lgb_model = lgb.LGBMClassifier().fit(x_train, y_train)
preds = lgb_model.predict(x_test)
err = sum(1 for i in range(len(preds))
......@@ -35,7 +35,7 @@ def test_multiclass_classification():
X, y = datasets.make_classification(n_samples=10000, n_features=100, n_classes=4, n_informative=3)
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1)
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
lgb_model = lgb.LGBMClassifier().fit(x_train, y_train)
preds = lgb_model.predict(x_test)
......@@ -51,10 +51,10 @@ def test_regression():
boston = load_boston()
y = boston['target']
X = boston['data']
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1)
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
lgb_model = lgb.LGBMRegressor().fit(x_train, y_train)
preds = lgb_model.predict(x_test)
assert mean_squared_error(preds, y_test) < 40
assert mean_squared_error(preds, y_test) < 100
def test_regression_with_custom_objective():
from sklearn.metrics import mean_squared_error
......@@ -68,10 +68,10 @@ def test_regression_with_custom_objective():
boston = load_boston()
y = boston['target']
X = boston['data']
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1)
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
lgb_model = lgb.LGBMRegressor(objective=objective_ls).fit(x_train, y_train)
preds = lgb_model.predict(x_test)
assert mean_squared_error(preds, y_test) < 40
assert mean_squared_error(preds, y_test) < 100
def test_binary_classification_with_custom_objective():
......@@ -83,13 +83,13 @@ def test_binary_classification_with_custom_objective():
hess = y_pred * (1.0 - y_pred)
return grad, hess
X, y = datasets.make_classification(n_samples=10000, n_features=100)
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1)
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=1)
lgb_model = lgb.LGBMClassifier(objective=logregobj).fit(x_train, y_train)
from sklearn.datasets import load_digits
digits = load_digits(2)
y = digits['target']
X = digits['data']
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2)
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2, random_state=1)
lgb_model = lgb.LGBMClassifier(objective=logregobj).fit(x_train, y_train)
preds = lgb_model.predict(x_test)
err = sum(1 for i in range(len(preds))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment