Unverified Commit 1b792e71 authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[ci] [python-package] enable ruff-format on tests and examples (#6317)

parent b60068c8
This diff is collapsed.
......@@ -10,7 +10,7 @@ def reset_feature_fraction(boosting_round):
return 0.6 if boosting_round < 15 else 0.8
@pytest.mark.parametrize('serializer', SERIALIZERS)
@pytest.mark.parametrize("serializer", SERIALIZERS)
def test_early_stopping_callback_is_picklable(serializer):
rounds = 5
callback = lgb.early_stopping(stopping_rounds=rounds)
......@@ -32,7 +32,7 @@ def test_early_stopping_callback_rejects_invalid_stopping_rounds_with_informativ
lgb.early_stopping(stopping_rounds="neverrrr")
@pytest.mark.parametrize('serializer', SERIALIZERS)
@pytest.mark.parametrize("serializer", SERIALIZERS)
def test_log_evaluation_callback_is_picklable(serializer):
periods = 42
callback = lgb.log_evaluation(period=periods)
......@@ -43,7 +43,7 @@ def test_log_evaluation_callback_is_picklable(serializer):
assert callback.period == periods
@pytest.mark.parametrize('serializer', SERIALIZERS)
@pytest.mark.parametrize("serializer", SERIALIZERS)
def test_record_evaluation_callback_is_picklable(serializer):
results = {}
callback = lgb.record_evaluation(eval_result=results)
......@@ -54,12 +54,9 @@ def test_record_evaluation_callback_is_picklable(serializer):
assert callback.eval_result is results
@pytest.mark.parametrize('serializer', SERIALIZERS)
@pytest.mark.parametrize("serializer", SERIALIZERS)
def test_reset_parameter_callback_is_picklable(serializer):
params = {
'bagging_fraction': [0.7] * 5 + [0.6] * 5,
'feature_fraction': reset_feature_fraction
}
params = {"bagging_fraction": [0.7] * 5 + [0.6] * 5, "feature_fraction": reset_feature_fraction}
callback = lgb.reset_parameter(**params)
callback_from_disk = pickle_and_unpickle_object(obj=callback, serializer=serializer)
assert callback_from_disk.order == 10
......
......@@ -6,22 +6,21 @@ from sklearn.datasets import load_svmlight_file
import lightgbm as lgb
EXAMPLES_DIR = Path(__file__).absolute().parents[2] / 'examples'
EXAMPLES_DIR = Path(__file__).absolute().parents[2] / "examples"
class FileLoader:
def __init__(self, directory, prefix, config_file='train.conf'):
def __init__(self, directory, prefix, config_file="train.conf"):
self.directory = directory
self.prefix = prefix
self.params = {'gpu_use_dp': True}
with open(self.directory / config_file, 'r') as f:
self.params = {"gpu_use_dp": True}
with open(self.directory / config_file, "r") as f:
for line in f.readlines():
line = line.strip()
if line and not line.startswith('#'):
key, value = [token.strip() for token in line.split('=')]
if 'early_stopping' not in key: # disable early_stopping
self.params[key] = value if key not in {'num_trees', 'num_threads'} else int(value)
if line and not line.startswith("#"):
key, value = [token.strip() for token in line.split("=")]
if "early_stopping" not in key: # disable early_stopping
self.params[key] = value if key not in {"num_trees", "num_threads"} else int(value)
def load_dataset(self, suffix, is_sparse=False):
filename = str(self.path(suffix))
......@@ -33,14 +32,14 @@ class FileLoader:
return mat[:, 1:], mat[:, 0], filename
def load_field(self, suffix):
return np.loadtxt(str(self.directory / f'{self.prefix}{suffix}'))
return np.loadtxt(str(self.directory / f"{self.prefix}{suffix}"))
def load_cpp_result(self, result_file='LightGBM_predict_result.txt'):
def load_cpp_result(self, result_file="LightGBM_predict_result.txt"):
return np.loadtxt(str(self.directory / result_file))
def train_predict_check(self, lgb_train, X_test, X_test_fn, sk_pred):
params = dict(self.params)
params['force_row_wise'] = True
params["force_row_wise"] = True
gbm = lgb.train(params, lgb_train)
y_pred = gbm.predict(X_test)
cpp_pred = gbm.predict(X_test_fn)
......@@ -49,7 +48,7 @@ class FileLoader:
def file_load_check(self, lgb_train, name):
lgb_train_f = lgb.Dataset(self.path(name), params=self.params).construct()
for f in ('num_data', 'num_feature', 'get_label', 'get_weight', 'get_init_score', 'get_group'):
for f in ("num_data", "num_feature", "get_label", "get_weight", "get_init_score", "get_group"):
a = getattr(lgb_train, f)()
b = getattr(lgb_train_f, f)()
if a is None and b is None:
......@@ -62,83 +61,83 @@ class FileLoader:
assert a == b, f
def path(self, suffix):
return self.directory / f'{self.prefix}{suffix}'
return self.directory / f"{self.prefix}{suffix}"
def test_binary():
fd = FileLoader(EXAMPLES_DIR / 'binary_classification', 'binary')
X_train, y_train, _ = fd.load_dataset('.train')
X_test, _, X_test_fn = fd.load_dataset('.test')
weight_train = fd.load_field('.train.weight')
fd = FileLoader(EXAMPLES_DIR / "binary_classification", "binary")
X_train, y_train, _ = fd.load_dataset(".train")
X_test, _, X_test_fn = fd.load_dataset(".test")
weight_train = fd.load_field(".train.weight")
lgb_train = lgb.Dataset(X_train, y_train, params=fd.params, weight=weight_train)
gbm = lgb.LGBMClassifier(**fd.params)
gbm.fit(X_train, y_train, sample_weight=weight_train)
sk_pred = gbm.predict_proba(X_test)[:, 1]
fd.train_predict_check(lgb_train, X_test, X_test_fn, sk_pred)
fd.file_load_check(lgb_train, '.train')
fd.file_load_check(lgb_train, ".train")
def test_binary_linear():
fd = FileLoader(EXAMPLES_DIR / 'binary_classification', 'binary', 'train_linear.conf')
X_train, y_train, _ = fd.load_dataset('.train')
X_test, _, X_test_fn = fd.load_dataset('.test')
weight_train = fd.load_field('.train.weight')
fd = FileLoader(EXAMPLES_DIR / "binary_classification", "binary", "train_linear.conf")
X_train, y_train, _ = fd.load_dataset(".train")
X_test, _, X_test_fn = fd.load_dataset(".test")
weight_train = fd.load_field(".train.weight")
lgb_train = lgb.Dataset(X_train, y_train, params=fd.params, weight=weight_train)
gbm = lgb.LGBMClassifier(**fd.params)
gbm.fit(X_train, y_train, sample_weight=weight_train)
sk_pred = gbm.predict_proba(X_test)[:, 1]
fd.train_predict_check(lgb_train, X_test, X_test_fn, sk_pred)
fd.file_load_check(lgb_train, '.train')
fd.file_load_check(lgb_train, ".train")
def test_multiclass():
fd = FileLoader(EXAMPLES_DIR / 'multiclass_classification', 'multiclass')
X_train, y_train, _ = fd.load_dataset('.train')
X_test, _, X_test_fn = fd.load_dataset('.test')
fd = FileLoader(EXAMPLES_DIR / "multiclass_classification", "multiclass")
X_train, y_train, _ = fd.load_dataset(".train")
X_test, _, X_test_fn = fd.load_dataset(".test")
lgb_train = lgb.Dataset(X_train, y_train)
gbm = lgb.LGBMClassifier(**fd.params)
gbm.fit(X_train, y_train)
sk_pred = gbm.predict_proba(X_test)
fd.train_predict_check(lgb_train, X_test, X_test_fn, sk_pred)
fd.file_load_check(lgb_train, '.train')
fd.file_load_check(lgb_train, ".train")
def test_regression():
fd = FileLoader(EXAMPLES_DIR / 'regression', 'regression')
X_train, y_train, _ = fd.load_dataset('.train')
X_test, _, X_test_fn = fd.load_dataset('.test')
init_score_train = fd.load_field('.train.init')
fd = FileLoader(EXAMPLES_DIR / "regression", "regression")
X_train, y_train, _ = fd.load_dataset(".train")
X_test, _, X_test_fn = fd.load_dataset(".test")
init_score_train = fd.load_field(".train.init")
lgb_train = lgb.Dataset(X_train, y_train, init_score=init_score_train)
gbm = lgb.LGBMRegressor(**fd.params)
gbm.fit(X_train, y_train, init_score=init_score_train)
sk_pred = gbm.predict(X_test)
fd.train_predict_check(lgb_train, X_test, X_test_fn, sk_pred)
fd.file_load_check(lgb_train, '.train')
fd.file_load_check(lgb_train, ".train")
def test_lambdarank():
fd = FileLoader(EXAMPLES_DIR / 'lambdarank', 'rank')
X_train, y_train, _ = fd.load_dataset('.train', is_sparse=True)
X_test, _, X_test_fn = fd.load_dataset('.test', is_sparse=True)
group_train = fd.load_field('.train.query')
fd = FileLoader(EXAMPLES_DIR / "lambdarank", "rank")
X_train, y_train, _ = fd.load_dataset(".train", is_sparse=True)
X_test, _, X_test_fn = fd.load_dataset(".test", is_sparse=True)
group_train = fd.load_field(".train.query")
lgb_train = lgb.Dataset(X_train, y_train, group=group_train)
params = dict(fd.params)
params['force_col_wise'] = True
params["force_col_wise"] = True
gbm = lgb.LGBMRanker(**params)
gbm.fit(X_train, y_train, group=group_train)
sk_pred = gbm.predict(X_test)
fd.train_predict_check(lgb_train, X_test, X_test_fn, sk_pred)
fd.file_load_check(lgb_train, '.train')
fd.file_load_check(lgb_train, ".train")
def test_xendcg():
fd = FileLoader(EXAMPLES_DIR / 'xendcg', 'rank')
X_train, y_train, _ = fd.load_dataset('.train', is_sparse=True)
X_test, _, X_test_fn = fd.load_dataset('.test', is_sparse=True)
group_train = fd.load_field('.train.query')
fd = FileLoader(EXAMPLES_DIR / "xendcg", "rank")
X_train, y_train, _ = fd.load_dataset(".train", is_sparse=True)
X_test, _, X_test_fn = fd.load_dataset(".test", is_sparse=True)
group_train = fd.load_field(".train.query")
lgb_train = lgb.Dataset(X_train, y_train, group=group_train)
gbm = lgb.LGBMRanker(**fd.params)
gbm.fit(X_train, y_train, group=group_train)
sk_pred = gbm.predict(X_test)
fd.train_predict_check(lgb_train, X_test, X_test_fn, sk_pred)
fd.file_load_check(lgb_train, '.train')
fd.file_load_check(lgb_train, ".train")
This diff is collapsed.
......@@ -28,7 +28,7 @@ def test_cpu_and_gpu_work():
params_gpu = params_cpu.copy()
params_gpu["device"] = "gpu"
# Double-precision floats are only supported on x86_64 with PoCL
params_gpu["gpu_use_dp"] = (platform.machine() == "x86_64")
params_gpu["gpu_use_dp"] = platform.machine() == "x86_64"
gpu_bst = lgb.train(params_gpu, data, num_boost_round=10)
gpu_score = log_loss(y, gpu_bst.predict(X))
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -10,7 +10,7 @@ import lightgbm as lgb
def test_register_logger(tmp_path):
logger = logging.getLogger("LightGBM")
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(levelname)s | %(message)s')
formatter = logging.Formatter("%(levelname)s | %(message)s")
log_filename = tmp_path / "LightGBM_test_logger.log"
file_handler = logging.FileHandler(log_filename, mode="w", encoding="utf-8")
file_handler.setLevel(logging.DEBUG)
......@@ -18,29 +18,27 @@ def test_register_logger(tmp_path):
logger.addHandler(file_handler)
def dummy_metric(_, __):
logger.debug('In dummy_metric')
return 'dummy_metric', 1, True
logger.debug("In dummy_metric")
return "dummy_metric", 1, True
lgb.register_logger(logger)
X = np.array([[1, 2, 3],
[1, 2, 4],
[1, 2, 4],
[1, 2, 3]],
dtype=np.float32)
X = np.array([[1, 2, 3], [1, 2, 4], [1, 2, 4], [1, 2, 3]], dtype=np.float32)
y = np.array([0, 1, 1, 0])
lgb_train = lgb.Dataset(X, y)
lgb_valid = lgb.Dataset(X, y) # different object for early-stopping
eval_records = {}
callbacks = [
lgb.record_evaluation(eval_records),
lgb.log_evaluation(2),
lgb.early_stopping(10)
]
lgb.train({'objective': 'binary', 'metric': ['auc', 'binary_error']},
lgb_train, num_boost_round=10, feval=dummy_metric,
valid_sets=[lgb_valid], categorical_feature=[1], callbacks=callbacks)
callbacks = [lgb.record_evaluation(eval_records), lgb.log_evaluation(2), lgb.early_stopping(10)]
lgb.train(
{"objective": "binary", "metric": ["auc", "binary_error"]},
lgb_train,
num_boost_round=10,
feval=dummy_metric,
valid_sets=[lgb_valid],
categorical_feature=[1],
callbacks=callbacks,
)
lgb.plot_metric(eval_records)
......@@ -89,7 +87,7 @@ WARNING | More than one metric available, picking one to plot.
"INFO | [LightGBM] [Warning] GPU acceleration is disabled because no non-trivial dense features can be found",
"INFO | [LightGBM] [Warning] Using sparse features with CUDA is currently not supported.",
"INFO | [LightGBM] [Warning] CUDA currently requires double precision calculations.",
"INFO | [LightGBM] [Info] LightGBM using CUDA trainer with DP float!!"
"INFO | [LightGBM] [Info] LightGBM using CUDA trainer with DP float!!",
]
cuda_lines = [
"INFO | [LightGBM] [Warning] Metric auc is not implemented in cuda version. Fall back to evaluation on CPU.",
......@@ -142,11 +140,7 @@ def test_register_custom_logger():
logged_messages.append(msg)
custom_logger = CustomLogger()
lgb.register_logger(
custom_logger,
info_method_name="custom_info",
warning_method_name="custom_warning"
)
lgb.register_logger(custom_logger, info_method_name="custom_info", warning_method_name="custom_warning")
lgb.basic._log_info("info message")
lgb.basic._log_warning("warning message")
......@@ -155,18 +149,14 @@ def test_register_custom_logger():
assert logged_messages == expected_log
logged_messages = []
X = np.array([[1, 2, 3],
[1, 2, 4],
[1, 2, 4],
[1, 2, 3]],
dtype=np.float32)
X = np.array([[1, 2, 3], [1, 2, 4], [1, 2, 4], [1, 2, 3]], dtype=np.float32)
y = np.array([0, 1, 1, 0])
lgb_data = lgb.Dataset(X, y)
lgb.train(
{'objective': 'binary', 'metric': 'auc'},
{"objective": "binary", "metric": "auc"},
lgb_data,
num_boost_round=10,
valid_sets=[lgb_data],
categorical_feature=[1]
categorical_feature=[1],
)
assert logged_messages, "custom logger was not called"
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment