"git@developer.sourcefind.cn:tianlh/lightgbm-dcu.git" did not exist on "3ed0027bffe820ba6c81f32b82f02ef8302c6aad"
Unverified Commit 1b792e71 authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[ci] [python-package] enable ruff-format on tests and examples (#6317)

parent b60068c8
This diff is collapsed.
...@@ -10,7 +10,7 @@ def reset_feature_fraction(boosting_round): ...@@ -10,7 +10,7 @@ def reset_feature_fraction(boosting_round):
return 0.6 if boosting_round < 15 else 0.8 return 0.6 if boosting_round < 15 else 0.8
@pytest.mark.parametrize('serializer', SERIALIZERS) @pytest.mark.parametrize("serializer", SERIALIZERS)
def test_early_stopping_callback_is_picklable(serializer): def test_early_stopping_callback_is_picklable(serializer):
rounds = 5 rounds = 5
callback = lgb.early_stopping(stopping_rounds=rounds) callback = lgb.early_stopping(stopping_rounds=rounds)
...@@ -32,7 +32,7 @@ def test_early_stopping_callback_rejects_invalid_stopping_rounds_with_informativ ...@@ -32,7 +32,7 @@ def test_early_stopping_callback_rejects_invalid_stopping_rounds_with_informativ
lgb.early_stopping(stopping_rounds="neverrrr") lgb.early_stopping(stopping_rounds="neverrrr")
@pytest.mark.parametrize('serializer', SERIALIZERS) @pytest.mark.parametrize("serializer", SERIALIZERS)
def test_log_evaluation_callback_is_picklable(serializer): def test_log_evaluation_callback_is_picklable(serializer):
periods = 42 periods = 42
callback = lgb.log_evaluation(period=periods) callback = lgb.log_evaluation(period=periods)
...@@ -43,7 +43,7 @@ def test_log_evaluation_callback_is_picklable(serializer): ...@@ -43,7 +43,7 @@ def test_log_evaluation_callback_is_picklable(serializer):
assert callback.period == periods assert callback.period == periods
@pytest.mark.parametrize('serializer', SERIALIZERS) @pytest.mark.parametrize("serializer", SERIALIZERS)
def test_record_evaluation_callback_is_picklable(serializer): def test_record_evaluation_callback_is_picklable(serializer):
results = {} results = {}
callback = lgb.record_evaluation(eval_result=results) callback = lgb.record_evaluation(eval_result=results)
...@@ -54,12 +54,9 @@ def test_record_evaluation_callback_is_picklable(serializer): ...@@ -54,12 +54,9 @@ def test_record_evaluation_callback_is_picklable(serializer):
assert callback.eval_result is results assert callback.eval_result is results
@pytest.mark.parametrize('serializer', SERIALIZERS) @pytest.mark.parametrize("serializer", SERIALIZERS)
def test_reset_parameter_callback_is_picklable(serializer): def test_reset_parameter_callback_is_picklable(serializer):
params = { params = {"bagging_fraction": [0.7] * 5 + [0.6] * 5, "feature_fraction": reset_feature_fraction}
'bagging_fraction': [0.7] * 5 + [0.6] * 5,
'feature_fraction': reset_feature_fraction
}
callback = lgb.reset_parameter(**params) callback = lgb.reset_parameter(**params)
callback_from_disk = pickle_and_unpickle_object(obj=callback, serializer=serializer) callback_from_disk = pickle_and_unpickle_object(obj=callback, serializer=serializer)
assert callback_from_disk.order == 10 assert callback_from_disk.order == 10
......
...@@ -6,22 +6,21 @@ from sklearn.datasets import load_svmlight_file ...@@ -6,22 +6,21 @@ from sklearn.datasets import load_svmlight_file
import lightgbm as lgb import lightgbm as lgb
EXAMPLES_DIR = Path(__file__).absolute().parents[2] / 'examples' EXAMPLES_DIR = Path(__file__).absolute().parents[2] / "examples"
class FileLoader: class FileLoader:
def __init__(self, directory, prefix, config_file="train.conf"):
def __init__(self, directory, prefix, config_file='train.conf'):
self.directory = directory self.directory = directory
self.prefix = prefix self.prefix = prefix
self.params = {'gpu_use_dp': True} self.params = {"gpu_use_dp": True}
with open(self.directory / config_file, 'r') as f: with open(self.directory / config_file, "r") as f:
for line in f.readlines(): for line in f.readlines():
line = line.strip() line = line.strip()
if line and not line.startswith('#'): if line and not line.startswith("#"):
key, value = [token.strip() for token in line.split('=')] key, value = [token.strip() for token in line.split("=")]
if 'early_stopping' not in key: # disable early_stopping if "early_stopping" not in key: # disable early_stopping
self.params[key] = value if key not in {'num_trees', 'num_threads'} else int(value) self.params[key] = value if key not in {"num_trees", "num_threads"} else int(value)
def load_dataset(self, suffix, is_sparse=False): def load_dataset(self, suffix, is_sparse=False):
filename = str(self.path(suffix)) filename = str(self.path(suffix))
...@@ -33,14 +32,14 @@ class FileLoader: ...@@ -33,14 +32,14 @@ class FileLoader:
return mat[:, 1:], mat[:, 0], filename return mat[:, 1:], mat[:, 0], filename
def load_field(self, suffix): def load_field(self, suffix):
return np.loadtxt(str(self.directory / f'{self.prefix}{suffix}')) return np.loadtxt(str(self.directory / f"{self.prefix}{suffix}"))
def load_cpp_result(self, result_file='LightGBM_predict_result.txt'): def load_cpp_result(self, result_file="LightGBM_predict_result.txt"):
return np.loadtxt(str(self.directory / result_file)) return np.loadtxt(str(self.directory / result_file))
def train_predict_check(self, lgb_train, X_test, X_test_fn, sk_pred): def train_predict_check(self, lgb_train, X_test, X_test_fn, sk_pred):
params = dict(self.params) params = dict(self.params)
params['force_row_wise'] = True params["force_row_wise"] = True
gbm = lgb.train(params, lgb_train) gbm = lgb.train(params, lgb_train)
y_pred = gbm.predict(X_test) y_pred = gbm.predict(X_test)
cpp_pred = gbm.predict(X_test_fn) cpp_pred = gbm.predict(X_test_fn)
...@@ -49,7 +48,7 @@ class FileLoader: ...@@ -49,7 +48,7 @@ class FileLoader:
def file_load_check(self, lgb_train, name): def file_load_check(self, lgb_train, name):
lgb_train_f = lgb.Dataset(self.path(name), params=self.params).construct() lgb_train_f = lgb.Dataset(self.path(name), params=self.params).construct()
for f in ('num_data', 'num_feature', 'get_label', 'get_weight', 'get_init_score', 'get_group'): for f in ("num_data", "num_feature", "get_label", "get_weight", "get_init_score", "get_group"):
a = getattr(lgb_train, f)() a = getattr(lgb_train, f)()
b = getattr(lgb_train_f, f)() b = getattr(lgb_train_f, f)()
if a is None and b is None: if a is None and b is None:
...@@ -62,83 +61,83 @@ class FileLoader: ...@@ -62,83 +61,83 @@ class FileLoader:
assert a == b, f assert a == b, f
def path(self, suffix): def path(self, suffix):
return self.directory / f'{self.prefix}{suffix}' return self.directory / f"{self.prefix}{suffix}"
def test_binary(): def test_binary():
fd = FileLoader(EXAMPLES_DIR / 'binary_classification', 'binary') fd = FileLoader(EXAMPLES_DIR / "binary_classification", "binary")
X_train, y_train, _ = fd.load_dataset('.train') X_train, y_train, _ = fd.load_dataset(".train")
X_test, _, X_test_fn = fd.load_dataset('.test') X_test, _, X_test_fn = fd.load_dataset(".test")
weight_train = fd.load_field('.train.weight') weight_train = fd.load_field(".train.weight")
lgb_train = lgb.Dataset(X_train, y_train, params=fd.params, weight=weight_train) lgb_train = lgb.Dataset(X_train, y_train, params=fd.params, weight=weight_train)
gbm = lgb.LGBMClassifier(**fd.params) gbm = lgb.LGBMClassifier(**fd.params)
gbm.fit(X_train, y_train, sample_weight=weight_train) gbm.fit(X_train, y_train, sample_weight=weight_train)
sk_pred = gbm.predict_proba(X_test)[:, 1] sk_pred = gbm.predict_proba(X_test)[:, 1]
fd.train_predict_check(lgb_train, X_test, X_test_fn, sk_pred) fd.train_predict_check(lgb_train, X_test, X_test_fn, sk_pred)
fd.file_load_check(lgb_train, '.train') fd.file_load_check(lgb_train, ".train")
def test_binary_linear(): def test_binary_linear():
fd = FileLoader(EXAMPLES_DIR / 'binary_classification', 'binary', 'train_linear.conf') fd = FileLoader(EXAMPLES_DIR / "binary_classification", "binary", "train_linear.conf")
X_train, y_train, _ = fd.load_dataset('.train') X_train, y_train, _ = fd.load_dataset(".train")
X_test, _, X_test_fn = fd.load_dataset('.test') X_test, _, X_test_fn = fd.load_dataset(".test")
weight_train = fd.load_field('.train.weight') weight_train = fd.load_field(".train.weight")
lgb_train = lgb.Dataset(X_train, y_train, params=fd.params, weight=weight_train) lgb_train = lgb.Dataset(X_train, y_train, params=fd.params, weight=weight_train)
gbm = lgb.LGBMClassifier(**fd.params) gbm = lgb.LGBMClassifier(**fd.params)
gbm.fit(X_train, y_train, sample_weight=weight_train) gbm.fit(X_train, y_train, sample_weight=weight_train)
sk_pred = gbm.predict_proba(X_test)[:, 1] sk_pred = gbm.predict_proba(X_test)[:, 1]
fd.train_predict_check(lgb_train, X_test, X_test_fn, sk_pred) fd.train_predict_check(lgb_train, X_test, X_test_fn, sk_pred)
fd.file_load_check(lgb_train, '.train') fd.file_load_check(lgb_train, ".train")
def test_multiclass(): def test_multiclass():
fd = FileLoader(EXAMPLES_DIR / 'multiclass_classification', 'multiclass') fd = FileLoader(EXAMPLES_DIR / "multiclass_classification", "multiclass")
X_train, y_train, _ = fd.load_dataset('.train') X_train, y_train, _ = fd.load_dataset(".train")
X_test, _, X_test_fn = fd.load_dataset('.test') X_test, _, X_test_fn = fd.load_dataset(".test")
lgb_train = lgb.Dataset(X_train, y_train) lgb_train = lgb.Dataset(X_train, y_train)
gbm = lgb.LGBMClassifier(**fd.params) gbm = lgb.LGBMClassifier(**fd.params)
gbm.fit(X_train, y_train) gbm.fit(X_train, y_train)
sk_pred = gbm.predict_proba(X_test) sk_pred = gbm.predict_proba(X_test)
fd.train_predict_check(lgb_train, X_test, X_test_fn, sk_pred) fd.train_predict_check(lgb_train, X_test, X_test_fn, sk_pred)
fd.file_load_check(lgb_train, '.train') fd.file_load_check(lgb_train, ".train")
def test_regression(): def test_regression():
fd = FileLoader(EXAMPLES_DIR / 'regression', 'regression') fd = FileLoader(EXAMPLES_DIR / "regression", "regression")
X_train, y_train, _ = fd.load_dataset('.train') X_train, y_train, _ = fd.load_dataset(".train")
X_test, _, X_test_fn = fd.load_dataset('.test') X_test, _, X_test_fn = fd.load_dataset(".test")
init_score_train = fd.load_field('.train.init') init_score_train = fd.load_field(".train.init")
lgb_train = lgb.Dataset(X_train, y_train, init_score=init_score_train) lgb_train = lgb.Dataset(X_train, y_train, init_score=init_score_train)
gbm = lgb.LGBMRegressor(**fd.params) gbm = lgb.LGBMRegressor(**fd.params)
gbm.fit(X_train, y_train, init_score=init_score_train) gbm.fit(X_train, y_train, init_score=init_score_train)
sk_pred = gbm.predict(X_test) sk_pred = gbm.predict(X_test)
fd.train_predict_check(lgb_train, X_test, X_test_fn, sk_pred) fd.train_predict_check(lgb_train, X_test, X_test_fn, sk_pred)
fd.file_load_check(lgb_train, '.train') fd.file_load_check(lgb_train, ".train")
def test_lambdarank(): def test_lambdarank():
fd = FileLoader(EXAMPLES_DIR / 'lambdarank', 'rank') fd = FileLoader(EXAMPLES_DIR / "lambdarank", "rank")
X_train, y_train, _ = fd.load_dataset('.train', is_sparse=True) X_train, y_train, _ = fd.load_dataset(".train", is_sparse=True)
X_test, _, X_test_fn = fd.load_dataset('.test', is_sparse=True) X_test, _, X_test_fn = fd.load_dataset(".test", is_sparse=True)
group_train = fd.load_field('.train.query') group_train = fd.load_field(".train.query")
lgb_train = lgb.Dataset(X_train, y_train, group=group_train) lgb_train = lgb.Dataset(X_train, y_train, group=group_train)
params = dict(fd.params) params = dict(fd.params)
params['force_col_wise'] = True params["force_col_wise"] = True
gbm = lgb.LGBMRanker(**params) gbm = lgb.LGBMRanker(**params)
gbm.fit(X_train, y_train, group=group_train) gbm.fit(X_train, y_train, group=group_train)
sk_pred = gbm.predict(X_test) sk_pred = gbm.predict(X_test)
fd.train_predict_check(lgb_train, X_test, X_test_fn, sk_pred) fd.train_predict_check(lgb_train, X_test, X_test_fn, sk_pred)
fd.file_load_check(lgb_train, '.train') fd.file_load_check(lgb_train, ".train")
def test_xendcg(): def test_xendcg():
fd = FileLoader(EXAMPLES_DIR / 'xendcg', 'rank') fd = FileLoader(EXAMPLES_DIR / "xendcg", "rank")
X_train, y_train, _ = fd.load_dataset('.train', is_sparse=True) X_train, y_train, _ = fd.load_dataset(".train", is_sparse=True)
X_test, _, X_test_fn = fd.load_dataset('.test', is_sparse=True) X_test, _, X_test_fn = fd.load_dataset(".test", is_sparse=True)
group_train = fd.load_field('.train.query') group_train = fd.load_field(".train.query")
lgb_train = lgb.Dataset(X_train, y_train, group=group_train) lgb_train = lgb.Dataset(X_train, y_train, group=group_train)
gbm = lgb.LGBMRanker(**fd.params) gbm = lgb.LGBMRanker(**fd.params)
gbm.fit(X_train, y_train, group=group_train) gbm.fit(X_train, y_train, group=group_train)
sk_pred = gbm.predict(X_test) sk_pred = gbm.predict(X_test)
fd.train_predict_check(lgb_train, X_test, X_test_fn, sk_pred) fd.train_predict_check(lgb_train, X_test, X_test_fn, sk_pred)
fd.file_load_check(lgb_train, '.train') fd.file_load_check(lgb_train, ".train")
This diff is collapsed.
...@@ -28,7 +28,7 @@ def test_cpu_and_gpu_work(): ...@@ -28,7 +28,7 @@ def test_cpu_and_gpu_work():
params_gpu = params_cpu.copy() params_gpu = params_cpu.copy()
params_gpu["device"] = "gpu" params_gpu["device"] = "gpu"
# Double-precision floats are only supported on x86_64 with PoCL # Double-precision floats are only supported on x86_64 with PoCL
params_gpu["gpu_use_dp"] = (platform.machine() == "x86_64") params_gpu["gpu_use_dp"] = platform.machine() == "x86_64"
gpu_bst = lgb.train(params_gpu, data, num_boost_round=10) gpu_bst = lgb.train(params_gpu, data, num_boost_round=10)
gpu_score = log_loss(y, gpu_bst.predict(X)) gpu_score = log_loss(y, gpu_bst.predict(X))
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -10,7 +10,7 @@ import lightgbm as lgb ...@@ -10,7 +10,7 @@ import lightgbm as lgb
def test_register_logger(tmp_path): def test_register_logger(tmp_path):
logger = logging.getLogger("LightGBM") logger = logging.getLogger("LightGBM")
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(levelname)s | %(message)s') formatter = logging.Formatter("%(levelname)s | %(message)s")
log_filename = tmp_path / "LightGBM_test_logger.log" log_filename = tmp_path / "LightGBM_test_logger.log"
file_handler = logging.FileHandler(log_filename, mode="w", encoding="utf-8") file_handler = logging.FileHandler(log_filename, mode="w", encoding="utf-8")
file_handler.setLevel(logging.DEBUG) file_handler.setLevel(logging.DEBUG)
...@@ -18,29 +18,27 @@ def test_register_logger(tmp_path): ...@@ -18,29 +18,27 @@ def test_register_logger(tmp_path):
logger.addHandler(file_handler) logger.addHandler(file_handler)
def dummy_metric(_, __): def dummy_metric(_, __):
logger.debug('In dummy_metric') logger.debug("In dummy_metric")
return 'dummy_metric', 1, True return "dummy_metric", 1, True
lgb.register_logger(logger) lgb.register_logger(logger)
X = np.array([[1, 2, 3], X = np.array([[1, 2, 3], [1, 2, 4], [1, 2, 4], [1, 2, 3]], dtype=np.float32)
[1, 2, 4],
[1, 2, 4],
[1, 2, 3]],
dtype=np.float32)
y = np.array([0, 1, 1, 0]) y = np.array([0, 1, 1, 0])
lgb_train = lgb.Dataset(X, y) lgb_train = lgb.Dataset(X, y)
lgb_valid = lgb.Dataset(X, y) # different object for early-stopping lgb_valid = lgb.Dataset(X, y) # different object for early-stopping
eval_records = {} eval_records = {}
callbacks = [ callbacks = [lgb.record_evaluation(eval_records), lgb.log_evaluation(2), lgb.early_stopping(10)]
lgb.record_evaluation(eval_records), lgb.train(
lgb.log_evaluation(2), {"objective": "binary", "metric": ["auc", "binary_error"]},
lgb.early_stopping(10) lgb_train,
] num_boost_round=10,
lgb.train({'objective': 'binary', 'metric': ['auc', 'binary_error']}, feval=dummy_metric,
lgb_train, num_boost_round=10, feval=dummy_metric, valid_sets=[lgb_valid],
valid_sets=[lgb_valid], categorical_feature=[1], callbacks=callbacks) categorical_feature=[1],
callbacks=callbacks,
)
lgb.plot_metric(eval_records) lgb.plot_metric(eval_records)
...@@ -89,7 +87,7 @@ WARNING | More than one metric available, picking one to plot. ...@@ -89,7 +87,7 @@ WARNING | More than one metric available, picking one to plot.
"INFO | [LightGBM] [Warning] GPU acceleration is disabled because no non-trivial dense features can be found", "INFO | [LightGBM] [Warning] GPU acceleration is disabled because no non-trivial dense features can be found",
"INFO | [LightGBM] [Warning] Using sparse features with CUDA is currently not supported.", "INFO | [LightGBM] [Warning] Using sparse features with CUDA is currently not supported.",
"INFO | [LightGBM] [Warning] CUDA currently requires double precision calculations.", "INFO | [LightGBM] [Warning] CUDA currently requires double precision calculations.",
"INFO | [LightGBM] [Info] LightGBM using CUDA trainer with DP float!!" "INFO | [LightGBM] [Info] LightGBM using CUDA trainer with DP float!!",
] ]
cuda_lines = [ cuda_lines = [
"INFO | [LightGBM] [Warning] Metric auc is not implemented in cuda version. Fall back to evaluation on CPU.", "INFO | [LightGBM] [Warning] Metric auc is not implemented in cuda version. Fall back to evaluation on CPU.",
...@@ -142,11 +140,7 @@ def test_register_custom_logger(): ...@@ -142,11 +140,7 @@ def test_register_custom_logger():
logged_messages.append(msg) logged_messages.append(msg)
custom_logger = CustomLogger() custom_logger = CustomLogger()
lgb.register_logger( lgb.register_logger(custom_logger, info_method_name="custom_info", warning_method_name="custom_warning")
custom_logger,
info_method_name="custom_info",
warning_method_name="custom_warning"
)
lgb.basic._log_info("info message") lgb.basic._log_info("info message")
lgb.basic._log_warning("warning message") lgb.basic._log_warning("warning message")
...@@ -155,18 +149,14 @@ def test_register_custom_logger(): ...@@ -155,18 +149,14 @@ def test_register_custom_logger():
assert logged_messages == expected_log assert logged_messages == expected_log
logged_messages = [] logged_messages = []
X = np.array([[1, 2, 3], X = np.array([[1, 2, 3], [1, 2, 4], [1, 2, 4], [1, 2, 3]], dtype=np.float32)
[1, 2, 4],
[1, 2, 4],
[1, 2, 3]],
dtype=np.float32)
y = np.array([0, 1, 1, 0]) y = np.array([0, 1, 1, 0])
lgb_data = lgb.Dataset(X, y) lgb_data = lgb.Dataset(X, y)
lgb.train( lgb.train(
{'objective': 'binary', 'metric': 'auc'}, {"objective": "binary", "metric": "auc"},
lgb_data, lgb_data,
num_boost_round=10, num_boost_round=10,
valid_sets=[lgb_data], valid_sets=[lgb_data],
categorical_feature=[1] categorical_feature=[1],
) )
assert logged_messages, "custom logger was not called" assert logged_messages, "custom logger was not called"
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment