[ci] [python-package] enable ruff-format on tests and examples (#6317)

1b792e71 · James Lamb · GitHub · b60068c8 · 1b792e71 · 1b792e71
Unverified Commit 1b792e71 authored Feb 21, 2024 by James Lamb Committed by GitHub Feb 21, 2024
20 changed files
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,6 +7,12 @@ exclude: |
  )$
 repos:
+  - repo: https://github.com/pycqa/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
+        name: isort (python)
+        args: ["--settings-path", "python-package/pyproject.toml"]
  - repo: https://github.com/astral-sh/ruff-pre-commit
    # Ruff version.
    rev: v0.2.1
@@ -14,12 +20,8 @@ repos:
      # Run the linter.
      - id: ruff
        args: ["--config", "python-package/pyproject.toml"]
+        types_or: [python, jupyter]
      # Run the formatter.
      - id: ruff-format
        args: ["--config", "python-package/pyproject.toml"]
-  - repo: https://github.com/pycqa/isort
+        types_or: [python, jupyter]
-    rev: 5.13.2
-    hooks:
-      - id: isort
-        name: isort (python)
-        args: ["--settings-path", "python-package/pyproject.toml"]
--- a/examples/python-guide/advanced_example.py
+++ b/examples/python-guide/advanced_example.py
@@ -10,13 +10,13 @@ from sklearn.metrics import roc_auc_score
 import lightgbm as lgb
-print('Loading data...')
+print("Loading data...")
 # load or create your dataset
-binary_example_dir = Path(__file__).absolute().parents[1] / 'binary_classification'
+binary_example_dir = Path(__file__).absolute().parents[1] / "binary_classification"
-df_train = pd.read_csv(str(binary_example_dir / 'binary.train'), header=None, sep='\t')
+df_train = pd.read_csv(str(binary_example_dir / "binary.train"), header=None, sep="\t")
-df_test = pd.read_csv(str(binary_example_dir / 'binary.test'), header=None, sep='\t')
+df_test = pd.read_csv(str(binary_example_dir / "binary.test"), header=None, sep="\t")
-W_train = pd.read_csv(str(binary_example_dir / 'binary.train.weight'), header=None)[0]
+W_train = pd.read_csv(str(binary_example_dir / "binary.train.weight"), header=None)[0]
-W_test = pd.read_csv(str(binary_example_dir / 'binary.test.weight'), header=None)[0]
+W_test = pd.read_csv(str(binary_example_dir / "binary.test.weight"), header=None)[0]
 y_train = df_train[0]
 y_test = df_test[0]
@@ -27,72 +27,72 @@ num_train, num_feature = X_train.shape
 # create dataset for lightgbm
 # if you want to re-use data, remember to set free_raw_data=False
-lgb_train = lgb.Dataset(X_train, y_train,
+lgb_train = lgb.Dataset(X_train, y_train, weight=W_train, free_raw_data=False)
-                        weight=W_train, free_raw_data=False)
+lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train, weight=W_test, free_raw_data=False)
-lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train,
-                       weight=W_test, free_raw_data=False)
 # specify your configurations as a dict
 params = {
-    'boosting_type': 'gbdt',
+    "boosting_type": "gbdt",
-    'objective': 'binary',
+    "objective": "binary",
-    'metric': 'binary_logloss',
+    "metric": "binary_logloss",
-    'num_leaves': 31,
+    "num_leaves": 31,
-    'learning_rate': 0.05,
+    "learning_rate": 0.05,
-    'feature_fraction': 0.9,
+    "feature_fraction": 0.9,
-    'bagging_fraction': 0.8,
+    "bagging_fraction": 0.8,
-    'bagging_freq': 5,
+    "bagging_freq": 5,
-    'verbose': 0
+    "verbose": 0,
 }
 # generate feature names
-feature_name = [f'feature_{col}' for col in range(num_feature)]
+feature_name = [f"feature_{col}" for col in range(num_feature)]
-print('Starting training...')
+print("Starting training...")
 # feature_name and categorical_feature
-gbm = lgb.train(params,
+gbm = lgb.train(
-                lgb_train,
+    params,
-                num_boost_round=10,
+    lgb_train,
-                valid_sets=lgb_train,  # eval training data
+    num_boost_round=10,
-                feature_name=feature_name,
+    valid_sets=lgb_train,  # eval training data
-                categorical_feature=[21])
+    feature_name=feature_name,
+    categorical_feature=[21],
-print('Finished first 10 rounds...')
+)
+print("Finished first 10 rounds...")
 # check feature name
-print(f'7th feature name is: {lgb_train.feature_name[6]}')
+print(f"7th feature name is: {lgb_train.feature_name[6]}")
-print('Saving model...')
+print("Saving model...")
 # save model to file
-gbm.save_model('model.txt')
+gbm.save_model("model.txt")
-print('Dumping model to JSON...')
+print("Dumping model to JSON...")
 # dump model to JSON (and save to file)
 model_json = gbm.dump_model()
-with open('model.json', 'w+') as f:
+with open("model.json", "w+") as f:
    json.dump(model_json, f, indent=4)
 # feature names
-print(f'Feature names: {gbm.feature_name()}')
+print(f"Feature names: {gbm.feature_name()}")
 # feature importances
-print(f'Feature importances: {list(gbm.feature_importance())}')
+print(f"Feature importances: {list(gbm.feature_importance())}")
-print('Loading model to predict...')
+print("Loading model to predict...")
 # load model to predict
-bst = lgb.Booster(model_file='model.txt')
+bst = lgb.Booster(model_file="model.txt")
 # can only predict with the best iteration (or the saving iteration)
 y_pred = bst.predict(X_test)
 # eval with loaded model
 auc_loaded_model = roc_auc_score(y_test, y_pred)
 print(f"The ROC AUC of loaded model's prediction is: {auc_loaded_model}")
-print('Dumping and loading model with pickle...')
+print("Dumping and loading model with pickle...")
 # dump model with pickle
-with open('model.pkl', 'wb') as fout:
+with open("model.pkl", "wb") as fout:
    pickle.dump(gbm, fout)
 # load model with pickle to predict
-with open('model.pkl', 'rb') as fin:
+with open("model.pkl", "rb") as fin:
    pkl_bst = pickle.load(fin)
 # can predict with any iteration when loaded in pickle way
 y_pred = pkl_bst.predict(X_test, num_iteration=7)
@@ -104,36 +104,36 @@ print(f"The ROC AUC of pickled model's prediction is: {auc_pickled_model}")
 # init_model accepts:
 # 1. model file name
 # 2. Booster()
-gbm = lgb.train(params,
+gbm = lgb.train(params, lgb_train, num_boost_round=10, init_model="model.txt", valid_sets=lgb_eval)
-                lgb_train,
-                num_boost_round=10,
-                init_model='model.txt',
-                valid_sets=lgb_eval)
-print('Finished 10 - 20 rounds with model file...')
+print("Finished 10 - 20 rounds with model file...")
 # decay learning rates
 # reset_parameter callback accepts:
 # 1. list with length = num_boost_round
 # 2. function(curr_iter)
-gbm = lgb.train(params,
+gbm = lgb.train(
-                lgb_train,
+    params,
-                num_boost_round=10,
+    lgb_train,
-                init_model=gbm,
+    num_boost_round=10,
-                valid_sets=lgb_eval,
+    init_model=gbm,
-                callbacks=[lgb.reset_parameter(learning_rate=lambda iter: 0.05 * (0.99 ** iter))])
+    valid_sets=lgb_eval,
+    callbacks=[lgb.reset_parameter(learning_rate=lambda iter: 0.05 * (0.99**iter))],
+)
-print('Finished 20 - 30 rounds with decay learning rates...')
+print("Finished 20 - 30 rounds with decay learning rates...")
 # change other parameters during training
-gbm = lgb.train(params,
+gbm = lgb.train(
-                lgb_train,
+    params,
-                num_boost_round=10,
+    lgb_train,
-                init_model=gbm,
+    num_boost_round=10,
-                valid_sets=lgb_eval,
+    init_model=gbm,
-                callbacks=[lgb.reset_parameter(bagging_fraction=[0.7] * 5 + [0.6] * 5)])
+    valid_sets=lgb_eval,
+    callbacks=[lgb.reset_parameter(bagging_fraction=[0.7] * 5 + [0.6] * 5)],
+)
-print('Finished 30 - 40 rounds with changing bagging_fraction...')
+print("Finished 30 - 40 rounds with changing bagging_fraction...")
 # self-defined objective function
@@ -141,9 +141,9 @@ print('Finished 30 - 40 rounds with changing bagging_fraction...')
 # log likelihood loss
 def loglikelihood(preds, train_data):
    labels = train_data.get_label()
-    preds = 1. / (1. + np.exp(-preds))
+    preds = 1.0 / (1.0 + np.exp(-preds))
    grad = preds - labels
-    hess = preds * (1. - preds)
+    hess = preds * (1.0 - preds)
    return grad, hess
@@ -156,22 +156,19 @@ def loglikelihood(preds, train_data):
 # Keep this in mind when you use the customization
 def binary_error(preds, train_data):
    labels = train_data.get_label()
-    preds = 1. / (1. + np.exp(-preds))
+    preds = 1.0 / (1.0 + np.exp(-preds))
-    return 'error', np.mean(labels != (preds > 0.5)), False
+    return "error", np.mean(labels != (preds > 0.5)), False
 # Pass custom objective function through params
 params_custom_obj = copy.deepcopy(params)
-params_custom_obj['objective'] = loglikelihood
+params_custom_obj["objective"] = loglikelihood
-gbm = lgb.train(params_custom_obj,
+gbm = lgb.train(
-                lgb_train,
+    params_custom_obj, lgb_train, num_boost_round=10, init_model=gbm, feval=binary_error, valid_sets=lgb_eval
-                num_boost_round=10,
+)
-                init_model=gbm,
-                feval=binary_error,
-                valid_sets=lgb_eval)
-print('Finished 40 - 50 rounds with self-defined objective function and eval metric...')
+print("Finished 40 - 50 rounds with self-defined objective function and eval metric...")
 # another self-defined eval metric
@@ -183,24 +180,26 @@ print('Finished 40 - 50 rounds with self-defined objective function and eval met
 # Keep this in mind when you use the customization
 def accuracy(preds, train_data):
    labels = train_data.get_label()
-    preds = 1. / (1. + np.exp(-preds))
+    preds = 1.0 / (1.0 + np.exp(-preds))
-    return 'accuracy', np.mean(labels == (preds > 0.5)), True
+    return "accuracy", np.mean(labels == (preds > 0.5)), True
 # Pass custom objective function through params
 params_custom_obj = copy.deepcopy(params)
-params_custom_obj['objective'] = loglikelihood
+params_custom_obj["objective"] = loglikelihood
-gbm = lgb.train(params_custom_obj,
+gbm = lgb.train(
-                lgb_train,
+    params_custom_obj,
-                num_boost_round=10,
+    lgb_train,
-                init_model=gbm,
+    num_boost_round=10,
-                feval=[binary_error, accuracy],
+    init_model=gbm,
-                valid_sets=lgb_eval)
+    feval=[binary_error, accuracy],
+    valid_sets=lgb_eval,
+)
-print('Finished 50 - 60 rounds with self-defined objective function and multiple self-defined eval metrics...')
+print("Finished 50 - 60 rounds with self-defined objective function and multiple self-defined eval metrics...")
-print('Starting a new training job...')
+print("Starting a new training job...")
 # callback
@@ -208,17 +207,14 @@ def reset_metrics():
    def callback(env):
        lgb_eval_new = lgb.Dataset(X_test, y_test, reference=lgb_train)
        if env.iteration - env.begin_iteration == 5:
-            print('Add a new valid dataset at iteration 5...')
+            print("Add a new valid dataset at iteration 5...")
-            env.model.add_valid(lgb_eval_new, 'new_valid')
+            env.model.add_valid(lgb_eval_new, "new_valid")
    callback.before_iteration = True
    callback.order = 0
    return callback
-gbm = lgb.train(params,
+gbm = lgb.train(params, lgb_train, num_boost_round=10, valid_sets=lgb_train, callbacks=[reset_metrics()])
-                lgb_train,
-                num_boost_round=10,
-                valid_sets=lgb_train,
-                callbacks=[reset_metrics()])
-print('Finished first 10 rounds with callback function...')
+print("Finished first 10 rounds with callback function...")
--- a/examples/python-guide/dask/ranking.py
+++ b/examples/python-guide/dask/ranking.py
@@ -10,9 +10,9 @@ import lightgbm as lgb
 if __name__ == "__main__":
    print("loading data")
-    rank_example_dir = Path(__file__).absolute().parents[2] / 'lambdarank'
+    rank_example_dir = Path(__file__).absolute().parents[2] / "lambdarank"
-    X, y = load_svmlight_file(str(rank_example_dir / 'rank.train'))
+    X, y = load_svmlight_file(str(rank_example_dir / "rank.train"))
-    group = np.loadtxt(str(rank_example_dir / 'rank.train.query'))
+    group = np.loadtxt(str(rank_example_dir / "rank.train.query"))
    print("initializing a Dask cluster")
@@ -32,25 +32,14 @@ if __name__ == "__main__":
    # a sparse boundary to partition the data
    X = X.toarray()
-    dX = da.from_array(
+    dX = da.from_array(x=X, chunks=[(rows_in_part1, rows_in_part2), (num_features,)])
-        x=X,
-        chunks=[
-            (rows_in_part1, rows_in_part2),
-            (num_features,)
-        ]
-    )
    dy = da.from_array(
        x=y,
        chunks=[
            (rows_in_part1, rows_in_part2),
-        ]
+        ],
-    )
-    dg = da.from_array(
-        x=group,
-        chunks=[
-            (100, group.size - 100)
-        ]
    )
+    dg = da.from_array(x=group, chunks=[(100, group.size - 100)])
    print("beginning training")

--- a/examples/python-guide/dataset_from_multi_hdf5.py
+++ b/examples/python-guide/dataset_from_multi_hdf5.py
@@ -34,13 +34,13 @@ def create_dataset_from_multiple_hdf(input_flist, batch_size):
    data = []
    ylist = []
    for f in input_flist:
-        f = h5py.File(f, 'r')
+        f = h5py.File(f, "r")
-        data.append(HDFSequence(f['X'], batch_size))
+        data.append(HDFSequence(f["X"], batch_size))
-        ylist.append(f['Y'][:])
+        ylist.append(f["Y"][:])
    params = {
-        'bin_construct_sample_cnt': 200000,
+        "bin_construct_sample_cnt": 200000,
-        'max_bin': 255,
+        "max_bin": 255,
    }
    y = np.concatenate(ylist)
    dataset = lgb.Dataset(data, label=y, params=params)
@@ -51,7 +51,7 @@ def create_dataset_from_multiple_hdf(input_flist, batch_size):
    # The reason is that DataFrame column names will be used in Dataset. For a DataFrame with Int64Index
    # as columns, Dataset will use column names like ["0", "1", "2", ...]. While for numpy array, column names
    # are using the default one assigned in C++ code (dataset_loader.cpp), like ["Column_0", "Column_1", ...].
-    dataset.save_binary('regression.train.from_hdf.bin')
+    dataset.save_binary("regression.train.from_hdf.bin")
 def save2hdf(input_data, fname, batch_size):
@@ -59,7 +59,7 @@ def save2hdf(input_data, fname, batch_size):
    Please note chunk size settings in the implementation for I/O performance optimization.
    """
-    with h5py.File(fname, 'w') as f:
+    with h5py.File(fname, "w") as f:
        for name, data in input_data.items():
            nrow, ncol = data.shape
            if ncol == 1:
@@ -75,12 +75,12 @@ def save2hdf(input_data, fname, batch_size):
                # Also note that the data is stored in row major order to avoid extra copy when passing to
                # lightgbm Dataset.
                chunk = (batch_size, ncol)
-            f.create_dataset(name, data=data, chunks=chunk, compression='lzf')
+            f.create_dataset(name, data=data, chunks=chunk, compression="lzf")
 def generate_hdf(input_fname, output_basename, batch_size):
    # Save to 2 HDF5 files for demonstration.
-    df = pd.read_csv(input_fname, header=None, sep='\t')
+    df = pd.read_csv(input_fname, header=None, sep="\t")
    mid = len(df) // 2
    df1 = df.iloc[:mid]
@@ -88,25 +88,23 @@ def generate_hdf(input_fname, output_basename, batch_size):
    # We can store multiple datasets inside a single HDF5 file.
    # Separating X and Y for choosing best chunk size for data loading.
-    fname1 = f'{output_basename}1.h5'
+    fname1 = f"{output_basename}1.h5"
-    fname2 = f'{output_basename}2.h5'
+    fname2 = f"{output_basename}2.h5"
-    save2hdf({'Y': df1.iloc[:, :1], 'X': df1.iloc[:, 1:]}, fname1, batch_size)
+    save2hdf({"Y": df1.iloc[:, :1], "X": df1.iloc[:, 1:]}, fname1, batch_size)
-    save2hdf({'Y': df2.iloc[:, :1], 'X': df2.iloc[:, 1:]}, fname2, batch_size)
+    save2hdf({"Y": df2.iloc[:, :1], "X": df2.iloc[:, 1:]}, fname2, batch_size)
    return [fname1, fname2]
 def main():
    batch_size = 64
-    output_basename = 'regression'
+    output_basename = "regression"
    hdf_files = generate_hdf(
-        str(Path(__file__).absolute().parents[1] / 'regression' / 'regression.train'),
+        str(Path(__file__).absolute().parents[1] / "regression" / "regression.train"), output_basename, batch_size
-        output_basename,
-        batch_size
    )
    create_dataset_from_multiple_hdf(hdf_files, batch_size=batch_size)
-if __name__ == '__main__':
+if __name__ == "__main__":
    main()
--- a/examples/python-guide/logistic_regression.py
+++ b/examples/python-guide/logistic_regression.py
@@ -24,23 +24,19 @@ import lightgbm as lgb
 #   single continuous predictor
 np.random.seed(0)
 N = 1000
-X = pd.DataFrame({
+X = pd.DataFrame({"continuous": range(N), "categorical": np.repeat([0, 1, 2, 3, 4], N / 5)})
-    'continuous': range(N),
-    'categorical': np.repeat([0, 1, 2, 3, 4], N / 5)
-})
 CATEGORICAL_EFFECTS = [-1, -1, -2, -2, 2]
-LINEAR_TERM = np.array([
+LINEAR_TERM = np.array(
-    -0.5 + 0.01 * X['continuous'][k]
+    [-0.5 + 0.01 * X["continuous"][k] + CATEGORICAL_EFFECTS[X["categorical"][k]] for k in range(X.shape[0])]
-    + CATEGORICAL_EFFECTS[X['categorical'][k]] for k in range(X.shape[0])
+) + np.random.normal(0, 1, X.shape[0])
-]) + np.random.normal(0, 1, X.shape[0])
 TRUE_PROB = expit(LINEAR_TERM)
 Y = np.random.binomial(1, TRUE_PROB, size=N)
 DATA = {
-    'X': X,
+    "X": X,
-    'probability_labels': TRUE_PROB,
+    "probability_labels": TRUE_PROB,
-    'binary_labels': Y,
+    "binary_labels": Y,
-    'lgb_with_binary_labels': lgb.Dataset(X, Y),
+    "lgb_with_binary_labels": lgb.Dataset(X, Y),
-    'lgb_with_probability_labels': lgb.Dataset(X, TRUE_PROB),
+    "lgb_with_probability_labels": lgb.Dataset(X, TRUE_PROB),
 }
@@ -72,34 +68,25 @@ def experiment(objective, label_type, data):
    np.random.seed(0)
    nrounds = 5
    lgb_data = data[f"lgb_with_{label_type}_labels"]
-    params = {
+    params = {"objective": objective, "feature_fraction": 1, "bagging_fraction": 1, "verbose": -1}
-        'objective': objective,
-        'feature_fraction': 1,
-        'bagging_fraction': 1,
-        'verbose': -1
-    }
    time_zero = time.time()
    gbm = lgb.train(params, lgb_data, num_boost_round=nrounds)
-    y_fitted = gbm.predict(data['X'])
+    y_fitted = gbm.predict(data["X"])
    y_true = data[f"{label_type}_labels"]
    duration = time.time() - time_zero
-    return {
+    return {"time": duration, "correlation": np.corrcoef(y_fitted, y_true)[0, 1], "logloss": log_loss(y_fitted, y_true)}
-        'time': duration,
-        'correlation': np.corrcoef(y_fitted, y_true)[0, 1],
-        'logloss': log_loss(y_fitted, y_true)
-    }
 #################
 # Observe the behavior of `binary` and `xentropy` objectives
-print('Performance of `binary` objective with binary labels:')
+print("Performance of `binary` objective with binary labels:")
-print(experiment('binary', label_type='binary', data=DATA))
+print(experiment("binary", label_type="binary", data=DATA))
-print('Performance of `xentropy` objective with binary labels:')
+print("Performance of `xentropy` objective with binary labels:")
-print(experiment('xentropy', label_type='binary', data=DATA))
+print(experiment("xentropy", label_type="binary", data=DATA))
-print('Performance of `xentropy` objective with probability labels:')
+print("Performance of `xentropy` objective with probability labels:")
-print(experiment('xentropy', label_type='probability', data=DATA))
+print(experiment("xentropy", label_type="probability", data=DATA))
 # Trying this throws an error on non-binary values of y:
 #   experiment('binary', label_type='probability', DATA)
@@ -109,9 +96,7 @@ print(experiment('xentropy', label_type='probability', data=DATA))
 #   there are reasons to suspect that `binary` should run faster when the
 #   label is an integer instead of a float
 K = 10
-A = [experiment('binary', label_type='binary', data=DATA)['time']
+A = [experiment("binary", label_type="binary", data=DATA)["time"] for k in range(K)]
-     for k in range(K)]
+B = [experiment("xentropy", label_type="binary", data=DATA)["time"] for k in range(K)]
-B = [experiment('xentropy', label_type='binary', data=DATA)['time']
-     for k in range(K)]
 print(f"Best `binary` time: {min(A)}")
 print(f"Best `xentropy` time: {min(B)}")
--- a/examples/python-guide/notebooks/interactive_plot_example.ipynb
+++ b/examples/python-guide/notebooks/interactive_plot_example.ipynb
--- a/examples/python-guide/plot_example.py
+++ b/examples/python-guide/plot_example.py
@@ -8,13 +8,13 @@ import lightgbm as lgb
 if lgb.compat.MATPLOTLIB_INSTALLED:
    import matplotlib.pyplot as plt
 else:
-    raise ImportError('You need to install matplotlib and restart your session for plot_example.py.')
+    raise ImportError("You need to install matplotlib and restart your session for plot_example.py.")
-print('Loading data...')
+print("Loading data...")
 # load or create your dataset
-regression_example_dir = Path(__file__).absolute().parents[1] / 'regression'
+regression_example_dir = Path(__file__).absolute().parents[1] / "regression"
-df_train = pd.read_csv(str(regression_example_dir / 'regression.train'), header=None, sep='\t')
+df_train = pd.read_csv(str(regression_example_dir / "regression.train"), header=None, sep="\t")
-df_test = pd.read_csv(str(regression_example_dir / 'regression.test'), header=None, sep='\t')
+df_test = pd.read_csv(str(regression_example_dir / "regression.test"), header=None, sep="\t")
 y_train = df_train[0]
 y_test = df_test[0]
@@ -26,45 +26,38 @@ lgb_train = lgb.Dataset(X_train, y_train)
 lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train)
 # specify your configurations as a dict
-params = {
+params = {"num_leaves": 5, "metric": ("l1", "l2"), "verbose": 0}
-    'num_leaves': 5,
-    'metric': ('l1', 'l2'),
-    'verbose': 0
-}
 evals_result = {}  # to record eval results for plotting
-print('Starting training...')
+print("Starting training...")
 # train
 gbm = lgb.train(
    params,
    lgb_train,
    num_boost_round=100,
    valid_sets=[lgb_train, lgb_test],
-    feature_name=[f'f{i + 1}' for i in range(X_train.shape[-1])],
+    feature_name=[f"f{i + 1}" for i in range(X_train.shape[-1])],
    categorical_feature=[21],
-    callbacks=[
+    callbacks=[lgb.log_evaluation(10), lgb.record_evaluation(evals_result)],
-        lgb.log_evaluation(10),
-        lgb.record_evaluation(evals_result)
-    ]
 )
-print('Plotting metrics recorded during training...')
+print("Plotting metrics recorded during training...")
-ax = lgb.plot_metric(evals_result, metric='l1')
+ax = lgb.plot_metric(evals_result, metric="l1")
 plt.show()
-print('Plotting feature importances...')
+print("Plotting feature importances...")
 ax = lgb.plot_importance(gbm, max_num_features=10)
 plt.show()
-print('Plotting split value histogram...')
+print("Plotting split value histogram...")
-ax = lgb.plot_split_value_histogram(gbm, feature='f26', bins='auto')
+ax = lgb.plot_split_value_histogram(gbm, feature="f26", bins="auto")
 plt.show()
-print('Plotting 54th tree...')  # one tree use categorical feature to split
+print("Plotting 54th tree...")  # one tree use categorical feature to split
-ax = lgb.plot_tree(gbm, tree_index=53, figsize=(15, 15), show_info=['split_gain'])
+ax = lgb.plot_tree(gbm, tree_index=53, figsize=(15, 15), show_info=["split_gain"])
 plt.show()
-print('Plotting 54th tree with graphviz...')
+print("Plotting 54th tree with graphviz...")
-graph = lgb.create_tree_digraph(gbm, tree_index=53, name='Tree54')
+graph = lgb.create_tree_digraph(gbm, tree_index=53, name="Tree54")
 graph.render(view=True)
--- a/examples/python-guide/simple_example.py
+++ b/examples/python-guide/simple_example.py
@@ -6,11 +6,11 @@ from sklearn.metrics import mean_squared_error
 import lightgbm as lgb
-print('Loading data...')
+print("Loading data...")
 # load or create your dataset
-regression_example_dir = Path(__file__).absolute().parents[1] / 'regression'
+regression_example_dir = Path(__file__).absolute().parents[1] / "regression"
-df_train = pd.read_csv(str(regression_example_dir / 'regression.train'), header=None, sep='\t')
+df_train = pd.read_csv(str(regression_example_dir / "regression.train"), header=None, sep="\t")
-df_test = pd.read_csv(str(regression_example_dir / 'regression.test'), header=None, sep='\t')
+df_test = pd.read_csv(str(regression_example_dir / "regression.test"), header=None, sep="\t")
 y_train = df_train[0]
 y_test = df_test[0]
@@ -23,32 +23,30 @@ lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
 # specify your configurations as a dict
 params = {
-    'boosting_type': 'gbdt',
+    "boosting_type": "gbdt",
-    'objective': 'regression',
+    "objective": "regression",
-    'metric': {'l2', 'l1'},
+    "metric": {"l2", "l1"},
-    'num_leaves': 31,
+    "num_leaves": 31,
-    'learning_rate': 0.05,
+    "learning_rate": 0.05,
-    'feature_fraction': 0.9,
+    "feature_fraction": 0.9,
-    'bagging_fraction': 0.8,
+    "bagging_fraction": 0.8,
-    'bagging_freq': 5,
+    "bagging_freq": 5,
-    'verbose': 0
+    "verbose": 0,
 }
-print('Starting training...')
+print("Starting training...")
 # train
-gbm = lgb.train(params,
+gbm = lgb.train(
-                lgb_train,
+    params, lgb_train, num_boost_round=20, valid_sets=lgb_eval, callbacks=[lgb.early_stopping(stopping_rounds=5)]
-                num_boost_round=20,
+)
-                valid_sets=lgb_eval,
-                callbacks=[lgb.early_stopping(stopping_rounds=5)])
-print('Saving model...')
+print("Saving model...")
 # save model to file
-gbm.save_model('model.txt')
+gbm.save_model("model.txt")
-print('Starting predicting...')
+print("Starting predicting...")
 # predict
 y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
 # eval
 rmse_test = mean_squared_error(y_test, y_pred) ** 0.5
-print(f'The RMSE of prediction is: {rmse_test}')
+print(f"The RMSE of prediction is: {rmse_test}")
--- a/examples/python-guide/sklearn_example.py
+++ b/examples/python-guide/sklearn_example.py
@@ -8,85 +8,71 @@ from sklearn.model_selection import GridSearchCV
 import lightgbm as lgb
-print('Loading data...')
+print("Loading data...")
 # load or create your dataset
-regression_example_dir = Path(__file__).absolute().parents[1] / 'regression'
+regression_example_dir = Path(__file__).absolute().parents[1] / "regression"
-df_train = pd.read_csv(str(regression_example_dir / 'regression.train'), header=None, sep='\t')
+df_train = pd.read_csv(str(regression_example_dir / "regression.train"), header=None, sep="\t")
-df_test = pd.read_csv(str(regression_example_dir / 'regression.test'), header=None, sep='\t')
+df_test = pd.read_csv(str(regression_example_dir / "regression.test"), header=None, sep="\t")
 y_train = df_train[0]
 y_test = df_test[0]
 X_train = df_train.drop(0, axis=1)
 X_test = df_test.drop(0, axis=1)
-print('Starting training...')
+print("Starting training...")
 # train
-gbm = lgb.LGBMRegressor(num_leaves=31,
+gbm = lgb.LGBMRegressor(num_leaves=31, learning_rate=0.05, n_estimators=20)
-                        learning_rate=0.05,
+gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], eval_metric="l1", callbacks=[lgb.early_stopping(5)])
-                        n_estimators=20)
-gbm.fit(X_train, y_train,
+print("Starting predicting...")
-        eval_set=[(X_test, y_test)],
-        eval_metric='l1',
-        callbacks=[lgb.early_stopping(5)])
-print('Starting predicting...')
 # predict
 y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration_)
 # eval
 rmse_test = mean_squared_error(y_test, y_pred) ** 0.5
-print(f'The RMSE of prediction is: {rmse_test}')
+print(f"The RMSE of prediction is: {rmse_test}")
 # feature importances
-print(f'Feature importances: {list(gbm.feature_importances_)}')
+print(f"Feature importances: {list(gbm.feature_importances_)}")
 # self-defined eval metric
 # f(y_true: array, y_pred: array) -> name: str, eval_result: float, is_higher_better: bool
 # Root Mean Squared Logarithmic Error (RMSLE)
 def rmsle(y_true, y_pred):
-    return 'RMSLE', np.sqrt(np.mean(np.power(np.log1p(y_pred) - np.log1p(y_true), 2))), False
+    return "RMSLE", np.sqrt(np.mean(np.power(np.log1p(y_pred) - np.log1p(y_true), 2))), False
-print('Starting training with custom eval function...')
+print("Starting training with custom eval function...")
 # train
-gbm.fit(X_train, y_train,
+gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], eval_metric=rmsle, callbacks=[lgb.early_stopping(5)])
-        eval_set=[(X_test, y_test)],
-        eval_metric=rmsle,
-        callbacks=[lgb.early_stopping(5)])
 # another self-defined eval metric
 # f(y_true: array, y_pred: array) -> name: str, eval_result: float, is_higher_better: bool
 # Relative Absolute Error (RAE)
 def rae(y_true, y_pred):
-    return 'RAE', np.sum(np.abs(y_pred - y_true)) / np.sum(np.abs(np.mean(y_true) - y_true)), False
+    return "RAE", np.sum(np.abs(y_pred - y_true)) / np.sum(np.abs(np.mean(y_true) - y_true)), False
-print('Starting training with multiple custom eval functions...')
+print("Starting training with multiple custom eval functions...")
 # train
-gbm.fit(X_train, y_train,
+gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], eval_metric=[rmsle, rae], callbacks=[lgb.early_stopping(5)])
-        eval_set=[(X_test, y_test)],
-        eval_metric=[rmsle, rae],
-        callbacks=[lgb.early_stopping(5)])
-print('Starting predicting...')
+print("Starting predicting...")
 # predict
 y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration_)
 # eval
 rmsle_test = rmsle(y_test, y_pred)[1]
 rae_test = rae(y_test, y_pred)[1]
-print(f'The RMSLE of prediction is: {rmsle_test}')
+print(f"The RMSLE of prediction is: {rmsle_test}")
-print(f'The RAE of prediction is: {rae_test}')
+print(f"The RAE of prediction is: {rae_test}")
 # other scikit-learn modules
 estimator = lgb.LGBMRegressor(num_leaves=31)
-param_grid = {
+param_grid = {"learning_rate": [0.01, 0.1, 1], "n_estimators": [20, 40]}
-    'learning_rate': [0.01, 0.1, 1],
-    'n_estimators': [20, 40]
-}
 gbm = GridSearchCV(estimator, param_grid, cv=3)
 gbm.fit(X_train, y_train)
-print(f'Best parameters found by grid search are: {gbm.best_params_}')
+print(f"Best parameters found by grid search are: {gbm.best_params_}")
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -18,9 +18,23 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, List, Optional,
 import numpy as np
 import scipy.sparse
-from .compat import (PANDAS_INSTALLED, PYARROW_INSTALLED, arrow_cffi, arrow_is_floating, arrow_is_integer, concat,
+from .compat import (
-                     dt_DataTable, pa_Array, pa_chunked_array, pa_ChunkedArray, pa_compute, pa_Table,
+    PANDAS_INSTALLED,
-                     pd_CategoricalDtype, pd_DataFrame, pd_Series)
+    PYARROW_INSTALLED,
+    arrow_cffi,
+    arrow_is_floating,
+    arrow_is_integer,
+    concat,
+    dt_DataTable,
+    pa_Array,
+    pa_chunked_array,
+    pa_ChunkedArray,
+    pa_compute,
+    pa_Table,
+    pd_CategoricalDtype,
+    pd_DataFrame,
+    pd_Series,
+)
 from .libpath import find_lib_path
 if TYPE_CHECKING:

--- a/python-package/lightgbm/callback.py
+++ b/python-package/lightgbm/callback.py
@@ -5,8 +5,14 @@ from dataclasses import dataclass
 from functools import partial
 from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
-from .basic import (Booster, _ConfigAliases, _LGBM_BoosterEvalMethodResultType,
+from .basic import (
-                    _LGBM_BoosterEvalMethodResultWithStandardDeviationType, _log_info, _log_warning)
+    Booster,
+    _ConfigAliases,
+    _LGBM_BoosterEvalMethodResultType,
+    _LGBM_BoosterEvalMethodResultWithStandardDeviationType,
+    _log_info,
+    _log_warning,
+)
 if TYPE_CHECKING:
    from .engine import CVBooster

--- a/python-package/lightgbm/dask.py
+++ b/python-package/lightgbm/dask.py
@@ -19,12 +19,36 @@ import numpy as np
 import scipy.sparse as ss
 from .basic import LightGBMError, _choose_param_value, _ConfigAliases, _log_info, _log_warning
-from .compat import (DASK_INSTALLED, PANDAS_INSTALLED, SKLEARN_INSTALLED, Client, Future, LGBMNotFittedError, concat,
+from .compat import (
-                     dask_Array, dask_array_from_delayed, dask_bag_from_delayed, dask_DataFrame, dask_Series,
+    DASK_INSTALLED,
-                     default_client, delayed, pd_DataFrame, pd_Series, wait)
+    PANDAS_INSTALLED,
-from .sklearn import (LGBMClassifier, LGBMModel, LGBMRanker, LGBMRegressor, _LGBM_ScikitCustomObjectiveFunction,
+    SKLEARN_INSTALLED,
-                      _LGBM_ScikitEvalMetricType, _lgbmmodel_doc_custom_eval_note, _lgbmmodel_doc_fit,
+    Client,
-                      _lgbmmodel_doc_predict)
+    Future,
+    LGBMNotFittedError,
+    concat,
+    dask_Array,
+    dask_array_from_delayed,
+    dask_bag_from_delayed,
+    dask_DataFrame,
+    dask_Series,
+    default_client,
+    delayed,
+    pd_DataFrame,
+    pd_Series,
+    wait,
+)
+from .sklearn import (
+    LGBMClassifier,
+    LGBMModel,
+    LGBMRanker,
+    LGBMRegressor,
+    _LGBM_ScikitCustomObjectiveFunction,
+    _LGBM_ScikitEvalMetricType,
+    _lgbmmodel_doc_custom_eval_note,
+    _lgbmmodel_doc_fit,
+    _lgbmmodel_doc_predict,
+)
 __all__ = [
    'DaskLGBMClassifier',

--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
@@ -10,10 +10,21 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
 import numpy as np
 from . import callback
-from .basic import (Booster, Dataset, LightGBMError, _choose_param_value, _ConfigAliases, _InnerPredictor,
+from .basic import (
-                    _LGBM_BoosterEvalMethodResultType, _LGBM_BoosterEvalMethodResultWithStandardDeviationType,
+    Booster,
-                    _LGBM_CategoricalFeatureConfiguration, _LGBM_CustomObjectiveFunction, _LGBM_EvalFunctionResultType,
+    Dataset,
-                    _LGBM_FeatureNameConfiguration, _log_warning)
+    LightGBMError,
+    _choose_param_value,
+    _ConfigAliases,
+    _InnerPredictor,
+    _LGBM_BoosterEvalMethodResultType,
+    _LGBM_BoosterEvalMethodResultWithStandardDeviationType,
+    _LGBM_CategoricalFeatureConfiguration,
+    _LGBM_CustomObjectiveFunction,
+    _LGBM_EvalFunctionResultType,
+    _LGBM_FeatureNameConfiguration,
+    _log_warning,
+)
 from .compat import SKLEARN_INSTALLED, _LGBMBaseCrossValidator, _LGBMGroupKFold, _LGBMStratifiedKFold
 __all__ = [

--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -8,14 +8,41 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 import numpy as np
 import scipy.sparse
-from .basic import (Booster, Dataset, LightGBMError, _choose_param_value, _ConfigAliases, _LGBM_BoosterBestScoreType,
+from .basic import (
-                    _LGBM_CategoricalFeatureConfiguration, _LGBM_EvalFunctionResultType, _LGBM_FeatureNameConfiguration,
+    Booster,
-                    _LGBM_GroupType, _LGBM_InitScoreType, _LGBM_LabelType, _LGBM_WeightType, _log_warning)
+    Dataset,
+    LightGBMError,
+    _choose_param_value,
+    _ConfigAliases,
+    _LGBM_BoosterBestScoreType,
+    _LGBM_CategoricalFeatureConfiguration,
+    _LGBM_EvalFunctionResultType,
+    _LGBM_FeatureNameConfiguration,
+    _LGBM_GroupType,
+    _LGBM_InitScoreType,
+    _LGBM_LabelType,
+    _LGBM_WeightType,
+    _log_warning,
+)
 from .callback import _EvalResultDict, record_evaluation
-from .compat import (SKLEARN_INSTALLED, LGBMNotFittedError, _LGBMAssertAllFinite, _LGBMCheckArray,
+from .compat import (
-                     _LGBMCheckClassificationTargets, _LGBMCheckSampleWeight, _LGBMCheckXY, _LGBMClassifierBase,
+    SKLEARN_INSTALLED,
-                     _LGBMComputeSampleWeight, _LGBMCpuCount, _LGBMLabelEncoder, _LGBMModelBase, _LGBMRegressorBase,
+    LGBMNotFittedError,
-                     dt_DataTable, np_random_Generator, pd_DataFrame)
+    _LGBMAssertAllFinite,
+    _LGBMCheckArray,
+    _LGBMCheckClassificationTargets,
+    _LGBMCheckSampleWeight,
+    _LGBMCheckXY,
+    _LGBMClassifierBase,
+    _LGBMComputeSampleWeight,
+    _LGBMCpuCount,
+    _LGBMLabelEncoder,
+    _LGBMModelBase,
+    _LGBMRegressorBase,
+    dt_DataTable,
+    np_random_Generator,
+    pd_DataFrame,
+)
 from .engine import train
 __all__ = [

--- a/python-package/pyproject.toml
+++ b/python-package/pyproject.toml
@@ -81,10 +81,14 @@ minimum-version = "0.4.4"
 # end:build-system
 [tool.isort]
+include_trailing_comma = true
 line_length = 120
+# "vertical hanging indent", to match what ruff-format does
+# ref: https://pycqa.github.io/isort/docs/configuration/multi_line_output_modes.html#3-vertical-hanging-indent
+multi_line_output = 3
 skip_glob = [
    "*/external_libs/*",
-    "*/lightgbm-python/*"
+    "*/lightgbm-python/*",
 ]
 [tool.mypy]
@@ -108,14 +112,13 @@ docstring-code-format = false
 exclude = [
    "build/*.py",
    "compile/*.py",
-    "examples/*.py",
    "external_libs/*.py",
    "lightgbm-python/*.py",
    "python-package/*.py",
-    "tests/*.py"
 ]
 indent-style = "space"
 quote-style = "double"
+skip-magic-trailing-comma = false
 [tool.ruff.lint]
 ignore = [

--- a/tests/c_api_test/test_.py
+++ b/tests/c_api_test/test_.py
@@ -10,7 +10,7 @@ try:
    from lightgbm.basic import _LIB as LIB
 except ModuleNotFoundError:
    print("Could not import lightgbm Python package, looking for lib_lightgbm at the repo root")
-    if system() in ('Windows', 'Microsoft'):
+    if system() in ("Windows", "Microsoft"):
        lib_file = Path(__file__).absolute().parents[2] / "Release" / "lib_lightgbm.dll"
    else:
        lib_file = Path(__file__).absolute().parents[2] / "lib_lightgbm.so"
@@ -25,7 +25,7 @@ dtype_int64 = 3
 def c_str(string):
-    return ctypes.c_char_p(string.encode('utf-8'))
+    return ctypes.c_char_p(string.encode("utf-8"))
 def load_from_file(filename, reference):
@@ -33,17 +33,13 @@ def load_from_file(filename, reference):
    if reference is not None:
        ref = reference
    handle = ctypes.c_void_p()
-    LIB.LGBM_DatasetCreateFromFile(
+    LIB.LGBM_DatasetCreateFromFile(c_str(str(filename)), c_str("max_bin=15"), ref, ctypes.byref(handle))
-        c_str(str(filename)),
-        c_str('max_bin=15'),
-        ref,
-        ctypes.byref(handle))
    print(LIB.LGBM_GetLastError())
    num_data = ctypes.c_int(0)
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
    num_feature = ctypes.c_int(0)
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
-    print(f'#data: {num_data.value} #feature: {num_feature.value}')
+    print(f"#data: {num_data.value} #feature: {num_feature.value}")
    return handle
@@ -69,20 +65,22 @@ def load_from_csr(filename, reference):
        ctypes.c_int64(len(csr.indptr)),
        ctypes.c_int64(len(csr.data)),
        ctypes.c_int64(csr.shape[1]),
-        c_str('max_bin=15'),
+        c_str("max_bin=15"),
        ref,
-        ctypes.byref(handle))
+        ctypes.byref(handle),
+    )
    num_data = ctypes.c_int(0)
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
    num_feature = ctypes.c_int(0)
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
    LIB.LGBM_DatasetSetField(
        handle,
-        c_str('label'),
+        c_str("label"),
        label.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
        ctypes.c_int(len(label)),
-        ctypes.c_int(dtype_float32))
+        ctypes.c_int(dtype_float32),
-    print(f'#data: {num_data.value} #feature: {num_feature.value}')
+    )
+    print(f"#data: {num_data.value} #feature: {num_feature.value}")
    return handle
@@ -104,20 +102,22 @@ def load_from_csc(filename, reference):
        ctypes.c_int64(len(csc.indptr)),
        ctypes.c_int64(len(csc.data)),
        ctypes.c_int64(csc.shape[0]),
-        c_str('max_bin=15'),
+        c_str("max_bin=15"),
        ref,
-        ctypes.byref(handle))
+        ctypes.byref(handle),
+    )
    num_data = ctypes.c_int(0)
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
    num_feature = ctypes.c_int(0)
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
    LIB.LGBM_DatasetSetField(
        handle,
-        c_str('label'),
+        c_str("label"),
        label.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
        ctypes.c_int(len(label)),
-        ctypes.c_int(dtype_float32))
+        ctypes.c_int(dtype_float32),
-    print(f'#data: {num_data.value} #feature: {num_feature.value}')
+    )
+    print(f"#data: {num_data.value} #feature: {num_feature.value}")
    return handle
@@ -137,20 +137,22 @@ def load_from_mat(filename, reference):
        ctypes.c_int32(mat.shape[0]),
        ctypes.c_int32(mat.shape[1]),
        ctypes.c_int(1),
-        c_str('max_bin=15'),
+        c_str("max_bin=15"),
        ref,
-        ctypes.byref(handle))
+        ctypes.byref(handle),
+    )
    num_data = ctypes.c_int(0)
    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
    num_feature = ctypes.c_int(0)
    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
    LIB.LGBM_DatasetSetField(
        handle,
-        c_str('label'),
+        c_str("label"),
        label.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
        ctypes.c_int(len(label)),
-        ctypes.c_int(dtype_float32))
+        ctypes.c_int(dtype_float32),
-    print(f'#data: {num_data.value} #feature: {num_feature.value}')
+    )
+    print(f"#data: {num_data.value} #feature: {num_feature.value}")
    return handle
@@ -159,29 +161,26 @@ def free_dataset(handle):
 def test_dataset():
-    binary_example_dir = Path(__file__).absolute().parents[2] / 'examples' / 'binary_classification'
+    binary_example_dir = Path(__file__).absolute().parents[2] / "examples" / "binary_classification"
-    train = load_from_file(binary_example_dir / 'binary.train', None)
+    train = load_from_file(binary_example_dir / "binary.train", None)
-    test = load_from_mat(binary_example_dir / 'binary.test', train)
+    test = load_from_mat(binary_example_dir / "binary.test", train)
    free_dataset(test)
-    test = load_from_csr(binary_example_dir / 'binary.test', train)
+    test = load_from_csr(binary_example_dir / "binary.test", train)
    free_dataset(test)
-    test = load_from_csc(binary_example_dir / 'binary.test', train)
+    test = load_from_csc(binary_example_dir / "binary.test", train)
    free_dataset(test)
-    save_to_binary(train, 'train.binary.bin')
+    save_to_binary(train, "train.binary.bin")
    free_dataset(train)
-    train = load_from_file('train.binary.bin', None)
+    train = load_from_file("train.binary.bin", None)
    free_dataset(train)
 def test_booster():
-    binary_example_dir = Path(__file__).absolute().parents[2] / 'examples' / 'binary_classification'
+    binary_example_dir = Path(__file__).absolute().parents[2] / "examples" / "binary_classification"
-    train = load_from_mat(binary_example_dir / 'binary.train', None)
+    train = load_from_mat(binary_example_dir / "binary.train", None)
-    test = load_from_mat(binary_example_dir / 'binary.test', train)
+    test = load_from_mat(binary_example_dir / "binary.test", train)
    booster = ctypes.c_void_p()
-    LIB.LGBM_BoosterCreate(
+    LIB.LGBM_BoosterCreate(train, c_str("app=binary metric=auc num_leaves=31 verbose=0"), ctypes.byref(booster))
-        train,
-        c_str("app=binary metric=auc num_leaves=31 verbose=0"),
-        ctypes.byref(booster))
    LIB.LGBM_BoosterAddValidData(booster, test)
    is_finished = ctypes.c_int(0)
    for i in range(1, 51):
@@ -189,28 +188,18 @@ def test_booster():
        result = np.array([0.0], dtype=np.float64)
        out_len = ctypes.c_int(0)
        LIB.LGBM_BoosterGetEval(
-            booster,
+            booster, ctypes.c_int(0), ctypes.byref(out_len), result.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
-            ctypes.c_int(0),
+        )
-            ctypes.byref(out_len),
-            result.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
        if i % 10 == 0:
-            print(f'{i} iteration test AUC {result[0]:.6f}')
+            print(f"{i} iteration test AUC {result[0]:.6f}")
-    LIB.LGBM_BoosterSaveModel(
+    LIB.LGBM_BoosterSaveModel(booster, ctypes.c_int(0), ctypes.c_int(-1), ctypes.c_int(0), c_str("model.txt"))
-        booster,
-        ctypes.c_int(0),
-        ctypes.c_int(-1),
-        ctypes.c_int(0),
-        c_str('model.txt'))
    LIB.LGBM_BoosterFree(booster)
    free_dataset(train)
    free_dataset(test)
    booster2 = ctypes.c_void_p()
    num_total_model = ctypes.c_int(0)
-    LIB.LGBM_BoosterCreateFromModelfile(
+    LIB.LGBM_BoosterCreateFromModelfile(c_str("model.txt"), ctypes.byref(num_total_model), ctypes.byref(booster2))
-        c_str('model.txt'),
+    data = np.loadtxt(str(binary_example_dir / "binary.test"), dtype=np.float64)
-        ctypes.byref(num_total_model),
-        ctypes.byref(booster2))
-    data = np.loadtxt(str(binary_example_dir / 'binary.test'), dtype=np.float64)
    mat = data[:, 1:]
    preb = np.empty(mat.shape[0], dtype=np.float64)
    num_preb = ctypes.c_int64(0)
@@ -225,58 +214,51 @@ def test_booster():
        ctypes.c_int(1),
        ctypes.c_int(0),
        ctypes.c_int(25),
-        c_str(''),
+        c_str(""),
        ctypes.byref(num_preb),
-        preb.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
+        preb.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
+    )
    LIB.LGBM_BoosterPredictForFile(
        booster2,
-        c_str(str(binary_example_dir / 'binary.test')),
+        c_str(str(binary_example_dir / "binary.test")),
        ctypes.c_int(0),
        ctypes.c_int(0),
        ctypes.c_int(0),
        ctypes.c_int(25),
-        c_str(''),
+        c_str(""),
-        c_str('preb.txt'))
+        c_str("preb.txt"),
+    )
    LIB.LGBM_BoosterPredictForFile(
        booster2,
-        c_str(str(binary_example_dir / 'binary.test')),
+        c_str(str(binary_example_dir / "binary.test")),
        ctypes.c_int(0),
        ctypes.c_int(0),
        ctypes.c_int(10),
        ctypes.c_int(25),
-        c_str(''),
+        c_str(""),
-        c_str('preb.txt'))
+        c_str("preb.txt"),
+    )
    LIB.LGBM_BoosterFree(booster2)
 def test_max_thread_control():
    # at initialization, should be -1
    num_threads = ctypes.c_int(0)
-    ret = LIB.LGBM_GetMaxThreads(
+    ret = LIB.LGBM_GetMaxThreads(ctypes.byref(num_threads))
-        ctypes.byref(num_threads)
-    )
    assert ret == 0
    assert num_threads.value == -1
    # updating that value through the C API should work
-    ret = LIB.LGBM_SetMaxThreads(
+    ret = LIB.LGBM_SetMaxThreads(ctypes.c_int(6))
-        ctypes.c_int(6)
-    )
    assert ret == 0
-    ret = LIB.LGBM_GetMaxThreads(
+    ret = LIB.LGBM_GetMaxThreads(ctypes.byref(num_threads))
-        ctypes.byref(num_threads)
-    )
    assert ret == 0
    assert num_threads.value == 6
    # resetting to any negative number should set it to -1
-    ret = LIB.LGBM_SetMaxThreads(
+    ret = LIB.LGBM_SetMaxThreads(ctypes.c_int(-123))
-        ctypes.c_int(-123)
-    )
    assert ret == 0
-    ret = LIB.LGBM_GetMaxThreads(
+    ret = LIB.LGBM_GetMaxThreads(ctypes.byref(num_threads))
-        ctypes.byref(num_threads)
-    )
    assert ret == 0
    assert num_threads.value == -1
--- a/tests/cpp_tests/test.py
+++ b/tests/cpp_tests/test.py
@@ -3,5 +3,5 @@ from pathlib import Path
 import numpy as np
-preds = [np.loadtxt(str(name)) for name in Path(__file__).absolute().parent.glob('*.pred')]
+preds = [np.loadtxt(str(name)) for name in Path(__file__).absolute().parent.glob("*.pred")]
 np.testing.assert_allclose(preds[0], preds[1])
--- a/tests/distributed/_test_distributed.py
+++ b/tests/distributed/_test_distributed.py
@@ -14,16 +14,16 @@ from sklearn.metrics import accuracy_score
 TESTS_DIR = Path(__file__).absolute().parent
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def executable(pytestconfig) -> str:
    """Returns the path to the lightgbm executable."""
-    return pytestconfig.getoption('execfile')
+    return pytestconfig.getoption("execfile")
 def _find_random_open_port() -> int:
    """Find a random open port on localhost."""
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-        s.bind(('', 0))
+        s.bind(("", 0))
        port = s.getsockname()[1]
    return port  # noqa: RET504
@@ -34,7 +34,7 @@ def _generate_n_ports(n: int) -> Generator[int, None, None]:
 def _write_dict(d: Dict, file: io.TextIOWrapper) -> None:
    for k, v in d.items():
-        file.write(f'{k} = {v}\n')
+        file.write(f"{k} = {v}\n")
 def create_data(task: str, n_samples: int = 1_000) -> np.ndarray:
@@ -42,10 +42,10 @@ def create_data(task: str, n_samples: int = 1_000) -> np.ndarray:
    The data is returned as a numpy array with the label as the first column.
    """
-    if task == 'binary-classification':
+    if task == "binary-classification":
        centers = [[-4, -4], [4, 4]]
        X, y = make_blobs(n_samples, centers=centers, random_state=42)
-    elif task == 'regression':
+    elif task == "regression":
        X, y = make_regression(n_samples, n_features=4, n_informative=2, random_state=42)
    return np.hstack([y.reshape(-1, 1), X])
@@ -54,22 +54,22 @@ class DistributedMockup:
    """Simulate distributed training."""
    default_train_config = {
-        'task': 'train',
+        "task": "train",
-        'pre_partition': True,
+        "pre_partition": True,
-        'machine_list_file': TESTS_DIR / 'mlist.txt',
+        "machine_list_file": TESTS_DIR / "mlist.txt",
-        'tree_learner': 'data',
+        "tree_learner": "data",
-        'force_row_wise': True,
+        "force_row_wise": True,
-        'verbose': 0,
+        "verbose": 0,
-        'num_boost_round': 20,
+        "num_boost_round": 20,
-        'num_leaves': 15,
+        "num_leaves": 15,
-        'num_threads': 2,
+        "num_threads": 2,
    }
    default_predict_config = {
-        'task': 'predict',
+        "task": "predict",
-        'data': TESTS_DIR / 'train.txt',
+        "data": TESTS_DIR / "train.txt",
-        'input_model': TESTS_DIR / 'model0.txt',
+        "input_model": TESTS_DIR / "model0.txt",
-        'output_result': TESTS_DIR / 'predictions.txt',
+        "output_result": TESTS_DIR / "predictions.txt",
    }
    def __init__(self, executable: str):
@@ -77,8 +77,8 @@ class DistributedMockup:
    def worker_train(self, i: int) -> subprocess.CompletedProcess:
        """Start the training process on the `i`-th worker."""
-        config_path = TESTS_DIR / f'train{i}.conf'
+        config_path = TESTS_DIR / f"train{i}.conf"
-        cmd = [self.executable, f'config={config_path}']
+        cmd = [self.executable, f"config={config_path}"]
        return subprocess.run(cmd)
    def _set_ports(self) -> None:
@@ -92,18 +92,18 @@ class DistributedMockup:
            ports.update(candidates)
            i += 1
        if i == max_tries:
-            raise RuntimeError('Unable to find non-colliding ports.')
+            raise RuntimeError("Unable to find non-colliding ports.")
        self.listen_ports = list(ports)
-        with open(TESTS_DIR / 'mlist.txt', 'wt') as f:
+        with open(TESTS_DIR / "mlist.txt", "wt") as f:
            for port in self.listen_ports:
-                f.write(f'127.0.0.1 {port}\n')
+                f.write(f"127.0.0.1 {port}\n")
    def _write_data(self, partitions: List[np.ndarray]) -> None:
        """Write all training data as train.txt and each training partition as train{i}.txt."""
        all_data = np.vstack(partitions)
-        np.savetxt(str(TESTS_DIR / 'train.txt'), all_data, delimiter=',')
+        np.savetxt(str(TESTS_DIR / "train.txt"), all_data, delimiter=",")
        for i, partition in enumerate(partitions):
-            np.savetxt(str(TESTS_DIR / f'train{i}.txt'), partition, delimiter=',')
+            np.savetxt(str(TESTS_DIR / f"train{i}.txt"), partition, delimiter=",")
    def fit(self, partitions: List[np.ndarray], train_config: Dict) -> None:
        """Run the distributed training process on a single machine.
@@ -118,7 +118,7 @@ class DistributedMockup:
        """
        self.train_config = copy.deepcopy(self.default_train_config)
        self.train_config.update(train_config)
-        self.n_workers = self.train_config['num_machines']
+        self.n_workers = self.train_config["num_machines"]
        self._set_ports()
        self._write_data(partitions)
        self.label_ = np.hstack([partition[:, 0] for partition in partitions])
@@ -131,7 +131,7 @@ class DistributedMockup:
            results = [f.result() for f in futures]
        for result in results:
            if result.returncode != 0:
-                raise RuntimeError('Error in training')
+                raise RuntimeError("Error in training")
    def predict(self, predict_config: Dict[str, Any]) -> np.ndarray:
        """Compute the predictions using the model created in the fit step.
@@ -141,14 +141,14 @@ class DistributedMockup:
        """
        self.predict_config = copy.deepcopy(self.default_predict_config)
        self.predict_config.update(predict_config)
-        config_path = TESTS_DIR / 'predict.conf'
+        config_path = TESTS_DIR / "predict.conf"
-        with open(config_path, 'wt') as file:
+        with open(config_path, "wt") as file:
            _write_dict(self.predict_config, file)
-        cmd = [self.executable, f'config={config_path}']
+        cmd = [self.executable, f"config={config_path}"]
        result = subprocess.run(cmd)
        if result.returncode != 0:
-            raise RuntimeError('Error in prediction')
+            raise RuntimeError("Error in prediction")
-        return np.loadtxt(str(TESTS_DIR / 'predictions.txt'))
+        return np.loadtxt(str(TESTS_DIR / "predictions.txt"))
    def write_train_config(self, i: int) -> None:
        """Create a file train{i}.conf with the required configuration to train.
@@ -156,41 +156,41 @@ class DistributedMockup:
        Each worker gets a different port and piece of the data, the rest are the
        model parameters contained in `self.config`.
        """
-        with open(TESTS_DIR / f'train{i}.conf', 'wt') as file:
+        with open(TESTS_DIR / f"train{i}.conf", "wt") as file:
-            output_model = TESTS_DIR / f'model{i}.txt'
+            output_model = TESTS_DIR / f"model{i}.txt"
-            data = TESTS_DIR / f'train{i}.txt'
+            data = TESTS_DIR / f"train{i}.txt"
-            file.write(f'output_model = {output_model}\n')
+            file.write(f"output_model = {output_model}\n")
-            file.write(f'local_listen_port = {self.listen_ports[i]}\n')
+            file.write(f"local_listen_port = {self.listen_ports[i]}\n")
-            file.write(f'data = {data}\n')
+            file.write(f"data = {data}\n")
            _write_dict(self.train_config, file)
 def test_classifier(executable):
    """Test the classification task."""
    num_machines = 2
-    data = create_data(task='binary-classification')
+    data = create_data(task="binary-classification")
    partitions = np.array_split(data, num_machines)
    train_params = {
-        'objective': 'binary',
+        "objective": "binary",
-        'num_machines': num_machines,
+        "num_machines": num_machines,
    }
    clf = DistributedMockup(executable)
    clf.fit(partitions, train_params)
    y_probas = clf.predict(predict_config={})
    y_pred = y_probas > 0.5
-    assert accuracy_score(clf.label_, y_pred) == 1.
+    assert accuracy_score(clf.label_, y_pred) == 1.0
 def test_regressor(executable):
    """Test the regression task."""
    num_machines = 2
-    data = create_data(task='regression')
+    data = create_data(task="regression")
    partitions = np.array_split(data, num_machines)
    train_params = {
-        'objective': 'regression',
+        "objective": "regression",
-        'num_machines': num_machines,
+        "num_machines": num_machines,
    }
    reg = DistributedMockup(executable)
    reg.fit(partitions, train_params)
    y_pred = reg.predict(predict_config={})
-    np.testing.assert_allclose(y_pred, reg.label_, rtol=0.2, atol=50.)
+    np.testing.assert_allclose(y_pred, reg.label_, rtol=0.2, atol=50.0)
--- a/tests/distributed/conftest.py
+++ b/tests/distributed/conftest.py
 from pathlib import Path
-default_exec_file = Path(__file__).absolute().parents[2] / 'lightgbm'
+default_exec_file = Path(__file__).absolute().parents[2] / "lightgbm"
 def pytest_addoption(parser):
-    parser.addoption('--execfile', action='store', default=str(default_exec_file))
+    parser.addoption("--execfile", action="store", default=str(default_exec_file))
--- a/tests/python_package_test/test_arrow.py
+++ b/tests/python_package_test/test_arrow.py
@@ -71,9 +71,7 @@ def generate_random_arrow_table(
    values: Optional[np.ndarray] = None,
 ) -> pa.Table:
    columns = [
-        generate_random_arrow_array(
+        generate_random_arrow_array(num_datapoints, seed + i, generate_nulls=generate_nulls, values=values)
-            num_datapoints, seed + i, generate_nulls=generate_nulls, values=values
-        )
        for i in range(num_columns)
    ]
    names = [f"col_{i}" for i in range(num_columns)]
@@ -156,9 +154,7 @@ def test_dataset_construct_fields_fuzzy():
    arrow_weights = generate_random_arrow_array(1000, 42, generate_nulls=False)
    arrow_groups = pa.chunked_array([[300, 400, 50], [250]], type=pa.int32())
-    arrow_dataset = lgb.Dataset(
+    arrow_dataset = lgb.Dataset(arrow_table, label=arrow_labels, weight=arrow_weights, group=arrow_groups)
-        arrow_table, label=arrow_labels, weight=arrow_weights, group=arrow_groups
-    )
    arrow_dataset.construct()
    pandas_dataset = lgb.Dataset(
@@ -171,9 +167,7 @@ def test_dataset_construct_fields_fuzzy():
    # Check for equality
    for field in ("label", "weight", "group"):
-        np_assert_array_equal(
+        np_assert_array_equal(arrow_dataset.get_field(field), pandas_dataset.get_field(field), strict=True)
-            arrow_dataset.get_field(field), pandas_dataset.get_field(field), strict=True
-        )
    np_assert_array_equal(arrow_dataset.get_label(), pandas_dataset.get_label(), strict=True)
    np_assert_array_equal(arrow_dataset.get_weight(), pandas_dataset.get_weight(), strict=True)
@@ -269,9 +263,7 @@ def test_dataset_construct_groups(array_type, group_data, arrow_type):
    ],
 )
 @pytest.mark.parametrize("arrow_type", _INTEGER_TYPES + _FLOAT_TYPES)
-def test_dataset_construct_init_scores_array(
+def test_dataset_construct_init_scores_array(array_type: Any, init_score_data: Any, arrow_type: Any):
-    array_type: Any, init_score_data: Any, arrow_type: Any
-):
    data = generate_dummy_arrow_table()
    init_scores = array_type(init_score_data, type=arrow_type)
    dataset = lgb.Dataset(data, init_score=init_scores, params=dummy_dataset_params())
@@ -320,9 +312,7 @@ def assert_equal_predict_arrow_pandas(booster: lgb.Booster, data: pa.Table):
    np_assert_array_equal(p_pred_contrib_arrow, p_pred_contrib_pandas, strict=True)
    p_first_iter_arrow = booster.predict(data, start_iteration=0, num_iteration=1, raw_score=True)
-    p_first_iter_pandas = booster.predict(
+    p_first_iter_pandas = booster.predict(data.to_pandas(), start_iteration=0, num_iteration=1, raw_score=True)
-        data.to_pandas(), start_iteration=0, num_iteration=1, raw_score=True
-    )
    np_assert_array_equal(p_first_iter_arrow, p_first_iter_pandas, strict=True)