[python-package][tests] enhance `test_set_field_none_removes_field` test (#7044)

* Update test_basic.py * dev * dev * dev * dev * dev * dev * Update test_basic.py * Update test_basic.py * Update test_basic.py * Update test_basic.py * Update test_basic.py * dev * dev * dev * dev * dev * dev * dev * dev * dev --------- Co-authored-by: James Lamb <jaylamb20@gmail.com>

[python-package][tests] enhance `test_set_field_none_removes_field` test (#7044)
* Update test_basic.py * dev * dev * dev * dev * dev * dev * Update test_basic.py * Update test_basic.py * Update test_basic.py * Update test_basic.py * Update test_basic.py * dev * dev * dev * dev * dev * dev * dev * dev * dev --------- Co-authored-by: James Lamb <jaylamb20@gmail.com>
f2a32f9d · Nikita Titov · GitHub · fd1a0f4a · f2a32f9d · f2a32f9d
Unverified Commit f2a32f9d authored Oct 28, 2025 by Nikita Titov Committed by GitHub Oct 28, 2025
4 changed files
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -69,13 +69,13 @@ def test_basic(tmp_path):
    assert bst.feature_name() == feature_names
    pred_from_model_file = bst.predict(X_test)
    # we need to check the consistency of model file here, so test for exact equal
-    np.testing.assert_array_equal(pred_from_matr, pred_from_model_file)
+    np_assert_array_equal(pred_from_matr, pred_from_model_file, strict=True)

    # check early stopping is working. Make it stop very early, so the scores should be very close to zero
    pred_parameter = {"pred_early_stop": True, "pred_early_stop_freq": 5, "pred_early_stop_margin": 1.5}
    pred_early_stopping = bst.predict(X_test, **pred_parameter)
    # scores likely to be different, but prediction should still be the same
-    np.testing.assert_array_equal(np.sign(pred_from_matr), np.sign(pred_early_stopping))
+    np_assert_array_equal(np.sign(pred_from_matr), np.sign(pred_early_stopping), strict=True)

    # test that shape is checked during prediction
    bad_X_test = X_test[:, 1:]
@@ -213,7 +213,7 @@ def test_sequence_get_data(num_seq, rng):

    used_indices = rng.choice(a=np.arange(nrow), size=nrow // 3, replace=False)
    subset_data = seq_ds.subset(used_indices).construct()
-    np.testing.assert_array_equal(subset_data.get_data(), X[sorted(used_indices)])
+    np_assert_array_equal(subset_data.get_data(), X[sorted(used_indices)], strict=True)


 def test_chunked_dataset():
@@ -1019,14 +1019,39 @@ def test_equal_datasets_from_one_and_several_matrices_w_different_layouts(rng, t
    assert filecmp.cmp(one_path, several_path)


-def test_set_field_none_removes_field(rng):
-    X1 = rng.uniform(size=(10, 1))
-    d1 = lgb.Dataset(X1).construct()
-    weight = rng.uniform(size=10)
-    out = d1.set_field("weight", weight)
-    assert out is d1
+@pytest.mark.parametrize(
+    "field_name",
+    [
+        "group",
+        "init_score",
+        pytest.param(
+            "position",
+            marks=pytest.mark.skipif(
+                getenv("TASK", "") == "cuda",
+                reason="Positions in learning to rank is not supported in CUDA version yet",
+            ),
+        ),
+        "weight",
+    ],
+)
+def test_set_field_none_removes_field(rng, field_name):
+    X = rng.uniform(size=(10, 1))
+    d = lgb.Dataset(X).construct()
+
+    if field_name == "group":
+        field = [5, 5]
+        expected = np.array([0, 5, 10], dtype=np.int32)
+    elif field_name == "position":
+        field = [100, 20, 100, 10, 30, 10, 30, 10, 30, 30]
+        expected = np.array([0, 1, 0, 2, 3, 2, 3, 2, 3, 3], dtype=np.int32)
+    else:
+        field = rng.uniform(size=10)
+        expected = field.astype(np.float64 if field_name == "init_score" else np.float32)
+
+    out = d.set_field(field_name, field)
+    assert out is d

-    np.testing.assert_allclose(d1.get_field("weight"), weight)
+    np_assert_array_equal(d.get_field(field_name), expected, strict=True)

-    d1.set_field("weight", None)
-    assert d1.get_field("weight") is None
+    d.set_field(field_name, None)
+    assert d.get_field(field_name) is None
--- a/tests/python_package_test/test_dask.py
+++ b/tests/python_package_test/test_dask.py
@@ -14,7 +14,7 @@ from sklearn.metrics import accuracy_score, r2_score

 import lightgbm as lgb

-from .utils import sklearn_multiclass_custom_objective
+from .utils import np_assert_array_equal, sklearn_multiclass_custom_objective

 if platform in {"cygwin", "win32"}:
    pytest.skip("lightgbm.dask is not currently supported on Windows", allow_module_level=True)
@@ -370,7 +370,7 @@ def test_classifier_pred_contrib(output, task, cluster):
                # raw scores will probably be different, but at least check that all predicted classes are the same
                pred_classes = np.argmax(computed_preds.toarray(), axis=1)
                local_pred_classes = np.argmax(local_preds_with_contrib[i].toarray(), axis=1)
-                np.testing.assert_array_equal(pred_classes, local_pred_classes)
+                np_assert_array_equal(pred_classes, local_pred_classes, strict=True)
            return

        preds_with_contrib = preds_with_contrib.compute()

--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -40,6 +40,7 @@ from .utils import (
    logistic_sigmoid,
    make_synthetic_regression,
    mse_obj,
+    np_assert_array_equal,
    pickle_and_unpickle_object,
    sklearn_multiclass_custom_objective,
    softmax,
@@ -852,7 +853,7 @@ def test_ranking_with_position_information_with_dataset_constructor(tmp_path):

    # test get_position works
    positions_from_get = lgb_train.get_position()
-    np.testing.assert_array_equal(positions_from_get, positions)
+    np_assert_array_equal(positions_from_get, positions, strict=True)


 def test_early_stopping():
@@ -1398,7 +1399,7 @@ def test_cvbooster_save_load(tmp_path):
    cvbooster_from_string = lgb.CVBooster().model_from_string(model_string)
    for cvbooster_loaded in [cvbooster_from_txt_file, cvbooster_from_string]:
        assert best_iteration == cvbooster_loaded.best_iteration
-        np.testing.assert_array_equal(preds, cvbooster_loaded.predict(X_test))
+        np_assert_array_equal(preds, cvbooster_loaded.predict(X_test), strict=True)


 @pytest.mark.parametrize("serializer", SERIALIZERS)
@@ -1431,7 +1432,7 @@ def test_cvbooster_picklable(serializer):
    assert best_iteration == cvbooster_from_disk.best_iteration

    preds_from_disk = cvbooster_from_disk.predict(X_test)
-    np.testing.assert_array_equal(preds, preds_from_disk)
+    np_assert_array_equal(preds, preds_from_disk, strict=True)


 def test_feature_name():
@@ -2311,7 +2312,7 @@ def test_monotone_penalty_max():
        constrained_model = lgb.train(params_constrained_model, trainset_constrained_model, 10)

        # Check that a very high penalization is the same as not using the features at all
-        np.testing.assert_array_equal(constrained_model.predict(x), unconstrained_model_predictions)
+        np_assert_array_equal(constrained_model.predict(x), unconstrained_model_predictions, strict=True)


 def test_max_bin_by_feature():
@@ -3186,22 +3187,24 @@ def test_get_split_value_histogram(rng_fixed_seed):
    assert len(bins) == 8
    hist_idx, bins_idx = gbm.get_split_value_histogram(0)
    hist_name, bins_name = gbm.get_split_value_histogram(gbm.feature_name()[0])
-    np.testing.assert_array_equal(hist_idx, hist_name)
+    np_assert_array_equal(hist_idx, hist_name, strict=True)
    np.testing.assert_allclose(bins_idx, bins_name)
    hist_idx, bins_idx = gbm.get_split_value_histogram(X.shape[-1] - 1)
    hist_name, bins_name = gbm.get_split_value_histogram(gbm.feature_name()[X.shape[-1] - 1])
-    np.testing.assert_array_equal(hist_idx, hist_name)
+    np_assert_array_equal(hist_idx, hist_name, strict=True)
    np.testing.assert_allclose(bins_idx, bins_name)
    # test bins string type
    hist_vals, bin_edges = gbm.get_split_value_histogram(0, bins="auto")
    hist = gbm.get_split_value_histogram(0, bins="auto", xgboost_style=True)
    if lgb.compat.PANDAS_INSTALLED:
        mask = hist_vals > 0
-        np.testing.assert_array_equal(hist_vals[mask], hist["Count"].values)
+        # strict=False due to dtype mismatch: 'int64' and 'float64'
+        np_assert_array_equal(hist_vals[mask], hist["Count"].values, strict=False)
        np.testing.assert_allclose(bin_edges[1:][mask], hist["SplitValue"].values)
    else:
        mask = hist_vals > 0
-        np.testing.assert_array_equal(hist_vals[mask], hist[:, 1])
+        # strict=False due to dtype mismatch: 'int64' and 'float64'
+        np_assert_array_equal(hist_vals[mask], hist[:, 1], strict=False)
        np.testing.assert_allclose(bin_edges[1:][mask], hist[:, 0])
    # test histogram is disabled for categorical features
    with pytest.raises(

--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -42,6 +42,7 @@ from .utils import (
    load_linnerud,
    make_ranking,
    make_synthetic_regression,
+    np_assert_array_equal,
    sklearn_multiclass_custom_objective,
    softmax,
 )
@@ -423,7 +424,7 @@ def test_multioutput_classifier():
    score = clf.score(X_test, y_test)
    assert score >= 0.2
    assert score <= 1.0
-    np.testing.assert_array_equal(np.tile(np.unique(y_train), n_outputs), np.concatenate(clf.classes_))
+    np_assert_array_equal(np.tile(np.unique(y_train), n_outputs), np.concatenate(clf.classes_), strict=True)
    for classifier in clf.estimators_:
        assert isinstance(classifier, lgb.LGBMClassifier)
        assert isinstance(classifier.booster_, lgb.Booster)
@@ -454,7 +455,7 @@ def test_classifier_chain():
    score = clf.score(X_test, y_test)
    assert score >= 0.2
    assert score <= 1.0
-    np.testing.assert_array_equal(np.tile(np.unique(y_train), n_outputs), np.concatenate(clf.classes_))
+    np_assert_array_equal(np.tile(np.unique(y_train), n_outputs), np.concatenate(clf.classes_), strict=True)
    assert order == clf.order_
    for classifier in clf.estimators_:
        assert isinstance(classifier, lgb.LGBMClassifier)
@@ -709,7 +710,7 @@ def test_joblib(tmp_path):
    gbm_pickle = joblib.load(model_path_pkl)
    assert isinstance(gbm_pickle.booster_, lgb.Booster)
    assert gbm.get_params() == gbm_pickle.get_params()
-    np.testing.assert_array_equal(gbm.feature_importances_, gbm_pickle.feature_importances_)
+    np_assert_array_equal(gbm.feature_importances_, gbm_pickle.feature_importances_, strict=True)
    assert gbm_pickle.learning_rate == pytest.approx(0.1)
    assert callable(gbm_pickle.objective)

@@ -750,7 +751,7 @@ def test_random_state_object(rng_constructor):
    y_pred1 = clf1.predict(X_test, raw_score=True)
    y_pred2 = clf2.predict(X_test, raw_score=True)
    np.testing.assert_allclose(y_pred1, y_pred2)
-    np.testing.assert_array_equal(clf1.feature_importances_, clf2.feature_importances_)
+    np_assert_array_equal(clf1.feature_importances_, clf2.feature_importances_, strict=True)
    df1 = clf1.booster_.model_to_string(num_iteration=0)
    df2 = clf2.booster_.model_to_string(num_iteration=0)
    assert df1 == df2
@@ -1514,13 +1515,13 @@ def test_continue_training_with_model():

 def test_actual_number_of_trees():
    X = [[1, 2, 3], [1, 2, 3]]
-    y = [1, 1]
+    y = [1.0, 1.0]
    n_estimators = 5
    gbm = lgb.LGBMRegressor(n_estimators=n_estimators).fit(X, y)
    assert gbm.n_estimators == n_estimators
    assert gbm.n_estimators_ == 1
    assert gbm.n_iter_ == 1
-    np.testing.assert_array_equal(gbm.predict(np.array(X) * 10), y)
+    np_assert_array_equal(gbm.predict(np.array(X) * 10), y, strict=True)


 def test_check_is_fitted():
@@ -1638,7 +1639,7 @@ def test_getting_feature_names_in_np_input(estimator_class):
        model.fit(X, y, group=[X.shape[0]])
    else:
        model.fit(X, y)
-    np.testing.assert_array_equal(model.feature_names_in_, np.array([f"Column_{i}" for i in range(X.shape[1])]))
+    np_assert_array_equal(model.feature_names_in_, np.array([f"Column_{i}" for i in range(X.shape[1])]), strict=True)


 @pytest.mark.parametrize("estimator_class", estimator_classes)
@@ -1661,7 +1662,8 @@ def test_getting_feature_names_in_pd_input(estimator_class):
        model.fit(X, y, group=[X.shape[0]])
    else:
        model.fit(X, y)
-    np.testing.assert_array_equal(model.feature_names_in_, X.columns)
+    # strict=False due to dtype mismatch: '<U9' and 'object'
+    np_assert_array_equal(model.feature_names_in_, X.columns, strict=False)


 # Starting with scikit-learn 1.6 (https://github.com/scikit-learn/scikit-learn/pull/30149),
@@ -1741,7 +1743,7 @@ def test_training_succeeds_when_data_is_dataframe_and_label_is_column_array(task

    preds_1d = model_1d.predict(X)
    preds_2d = model_2d.predict(X)
-    np.testing.assert_array_equal(preds_1d, preds_2d)
+    np_assert_array_equal(preds_1d, preds_2d, strict=True)


 @pytest.mark.parametrize("use_weight", [True, False])