Merge branch 'master' into rocm3

548cec82 · Jeff Daily · 2f7bd8ef · 5dbfcdc4 · 548cec82 · 548cec82
Commit 548cec82 authored Oct 21, 2025 by Jeff Daily
15 changed files
--- a/src/treelearner/serial_tree_learner.h
+++ b/src/treelearner/serial_tree_learner.h
@@ -2,8 +2,8 @@
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
-#ifndef LIGHTGBM_TREELEARNER_SERIAL_TREE_LEARNER_H_
-#define LIGHTGBM_TREELEARNER_SERIAL_TREE_LEARNER_H_
+#ifndef LIGHTGBM_SRC_TREELEARNER_SERIAL_TREE_LEARNER_H_
+#define LIGHTGBM_SRC_TREELEARNER_SERIAL_TREE_LEARNER_H_

 #include <LightGBM/dataset.h>
 #include <LightGBM/tree.h>
@@ -247,4 +247,4 @@ inline data_size_t SerialTreeLearner::GetGlobalDataCountInLeaf(int leaf_idx) con
 }

 }  // namespace LightGBM
-#endif   // LightGBM_TREELEARNER_SERIAL_TREE_LEARNER_H_
+#endif   // LIGHTGBM_SRC_TREELEARNER_SERIAL_TREE_LEARNER_H_
--- a/src/treelearner/split_info.hpp
+++ b/src/treelearner/split_info.hpp
@@ -2,8 +2,8 @@
 * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
-#ifndef LIGHTGBM_TREELEARNER_SPLIT_INFO_HPP_
-#define LIGHTGBM_TREELEARNER_SPLIT_INFO_HPP_
+#ifndef LIGHTGBM_SRC_TREELEARNER_SPLIT_INFO_HPP_
+#define LIGHTGBM_SRC_TREELEARNER_SPLIT_INFO_HPP_

 #include <LightGBM/meta.h>

@@ -291,4 +291,4 @@ struct LightSplitInfo {
 };

 }  // namespace LightGBM
-#endif   // LightGBM_TREELEARNER_SPLIT_INFO_HPP_
+#endif   // LIGHTGBM_SRC_TREELEARNER_SPLIT_INFO_HPP_
--- a/src/treelearner/tree_learner.cpp
+++ b/src/treelearner/tree_learner.cpp
@@ -4,6 +4,8 @@
 */
 #include <LightGBM/tree_learner.h>

+#include <string>
+
 #include "gpu_tree_learner.h"
 #include "linear_tree_learner.h"
 #include "parallel_tree_learner.h"

--- a/src/treelearner/voting_parallel_tree_learner.cpp
+++ b/src/treelearner/voting_parallel_tree_learner.cpp
@@ -4,7 +4,9 @@
 */
 #include <LightGBM/utils/common.h>

+#include <algorithm>
 #include <cstring>
+#include <functional>
 #include <tuple>
 #include <vector>

@@ -13,8 +15,7 @@
 namespace LightGBM {

 template <typename TREELEARNER_T>
-VotingParallelTreeLearner<TREELEARNER_T>::VotingParallelTreeLearner(const Config* config)
-  :TREELEARNER_T(config) {
+VotingParallelTreeLearner<TREELEARNER_T>::VotingParallelTreeLearner(const Config* config):TREELEARNER_T(config) {
  top_k_ = this->config_->top_k;
 }


--- a/swig/StringArray.hpp
+++ b/swig/StringArray.hpp
@@ -4,8 +4,8 @@
 *
 * Author: Alberto Ferreira
 */
-#ifndef LIGHTGBM_SWIG_STRING_ARRAY_H_
-#define LIGHTGBM_SWIG_STRING_ARRAY_H_
+#ifndef LIGHTGBM_SWIG_STRINGARRAY_HPP_
+#define LIGHTGBM_SWIG_STRINGARRAY_HPP_

 #include <algorithm>
 #include <new>
@@ -137,4 +137,4 @@ class StringArray {
    std::vector<char*> _array;
 };

-#endif  // LIGHTGBM_SWIG_STRING_ARRAY_H_
+#endif  // LIGHTGBM_SWIG_STRINGARRAY_HPP_
--- a/tests/cpp_tests/test_array_args.cpp
+++ b/tests/cpp_tests/test_array_args.cpp
@@ -8,6 +8,7 @@
 #include <LightGBM/utils/array_args.h>

 #include <random>
+#include <vector>

 using LightGBM::data_size_t;
 using LightGBM::score_t;

--- a/tests/cpp_tests/test_arrow.cpp
+++ b/tests/cpp_tests/test_arrow.cpp
@@ -10,6 +10,7 @@

 #include <cmath>
 #include <cstdlib>
+#include <vector>

 using LightGBM::ArrowChunkedArray;
 using LightGBM::ArrowTable;

--- a/tests/cpp_tests/test_byte_buffer.cpp
+++ b/tests/cpp_tests/test_byte_buffer.cpp
@@ -6,6 +6,7 @@
 #include <gtest/gtest.h>
 #include <LightGBM/utils/byte_buffer.h>

+#include <memory>
 #include <random>

 using LightGBM::ByteBuffer;

--- a/tests/cpp_tests/test_chunked_array.cpp
+++ b/tests/cpp_tests/test_chunked_array.cpp
@@ -5,6 +5,9 @@
 * Author: Alberto Ferreira
 */
 #include <gtest/gtest.h>
+
+#include <vector>
+
 #include "../include/LightGBM/utils/chunked_array.hpp"

 using LightGBM::ChunkedArray;

--- a/tests/cpp_tests/test_serialize.cpp
+++ b/tests/cpp_tests/test_serialize.cpp
@@ -11,6 +11,7 @@
 #include <LightGBM/dataset.h>

 #include <iostream>
+#include <string>

 using LightGBM::ByteBuffer;
 using LightGBM::Dataset;

--- a/tests/cpp_tests/test_single_row.cpp
+++ b/tests/cpp_tests/test_single_row.cpp
@@ -7,8 +7,10 @@
 #include <testutils.h>
 #include <LightGBM/c_api.h>

-#include <iostream>
+#include <algorithm>
 #include <fstream>
+#include <iostream>
+#include <vector>

 using LightGBM::TestUtils;


--- a/tests/cpp_tests/test_stream.cpp
+++ b/tests/cpp_tests/test_stream.cpp
@@ -10,6 +10,8 @@
 #include <LightGBM/dataset.h>

 #include <iostream>
+#include <string>
+#include <vector>

 using LightGBM::Dataset;
 using LightGBM::Log;

--- a/tests/cpp_tests/testutils.cpp
+++ b/tests/cpp_tests/testutils.cpp
@@ -3,437 +3,438 @@
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */

+#include <gtest/gtest.h>
 #include <testutils.h>
 #include <LightGBM/c_api.h>
 #include <LightGBM/utils/random.h>

-#include <gtest/gtest.h>
 #include <string>
 #include <thread>
 #include <utility>
+#include <vector>

 using LightGBM::Log;
 using LightGBM::Random;

 namespace LightGBM {

-  /*!
-  * Creates a Dataset from the internal repository examples.
-  */
-  int TestUtils::LoadDatasetFromExamples(const char* filename, const char* config, DatasetHandle* out) {
-    std::string fullPath("examples/");
-    fullPath += filename;
-    Log::Info("Debug sample data path: %s", fullPath.c_str());
-    return LGBM_DatasetCreateFromFile(
-      fullPath.c_str(),
-      config,
-      nullptr,
-      out);
-  }
+/*!
+* Creates a Dataset from the internal repository examples.
+*/
+int TestUtils::LoadDatasetFromExamples(const char* filename, const char* config, DatasetHandle* out) {
+  std::string fullPath("examples/");
+  fullPath += filename;
+  Log::Info("Debug sample data path: %s", fullPath.c_str());
+  return LGBM_DatasetCreateFromFile(
+    fullPath.c_str(),
+    config,
+    nullptr,
+    out);
+}

-  /*!
-  * Creates fake data in the passed vectors.
-  */
-  void TestUtils::CreateRandomDenseData(
-    int32_t nrows,
-    int32_t ncols,
-    int32_t nclasses,
-    std::vector<double>* features,
-    std::vector<float>* labels,
-    std::vector<float>* weights,
-    std::vector<double>* init_scores,
-    std::vector<int32_t>* groups) {
-    Random rand(42);
-    features->reserve(nrows * ncols);
-
-    for (int32_t row = 0; row < nrows; row++) {
-      for (int32_t col = 0; col < ncols; col++) {
-        features->push_back(rand.NextFloat());
-      }
+/*!
+* Creates fake data in the passed vectors.
+*/
+void TestUtils::CreateRandomDenseData(
+  int32_t nrows,
+  int32_t ncols,
+  int32_t nclasses,
+  std::vector<double>* features,
+  std::vector<float>* labels,
+  std::vector<float>* weights,
+  std::vector<double>* init_scores,
+  std::vector<int32_t>* groups) {
+  Random rand(42);
+  features->reserve(nrows * ncols);
+
+  for (int32_t row = 0; row < nrows; row++) {
+    for (int32_t col = 0; col < ncols; col++) {
+      features->push_back(rand.NextFloat());
    }
-
-    CreateRandomMetadata(nrows, nclasses, labels, weights, init_scores, groups);
  }

-  /*!
-  * Creates fake data in the passed vectors.
-  */
-  void TestUtils::CreateRandomSparseData(
-    int32_t nrows,
-    int32_t ncols,
-    int32_t nclasses,
-    float sparse_percent,
-    std::vector<int32_t>* indptr,
-    std::vector<int32_t>* indices,
-    std::vector<double>* values,
-    std::vector<float>* labels,
-    std::vector<float>* weights,
-    std::vector<double>* init_scores,
-    std::vector<int32_t>* groups) {
-    Random rand(42);
-    indptr->reserve(static_cast<int32_t>(nrows + 1));
-    indices->reserve(static_cast<int32_t>(sparse_percent * nrows * ncols));
-    values->reserve(static_cast<int32_t>(sparse_percent * nrows * ncols));
-
-    indptr->push_back(0);
-    for (int32_t row = 0; row < nrows; row++) {
-      for (int32_t col = 0; col < ncols; col++) {
-        float rnd = rand.NextFloat();
-        if (rnd < sparse_percent) {
-          indices->push_back(col);
-          values->push_back(rand.NextFloat());
-        }
+  CreateRandomMetadata(nrows, nclasses, labels, weights, init_scores, groups);
+}
+
+/*!
+* Creates fake data in the passed vectors.
+*/
+void TestUtils::CreateRandomSparseData(
+  int32_t nrows,
+  int32_t ncols,
+  int32_t nclasses,
+  float sparse_percent,
+  std::vector<int32_t>* indptr,
+  std::vector<int32_t>* indices,
+  std::vector<double>* values,
+  std::vector<float>* labels,
+  std::vector<float>* weights,
+  std::vector<double>* init_scores,
+  std::vector<int32_t>* groups) {
+  Random rand(42);
+  indptr->reserve(static_cast<int32_t>(nrows + 1));
+  indices->reserve(static_cast<int32_t>(sparse_percent * nrows * ncols));
+  values->reserve(static_cast<int32_t>(sparse_percent * nrows * ncols));
+
+  indptr->push_back(0);
+  for (int32_t row = 0; row < nrows; row++) {
+    for (int32_t col = 0; col < ncols; col++) {
+      float rnd = rand.NextFloat();
+      if (rnd < sparse_percent) {
+        indices->push_back(col);
+        values->push_back(rand.NextFloat());
      }
-      indptr->push_back(static_cast<int32_t>(indices->size() - 1));
    }
+    indptr->push_back(static_cast<int32_t>(indices->size() - 1));
+  }
+
+  CreateRandomMetadata(nrows, nclasses, labels, weights, init_scores, groups);
+}

-    CreateRandomMetadata(nrows, nclasses, labels, weights, init_scores, groups);
+/*!
+* Creates fake data in the passed vectors.
+*/
+void TestUtils::CreateRandomMetadata(int32_t nrows,
+  int32_t nclasses,
+  std::vector<float>* labels,
+  std::vector<float>* weights,
+  std::vector<double>* init_scores,
+  std::vector<int32_t>* groups) {
+  Random rand(42);
+  labels->reserve(nrows);
+  if (weights) {
+    weights->reserve(nrows);
+  }
+  if (init_scores) {
+    init_scores->reserve(nrows * nclasses);
+  }
+  if (groups) {
+    groups->reserve(nrows);
  }

-  /*!
-  * Creates fake data in the passed vectors.
-  */
-  void TestUtils::CreateRandomMetadata(int32_t nrows,
-    int32_t nclasses,
-    std::vector<float>* labels,
-    std::vector<float>* weights,
-    std::vector<double>* init_scores,
-    std::vector<int32_t>* groups) {
-    Random rand(42);
-    labels->reserve(nrows);
+  int32_t group = 0;
+
+  for (int32_t row = 0; row < nrows; row++) {
+    labels->push_back(rand.NextFloat());
    if (weights) {
-      weights->reserve(nrows);
+      weights->push_back(rand.NextFloat());
    }
    if (init_scores) {
-      init_scores->reserve(nrows * nclasses);
+      for (int32_t i = 0; i < nclasses; i++) {
+        init_scores->push_back(rand.NextFloat());
+      }
    }
    if (groups) {
-      groups->reserve(nrows);
+      if (rand.NextFloat() > 0.95) {
+        group++;
+      }
+      groups->push_back(group);
    }
+  }
+}
+
+void TestUtils::StreamDenseDataset(DatasetHandle dataset_handle,
+  int32_t nrows,
+  int32_t ncols,
+  int32_t nclasses,
+  int32_t batch_count,
+  const std::vector<double>* features,
+  const std::vector<float>* labels,
+  const std::vector<float>* weights,
+  const std::vector<double>* init_scores,
+  const std::vector<int32_t>* groups) {
+  int result = LGBM_DatasetSetWaitForManualFinish(dataset_handle, 1);
+  EXPECT_EQ(0, result) << "LGBM_DatasetSetWaitForManualFinish result code: " << result;
+
+  Log::Info("     Begin StreamDenseDataset");
+  if ((nrows % batch_count) != 0) {
+    Log::Fatal("This utility method only handles nrows that are a multiple of batch_count");
+  }

-    int32_t group = 0;
+  const double* features_ptr = features->data();
+  const float* labels_ptr = labels->data();
+  const float* weights_ptr = nullptr;
+  if (weights) {
+    weights_ptr = weights->data();
+  }

-    for (int32_t row = 0; row < nrows; row++) {
-      labels->push_back(rand.NextFloat());
-      if (weights) {
-        weights->push_back(rand.NextFloat());
-      }
-      if (init_scores) {
-        for (int32_t i = 0; i < nclasses; i++) {
-          init_scores->push_back(rand.NextFloat());
-        }
-      }
-      if (groups) {
-        if (rand.NextFloat() > 0.95) {
-          group++;
-        }
-        groups->push_back(group);
-      }
-    }
+  // Since init_scores are in a column format, but need to be pushed as rows, we have to extract each batch
+  std::vector<double> init_score_batch;
+  const double* init_scores_ptr = nullptr;
+  if (init_scores) {
+    init_score_batch.reserve(nclasses * batch_count);
+    init_scores_ptr = init_score_batch.data();
  }

-  void TestUtils::StreamDenseDataset(DatasetHandle dataset_handle,
-    int32_t nrows,
-    int32_t ncols,
-    int32_t nclasses,
-    int32_t batch_count,
-    const std::vector<double>* features,
-    const std::vector<float>* labels,
-    const std::vector<float>* weights,
-    const std::vector<double>* init_scores,
-    const std::vector<int32_t>* groups) {
-    int result = LGBM_DatasetSetWaitForManualFinish(dataset_handle, 1);
-    EXPECT_EQ(0, result) << "LGBM_DatasetSetWaitForManualFinish result code: " << result;
-
-    Log::Info("     Begin StreamDenseDataset");
-    if ((nrows % batch_count) != 0) {
-      Log::Fatal("This utility method only handles nrows that are a multiple of batch_count");
-    }
+  const int32_t* groups_ptr = nullptr;
+  if (groups) {
+    groups_ptr = groups->data();
+  }

-    const double* features_ptr = features->data();
-    const float* labels_ptr = labels->data();
-    const float* weights_ptr = nullptr;
-    if (weights) {
-      weights_ptr = weights->data();
-    }
+  auto start_time = std::chrono::steady_clock::now();

-    // Since init_scores are in a column format, but need to be pushed as rows, we have to extract each batch
-    std::vector<double> init_score_batch;
-    const double* init_scores_ptr = nullptr;
+  for (int32_t i = 0; i < nrows; i += batch_count) {
    if (init_scores) {
-      init_score_batch.reserve(nclasses * batch_count);
-      init_scores_ptr = init_score_batch.data();
+      init_scores_ptr = CreateInitScoreBatch(&init_score_batch, i, nrows, nclasses, batch_count, init_scores);
    }

-    const int32_t* groups_ptr = nullptr;
-    if (groups) {
-      groups_ptr = groups->data();
+    result = LGBM_DatasetPushRowsWithMetadata(dataset_handle,
+                                              features_ptr,
+                                              1,
+                                              batch_count,
+                                              ncols,
+                                              i,
+                                              labels_ptr,
+                                              weights_ptr,
+                                              init_scores_ptr,
+                                              groups_ptr,
+                                              0);
+    EXPECT_EQ(0, result) << "LGBM_DatasetPushRowsWithMetadata result code: " << result;
+    if (result != 0) {
+      FAIL() << "LGBM_DatasetPushRowsWithMetadata failed";  // This forces an immediate failure, which EXPECT_EQ does not
    }

-    auto start_time = std::chrono::steady_clock::now();
-
-    for (int32_t i = 0; i < nrows; i += batch_count) {
-      if (init_scores) {
-        init_scores_ptr = CreateInitScoreBatch(&init_score_batch, i, nrows, nclasses, batch_count, init_scores);
-      }
-
-      result = LGBM_DatasetPushRowsWithMetadata(dataset_handle,
-                                                features_ptr,
-                                                1,
-                                                batch_count,
-                                                ncols,
-                                                i,
-                                                labels_ptr,
-                                                weights_ptr,
-                                                init_scores_ptr,
-                                                groups_ptr,
-                                                0);
-      EXPECT_EQ(0, result) << "LGBM_DatasetPushRowsWithMetadata result code: " << result;
-      if (result != 0) {
-        FAIL() << "LGBM_DatasetPushRowsWithMetadata failed";  // This forces an immediate failure, which EXPECT_EQ does not
-      }
-
-      features_ptr += batch_count * ncols;
-      labels_ptr += batch_count;
-      if (weights_ptr) {
-        weights_ptr += batch_count;
-      }
-      if (groups_ptr) {
-        groups_ptr += batch_count;
-      }
+    features_ptr += batch_count * ncols;
+    labels_ptr += batch_count;
+    if (weights_ptr) {
+      weights_ptr += batch_count;
+    }
+    if (groups_ptr) {
+      groups_ptr += batch_count;
    }
+  }

-    auto cur_time = std::chrono::steady_clock::now();
-    Log::Info(" Time: %d", cur_time - start_time);
+  auto cur_time = std::chrono::steady_clock::now();
+  Log::Info(" Time: %d", cur_time - start_time);
+}
+
+void TestUtils::StreamSparseDataset(DatasetHandle dataset_handle,
+                                    int32_t nrows,
+                                    int32_t nclasses,
+                                    int32_t batch_count,
+                                    const std::vector<int32_t>* indptr,
+                                    const std::vector<int32_t>* indices,
+                                    const std::vector<double>* values,
+                                    const std::vector<float>* labels,
+                                    const std::vector<float>* weights,
+                                    const std::vector<double>* init_scores,
+                                    const std::vector<int32_t>* groups) {
+  int result = LGBM_DatasetSetWaitForManualFinish(dataset_handle, 1);
+  EXPECT_EQ(0, result) << "LGBM_DatasetSetWaitForManualFinish result code: " << result;
+
+  Log::Info("     Begin StreamSparseDataset");
+  if ((nrows % batch_count) != 0) {
+    Log::Fatal("This utility method only handles nrows that are a multiple of batch_count");
  }

-  void TestUtils::StreamSparseDataset(DatasetHandle dataset_handle,
-                                      int32_t nrows,
-                                      int32_t nclasses,
-                                      int32_t batch_count,
-                                      const std::vector<int32_t>* indptr,
-                                      const std::vector<int32_t>* indices,
-                                      const std::vector<double>* values,
-                                      const std::vector<float>* labels,
-                                      const std::vector<float>* weights,
-                                      const std::vector<double>* init_scores,
-                                      const std::vector<int32_t>* groups) {
-    int result = LGBM_DatasetSetWaitForManualFinish(dataset_handle, 1);
-    EXPECT_EQ(0, result) << "LGBM_DatasetSetWaitForManualFinish result code: " << result;
-
-    Log::Info("     Begin StreamSparseDataset");
-    if ((nrows % batch_count) != 0) {
-      Log::Fatal("This utility method only handles nrows that are a multiple of batch_count");
-    }
+  const int32_t* indptr_ptr = indptr->data();
+  const int32_t* indices_ptr = indices->data();
+  const double* values_ptr = values->data();
+  const float* labels_ptr = labels->data();
+  const float* weights_ptr = nullptr;
+  if (weights) {
+    weights_ptr = weights->data();
+  }

-    const int32_t* indptr_ptr = indptr->data();
-    const int32_t* indices_ptr = indices->data();
-    const double* values_ptr = values->data();
-    const float* labels_ptr = labels->data();
-    const float* weights_ptr = nullptr;
-    if (weights) {
-      weights_ptr = weights->data();
-    }
+  const int32_t* groups_ptr = nullptr;
+  if (groups) {
+    groups_ptr = groups->data();
+  }

-    const int32_t* groups_ptr = nullptr;
-    if (groups) {
-      groups_ptr = groups->data();
-    }
+  auto start_time = std::chrono::steady_clock::now();

-    auto start_time = std::chrono::steady_clock::now();
+  // Use multiple threads to test concurrency
+  int thread_count = 2;
+  if (nrows == batch_count) {
+    thread_count = 1;  // If pushing all rows in 1 batch, we cannot have multiple threads
+  }
+  std::vector<std::thread> threads;
+  threads.reserve(thread_count);
+  for (int32_t t = 0; t < thread_count; ++t) {
+    std::thread th(TestUtils::PushSparseBatch,
+                    dataset_handle,
+                    nrows,
+                    nclasses,
+                    batch_count,
+                    indptr,
+                    indptr_ptr,
+                    indices_ptr,
+                    values_ptr,
+                    labels_ptr,
+                    weights_ptr,
+                    init_scores,
+                    groups_ptr,
+                    thread_count,
+                    t);
+    threads.push_back(std::move(th));
+  }

-    // Use multiple threads to test concurrency
-    int thread_count = 2;
-    if (nrows == batch_count) {
-      thread_count = 1;  // If pushing all rows in 1 batch, we cannot have multiple threads
-    }
-    std::vector<std::thread> threads;
-    threads.reserve(thread_count);
-    for (int32_t t = 0; t < thread_count; ++t) {
-      std::thread th(TestUtils::PushSparseBatch,
-                     dataset_handle,
-                     nrows,
-                     nclasses,
-                     batch_count,
-                     indptr,
-                     indptr_ptr,
-                     indices_ptr,
-                     values_ptr,
-                     labels_ptr,
-                     weights_ptr,
-                     init_scores,
-                     groups_ptr,
-                     thread_count,
-                     t);
-      threads.push_back(std::move(th));
-    }
+  for (auto& t : threads) t.join();

-    for (auto& t : threads) t.join();
+  auto cur_time = std::chrono::steady_clock::now();
+  Log::Info(" Time: %d", cur_time - start_time);
+}

-    auto cur_time = std::chrono::steady_clock::now();
-    Log::Info(" Time: %d", cur_time - start_time);
+/*!
+  * Pushes data from 1 thread into a Dataset based on thread_id and nrows.
+  * e.g. with 100 rows, thread 0 will push rows 0-49, and thread 2 will push rows 50-99.
+  * Note that rows are still pushed in microbatches within their range.
+  */
+void TestUtils::PushSparseBatch(DatasetHandle dataset_handle,
+                                int32_t nrows,
+                                int32_t nclasses,
+                                int32_t batch_count,
+                                const std::vector<int32_t>* indptr,
+                                const int32_t* indptr_ptr,
+                                const int32_t* indices_ptr,
+                                const double* values_ptr,
+                                const float* labels_ptr,
+                                const float* weights_ptr,
+                                const std::vector<double>* init_scores,
+                                const int32_t* groups_ptr,
+                                int32_t thread_count,
+                                int32_t thread_id) {
+  int32_t threadChunkSize = nrows / thread_count;
+  int32_t startIndex = threadChunkSize * thread_id;
+  int32_t stopIndex = startIndex + threadChunkSize;
+
+  indptr_ptr += threadChunkSize * thread_id;
+  labels_ptr += threadChunkSize * thread_id;
+  if (weights_ptr) {
+    weights_ptr += threadChunkSize * thread_id;
+  }
+  if (groups_ptr) {
+    groups_ptr += threadChunkSize * thread_id;
  }

-  /*!
-   * Pushes data from 1 thread into a Dataset based on thread_id and nrows.
-   * e.g. with 100 rows, thread 0 will push rows 0-49, and thread 2 will push rows 50-99.
-   * Note that rows are still pushed in microbatches within their range.
-   */
-  void TestUtils::PushSparseBatch(DatasetHandle dataset_handle,
-                                  int32_t nrows,
-                                  int32_t nclasses,
-                                  int32_t batch_count,
-                                  const std::vector<int32_t>* indptr,
-                                  const int32_t* indptr_ptr,
-                                  const int32_t* indices_ptr,
-                                  const double* values_ptr,
-                                  const float* labels_ptr,
-                                  const float* weights_ptr,
-                                  const std::vector<double>* init_scores,
-                                  const int32_t* groups_ptr,
-                                  int32_t thread_count,
-                                  int32_t thread_id) {
-    int32_t threadChunkSize = nrows / thread_count;
-    int32_t startIndex = threadChunkSize * thread_id;
-    int32_t stopIndex = startIndex + threadChunkSize;
-
-    indptr_ptr += threadChunkSize * thread_id;
-    labels_ptr += threadChunkSize * thread_id;
+  for (int32_t i = startIndex; i < stopIndex; i += batch_count) {
+    // Since init_scores are in a column format, but need to be pushed as rows, we have to extract each batch
+    std::vector<double> init_score_batch;
+    const double* init_scores_ptr = nullptr;
+    if (init_scores) {
+      init_score_batch.reserve(nclasses * batch_count);
+      init_scores_ptr = CreateInitScoreBatch(&init_score_batch, i, nrows, nclasses, batch_count, init_scores);
+    }
+
+    int32_t nelem = indptr->at(i + batch_count - 1) - indptr->at(i);
+
+    int result = LGBM_DatasetPushRowsByCSRWithMetadata(dataset_handle,
+                                                        indptr_ptr,
+                                                        2,
+                                                        indices_ptr,
+                                                        values_ptr,
+                                                        1,
+                                                        batch_count + 1,
+                                                        nelem,
+                                                        i,
+                                                        labels_ptr,
+                                                        weights_ptr,
+                                                        init_scores_ptr,
+                                                        groups_ptr,
+                                                        thread_id);
+    EXPECT_EQ(0, result) << "LGBM_DatasetPushRowsByCSRWithMetadata result code: " << result;
+    if (result != 0) {
+      FAIL() << "LGBM_DatasetPushRowsByCSRWithMetadata failed";  // This forces an immediate failure, which EXPECT_EQ does not
+    }
+
+    indptr_ptr += batch_count;
+    labels_ptr += batch_count;
    if (weights_ptr) {
-      weights_ptr += threadChunkSize * thread_id;
+      weights_ptr += batch_count;
    }
    if (groups_ptr) {
-      groups_ptr += threadChunkSize * thread_id;
+      groups_ptr += batch_count;
    }
-
-    for (int32_t i = startIndex; i < stopIndex; i += batch_count) {
-      // Since init_scores are in a column format, but need to be pushed as rows, we have to extract each batch
-      std::vector<double> init_score_batch;
-      const double* init_scores_ptr = nullptr;
-      if (init_scores) {
-        init_score_batch.reserve(nclasses * batch_count);
-        init_scores_ptr = CreateInitScoreBatch(&init_score_batch, i, nrows, nclasses, batch_count, init_scores);
-      }
-
-      int32_t nelem = indptr->at(i + batch_count - 1) - indptr->at(i);
-
-      int result = LGBM_DatasetPushRowsByCSRWithMetadata(dataset_handle,
-                                                         indptr_ptr,
-                                                         2,
-                                                         indices_ptr,
-                                                         values_ptr,
-                                                         1,
-                                                         batch_count + 1,
-                                                         nelem,
-                                                         i,
-                                                         labels_ptr,
-                                                         weights_ptr,
-                                                         init_scores_ptr,
-                                                         groups_ptr,
-                                                         thread_id);
-      EXPECT_EQ(0, result) << "LGBM_DatasetPushRowsByCSRWithMetadata result code: " << result;
-      if (result != 0) {
-        FAIL() << "LGBM_DatasetPushRowsByCSRWithMetadata failed";  // This forces an immediate failure, which EXPECT_EQ does not
-      }
-
-      indptr_ptr += batch_count;
-      labels_ptr += batch_count;
-      if (weights_ptr) {
-        weights_ptr += batch_count;
-      }
-      if (groups_ptr) {
-        groups_ptr += batch_count;
-      }
+  }
+}
+
+
+void TestUtils::AssertMetadata(const Metadata* metadata,
+  const std::vector<float>* ref_labels,
+  const std::vector<float>* ref_weights,
+  const std::vector<double>* ref_init_scores,
+  const std::vector<int32_t>* ref_groups) {
+  const float* labels = metadata->label();
+  auto nTotal = static_cast<int32_t>(ref_labels->size());
+  for (auto i = 0; i < nTotal; i++) {
+    EXPECT_EQ(ref_labels->at(i), labels[i]) << "Inserted data: " << ref_labels->at(i) << " at " << i;
+    if (ref_labels->at(i) != labels[i]) {
+      FAIL() << "Mismatched labels";  // This forces an immediate failure, which EXPECT_EQ does not
    }
  }

-
-  void TestUtils::AssertMetadata(const Metadata* metadata,
-    const std::vector<float>* ref_labels,
-    const std::vector<float>* ref_weights,
-    const std::vector<double>* ref_init_scores,
-    const std::vector<int32_t>* ref_groups) {
-    const float* labels = metadata->label();
-    auto nTotal = static_cast<int32_t>(ref_labels->size());
+  const float* weights = metadata->weights();
+  if (weights) {
+    if (!ref_weights) {
+      FAIL() << "Expected null weights";
+    }
    for (auto i = 0; i < nTotal; i++) {
-      EXPECT_EQ(ref_labels->at(i), labels[i]) << "Inserted data: " << ref_labels->at(i) << " at " << i;
-      if (ref_labels->at(i) != labels[i]) {
-        FAIL() << "Mismatched labels";  // This forces an immediate failure, which EXPECT_EQ does not
+      EXPECT_EQ(ref_weights->at(i), weights[i]) << "Inserted data: " << ref_weights->at(i);
+      if (ref_weights->at(i) != weights[i]) {
+        FAIL() << "Mismatched weights";  // This forces an immediate failure, which EXPECT_EQ does not
      }
    }
+  } else if (ref_weights) {
+    FAIL() << "Expected non-null weights";
+  }

-    const float* weights = metadata->weights();
-    if (weights) {
-      if (!ref_weights) {
-        FAIL() << "Expected null weights";
-      }
-      for (auto i = 0; i < nTotal; i++) {
-        EXPECT_EQ(ref_weights->at(i), weights[i]) << "Inserted data: " << ref_weights->at(i);
-        if (ref_weights->at(i) != weights[i]) {
-          FAIL() << "Mismatched weights";  // This forces an immediate failure, which EXPECT_EQ does not
-        }
-      }
-    } else if (ref_weights) {
-      FAIL() << "Expected non-null weights";
+  const double* init_scores = metadata->init_score();
+  if (init_scores) {
+    if (!ref_init_scores) {
+      FAIL() << "Expected null init_scores";
    }
-
-    const double* init_scores = metadata->init_score();
-    if (init_scores) {
-      if (!ref_init_scores) {
-        FAIL() << "Expected null init_scores";
-      }
-      for (size_t i = 0; i < ref_init_scores->size(); i++) {
-        EXPECT_EQ(ref_init_scores->at(i), init_scores[i]) << "Inserted data: " << ref_init_scores->at(i) << " Index: " << i;
-        if (ref_init_scores->at(i) != init_scores[i]) {
-          FAIL() << "Mismatched init_scores";  // This forces an immediate failure, which EXPECT_EQ does not
-        }
+    for (size_t i = 0; i < ref_init_scores->size(); i++) {
+      EXPECT_EQ(ref_init_scores->at(i), init_scores[i]) << "Inserted data: " << ref_init_scores->at(i) << " Index: " << i;
+      if (ref_init_scores->at(i) != init_scores[i]) {
+        FAIL() << "Mismatched init_scores";  // This forces an immediate failure, which EXPECT_EQ does not
      }
-    } else if (ref_init_scores) {
-      FAIL() << "Expected non-null init_scores";
    }
+  } else if (ref_init_scores) {
+    FAIL() << "Expected non-null init_scores";
+  }

-    const int32_t* query_boundaries = metadata->query_boundaries();
-    if (query_boundaries) {
-      if (!ref_groups) {
-        FAIL() << "Expected null query_boundaries";
-      }
-      // Calculate expected boundaries
-      std::vector<int32_t> ref_query_boundaries;
-      ref_query_boundaries.push_back(0);
-      int group_val = ref_groups->at(0);
-      for (auto i = 1; i < nTotal; i++) {
-        if (ref_groups->at(i) != group_val) {
-          ref_query_boundaries.push_back(i);
-          group_val = ref_groups->at(i);
-        }
+  const int32_t* query_boundaries = metadata->query_boundaries();
+  if (query_boundaries) {
+    if (!ref_groups) {
+      FAIL() << "Expected null query_boundaries";
+    }
+    // Calculate expected boundaries
+    std::vector<int32_t> ref_query_boundaries;
+    ref_query_boundaries.push_back(0);
+    int group_val = ref_groups->at(0);
+    for (auto i = 1; i < nTotal; i++) {
+      if (ref_groups->at(i) != group_val) {
+        ref_query_boundaries.push_back(i);
+        group_val = ref_groups->at(i);
      }
-      ref_query_boundaries.push_back(nTotal);
+    }
+    ref_query_boundaries.push_back(nTotal);

-      for (size_t i = 0; i < ref_query_boundaries.size(); i++) {
-        EXPECT_EQ(ref_query_boundaries[i], query_boundaries[i]) << "Inserted data: " << ref_query_boundaries[i];
-        if (ref_query_boundaries[i] != query_boundaries[i]) {
-          FAIL() << "Mismatched query_boundaries";  // This forces an immediate failure, which EXPECT_EQ does not
-        }
+    for (size_t i = 0; i < ref_query_boundaries.size(); i++) {
+      EXPECT_EQ(ref_query_boundaries[i], query_boundaries[i]) << "Inserted data: " << ref_query_boundaries[i];
+      if (ref_query_boundaries[i] != query_boundaries[i]) {
+        FAIL() << "Mismatched query_boundaries";  // This forces an immediate failure, which EXPECT_EQ does not
      }
-    } else if (ref_groups) {
-      FAIL() << "Expected non-null query_boundaries";
    }
+  } else if (ref_groups) {
+    FAIL() << "Expected non-null query_boundaries";
  }
-
-  const double* TestUtils::CreateInitScoreBatch(std::vector<double>* init_score_batch,
-    int32_t index,
-    int32_t nrows,
-    int32_t nclasses,
-    int32_t batch_count,
-    const std::vector<double>* original_init_scores) {
-    // Extract a set of rows from the column-based format (still maintaining column based format)
-    init_score_batch->clear();
-    for (int32_t c = 0; c < nclasses; c++) {
-      for (int32_t row = index; row < index + batch_count; row++) {
-        init_score_batch->push_back(original_init_scores->at(row + nrows * c));
-      }
+}
+
+const double* TestUtils::CreateInitScoreBatch(std::vector<double>* init_score_batch,
+  int32_t index,
+  int32_t nrows,
+  int32_t nclasses,
+  int32_t batch_count,
+  const std::vector<double>* original_init_scores) {
+  // Extract a set of rows from the column-based format (still maintaining column based format)
+  init_score_batch->clear();
+  for (int32_t c = 0; c < nclasses; c++) {
+    for (int32_t row = index; row < index + batch_count; row++) {
+      init_score_batch->push_back(original_init_scores->at(row + nrows * c));
    }
-    return init_score_batch->data();
  }
+  return init_score_batch->data();
+}

 }  // namespace LightGBM
--- a/tests/cpp_tests/testutils.h
+++ b/tests/cpp_tests/testutils.h
@@ -2,8 +2,8 @@
 * Copyright (c) 2022 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
-#ifndef LIGHTGBM_TESTUTILS_H_
-#define LIGHTGBM_TESTUTILS_H_
+#ifndef LIGHTGBM_TESTS_CPP_TESTS_TESTUTILS_H_
+#define LIGHTGBM_TESTS_CPP_TESTS_TESTUTILS_H_

 #include <LightGBM/c_api.h>
 #include <LightGBM/dataset.h>
@@ -121,4 +121,4 @@ class TestUtils {
    int32_t thread_id);
 };
 }  // namespace LightGBM
-#endif  // LIGHTGBM_TESTUTILS_H_
+#endif  // LIGHTGBM_TESTS_CPP_TESTS_TESTUTILS_H_
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -16,7 +16,14 @@ import psutil
 import pytest
 from scipy.sparse import csr_matrix, isspmatrix_csc, isspmatrix_csr
 from sklearn.datasets import load_svmlight_file, make_blobs, make_classification, make_multilabel_classification
-from sklearn.metrics import average_precision_score, log_loss, mean_absolute_error, mean_squared_error, roc_auc_score
+from sklearn.metrics import (
+    average_precision_score,
+    log_loss,
+    mean_absolute_error,
+    mean_squared_error,
+    r2_score,
+    roc_auc_score,
+)
 from sklearn.model_selection import GroupKFold, TimeSeriesSplit, train_test_split

 import lightgbm as lgb
@@ -4049,6 +4056,29 @@ def test_average_precision_metric():
    assert res["training"]["average_precision"][-1] == pytest.approx(1)


+def test_r2_metric():
+    # test against sklearn R2 metric
+    X, y = make_synthetic_regression()
+    params = {"objective": "regression", "metric": "r2", "verbose": -1}
+    res = {}
+    train_data = lgb.Dataset(X, label=y)
+    est = lgb.train(
+        params, train_data, num_boost_round=1, valid_sets=[train_data], callbacks=[lgb.record_evaluation(res)]
+    )
+    r2 = res["training"]["r2"][-1]
+    pred = est.predict(X)
+    sklearn_r2 = r2_score(y, pred)
+    assert r2 == pytest.approx(sklearn_r2)
+    assert r2 != 0
+    assert r2 != 1
+    # test that R2 is 1 when y has no variance and the model predicts perfectly
+    y = y.copy()
+    y[:] = 1
+    lgb_X = lgb.Dataset(X, label=y)
+    lgb.train(params, lgb_X, num_boost_round=1, valid_sets=[lgb_X], callbacks=[lgb.record_evaluation(res)])
+    assert res["training"]["r2"][-1] == pytest.approx(1)
+
+
 def test_reset_params_works_with_metric_num_class_and_boosting():
    X, y = load_breast_cancer(return_X_y=True)
    dataset_params = {"max_bin": 150}