Merge branch 'master' into rocm3

548cec82 · Jeff Daily · 2f7bd8ef · 5dbfcdc4 · 548cec82 · 548cec82
Commit 548cec82 authored Oct 21, 2025 by Jeff Daily
20 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,13 +36,7 @@ endif()
 project(lightgbm LANGUAGES C CXX)
-if(USE_CUDA OR USE_ROCM)
+set(CMAKE_CXX_STANDARD 17)
-  set(CMAKE_CXX_STANDARD 17)
-elseif(BUILD_CPP_TEST)
-  set(CMAKE_CXX_STANDARD 14)
-else()
-  set(CMAKE_CXX_STANDARD 11)
-endif()
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/modules")
@@ -72,22 +66,19 @@ endif()
 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.8.2")
-    message(FATAL_ERROR "Insufficient gcc version")
+    message(FATAL_ERROR "Insufficient gcc version (${CMAKE_CXX_COMPILER_VERSION})")
  endif()
 elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "3.8")
-    message(FATAL_ERROR "Insufficient Clang version")
+    message(FATAL_ERROR "Insufficient Clang version (${CMAKE_CXX_COMPILER_VERSION})")
  endif()
 elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8.1.0")
-    message(FATAL_ERROR "Insufficient AppleClang version")
+    message(FATAL_ERROR "Insufficient AppleClang version (${CMAKE_CXX_COMPILER_VERSION})")
  endif()
 elseif(MSVC)
  if(MSVC_VERSION LESS 1900)
-    message(
+    message(FATAL_ERROR "Insufficient MSVC version (${MSVC_VERSION})")
-      FATAL_ERROR
-      "The compiler ${CMAKE_CXX_COMPILER} doesn't support required C++11 features. Please use a newer MSVC."
-    )
  endif()
 endif()
@@ -674,12 +665,16 @@ if(BUILD_CPP_TEST)
  if(NOT GTEST_FOUND)
    message(STATUS "Did not find Google Test in the system root. Fetching Google Test now...")
    include(FetchContent)
+# lint_cmake: -readability/wonkycase
    FetchContent_Declare(
+# lint_cmake: +readability/wonkycase
      googletest
      GIT_REPOSITORY https://github.com/google/googletest.git
      GIT_TAG        v1.14.0
    )
+# lint_cmake: -readability/wonkycase
    FetchContent_MakeAvailable(googletest)
+# lint_cmake: +readability/wonkycase
    add_library(GTest::GTest ALIAS gtest)
  endif()

--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -27,7 +27,7 @@ Authors@R: c(
    person("Michael", "Mayer", role = c("ctb"))
    )
 Description: Tree based algorithms can be improved by introducing boosting frameworks.
-    'LightGBM' is one such framework, based on Ke, Guolin et al. (2017) <https://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision>.
+    'LightGBM' is one such framework, based on Ke, Guolin et al. (2017) <https://proceedings.neurips.cc/paper/2017/hash/6449f44a102fde848669bdd9eb6b76fa-Abstract.html>.
    This package offers an R interface to work with it.
    It is designed to be distributed and efficient with the following advantages:
        1. Faster training speed and higher efficiency.

--- a/R-package/R/lightgbm.R
+++ b/R-package/R/lightgbm.R
@@ -281,7 +281,7 @@ lightgbm <- function(data,
 #' https://archive.ics.uci.edu/ml/datasets/Mushroom
 #'
 #' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
-#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
+#' [https://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
 #' School of Information and Computer Science.
 #'
 #' @docType data
@@ -305,7 +305,7 @@ NULL
 #' https://archive.ics.uci.edu/ml/datasets/Mushroom
 #'
 #' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
-#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
+#' [https://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
 #' School of Information and Computer Science.
 #'
 #' @docType data
@@ -324,7 +324,7 @@ NULL
 #'              randomly selected from 3 (older version of this dataset with less inputs).
 #'
 #' @references
-#' http://archive.ics.uci.edu/ml/datasets/Bank+Marketing
+#' https://archive.ics.uci.edu/ml/datasets/Bank+Marketing
 #'
 #' S. Moro, P. Cortez and P. Rita. (2014)
 #' A Data-Driven Approach to Predict the Success of Bank Telemarketing. Decision Support Systems

--- a/R-package/R/metrics.R
+++ b/R-package/R/metrics.R
@@ -24,6 +24,7 @@
            , "map" = TRUE
            , "auc" = TRUE
            , "average_precision" = TRUE
+            , "r2" = TRUE
            , "binary_logloss" = FALSE
            , "binary_error" = FALSE
            , "auc_mu" = TRUE

--- a/R-package/README.md
+++ b/R-package/README.md
@@ -20,7 +20,6 @@
    - [Code Coverage](#code-coverage)
 * [Updating Documentation](#updating-documentation)
 * [Preparing a CRAN Package](#preparing-a-cran-package)
-* [External Repositories](#external-unofficial-repositories)
 * [Known Issues](#known-issues)
 Installation
@@ -68,16 +67,23 @@ The steps above should work on most systems, but users with highly-customized en
 To change the compiler used when installing the CRAN package, you can create a file `~/.R/Makevars` which overrides `CC` (`C` compiler) and `CXX` (`C++` compiler).
-For example, to use `gcc` instead of `clang` on Mac, you could use something like the following:
+For example, to use `gcc-14` instead of `clang` on macOS, you could use something like the following:
 ```make
 # ~/.R/Makevars
-CC=gcc-8
+CC=gcc-14
-CXX=g++-8
+CC17=gcc-14
-CXX11=g++-8
+CXX=g++-14
+CXX17=g++-14
 ```
-### Installing from Source with CMake <a name="install"></a>
+To check the values R is using, run the following:
+```shell
+R CMD config --all
+```
+### Installing from Source with CMake <a id="install"></a>
 You need to install git and [CMake](https://cmake.org/) first.
@@ -215,7 +221,7 @@ These packages do not require compilation, so they will be faster and easier to
 CRAN does not prepare precompiled binaries for Linux, and as of this writing neither does this project.
-### Installing from a Pre-compiled lib_lightgbm <a name="lib_lightgbm"></a>
+### Installing from a Pre-compiled lib_lightgbm <a id="lib_lightgbm"></a>
 Previous versions of LightGBM offered the ability to first compile the C++ library (`lib_lightgbm.{dll,dylib,so}`) and then build an R-package that wraps it.
@@ -372,9 +378,12 @@ At build time, `configure` will be run and used to create a file `Makevars`, usi
 3. Edit `src/Makevars.in`.
-Alternatively, GitHub Actions can re-generate this file for you. On a pull request (only on internal one, does not work for ones from forks), create a comment with this phrase:
+Alternatively, GitHub Actions can re-generate this file for you.
-> /gha run r-configure
+1. navigate to https://github.com/microsoft/LightGBM/actions/workflows/r_configure.yml
+2. click "Run workflow" (drop-down)
+3. enter the branch from the pull request for the `pr-branch` input
+4. click "Run workflow" (button)
 **Configuring for Windows**
@@ -477,9 +486,13 @@ RDvalgrind \
 | cat
 ```
-These tests can also be triggered on any pull request by leaving a comment in a pull request:
+These tests can also be triggered on a pull request branch, using GitHub Actions.
-> /gha run r-valgrind
+1. navigate to https://github.com/microsoft/LightGBM/actions/workflows/r_valgrind.yml
+2. click "Run workflow" (drop-down)
+3. enter the branch from the pull request for the `pr-branch` input
+4. enter the pull request ID for the `pr-number` input
+5. click "Run workflow" (button)
 Known Issues
 ------------

--- a/R-package/cran-comments.md
+++ b/R-package/cran-comments.md
@@ -702,11 +702,11 @@ Responded to CRAN with the following:
 The paper citation has been adjusted as requested. We were using 'glmnet' as a  guide on how to include the URL but maybe they are no longer in compliance with CRAN policies: https://github.com/cran/glmnet/blob/b1a4b50de01e0cd24343959d7cf86452bac17b26/DESCRIPTION
-All authors from the original LightGBM paper have been added to Authors@R as `"aut"`. We have also added Microsoft and DropBox, Inc. as `"cph"` (copyright holders). These roles were chosen based on the guidance in https://journal.r-project.org/archive/2012-1/RJournal_2012-1_Hornik~et~al.pdf.
+All authors from the original LightGBM paper have been added to Authors@R as `"aut"`. We have also added Microsoft and DropBox, Inc. as `"cph"` (copyright holders). These roles were chosen based on the guidance in https://journal.r-project.org/archive/2012/RJ-2012-009/index.html.
 lightgbm's code does use `<<-`, but it does not modify the global environment. The uses of `<<-` in R/lgb.interprete.R and R/callback.R are in functions which are called in an environment created by the lightgbm functions that call them, and this operator is used to reach one level up into the calling function's environment.
-We chose to wrap our examples in `\donttest{}` because we found, through testing on https://builder.r-hub.io/ and in our own continuous integration environments, that their run time varies a lot between platforms, and we cannot guarantee that all examples will run in under 5 seconds. We intentionally chose `\donttest{}` over `\donttest{}` because this item in the R 4.0.0 changelog (https://cran.r-project.org/doc/manuals/r-devel/NEWS.html) seems to indicate that \donttest will be ignored by CRAN's automated checks:
+We chose to wrap our examples in `\donttest{}` because we found, through testing on https://r-hub.github.io/rhub/ and in our own continuous integration environments, that their run time varies a lot between platforms, and we cannot guarantee that all examples will run in under 5 seconds. We intentionally chose `\donttest{}` over `\donttest{}` because this item in the R 4.0.0 changelog (https://cran.r-project.org/doc/manuals/r-devel/NEWS.html) seems to indicate that \donttest will be ignored by CRAN's automated checks:
 > "`R CMD check --as-cran` now runs \donttest examples (which are run by example()) instead of instructing the tester to do so. This can be temporarily circumvented during development by setting environment variable `_R_CHECK_DONTTEST_EXAMPLES_` to a false value."
@@ -813,7 +813,7 @@ YEAR: 2016
 COPYRIGHT HOLDER: Microsoft Corporation
 ```
-Added a citation and link for [the main paper](https://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision) in `DESCRIPTION`.
+Added a citation and link for [the main paper](https://proceedings.neurips.cc/paper/2017/hash/6449f44a102fde848669bdd9eb6b76fa-Abstract.html) in `DESCRIPTION`.
 ## v3.0.0-1 - Submission 3 - (August 12, 2020)

--- a/R-package/man/agaricus.test.Rd
+++ b/R-package/man/agaricus.test.Rd
@@ -25,7 +25,7 @@ This data set is originally from the Mushroom data set,
 https://archive.ics.uci.edu/ml/datasets/Mushroom
 Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
-[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
+[https://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
 School of Information and Computer Science.
 }
 \keyword{datasets}
--- a/R-package/man/agaricus.train.Rd
+++ b/R-package/man/agaricus.train.Rd
@@ -25,7 +25,7 @@ This data set is originally from the Mushroom data set,
 https://archive.ics.uci.edu/ml/datasets/Mushroom
 Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
-[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
+[https://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
 School of Information and Computer Science.
 }
 \keyword{datasets}
--- a/R-package/man/bank.Rd
+++ b/R-package/man/bank.Rd
@@ -18,7 +18,7 @@ This data set is originally from the Bank Marketing data set,
             randomly selected from 3 (older version of this dataset with less inputs).
 }
 \references{
-http://archive.ics.uci.edu/ml/datasets/Bank+Marketing
+https://archive.ics.uci.edu/ml/datasets/Bank+Marketing
 S. Moro, P. Cortez and P. Rita. (2014)
 A Data-Driven Approach to Predict the Success of Bank Telemarketing. Decision Support Systems

--- a/R-package/src/lightgbm_R.h
+++ b/R-package/src/lightgbm_R.h
@@ -2,8 +2,8 @@
 * Copyright (c) 2017 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */
-#ifndef LIGHTGBM_R_H_
+#ifndef LIGHTGBM_R_PACKAGE_SRC_LIGHTGBM_R_H_
-#define LIGHTGBM_R_H_
+#define LIGHTGBM_R_PACKAGE_SRC_LIGHTGBM_R_H_
 #include <LightGBM/c_api.h>
@@ -903,4 +903,4 @@ LIGHTGBM_C_EXPORT SEXP LGBM_SetMaxThreads_R(
  SEXP num_threads
 );
-#endif  // LIGHTGBM_R_H_
+#endif  // LIGHTGBM_R_PACKAGE_SRC_LIGHTGBM_R_H_
--- a/README.md
+++ b/README.md
@@ -3,14 +3,16 @@
 Light Gradient Boosting Machine
 ===============================
+[![C++ GitHub Actions Build Status](https://github.com/microsoft/LightGBM/actions/workflows/cpp.yml/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions/workflows/cpp.yml)
 [![Python-package GitHub Actions Build Status](https://github.com/microsoft/LightGBM/actions/workflows/python_package.yml/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions/workflows/python_package.yml)
 [![R-package GitHub Actions Build Status](https://github.com/microsoft/LightGBM/actions/workflows/r_package.yml/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions/workflows/r_package.yml)
 [![CUDA Version GitHub Actions Build Status](https://github.com/microsoft/LightGBM/actions/workflows/cuda.yml/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions/workflows/cuda.yml)
+[![SWIG Wrapper GitHub Actions Build Status](https://github.com/microsoft/LightGBM/actions/workflows/swig.yml/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions/workflows/swig.yml)
 [![Static Analysis GitHub Actions Build Status](https://github.com/microsoft/LightGBM/actions/workflows/static_analysis.yml/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions/workflows/static_analysis.yml)
 [![Azure Pipelines Build Status](https://lightgbm-ci.visualstudio.com/lightgbm-ci/_apis/build/status/Microsoft.LightGBM?branchName=master)](https://lightgbm-ci.visualstudio.com/lightgbm-ci/_build/latest?definitionId=1)
 [![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/1ys5ot401m0fep6l/branch/master?svg=true)](https://ci.appveyor.com/project/guolinke/lightgbm/branch/master)
 [![Documentation Status](https://readthedocs.org/projects/lightgbm/badge/?version=latest)](https://lightgbm.readthedocs.io/)
-[![Link checks](https://github.com/microsoft/LightGBM/actions/workflows/linkchecker.yml/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions/workflows/linkchecker.yml)
+[![Link checks](https://github.com/microsoft/LightGBM/actions/workflows/lychee.yml/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions/workflows/lychee.yml)
 [![License](https://img.shields.io/github/license/microsoft/lightgbm.svg)](https://github.com/microsoft/LightGBM/blob/master/LICENSE)
 [![EffVer Versioning](https://img.shields.io/badge/version_scheme-EffVer-0097a7)](https://jacobtomlinson.dev/effver)
 [![StackOverflow questions](https://img.shields.io/stackexchange/stackoverflow/t/lightgbm?logo=stackoverflow&logoColor=white&label=StackOverflow%20questions)](https://stackoverflow.com/questions/tagged/lightgbm?sort=votes)
@@ -168,9 +170,9 @@ This project has adopted the [Microsoft Open Source Code of Conduct](https://ope
 Reference Papers
 ----------------
-Yu Shi, Guolin Ke, Zhuoming Chen, Shuxin Zheng, Tie-Yan Liu. "Quantized Training of Gradient Boosting Decision Trees" ([link](https://papers.nips.cc/paper_files/paper/2022/hash/77911ed9e6e864ca1a3d165b2c3cb258-Abstract.html)). Advances in Neural Information Processing Systems 35 (NeurIPS 2022), pp. 18822-18833.
+Yu Shi, Guolin Ke, Zhuoming Chen, Shuxin Zheng, Tie-Yan Liu. "Quantized Training of Gradient Boosting Decision Trees" ([link](https://proceedings.neurips.cc/paper/2022/hash/77911ed9e6e864ca1a3d165b2c3cb258-Abstract.html)). Advances in Neural Information Processing Systems 35 (NeurIPS 2022), pp. 18822-18833.
-Guolin Ke, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, Tie-Yan Liu. "[LightGBM: A Highly Efficient Gradient Boosting Decision Tree](https://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision-tree)". Advances in Neural Information Processing Systems 30 (NIPS 2017), pp. 3149-3157.
+Guolin Ke, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, Tie-Yan Liu. "[LightGBM: A Highly Efficient Gradient Boosting Decision Tree](https://proceedings.neurips.cc/paper/2017/hash/6449f44a102fde848669bdd9eb6b76fa-Abstract.html)". Advances in Neural Information Processing Systems 30 (NIPS 2017), pp. 3149-3157.
 Qi Meng, Guolin Ke, Taifeng Wang, Wei Chen, Qiwei Ye, Zhi-Ming Ma, Tie-Yan Liu. "[A Communication-Efficient Parallel Algorithm for Decision Tree](https://proceedings.neurips.cc/paper/2016/hash/10a5ab2db37feedfdeaab192ead4ac0e-Abstract.html)". Advances in Neural Information Processing Systems 29 (NIPS 2016), pp. 1279-1287.

--- a/biome.json
+++ b/biome.json
 {
-    "files": {
+    "root": false,
-        "ignore": [".mypy_cache/", ".pixi/"]
+    "vcs": {
+        "enabled": true,
+        "clientKind": "git",
+        "useIgnoreFile": true
    },
    "formatter": {
        "enabled": true,
-        "useEditorconfig": true
+        "useEditorconfig": true,
+        "lineWidth": 120
    },
-    "organizeImports": {
+    "assist": {
-        "enabled": true
+        "enabled": true,
+        "actions": {
+            "recommended": true
+        }
    },
    "linter": {
        "enabled": true,
+        "domains": {
+            "project": "all"
+        },
        "rules": {
-            "all": true
+            "recommended": true
        }
-    },
-    "javascript": {
-        "globals": ["$"]
    }
 }
--- a/cmake/IntegratedOpenCL.cmake
+++ b/cmake/IntegratedOpenCL.cmake
@@ -13,16 +13,22 @@ set(BOOST_TAG "boost-${BOOST_VERSION_DOT}.0")
 # Build Independent OpenCL library
 include(FetchContent)
+# lint_cmake: -readability/wonkycase
 FetchContent_Declare(OpenCL-Headers GIT_REPOSITORY ${OPENCL_HEADER_REPOSITORY} GIT_TAG ${OPENCL_HEADER_TAG})
 FetchContent_GetProperties(OpenCL-Headers)
+# lint_cmake: +readability/wonkycase
 if(NOT OpenCL-Headers_POPULATED)
+# lint_cmake: -readability/wonkycase
  FetchContent_MakeAvailable(OpenCL-Headers)
+# lint_cmake: +readability/wonkycase
  message(STATUS "Populated OpenCL Headers")
 endif()
 set(OPENCL_ICD_LOADER_HEADERS_DIR ${opencl-headers_SOURCE_DIR} CACHE PATH "") # for OpenCL ICD Loader
 set(OpenCL_INCLUDE_DIR ${opencl-headers_SOURCE_DIR} CACHE PATH "") # for Boost::Compute
+# lint_cmake: -readability/wonkycase
 FetchContent_Declare(
+# lint_cmake: +readability/wonkycase
  OpenCL-ICD-Loader
  GIT_REPOSITORY
  ${OPENCL_LOADER_REPOSITORY}
@@ -30,9 +36,13 @@ FetchContent_Declare(
  ${OPENCL_LOADER_TAG}
  EXCLUDE_FROM_ALL
 )
+# lint_cmake: -readability/wonkycase
 FetchContent_GetProperties(OpenCL-ICD-Loader)
+# lint_cmake: +readability/wonkycase
 if(NOT OpenCL-ICD-Loader_POPULATED)
+# lint_cmake: -readability/wonkycase
  FetchContent_MakeAvailable(OpenCL-ICD-Loader)
+# lint_cmake: +readability/wonkycase
  if(WIN32)
    set(USE_DYNAMIC_VCXX_RUNTIME ON)
  endif()
@@ -60,7 +70,9 @@ endif()
 # Build Independent Boost libraries
 include(ExternalProject)
 include(ProcessorCount)
+# lint_cmake: -readability/wonkycase
 ProcessorCount(J)
+# lint_cmake: +readability/wonkycase
 set(BOOST_BASE "${PROJECT_BINARY_DIR}/Boost")
 set(BOOST_INCLUDE "${BOOST_BASE}/source" CACHE PATH "")
 set(BOOST_LIBRARY "${BOOST_BASE}/source/stage/lib" CACHE PATH "")
@@ -160,7 +172,9 @@ list(
    "tools/boost_install"
    "tools/build"
 )
+# lint_cmake: -readability/wonkycase
 ExternalProject_Add(
+# lint_cmake: +readability/wonkycase
  Boost
  TMP_DIR "${BOOST_BASE}/tmp"
  STAMP_DIR "${BOOST_BASE}/stamp"

--- a/cmake/modules/FindLibR.cmake
+++ b/cmake/modules/FindLibR.cmake
@@ -15,6 +15,8 @@
 #  LIBR_CORE_LIBRARY
 # and a CMake function to create R.lib for MSVC
+# lint_cmake: -convention/filename
 if(NOT R_ARCH)
  if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "4")
    set(R_ARCH "i386")
@@ -240,7 +242,9 @@ endif()
 include(FindPackageHandleStandardArgs)
 if(WIN32 AND MSVC)
+# lint_cmake: -package/stdargs
  find_package_handle_standard_args(
+# lint_cmake: +package/stdargs
    LibR DEFAULT_MSG
    LIBR_HOME
    LIBR_EXECUTABLE
@@ -250,7 +254,9 @@ if(WIN32 AND MSVC)
    LIBR_MSVC_CORE_LIBRARY
  )
 else()
+# lint_cmake: -package/stdargs
  find_package_handle_standard_args(
+# lint_cmake: +package/stdargs
    LibR DEFAULT_MSG
    LIBR_HOME
    LIBR_EXECUTABLE

--- a/docs/.linkcheckerrc
+++ b/docs/.linkcheckerrc
-[checking]
-maxrequestspersecond=0.1
-recursionlevel=1
-anchors=1
-sslverify=0
-threads=4
-[filtering]
-ignore=
-  pythonapi/lightgbm\..*\.html.*
-  http.*amd.com/.*
-  https.*dl.acm.org/doi/.*
-  https.*tandfonline.com/.*
-ignorewarnings=http-redirected,http-robots-denied,https-certificate-error
-checkextern=1
-[output]
-# Set to 1 if you want see the full output, not only warnings and errors
-verbose=0
-[AnchorCheck]
--- a/docs/.lychee.toml
+++ b/docs/.lychee.toml
+verbose = "info"
+no_progress = false
+cache = false
+scheme = ["http", "https", "file"]
+include_mail = false
+include_fragments = true
+no_ignore = true
+insecure = false
+require_https = true
+accept = ["100..=103", "200..=299"]
+user_agent = "curl/7.88.1"
+header = {"User-Agent" = "curl/7.88.1"}
+timeout = 30
+retry_wait_time = 10
+max_concurrency = 10
+# remove anchors from GitHub URLs to overcome https://github.com/lycheeverse/lychee/issues/1729
+remap = [
+    '(?P<host>^https://github\.com)/(?P<path>.*)#(?P<anchor>.*)$ $host/$path/',
+]
+exclude = [
+    '^https://www\.swig\.org/download\.html$',
+    '^https://proceedings\.neurips\.cc/.*',
+    '^https://www\.amd\.com/en/support\.html$',
+    '^https://www\.jstor\.org/stable/2281952$',
+    '^https://dl\.acm\.org/doi/10\.1145/3298689\.3347033$',
+    '^https://packages\.ubuntu\.com/search.*',
+    '^https://stackoverflow\.com/.*',
+    '^https://.*\.stackexchange\.com/.*',
+]
+exclude_path = [
+    "(^|/)docs/.*\\.rst",
+]
--- a/docs/Advanced-Topics.rst
+++ b/docs/Advanced-Topics.rst
@@ -16,7 +16,7 @@ Categorical Feature Support
 ---------------------------
 -  LightGBM offers good accuracy with integer-encoded categorical features. LightGBM applies
-   `Fisher (1958) <https://www.tandfonline.com/doi/abs/10.1080/01621459.1958.10501479>`_
+   `Fisher (1958) <https://www.jstor.org/stable/2281952>`_
   to find the optimal split over categories as
   `described here <./Features.rst#optimal-split-for-categorical-features>`_. This often performs better than one-hot encoding.
@@ -46,7 +46,7 @@ LambdaRank
 Cost Efficient Gradient Boosting
 --------------------------------
-`Cost Efficient Gradient Boosting <https://papers.nips.cc/paper/6753-cost-efficient-gradient-boosting.pdf>`_ (CEGB)  makes it possible to penalise boosting based on the cost of obtaining feature values.
+`Cost Efficient Gradient Boosting <https://proceedings.neurips.cc/paper/2017/hash/4fac9ba115140ac4f1c22da82aa0bc7f-Abstract.html>`_ (CEGB)  makes it possible to penalise boosting based on the cost of obtaining feature values.
 CEGB penalises learning in the following ways:
 - Each time a tree is split, a penalty of ``cegb_penalty_split`` is applied.
@@ -112,4 +112,4 @@ Currently, implemented is an approach to model position bias by using an idea of
 During the training, the compound scoring function ``s(x, pos)`` is fit with a standard ranking algorithm (e.g., LambdaMART) which boils down to jointly learning the relevance component ``f(x)`` (it is later returned as an unbiased model) and the position factors ``g(pos)`` that help better explain the observed (biased) labels.
 Similar score decomposition ideas have previously been applied for classification & pointwise ranking tasks with assumptions of binary labels and binary relevance (a.k.a. "two-tower" models, refer to the papers: `Towards Disentangling Relevance and Bias in Unbiased Learning to Rank <https://arxiv.org/abs/2212.13937>`_, `PAL: a position-bias aware learning framework for CTR prediction in live recommender systems <https://dl.acm.org/doi/10.1145/3298689.3347033>`_, `A General Framework for Debiasing in CTR Prediction <https://arxiv.org/abs/2112.02767>`_).
 In LightGBM, we adapt this idea to general pairwise Lerarning-to-Rank with arbitrary ordinal relevance labels.
-Besides, GAMs have been used in the context of explainable ML (`Accurate Intelligible Models with Pairwise Interactions <https://www.cs.cornell.edu/~yinlou/papers/lou-kdd13.pdf>`_) to linearly decompose the contribution of each feature (and possibly their pairwise interactions) to the overall score, for subsequent analysis and interpretation of their effects in the trained models.
+Besides, GAMs have been used in the context of explainable ML (`Accurate Intelligible Models with Pairwise Interactions <https://www.cs.cornell.edu/~yinlou/projects/gam/>`_) to linearly decompose the contribution of each feature (and possibly their pairwise interactions) to the overall score, for subsequent analysis and interpretation of their effects in the trained models.
--- a/docs/Experiments.rst
+++ b/docs/Experiments.rst
@@ -23,7 +23,7 @@ We used 5 datasets to conduct our comparison experiments. Details of data are li
 +===========+=======================+=================================================================================+=============+==========+==============================================+
 | Higgs     | Binary classification | `link <https://archive.ics.uci.edu/dataset/280/higgs>`__                        | 10,500,000  | 28       | last 500,000 samples were used as test set   |
 +-----------+-----------------------+---------------------------------------------------------------------------------+-------------+----------+----------------------------------------------+
-| Yahoo LTR | Learning to rank      | `link <https://webscope.sandbox.yahoo.com/catalog.php?datatype=c>`__            | 473,134     | 700      | set1.train as train, set1.test as test       |
+| Yahoo LTR | Learning to rank      | `link <https://proceedings.mlr.press/v14/chapelle11a.html>`__                   | 473,134     | 700      | set1.train as train, set1.test as test       |
 +-----------+-----------------------+---------------------------------------------------------------------------------+-------------+----------+----------------------------------------------+
 | MS LTR    | Learning to rank      | `link <https://www.microsoft.com/en-us/research/project/mslr/>`__               | 2,270,296   | 137      | {S1,S2,S3} as train set, {S5} as test set    |
 +-----------+-----------------------+---------------------------------------------------------------------------------+-------------+----------+----------------------------------------------+
@@ -250,4 +250,4 @@ Refer to `GPU Performance <./GPU-Performance.rst>`__.
 .. _xgboost: https://github.com/dmlc/xgboost
-.. _link: http://labs.criteo.com/2013/12/download-terabyte-click-logs/
+.. _link: https://ailab.criteo.com/download-criteo-1tb-click-logs-dataset/
--- a/docs/Features.rst
+++ b/docs/Features.rst
@@ -287,11 +287,11 @@ References
 [11] Huan Zhang, Si Si and Cho-Jui Hsieh. "`GPU Acceleration for Large-scale Tree Boosting`_." SysML Conference, 2018.
-.. _LightGBM\: A Highly Efficient Gradient Boosting Decision Tree: https://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision-tree.pdf
+.. _LightGBM\: A Highly Efficient Gradient Boosting Decision Tree: https://proceedings.neurips.cc/paper/2017/hash/6449f44a102fde848669bdd9eb6b76fa-Abstract.html
-.. _On Grouping for Maximum Homogeneity: https://www.tandfonline.com/doi/abs/10.1080/01621459.1958.10501479
+.. _On Grouping for Maximum Homogeneity: https://www.jstor.org/stable/2281952
-.. _Optimization of collective communication operations in MPICH: https://web.cels.anl.gov/~thakur/papers/ijhpca-coll.pdf
+.. _Optimization of collective communication operations in MPICH: https://www.mpich.org/2012/10/24/optimization-of-collective-communication-operations-in-mpich/
 .. _A Communication-Efficient Parallel Algorithm for Decision Tree: https://proceedings.neurips.cc/paper/2016/hash/10a5ab2db37feedfdeaab192ead4ac0e-Abstract.html

--- a/docs/GPU-Performance.rst
+++ b/docs/GPU-Performance.rst
@@ -200,7 +200,7 @@ Huan Zhang, Si Si and Cho-Jui Hsieh. `GPU Acceleration for Large-scale Tree Boos
 .. _link3: https://www.kaggle.com/c/bosch-production-line-performance/data
-.. _link4: https://webscope.sandbox.yahoo.com/catalog.php?datatype=c
+.. _link4: https://proceedings.mlr.press/v14/chapelle11a.html
 .. _link5: https://www.microsoft.com/en-us/research/project/mslr/