Unverified Commit 1e7ebc51 authored by James Lamb's avatar James Lamb Committed by GitHub
Browse files

[python-package] Add support for NumPy 2.0, test against nightly versions of...

[python-package] Add support for NumPy 2.0, test against nightly versions of dependencies (fixes #6454) (#6467)
parent 63926827
#!/bin/bash
set -e -E -u -o pipefail
# latest versions of lightgbm's dependencies,
# including pre-releases and nightlies
#
# ref: https://github.com/pydata/xarray/blob/31111b3afe44fd6f7dac363264e94186cc5168d2/.github/workflows/upstream-dev-ci.yaml
echo "installing testing dependencies"
python -m pip install \
cloudpickle \
psutil \
pytest
echo "done installing testing dependencies"
echo "installing lightgbm's dependencies"
python -m pip install \
--extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \
--prefer-binary \
--pre \
--upgrade \
'numpy>=2.0.0.dev0' \
'matplotlib>=3.10.0.dev0' \
'pandas>=3.0.0.dev0' \
'scikit-learn>=1.6.dev0' \
'scipy>=1.15.0.dev0'
python -m pip install \
--extra-index-url https://pypi.fury.io/arrow-nightlies/ \
--prefer-binary \
--pre \
--upgrade \
'pyarrow>=17.0.0.dev0'
python -m pip install \
'cffi>=1.15.1'
echo "done installing lightgbm's dependencies"
echo "installing lightgbm"
pip install --no-deps dist/*.whl
echo "done installing lightgbm"
echo "installed package versions:"
pip freeze
echo ""
echo "running tests"
pytest tests/c_api_test/
pytest tests/python_package_test/
......@@ -75,6 +75,33 @@ jobs:
export PATH=${CONDA}/bin:${PATH}
$GITHUB_WORKSPACE/.ci/setup.sh || exit 1
$GITHUB_WORKSPACE/.ci/test.sh || exit 1
test-latest-versions:
name: Python - latest versions (ubuntu-latest)
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 5
submodules: true
- name: Create wheel
run: |
docker run \
--rm \
--env CMAKE_BUILD_PARALLEL_LEVEL=${{ env.CMAKE_BUILD_PARALLEL_LEVEL }} \
-v $(pwd):/opt/lgb-build \
-w /opt/lgb-build \
lightgbm/vsts-agent:manylinux_2_28_x86_64 \
/bin/bash -c 'PATH=/opt/miniforge/bin:$PATH sh ./build-python.sh bdist_wheel --nomp'
- name: Test compatibility
run: |
docker run \
--rm \
-v $(pwd):/opt/lgb-build \
-w /opt/lgb-build \
python:3.11 \
/bin/bash ./.ci/test-python-latest.sh
test-oldest-versions:
name: Python - oldest supported versions (ubuntu-latest)
runs-on: ubuntu-latest
......@@ -89,6 +116,7 @@ jobs:
run: |
docker run \
--rm \
--env CMAKE_BUILD_PARALLEL_LEVEL=${{ env.CMAKE_BUILD_PARALLEL_LEVEL }} \
-v $(pwd):/opt/lgb-build \
-w /opt/lgb-build \
lightgbm/vsts-agent:manylinux_2_28_x86_64 \
......@@ -104,7 +132,7 @@ jobs:
all-python-package-jobs-successful:
if: always()
runs-on: ubuntu-latest
needs: [test, test-oldest-versions]
needs: [test, test-latest-versions, test-oldest-versions]
steps:
- name: Note that all tests succeeded
uses: re-actors/alls-green@v1.2.2
......
......@@ -356,10 +356,10 @@ def _list_to_1d_numpy(
array = data.ravel()
return _cast_numpy_array_to_dtype(array, dtype)
elif _is_1d_list(data):
return np.array(data, dtype=dtype, copy=False)
return np.asarray(data, dtype=dtype)
elif isinstance(data, pd_Series):
_check_for_bad_pandas_dtypes(data.to_frame().dtypes)
return np.array(data, dtype=dtype, copy=False) # SparseArray should be supported as well
return np.asarray(data, dtype=dtype) # SparseArray should be supported as well
else:
raise TypeError(
f"Wrong type({type(data).__name__}) for {name}.\n" "It should be list, numpy 1-D array or pandas Series"
......@@ -728,7 +728,7 @@ def _convert_from_sliced_object(data: np.ndarray) -> np.ndarray:
def _c_float_array(data: np.ndarray) -> Tuple[_ctypes_float_ptr, int, np.ndarray]:
"""Get pointer of float numpy array / list."""
if _is_1d_list(data):
data = np.array(data, copy=False)
data = np.asarray(data)
if _is_numpy_1d_array(data):
data = _convert_from_sliced_object(data)
assert data.flags.c_contiguous
......@@ -749,7 +749,7 @@ def _c_float_array(data: np.ndarray) -> Tuple[_ctypes_float_ptr, int, np.ndarray
def _c_int_array(data: np.ndarray) -> Tuple[_ctypes_int_ptr, int, np.ndarray]:
"""Get pointer of int numpy array / list."""
if _is_1d_list(data):
data = np.array(data, copy=False)
data = np.asarray(data)
if _is_numpy_1d_array(data):
data = _convert_from_sliced_object(data)
assert data.flags.c_contiguous
......@@ -1270,7 +1270,7 @@ class _InnerPredictor:
preds: Optional[np.ndarray],
) -> Tuple[np.ndarray, int]:
if mat.dtype == np.float32 or mat.dtype == np.float64:
data = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False)
data = np.asarray(mat.reshape(mat.size), dtype=mat.dtype)
else: # change non-float data to float data, need to copy
data = np.array(mat.reshape(mat.size), dtype=np.float32)
ptr_data, type_ptr_data, _ = _c_float_array(data)
......@@ -2285,9 +2285,9 @@ class Dataset:
self._handle = ctypes.c_void_p()
if mat.dtype == np.float32 or mat.dtype == np.float64:
data = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False)
data = np.asarray(mat.reshape(mat.size), dtype=mat.dtype)
else: # change non-float data to float data, need to copy
data = np.array(mat.reshape(mat.size), dtype=np.float32)
data = np.asarray(mat.reshape(mat.size), dtype=np.float32)
ptr_data, type_ptr_data, _ = _c_float_array(data)
_safe_call(
......@@ -2332,7 +2332,7 @@ class Dataset:
nrow[i] = mat.shape[0]
if mat.dtype == np.float32 or mat.dtype == np.float64:
mats[i] = np.array(mat.reshape(mat.size), dtype=mat.dtype, copy=False)
mats[i] = np.asarray(mat.reshape(mat.size), dtype=mat.dtype)
else: # change non-float data to float data, need to copy
mats[i] = np.array(mat.reshape(mat.size), dtype=np.float32)
......
......@@ -512,7 +512,7 @@ def _make_n_folds(
if hasattr(folds, "split"):
group_info = full_data.get_group()
if group_info is not None:
group_info = np.array(group_info, dtype=np.int32, copy=False)
group_info = np.asarray(group_info, dtype=np.int32)
flatted_group = np.repeat(range(len(group_info)), repeats=group_info)
else:
flatted_group = np.zeros(num_data, dtype=np.int32)
......@@ -526,7 +526,7 @@ def _make_n_folds(
if not SKLEARN_INSTALLED:
raise LightGBMError("scikit-learn is required for ranking cv")
# ranking task, split according to groups
group_info = np.array(full_data.get_group(), dtype=np.int32, copy=False)
group_info = np.asarray(full_data.get_group(), dtype=np.int32)
flatted_group = np.repeat(range(len(group_info)), repeats=group_info)
group_kfold = _LGBMGroupKFold(n_splits=nfold)
folds = group_kfold.split(X=np.empty(num_data), groups=flatted_group)
......
......@@ -125,7 +125,7 @@ def load_from_mat(filename, reference):
mat = np.loadtxt(str(filename), dtype=np.float64)
label = mat[:, 0].astype(np.float32)
mat = mat[:, 1:]
data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False)
data = np.asarray(mat.reshape(mat.size), dtype=np.float64)
handle = ctypes.c_void_p()
ref = None
if reference is not None:
......@@ -203,7 +203,7 @@ def test_booster():
mat = data[:, 1:]
preb = np.empty(mat.shape[0], dtype=np.float64)
num_preb = ctypes.c_int64(0)
data = np.array(mat.reshape(mat.size), dtype=np.float64, copy=False)
data = np.asarray(mat.reshape(mat.size), dtype=np.float64)
LIB.LGBM_BoosterPredictForMat(
booster2,
data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
......
......@@ -20,6 +20,10 @@ if os.getenv("ALLOW_SKIP_ARROW_TESTS") == "1":
else:
import pyarrow as pa # type: ignore
assert (
lgb.compat.PYARROW_INSTALLED is True
), "'pyarrow' and its dependencies must be installed to run the arrow tests"
# ----------------------------------------------------------------------------------------------- #
# UTILITIES #
# ----------------------------------------------------------------------------------------------- #
......
......@@ -777,7 +777,10 @@ def test_custom_objective_safety(rng):
def test_no_copy_when_single_float_dtype_dataframe(dtype, feature_name, rng):
pd = pytest.importorskip("pandas")
X = rng.uniform(size=(10, 2)).astype(dtype)
df = pd.DataFrame(X)
# copy=False is necessary because starting with pandas 3.0, pd.DataFrame() creates
# a copy of the input numpy array by default
# ref: https://github.com/pandas-dev/pandas/issues/58913
df = pd.DataFrame(X, copy=False)
built_data = lgb.basic._data_from_pandas(
data=df, feature_name=feature_name, categorical_feature="auto", pandas_categorical=None
)[0]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment