Unverified Commit 44a6fb7f authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

[python] Drop Python 2 support (#3581)

* Update setup.py

* Update .appveyor.yml

* Update .travis.yml

* Update .vsts-ci.yml

* Update __init__.py

* Update test.sh

* Update test_windows.ps1

* Update advanced_example.py

* Update requirements_base.txt

* Update conf.py

* Update conf.py

* Update test_engine.py

* Update utils.py

* Update dockerfile-r

* Update README.md

* Update dockerfile.gpu

* Update test_consistency.py

* Update basic.py

* Update compat.py

* Update engine.py

* Update sklearn.py

* Update sklearn.py

* Update callback.py

* Update setup.py

* Update __init__.py

* Update plotting.py

* Update sklearn.py

* Update engine.py

* Update compat.py

* Update callback.py

* Update basic.py

* Update compat.py

* Update basic.py

* Update basic.py

* Update compat.py

* Update compat.py

* Update plotting.py

* Update engine.py

* Update basic.py

* Update sklearn.py

* Update compat.py

* Update engine.py

* Update engine.py

* Update callback.py

* Update basic.py

* Update basic.py

* Update basic.py

* Update basic.py

* Update basic.py

* Update sklearn.py

* Update sklearn.py

* Update plotting.py

* Update sklearn.py

* Update compat.py

* Update compat.py

* Update engine.py

* Update plotting.py

* Update sklearn.py

* Update basic.py

* Update basic.py

* Update basic.py

* Update basic.py

* Update compat.py

* Update compat.py

* Update compat.py

* Update engine.py

* Update basic.py

* Update compat.py

* Update basic.py

* Update basic.py

* Update basic.py

* Update compat.py

* Update compat.py

* Update basic.py

* Update basic.py

* Update .vsts-ci.yml

* Update .vsts-ci.yml

* Update conf.py

* Revert "Update dockerfile-r"

This reverts commit 4ff6ffc7e3eeda24cc6a59a3bb0c973f02d9d71c.
parent 1fb82e27
...@@ -28,7 +28,6 @@ install: ...@@ -28,7 +28,6 @@ install:
- set CONDA_ENV="test-env" - set CONDA_ENV="test-env"
- ps: >- - ps: >-
switch ($env:PYTHON_VERSION) { switch ($env:PYTHON_VERSION) {
"2.7" {$env:MINICONDA = "C:\Miniconda-x64"}
"3.6" {$env:MINICONDA = "C:\Miniconda36-x64"} "3.6" {$env:MINICONDA = "C:\Miniconda36-x64"}
"3.7" {$env:MINICONDA = "C:\Miniconda37-x64"} "3.7" {$env:MINICONDA = "C:\Miniconda37-x64"}
default {$env:MINICONDA = "C:\Miniconda37-x64"} default {$env:MINICONDA = "C:\Miniconda37-x64"}
......
...@@ -100,15 +100,15 @@ if [[ $TASK == "sdist" ]]; then ...@@ -100,15 +100,15 @@ if [[ $TASK == "sdist" ]]; then
exit 0 exit 0
elif [[ $TASK == "bdist" ]]; then elif [[ $TASK == "bdist" ]]; then
if [[ $OS_NAME == "macos" ]]; then if [[ $OS_NAME == "macos" ]]; then
cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --plat-name=macosx --universal || exit -1 cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --plat-name=macosx --python-tag py3 || exit -1
mv dist/lightgbm-$LGB_VER-py2.py3-none-macosx.whl dist/lightgbm-$LGB_VER-py2.py3-none-macosx_10_13_x86_64.macosx_10_14_x86_64.macosx_10_15_x86_64.whl mv dist/lightgbm-$LGB_VER-py3-none-macosx.whl dist/lightgbm-$LGB_VER-py3-none-macosx_10_13_x86_64.macosx_10_14_x86_64.macosx_10_15_x86_64.whl
if [[ $AZURE == "true" ]]; then if [[ $AZURE == "true" ]]; then
cp dist/lightgbm-$LGB_VER-py2.py3-none-macosx*.whl $BUILD_ARTIFACTSTAGINGDIRECTORY cp dist/lightgbm-$LGB_VER-py3-none-macosx*.whl $BUILD_ARTIFACTSTAGINGDIRECTORY
fi fi
else else
cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --plat-name=manylinux1_x86_64 --universal || exit -1 cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --plat-name=manylinux1_x86_64 --python-tag py3 || exit -1
if [[ $AZURE == "true" ]]; then if [[ $AZURE == "true" ]]; then
cp dist/lightgbm-$LGB_VER-py2.py3-none-manylinux1_x86_64.whl $BUILD_ARTIFACTSTAGINGDIRECTORY cp dist/lightgbm-$LGB_VER-py3-none-manylinux1_x86_64.whl $BUILD_ARTIFACTSTAGINGDIRECTORY
fi fi
fi fi
pip install --user $BUILD_DIRECTORY/python-package/dist/*.whl || exit -1 pip install --user $BUILD_DIRECTORY/python-package/dist/*.whl || exit -1
......
...@@ -49,7 +49,7 @@ elseif ($env:TASK -eq "sdist") { ...@@ -49,7 +49,7 @@ elseif ($env:TASK -eq "sdist") {
} }
elseif ($env:TASK -eq "bdist") { elseif ($env:TASK -eq "bdist") {
cd $env:BUILD_SOURCESDIRECTORY/python-package cd $env:BUILD_SOURCESDIRECTORY/python-package
python setup.py bdist_wheel --plat-name=win-amd64 --universal ; Check-Output $? python setup.py bdist_wheel --plat-name=win-amd64 --python-tag py3 ; Check-Output $?
cd dist; pip install @(Get-ChildItem *.whl) ; Check-Output $? cd dist; pip install @(Get-ChildItem *.whl) ; Check-Output $?
cp @(Get-ChildItem *.whl) $env:BUILD_ARTIFACTSTAGINGDIRECTORY cp @(Get-ChildItem *.whl) $env:BUILD_ARTIFACTSTAGINGDIRECTORY
} elseif (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python")) { } elseif (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python")) {
......
...@@ -17,7 +17,7 @@ env: ...@@ -17,7 +17,7 @@ env:
- PYTHON_VERSION=3.8 - PYTHON_VERSION=3.8
matrix: matrix:
- TASK=regular PYTHON_VERSION=3.6 - TASK=regular PYTHON_VERSION=3.6
- TASK=sdist PYTHON_VERSION=2.7 - TASK=sdist
- TASK=bdist - TASK=bdist
- TASK=if-else - TASK=if-else
- TASK=lint - TASK=lint
......
...@@ -117,7 +117,6 @@ jobs: ...@@ -117,7 +117,6 @@ jobs:
PYTHON_VERSION: 3.6 PYTHON_VERSION: 3.6
sdist: sdist:
TASK: sdist TASK: sdist
PYTHON_VERSION: 2.7
bdist: bdist:
TASK: bdist TASK: bdist
steps: steps:
......
...@@ -13,13 +13,12 @@ ...@@ -13,13 +13,12 @@
# Dockerfile for LightGBM GPU Version with Python # Dockerfile for LightGBM GPU Version with Python
`dockerfile.gpu` - A docker file with LightGBM utilizing nvidia-docker. The file is based on the `nvidia/cuda:8.0-cudnn5-devel` image. `dockerfile.gpu` - A docker file with LightGBM utilizing nvidia-docker. The file is based on the `nvidia/cuda:8.0-cudnn5-devel` image.
LightGBM can be utilized in GPU and CPU modes and via Python (2.7 & 3.6). LightGBM can be utilized in GPU and CPU modes and via Python.
## Contents ## Contents
- LightGBM (cpu + gpu) - LightGBM (cpu + gpu)
- Python 2.7 (conda) + scikit-learn, notebooks, pandas, matplotlib - Python 3.8 (conda) + scikit-learn, notebooks, pandas, matplotlib
- Python 3.6 (conda) + scikit-learn, notebooks, pandas, matplotlib
Running the container starts a Jupyter Notebook at `localhost:8888`. Running the container starts a Jupyter Notebook at `localhost:8888`.
......
...@@ -75,8 +75,7 @@ RUN echo "export PATH=$CONDA_DIR/bin:"'$PATH' > /etc/profile.d/conda.sh && \ ...@@ -75,8 +75,7 @@ RUN echo "export PATH=$CONDA_DIR/bin:"'$PATH' > /etc/profile.d/conda.sh && \
rm ~/miniconda.sh rm ~/miniconda.sh
RUN conda config --set always_yes yes --set changeps1 no && \ RUN conda config --set always_yes yes --set changeps1 no && \
conda create -y -q -n py2 python=2.7 mkl numpy scipy scikit-learn jupyter notebook ipython pandas matplotlib && \ conda create -y -q -n py3 python=3.8 mkl numpy scipy scikit-learn jupyter notebook ipython pandas matplotlib
conda create -y -q -n py3 python=3.6 mkl numpy scipy scikit-learn jupyter notebook ipython pandas matplotlib
################################################################################################################# #################################################################################################################
# LightGBM # LightGBM
...@@ -90,7 +89,6 @@ RUN cd /usr/local/src && mkdir lightgbm && cd lightgbm && \ ...@@ -90,7 +89,6 @@ RUN cd /usr/local/src && mkdir lightgbm && cd lightgbm && \
ENV PATH /usr/local/src/lightgbm/LightGBM:${PATH} ENV PATH /usr/local/src/lightgbm/LightGBM:${PATH}
RUN /bin/bash -c "source activate py2 && cd /usr/local/src/lightgbm/LightGBM/python-package && python setup.py install --precompile && source deactivate"
RUN /bin/bash -c "source activate py3 && cd /usr/local/src/lightgbm/LightGBM/python-package && python setup.py install --precompile && source deactivate" RUN /bin/bash -c "source activate py3 && cd /usr/local/src/lightgbm/LightGBM/python-package && python setup.py install --precompile && source deactivate"
################################################################################################################# #################################################################################################################
......
...@@ -26,17 +26,13 @@ from distutils.dir_util import copy_tree ...@@ -26,17 +26,13 @@ from distutils.dir_util import copy_tree
from docutils.parsers.rst import Directive from docutils.parsers.rst import Directive
from sphinx.errors import VersionRequirementError from sphinx.errors import VersionRequirementError
from subprocess import PIPE, Popen from subprocess import PIPE, Popen
from unittest.mock import Mock
CURR_PATH = os.path.abspath(os.path.dirname(__file__)) CURR_PATH = os.path.abspath(os.path.dirname(__file__))
LIB_PATH = os.path.join(CURR_PATH, os.path.pardir, 'python-package') LIB_PATH = os.path.join(CURR_PATH, os.path.pardir, 'python-package')
sys.path.insert(0, LIB_PATH) sys.path.insert(0, LIB_PATH)
# -- mock out modules # -- mock out modules
try:
from unittest.mock import Mock # Python 3.x
except ImportError:
from mock import Mock # Python 2.x
MOCK_MODULES = ['numpy', 'scipy', 'scipy.sparse', MOCK_MODULES = ['numpy', 'scipy', 'scipy.sparse',
'sklearn', 'matplotlib', 'pandas', 'graphviz'] 'sklearn', 'matplotlib', 'pandas', 'graphviz']
for mod_name in MOCK_MODULES: for mod_name in MOCK_MODULES:
...@@ -208,9 +204,7 @@ def generate_doxygen_xml(app): ...@@ -208,9 +204,7 @@ def generate_doxygen_xml(app):
"WARN_AS_ERROR=YES", "WARN_AS_ERROR=YES",
] ]
doxygen_input = '\n'.join(doxygen_args) doxygen_input = '\n'.join(doxygen_args)
is_py3 = sys.version[0] == "3" doxygen_input = bytes(doxygen_input, "utf-8")
if is_py3:
doxygen_input = bytes(doxygen_input, "utf-8")
if not os.path.exists(os.path.join(CURR_PATH, 'doxyoutput')): if not os.path.exists(os.path.join(CURR_PATH, 'doxyoutput')):
os.makedirs(os.path.join(CURR_PATH, 'doxyoutput')) os.makedirs(os.path.join(CURR_PATH, 'doxyoutput'))
try: try:
...@@ -221,8 +215,7 @@ def generate_doxygen_xml(app): ...@@ -221,8 +215,7 @@ def generate_doxygen_xml(app):
process = Popen(["doxygen", "-"], process = Popen(["doxygen", "-"],
stdin=PIPE, stdout=PIPE, stderr=PIPE) stdin=PIPE, stdout=PIPE, stderr=PIPE)
stdout, stderr = process.communicate(doxygen_input) stdout, stderr = process.communicate(doxygen_input)
output = '\n'.join([i.decode('utf-8') if is_py3 else i output = '\n'.join([i.decode('utf-8') for i in (stdout, stderr) if i is not None])
for i in (stdout, stderr) if i is not None])
if process.returncode != 0: if process.returncode != 0:
raise RuntimeError(output) raise RuntimeError(output)
else: else:
......
sphinx sphinx
sphinx_rtd_theme >= 0.3 sphinx_rtd_theme >= 0.3
mock; python_version < '3'
# coding: utf-8 # coding: utf-8
import json import json
import pickle
import lightgbm as lgb import lightgbm as lgb
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from sklearn.metrics import mean_squared_error from sklearn.metrics import mean_squared_error
try:
import cPickle as pickle
except BaseException:
import pickle
print('Loading data...') print('Loading data...')
# load or create your dataset # load or create your dataset
......
...@@ -3,16 +3,12 @@ ...@@ -3,16 +3,12 @@
Contributors: https://github.com/microsoft/LightGBM/graphs/contributors. Contributors: https://github.com/microsoft/LightGBM/graphs/contributors.
""" """
from __future__ import absolute_import
from .basic import Booster, Dataset from .basic import Booster, Dataset
from .callback import (early_stopping, print_evaluation, record_evaluation, from .callback import (early_stopping, print_evaluation, record_evaluation,
reset_parameter) reset_parameter)
from .engine import cv, train, CVBooster from .engine import cv, train, CVBooster
import os import os
import sys
import warnings
try: try:
from .sklearn import LGBMModel, LGBMRegressor, LGBMClassifier, LGBMRanker from .sklearn import LGBMModel, LGBMRegressor, LGBMClassifier, LGBMRanker
...@@ -36,8 +32,3 @@ __all__ = ['Dataset', 'Booster', 'CVBooster', ...@@ -36,8 +32,3 @@ __all__ = ['Dataset', 'Booster', 'CVBooster',
'LGBMModel', 'LGBMRegressor', 'LGBMClassifier', 'LGBMRanker', 'LGBMModel', 'LGBMRegressor', 'LGBMClassifier', 'LGBMRanker',
'print_evaluation', 'record_evaluation', 'reset_parameter', 'early_stopping', 'print_evaluation', 'record_evaluation', 'reset_parameter', 'early_stopping',
'plot_importance', 'plot_split_value_histogram', 'plot_metric', 'plot_tree', 'create_tree_digraph'] 'plot_importance', 'plot_split_value_histogram', 'plot_metric', 'plot_tree', 'create_tree_digraph']
# REMOVEME: remove warning after 3.1.0 version release
if sys.version_info[0] == 2:
warnings.warn("LightGBM 3.1 version is the last version that supports Python 2.\n"
"Next release will drop the support.", UserWarning)
# coding: utf-8 # coding: utf-8
"""Wrapper for C API of LightGBM.""" """Wrapper for C API of LightGBM."""
from __future__ import absolute_import, print_function
import copy import copy
import ctypes import ctypes
import json
import os import os
import warnings import warnings
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
...@@ -12,18 +11,13 @@ from collections import OrderedDict ...@@ -12,18 +11,13 @@ from collections import OrderedDict
import numpy as np import numpy as np
import scipy.sparse import scipy.sparse
from .compat import (PANDAS_INSTALLED, DataFrame, Series, is_dtype_sparse, from .compat import PANDAS_INSTALLED, DataFrame, Series, is_dtype_sparse, DataTable
DataTable,
decode_string, string_type,
integer_types, numeric_types,
json, json_default_with_numpy,
range_, zip_)
from .libpath import find_lib_path from .libpath import find_lib_path
def _log_callback(msg): def _log_callback(msg):
"""Redirect logs from native library into Python console.""" """Redirect logs from native library into Python console."""
print("{0:s}".format(decode_string(msg)), end='') print("{0:s}".format(msg.decode('utf-8')), end='')
def _load_lib(): def _load_lib():
...@@ -36,13 +30,16 @@ def _load_lib(): ...@@ -36,13 +30,16 @@ def _load_lib():
callback = ctypes.CFUNCTYPE(None, ctypes.c_char_p) callback = ctypes.CFUNCTYPE(None, ctypes.c_char_p)
lib.callback = callback(_log_callback) lib.callback = callback(_log_callback)
if lib.LGBM_RegisterLogCallback(lib.callback) != 0: if lib.LGBM_RegisterLogCallback(lib.callback) != 0:
raise LightGBMError(decode_string(lib.LGBM_GetLastError())) raise LightGBMError(lib.LGBM_GetLastError().decode('utf-8'))
return lib return lib
_LIB = _load_lib() _LIB = _load_lib()
NUMERIC_TYPES = (int, float, bool)
def _safe_call(ret): def _safe_call(ret):
"""Check the return value from C API call. """Check the return value from C API call.
...@@ -52,7 +49,7 @@ def _safe_call(ret): ...@@ -52,7 +49,7 @@ def _safe_call(ret):
The return value from C API calls. The return value from C API calls.
""" """
if ret != 0: if ret != 0:
raise LightGBMError(decode_string(_LIB.LGBM_GetLastError())) raise LightGBMError(_LIB.LGBM_GetLastError().decode('utf-8'))
def is_numeric(obj): def is_numeric(obj):
...@@ -136,6 +133,16 @@ def c_array(ctype, values): ...@@ -136,6 +133,16 @@ def c_array(ctype, values):
return (ctype * len(values))(*values) return (ctype * len(values))(*values)
def json_default_with_numpy(obj):
"""Convert numpy classes to JSON serializable objects."""
if isinstance(obj, (np.integer, np.floating, np.bool_)):
return obj.item()
elif isinstance(obj, np.ndarray):
return obj.tolist()
else:
return obj
def param_dict_to_str(data): def param_dict_to_str(data):
"""Convert Python dictionary to string, which is passed to C API.""" """Convert Python dictionary to string, which is passed to C API."""
if data is None or not data: if data is None or not data:
...@@ -149,7 +156,7 @@ def param_dict_to_str(data): ...@@ -149,7 +156,7 @@ def param_dict_to_str(data):
else: else:
return str(x) return str(x)
pairs.append(str(key) + '=' + ','.join(map(to_string, val))) pairs.append(str(key) + '=' + ','.join(map(to_string, val)))
elif isinstance(val, string_type) or isinstance(val, numeric_types) or is_numeric(val): elif isinstance(val, (str, NUMERIC_TYPES)) or is_numeric(val):
pairs.append(str(key) + '=' + str(val)) pairs.append(str(key) + '=' + str(val))
elif val is not None: elif val is not None:
raise TypeError('Unknown type of parameter:%s, got:%s' raise TypeError('Unknown type of parameter:%s, got:%s'
...@@ -157,7 +164,7 @@ def param_dict_to_str(data): ...@@ -157,7 +164,7 @@ def param_dict_to_str(data):
return ' '.join(pairs) return ' '.join(pairs)
class _TempFile(object): class _TempFile:
def __enter__(self): def __enter__(self):
with NamedTemporaryFile(prefix="lightgbm_tmp_", delete=True) as f: with NamedTemporaryFile(prefix="lightgbm_tmp_", delete=True) as f:
self.name = f.name self.name = f.name
...@@ -183,7 +190,14 @@ class LightGBMError(Exception): ...@@ -183,7 +190,14 @@ class LightGBMError(Exception):
pass pass
class _ConfigAliases(object): # DeprecationWarning is not shown by default, so let's create our own with higher level
class LGBMDeprecationWarning(UserWarning):
"""Custom deprecation warning."""
pass
class _ConfigAliases:
aliases = {"bin_construct_sample_cnt": {"bin_construct_sample_cnt", aliases = {"bin_construct_sample_cnt": {"bin_construct_sample_cnt",
"subsample_for_bin"}, "subsample_for_bin"},
"boosting": {"boosting", "boosting": {"boosting",
...@@ -375,7 +389,7 @@ def _data_from_pandas(data, feature_name, categorical_feature, pandas_categorica ...@@ -375,7 +389,7 @@ def _data_from_pandas(data, feature_name, categorical_feature, pandas_categorica
else: else:
if len(cat_cols) != len(pandas_categorical): if len(cat_cols) != len(pandas_categorical):
raise ValueError('train and valid dataset categorical_feature do not match.') raise ValueError('train and valid dataset categorical_feature do not match.')
for col, category in zip_(cat_cols, pandas_categorical): for col, category in zip(cat_cols, pandas_categorical):
if list(data[col].cat.categories) != list(category): if list(data[col].cat.categories) != list(category):
data[col] = data[col].cat.set_categories(category) data[col] = data[col].cat.set_categories(category)
if len(cat_cols): # cat_cols is list if len(cat_cols): # cat_cols is list
...@@ -440,9 +454,9 @@ def _load_pandas_categorical(file_name=None, model_str=None): ...@@ -440,9 +454,9 @@ def _load_pandas_categorical(file_name=None, model_str=None):
if len(lines) >= 2: if len(lines) >= 2:
break break
offset *= 2 offset *= 2
last_line = decode_string(lines[-1]).strip() last_line = lines[-1].decode('utf-8').strip()
if not last_line.startswith(pandas_key): if not last_line.startswith(pandas_key):
last_line = decode_string(lines[-2]).strip() last_line = lines[-2].decode('utf-8').strip()
elif model_str is not None: elif model_str is not None:
idx = model_str.rfind('\n', 0, offset) idx = model_str.rfind('\n', 0, offset)
last_line = model_str[idx:].strip() last_line = model_str[idx:].strip()
...@@ -452,7 +466,7 @@ def _load_pandas_categorical(file_name=None, model_str=None): ...@@ -452,7 +466,7 @@ def _load_pandas_categorical(file_name=None, model_str=None):
return None return None
class _InnerPredictor(object): class _InnerPredictor:
"""_InnerPredictor of LightGBM. """_InnerPredictor of LightGBM.
Not exposed to user. Not exposed to user.
...@@ -563,7 +577,7 @@ class _InnerPredictor(object): ...@@ -563,7 +577,7 @@ class _InnerPredictor(object):
predict_type = C_API_PREDICT_CONTRIB predict_type = C_API_PREDICT_CONTRIB
int_data_has_header = 1 if data_has_header else 0 int_data_has_header = 1 if data_has_header else 0
if isinstance(data, string_type): if isinstance(data, str):
with _TempFile() as f: with _TempFile() as f:
_safe_call(_LIB.LGBM_BoosterPredictForFile( _safe_call(_LIB.LGBM_BoosterPredictForFile(
self.handle, self.handle,
...@@ -668,8 +682,8 @@ class _InnerPredictor(object): ...@@ -668,8 +682,8 @@ class _InnerPredictor(object):
n_preds = [self.__get_num_preds(start_iteration, num_iteration, i, predict_type) for i in np.diff([0] + list(sections) + [nrow])] n_preds = [self.__get_num_preds(start_iteration, num_iteration, i, predict_type) for i in np.diff([0] + list(sections) + [nrow])]
n_preds_sections = np.array([0] + n_preds, dtype=np.intp).cumsum() n_preds_sections = np.array([0] + n_preds, dtype=np.intp).cumsum()
preds = np.zeros(sum(n_preds), dtype=np.float64) preds = np.zeros(sum(n_preds), dtype=np.float64)
for chunk, (start_idx_pred, end_idx_pred) in zip_(np.array_split(mat, sections), for chunk, (start_idx_pred, end_idx_pred) in zip(np.array_split(mat, sections),
zip_(n_preds_sections, n_preds_sections[1:])): zip(n_preds_sections, n_preds_sections[1:])):
# avoid memory consumption by arrays concatenation operations # avoid memory consumption by arrays concatenation operations
inner_predict(chunk, start_iteration, num_iteration, predict_type, preds[start_idx_pred:end_idx_pred]) inner_predict(chunk, start_iteration, num_iteration, predict_type, preds[start_idx_pred:end_idx_pred])
return preds, nrow return preds, nrow
...@@ -807,8 +821,8 @@ class _InnerPredictor(object): ...@@ -807,8 +821,8 @@ class _InnerPredictor(object):
n_preds = [self.__get_num_preds(start_iteration, num_iteration, i, predict_type) for i in np.diff(sections)] n_preds = [self.__get_num_preds(start_iteration, num_iteration, i, predict_type) for i in np.diff(sections)]
n_preds_sections = np.array([0] + n_preds, dtype=np.intp).cumsum() n_preds_sections = np.array([0] + n_preds, dtype=np.intp).cumsum()
preds = np.zeros(sum(n_preds), dtype=np.float64) preds = np.zeros(sum(n_preds), dtype=np.float64)
for (start_idx, end_idx), (start_idx_pred, end_idx_pred) in zip_(zip_(sections, sections[1:]), for (start_idx, end_idx), (start_idx_pred, end_idx_pred) in zip(zip(sections, sections[1:]),
zip_(n_preds_sections, n_preds_sections[1:])): zip(n_preds_sections, n_preds_sections[1:])):
# avoid memory consumption by arrays concatenation operations # avoid memory consumption by arrays concatenation operations
inner_predict(csr[start_idx:end_idx], start_iteration, num_iteration, predict_type, preds[start_idx_pred:end_idx_pred]) inner_predict(csr[start_idx:end_idx], start_iteration, num_iteration, predict_type, preds[start_idx_pred:end_idx_pred])
return preds, nrow return preds, nrow
...@@ -906,7 +920,7 @@ class _InnerPredictor(object): ...@@ -906,7 +920,7 @@ class _InnerPredictor(object):
return out_cur_iter.value return out_cur_iter.value
class Dataset(object): class Dataset:
"""Dataset in LightGBM.""" """Dataset in LightGBM."""
def __init__(self, data, label=None, reference=None, def __init__(self, data, label=None, reference=None,
...@@ -1018,7 +1032,7 @@ class Dataset(object): ...@@ -1018,7 +1032,7 @@ class Dataset(object):
def _set_init_score_by_predictor(self, predictor, data, used_indices=None): def _set_init_score_by_predictor(self, predictor, data, used_indices=None):
data_has_header = False data_has_header = False
if isinstance(data, string_type): if isinstance(data, str):
# check data has header or not # check data has header or not
data_has_header = any(self.params.get(alias, False) for alias in _ConfigAliases.get("header")) data_has_header = any(self.params.get(alias, False) for alias in _ConfigAliases.get("header"))
num_data = self.num_data() num_data = self.num_data()
...@@ -1029,18 +1043,18 @@ class Dataset(object): ...@@ -1029,18 +1043,18 @@ class Dataset(object):
is_reshape=False) is_reshape=False)
if used_indices is not None: if used_indices is not None:
assert not self.need_slice assert not self.need_slice
if isinstance(data, string_type): if isinstance(data, str):
sub_init_score = np.zeros(num_data * predictor.num_class, dtype=np.float32) sub_init_score = np.zeros(num_data * predictor.num_class, dtype=np.float32)
assert num_data == len(used_indices) assert num_data == len(used_indices)
for i in range_(len(used_indices)): for i in range(len(used_indices)):
for j in range_(predictor.num_class): for j in range(predictor.num_class):
sub_init_score[i * predictor.num_class + j] = init_score[used_indices[i] * predictor.num_class + j] sub_init_score[i * predictor.num_class + j] = init_score[used_indices[i] * predictor.num_class + j]
init_score = sub_init_score init_score = sub_init_score
if predictor.num_class > 1: if predictor.num_class > 1:
# need to regroup init_score # need to regroup init_score
new_init_score = np.zeros(init_score.size, dtype=np.float32) new_init_score = np.zeros(init_score.size, dtype=np.float32)
for i in range_(num_data): for i in range(num_data):
for j in range_(predictor.num_class): for j in range(predictor.num_class):
new_init_score[j * num_data + i] = init_score[i * predictor.num_class + j] new_init_score[j * num_data + i] = init_score[i * predictor.num_class + j]
init_score = new_init_score init_score = new_init_score
elif self.init_score is not None: elif self.init_score is not None:
...@@ -1085,9 +1099,9 @@ class Dataset(object): ...@@ -1085,9 +1099,9 @@ class Dataset(object):
if feature_name is not None: if feature_name is not None:
feature_dict = {name: i for i, name in enumerate(feature_name)} feature_dict = {name: i for i, name in enumerate(feature_name)}
for name in categorical_feature: for name in categorical_feature:
if isinstance(name, string_type) and name in feature_dict: if isinstance(name, str) and name in feature_dict:
categorical_indices.add(feature_dict[name]) categorical_indices.add(feature_dict[name])
elif isinstance(name, integer_types): elif isinstance(name, int):
categorical_indices.add(name) categorical_indices.add(name)
else: else:
raise TypeError("Wrong type({}) or unknown name({}) in categorical_feature" raise TypeError("Wrong type({}) or unknown name({}) in categorical_feature"
...@@ -1108,7 +1122,7 @@ class Dataset(object): ...@@ -1108,7 +1122,7 @@ class Dataset(object):
elif reference is not None: elif reference is not None:
raise TypeError('Reference dataset should be None or dataset instance') raise TypeError('Reference dataset should be None or dataset instance')
# start construct data # start construct data
if isinstance(data, string_type): if isinstance(data, str):
self.handle = ctypes.c_void_p() self.handle = ctypes.c_void_p()
_safe_call(_LIB.LGBM_DatasetCreateFromFile( _safe_call(_LIB.LGBM_DatasetCreateFromFile(
c_str(data), c_str(data),
...@@ -1297,7 +1311,7 @@ class Dataset(object): ...@@ -1297,7 +1311,7 @@ class Dataset(object):
assert used_indices.flags.c_contiguous assert used_indices.flags.c_contiguous
if self.reference.group is not None: if self.reference.group is not None:
group_info = np.array(self.reference.group).astype(np.int32, copy=False) group_info = np.array(self.reference.group).astype(np.int32, copy=False)
_, self.group = np.unique(np.repeat(range_(len(group_info)), repeats=group_info)[self.used_indices], _, self.group = np.unique(np.repeat(range(len(group_info)), repeats=group_info)[self.used_indices],
return_counts=True) return_counts=True)
self.handle = ctypes.c_void_p() self.handle = ctypes.c_void_p()
params_str = param_dict_to_str(self.params) params_str = param_dict_to_str(self.params)
...@@ -1433,7 +1447,7 @@ class Dataset(object): ...@@ -1433,7 +1447,7 @@ class Dataset(object):
update() update()
self._free_handle() self._free_handle()
else: else:
raise LightGBMError(decode_string(_LIB.LGBM_GetLastError())) raise LightGBMError(_LIB.LGBM_GetLastError().decode('utf-8'))
return self return self
def _reverse_update_params(self): def _reverse_update_params(self):
...@@ -1727,7 +1741,7 @@ class Dataset(object): ...@@ -1727,7 +1741,7 @@ class Dataset(object):
tmp_out_len = ctypes.c_int(0) tmp_out_len = ctypes.c_int(0)
reserved_string_buffer_size = 255 reserved_string_buffer_size = 255
required_string_buffer_size = ctypes.c_size_t(0) required_string_buffer_size = ctypes.c_size_t(0)
string_buffers = [ctypes.create_string_buffer(reserved_string_buffer_size) for i in range_(num_feature)] string_buffers = [ctypes.create_string_buffer(reserved_string_buffer_size) for i in range(num_feature)]
ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers)) ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers))
_safe_call(_LIB.LGBM_DatasetGetFeatureNames( _safe_call(_LIB.LGBM_DatasetGetFeatureNames(
self.handle, self.handle,
...@@ -1743,7 +1757,7 @@ class Dataset(object): ...@@ -1743,7 +1757,7 @@ class Dataset(object):
"Allocated feature name buffer size ({}) was inferior to the needed size ({})." "Allocated feature name buffer size ({}) was inferior to the needed size ({})."
.format(reserved_string_buffer_size, required_string_buffer_size.value) .format(reserved_string_buffer_size, required_string_buffer_size.value)
) )
return [string_buffers[i].value.decode('utf-8') for i in range_(num_feature)] return [string_buffers[i].value.decode('utf-8') for i in range(num_feature)]
def get_label(self): def get_label(self):
"""Get the label of the Dataset. """Get the label of the Dataset.
...@@ -1997,7 +2011,7 @@ class Dataset(object): ...@@ -1997,7 +2011,7 @@ class Dataset(object):
return self return self
class Booster(object): class Booster:
"""Booster in LightGBM.""" """Booster in LightGBM."""
def __init__(self, params=None, train_set=None, model_file=None, model_str=None, silent=False): def __init__(self, params=None, train_set=None, model_file=None, model_str=None, silent=False):
...@@ -2037,7 +2051,7 @@ class Booster(object): ...@@ -2037,7 +2051,7 @@ class Booster(object):
for alias in _ConfigAliases.get("machines"): for alias in _ConfigAliases.get("machines"):
if alias in params: if alias in params:
machines = params[alias] machines = params[alias]
if isinstance(machines, string_type): if isinstance(machines, str):
num_machines = len(machines.split(',')) num_machines = len(machines.split(','))
elif isinstance(machines, (list, set)): elif isinstance(machines, (list, set)):
num_machines = len(machines) num_machines = len(machines)
...@@ -2458,7 +2472,7 @@ class Booster(object): ...@@ -2458,7 +2472,7 @@ class Booster(object):
_safe_call(_LIB.LGBM_BoosterUpdateOneIter( _safe_call(_LIB.LGBM_BoosterUpdateOneIter(
self.handle, self.handle,
ctypes.byref(is_finished))) ctypes.byref(is_finished)))
self.__is_predicted_cur_iter = [False for _ in range_(self.__num_dataset)] self.__is_predicted_cur_iter = [False for _ in range(self.__num_dataset)]
return is_finished.value == 1 return is_finished.value == 1
else: else:
if not self.__set_objective_to_none: if not self.__set_objective_to_none:
...@@ -2501,7 +2515,7 @@ class Booster(object): ...@@ -2501,7 +2515,7 @@ class Booster(object):
grad.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), grad.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
hess.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), hess.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
ctypes.byref(is_finished))) ctypes.byref(is_finished)))
self.__is_predicted_cur_iter = [False for _ in range_(self.__num_dataset)] self.__is_predicted_cur_iter = [False for _ in range(self.__num_dataset)]
return is_finished.value == 1 return is_finished.value == 1
def rollback_one_iter(self): def rollback_one_iter(self):
...@@ -2514,7 +2528,7 @@ class Booster(object): ...@@ -2514,7 +2528,7 @@ class Booster(object):
""" """
_safe_call(_LIB.LGBM_BoosterRollbackOneIter( _safe_call(_LIB.LGBM_BoosterRollbackOneIter(
self.handle)) self.handle))
self.__is_predicted_cur_iter = [False for _ in range_(self.__num_dataset)] self.__is_predicted_cur_iter = [False for _ in range(self.__num_dataset)]
return self return self
def current_iteration(self): def current_iteration(self):
...@@ -2627,7 +2641,7 @@ class Booster(object): ...@@ -2627,7 +2641,7 @@ class Booster(object):
if data is self.train_set: if data is self.train_set:
data_idx = 0 data_idx = 0
else: else:
for i in range_(len(self.valid_sets)): for i in range(len(self.valid_sets)):
if data is self.valid_sets[i]: if data is self.valid_sets[i]:
data_idx = i + 1 data_idx = i + 1
break break
...@@ -2700,7 +2714,7 @@ class Booster(object): ...@@ -2700,7 +2714,7 @@ class Booster(object):
result : list result : list
List with evaluation results. List with evaluation results.
""" """
return [item for i in range_(1, self.__num_dataset) return [item for i in range(1, self.__num_dataset)
for item in self.__inner_eval(self.name_valid_sets[i - 1], i, feval)] for item in self.__inner_eval(self.name_valid_sets[i - 1], i, feval)]
def save_model(self, filename, num_iteration=None, start_iteration=0, importance_type='split'): def save_model(self, filename, num_iteration=None, start_iteration=0, importance_type='split'):
...@@ -3060,7 +3074,7 @@ class Booster(object): ...@@ -3060,7 +3074,7 @@ class Booster(object):
tmp_out_len = ctypes.c_int(0) tmp_out_len = ctypes.c_int(0)
reserved_string_buffer_size = 255 reserved_string_buffer_size = 255
required_string_buffer_size = ctypes.c_size_t(0) required_string_buffer_size = ctypes.c_size_t(0)
string_buffers = [ctypes.create_string_buffer(reserved_string_buffer_size) for i in range_(num_feature)] string_buffers = [ctypes.create_string_buffer(reserved_string_buffer_size) for i in range(num_feature)]
ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers)) ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers))
_safe_call(_LIB.LGBM_BoosterGetFeatureNames( _safe_call(_LIB.LGBM_BoosterGetFeatureNames(
self.handle, self.handle,
...@@ -3076,7 +3090,7 @@ class Booster(object): ...@@ -3076,7 +3090,7 @@ class Booster(object):
"Allocated feature name buffer size ({}) was inferior to the needed size ({})." "Allocated feature name buffer size ({}) was inferior to the needed size ({})."
.format(reserved_string_buffer_size, required_string_buffer_size.value) .format(reserved_string_buffer_size, required_string_buffer_size.value)
) )
return [string_buffers[i].value.decode('utf-8') for i in range_(num_feature)] return [string_buffers[i].value.decode('utf-8') for i in range(num_feature)]
def feature_importance(self, importance_type='split', iteration=None): def feature_importance(self, importance_type='split', iteration=None):
"""Get feature importances. """Get feature importances.
...@@ -3147,12 +3161,12 @@ class Booster(object): ...@@ -3147,12 +3161,12 @@ class Booster(object):
def add(root): def add(root):
"""Recursively add thresholds.""" """Recursively add thresholds."""
if 'split_index' in root: # non-leaf if 'split_index' in root: # non-leaf
if feature_names is not None and isinstance(feature, string_type): if feature_names is not None and isinstance(feature, str):
split_feature = feature_names[root['split_feature']] split_feature = feature_names[root['split_feature']]
else: else:
split_feature = root['split_feature'] split_feature = root['split_feature']
if split_feature == feature: if split_feature == feature:
if isinstance(root['threshold'], string_type): if isinstance(root['threshold'], str):
raise LightGBMError('Cannot compute split value histogram for the categorical feature') raise LightGBMError('Cannot compute split value histogram for the categorical feature')
else: else:
values.append(root['threshold']) values.append(root['threshold'])
...@@ -3166,7 +3180,7 @@ class Booster(object): ...@@ -3166,7 +3180,7 @@ class Booster(object):
for tree_info in tree_infos: for tree_info in tree_infos:
add(tree_info['tree_structure']) add(tree_info['tree_structure'])
if bins is None or isinstance(bins, integer_types) and xgboost_style: if bins is None or isinstance(bins, int) and xgboost_style:
n_unique = len(np.unique(values)) n_unique = len(np.unique(values))
bins = max(min(n_unique, bins) if bins is not None else n_unique, 1) bins = max(min(n_unique, bins) if bins is not None else n_unique, 1)
hist, bin_edges = np.histogram(values, bins=bins) hist, bin_edges = np.histogram(values, bins=bins)
...@@ -3196,7 +3210,7 @@ class Booster(object): ...@@ -3196,7 +3210,7 @@ class Booster(object):
result.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))) result.ctypes.data_as(ctypes.POINTER(ctypes.c_double))))
if tmp_out_len.value != self.__num_inner_eval: if tmp_out_len.value != self.__num_inner_eval:
raise ValueError("Wrong length of eval results") raise ValueError("Wrong length of eval results")
for i in range_(self.__num_inner_eval): for i in range(self.__num_inner_eval):
ret.append((data_name, self.__name_inner_eval[i], ret.append((data_name, self.__name_inner_eval[i],
result[i], self.__higher_better_inner_eval[i])) result[i], self.__higher_better_inner_eval[i]))
if callable(feval): if callable(feval):
...@@ -3258,7 +3272,7 @@ class Booster(object): ...@@ -3258,7 +3272,7 @@ class Booster(object):
reserved_string_buffer_size = 255 reserved_string_buffer_size = 255
required_string_buffer_size = ctypes.c_size_t(0) required_string_buffer_size = ctypes.c_size_t(0)
string_buffers = [ string_buffers = [
ctypes.create_string_buffer(reserved_string_buffer_size) for i in range_(self.__num_inner_eval) ctypes.create_string_buffer(reserved_string_buffer_size) for i in range(self.__num_inner_eval)
] ]
ptr_string_buffers = (ctypes.c_char_p * self.__num_inner_eval)(*map(ctypes.addressof, string_buffers)) ptr_string_buffers = (ctypes.c_char_p * self.__num_inner_eval)(*map(ctypes.addressof, string_buffers))
_safe_call(_LIB.LGBM_BoosterGetEvalNames( _safe_call(_LIB.LGBM_BoosterGetEvalNames(
...@@ -3276,7 +3290,7 @@ class Booster(object): ...@@ -3276,7 +3290,7 @@ class Booster(object):
.format(reserved_string_buffer_size, required_string_buffer_size.value) .format(reserved_string_buffer_size, required_string_buffer_size.value)
) )
self.__name_inner_eval = \ self.__name_inner_eval = \
[string_buffers[i].value.decode('utf-8') for i in range_(self.__num_inner_eval)] [string_buffers[i].value.decode('utf-8') for i in range(self.__num_inner_eval)]
self.__higher_better_inner_eval = \ self.__higher_better_inner_eval = \
[name.startswith(('auc', 'ndcg@', 'map@')) for name in self.__name_inner_eval] [name.startswith(('auc', 'ndcg@', 'map@')) for name in self.__name_inner_eval]
...@@ -3312,7 +3326,7 @@ class Booster(object): ...@@ -3312,7 +3326,7 @@ class Booster(object):
""" """
for key, value in kwargs.items(): for key, value in kwargs.items():
if value is not None: if value is not None:
if not isinstance(value, string_type): if not isinstance(value, str):
raise ValueError("Only string values are accepted") raise ValueError("Only string values are accepted")
self.__attr[key] = value self.__attr[key] = value
else: else:
......
# coding: utf-8 # coding: utf-8
"""Callbacks library.""" """Callbacks library."""
from __future__ import absolute_import
import collections import collections
import warnings import warnings
from operator import gt, lt from operator import gt, lt
from .basic import _ConfigAliases from .basic import _ConfigAliases
from .compat import range_
class EarlyStopException(Exception): class EarlyStopException(Exception):
...@@ -23,7 +20,7 @@ class EarlyStopException(Exception): ...@@ -23,7 +20,7 @@ class EarlyStopException(Exception):
best_score : float best_score : float
The score of the best iteration. The score of the best iteration.
""" """
super(EarlyStopException, self).__init__() super().__init__()
self.best_iteration = best_iteration self.best_iteration = best_iteration
self.best_score = best_score self.best_score = best_score
...@@ -219,7 +216,7 @@ def early_stopping(stopping_rounds, first_metric_only=False, verbose=True): ...@@ -219,7 +216,7 @@ def early_stopping(stopping_rounds, first_metric_only=False, verbose=True):
_init(env) _init(env)
if not enabled[0]: if not enabled[0]:
return return
for i in range_(len(env.evaluation_result_list)): for i in range(len(env.evaluation_result_list)):
score = env.evaluation_result_list[i][2] score = env.evaluation_result_list[i][2]
if best_score_list[i] is None or cmp_op[i](score, best_score[i]): if best_score_list[i] is None or cmp_op[i](score, best_score[i]):
best_score[i] = score best_score[i] = score
......
# coding: utf-8 # coding: utf-8
"""Compatibility library.""" """Compatibility library."""
from __future__ import absolute_import
import inspect
import sys
import numpy as np
is_py3 = (sys.version_info[0] == 3)
"""Compatibility between Python2 and Python3"""
if is_py3:
zip_ = zip
string_type = str
numeric_types = (int, float, bool)
integer_types = (int, )
range_ = range
def argc_(func):
"""Count the number of arguments of a function."""
return len(inspect.signature(func).parameters)
def decode_string(bytestring):
"""Decode C bytestring to ordinary string."""
return bytestring.decode('utf-8')
else:
from itertools import izip as zip_
string_type = basestring
numeric_types = (int, long, float, bool)
integer_types = (int, long)
range_ = xrange
def argc_(func):
"""Count the number of arguments of a function."""
return len(inspect.getargspec(func).args)
def decode_string(bytestring):
"""Decode C bytestring to ordinary string."""
return bytestring
"""json"""
try:
import simplejson as json
except (ImportError, SyntaxError):
# simplejson does not support Python 3.2, it throws a SyntaxError
# because of u'...' Unicode literals.
import json
def json_default_with_numpy(obj):
"""Convert numpy classes to JSON serializable objects."""
if isinstance(obj, (np.integer, np.floating, np.bool_)):
return obj.item()
elif isinstance(obj, np.ndarray):
return obj.tolist()
else:
return obj
"""pandas""" """pandas"""
try: try:
...@@ -66,12 +9,12 @@ try: ...@@ -66,12 +9,12 @@ try:
except ImportError: except ImportError:
PANDAS_INSTALLED = False PANDAS_INSTALLED = False
class Series(object): class Series:
"""Dummy class for pandas.Series.""" """Dummy class for pandas.Series."""
pass pass
class DataFrame(object): class DataFrame:
"""Dummy class for pandas.DataFrame.""" """Dummy class for pandas.DataFrame."""
pass pass
...@@ -103,7 +46,7 @@ try: ...@@ -103,7 +46,7 @@ try:
except ImportError: except ImportError:
DATATABLE_INSTALLED = False DATATABLE_INSTALLED = False
class DataTable(object): class DataTable:
"""Dummy class for DataTable.""" """Dummy class for DataTable."""
pass pass
...@@ -162,10 +105,3 @@ except ImportError: ...@@ -162,10 +105,3 @@ except ImportError:
_LGBMAssertAllFinite = None _LGBMAssertAllFinite = None
_LGBMCheckClassificationTargets = None _LGBMCheckClassificationTargets = None
_LGBMComputeSampleWeight = None _LGBMComputeSampleWeight = None
# DeprecationWarning is not shown by default, so let's create our own with higher level
class LGBMDeprecationWarning(UserWarning):
"""Custom deprecation warning."""
pass
# coding: utf-8 # coding: utf-8
"""Library with training routines of LightGBM.""" """Library with training routines of LightGBM."""
from __future__ import absolute_import
import collections import collections
import copy import copy
import warnings import warnings
...@@ -11,8 +9,7 @@ import numpy as np ...@@ -11,8 +9,7 @@ import numpy as np
from . import callback from . import callback
from .basic import Booster, Dataset, LightGBMError, _ConfigAliases, _InnerPredictor from .basic import Booster, Dataset, LightGBMError, _ConfigAliases, _InnerPredictor
from .compat import (SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold, from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold
string_type, integer_types, range_, zip_)
def train(params, train_set, num_boost_round=100, def train(params, train_set, num_boost_round=100,
...@@ -159,7 +156,7 @@ def train(params, train_set, num_boost_round=100, ...@@ -159,7 +156,7 @@ def train(params, train_set, num_boost_round=100,
if num_boost_round <= 0: if num_boost_round <= 0:
raise ValueError("num_boost_round should be greater than zero.") raise ValueError("num_boost_round should be greater than zero.")
if isinstance(init_model, string_type): if isinstance(init_model, str):
predictor = _InnerPredictor(model_file=init_model, pred_parameter=params) predictor = _InnerPredictor(model_file=init_model, pred_parameter=params)
elif isinstance(init_model, Booster): elif isinstance(init_model, Booster):
predictor = init_model._to_predictor(dict(init_model.params, **params)) predictor = init_model._to_predictor(dict(init_model.params, **params))
...@@ -182,7 +179,7 @@ def train(params, train_set, num_boost_round=100, ...@@ -182,7 +179,7 @@ def train(params, train_set, num_boost_round=100,
if valid_sets is not None: if valid_sets is not None:
if isinstance(valid_sets, Dataset): if isinstance(valid_sets, Dataset):
valid_sets = [valid_sets] valid_sets = [valid_sets]
if isinstance(valid_names, string_type): if isinstance(valid_names, str):
valid_names = [valid_names] valid_names = [valid_names]
for i, valid_data in enumerate(valid_sets): for i, valid_data in enumerate(valid_sets):
# reduce cost for prediction training data # reduce cost for prediction training data
...@@ -209,7 +206,7 @@ def train(params, train_set, num_boost_round=100, ...@@ -209,7 +206,7 @@ def train(params, train_set, num_boost_round=100,
# Most of legacy advanced options becomes callbacks # Most of legacy advanced options becomes callbacks
if verbose_eval is True: if verbose_eval is True:
callbacks.add(callback.print_evaluation()) callbacks.add(callback.print_evaluation())
elif isinstance(verbose_eval, integer_types): elif isinstance(verbose_eval, int):
callbacks.add(callback.print_evaluation(verbose_eval)) callbacks.add(callback.print_evaluation(verbose_eval))
if early_stopping_rounds is not None and early_stopping_rounds > 0: if early_stopping_rounds is not None and early_stopping_rounds > 0:
...@@ -231,7 +228,7 @@ def train(params, train_set, num_boost_round=100, ...@@ -231,7 +228,7 @@ def train(params, train_set, num_boost_round=100,
booster = Booster(params=params, train_set=train_set) booster = Booster(params=params, train_set=train_set)
if is_valid_contain_train: if is_valid_contain_train:
booster.set_train_data_name(train_data_name) booster.set_train_data_name(train_data_name)
for valid_set, name_valid_set in zip_(reduced_valid_sets, name_valid_sets): for valid_set, name_valid_set in zip(reduced_valid_sets, name_valid_sets):
booster.add_valid(valid_set, name_valid_set) booster.add_valid(valid_set, name_valid_set)
finally: finally:
train_set._reverse_update_params() train_set._reverse_update_params()
...@@ -240,7 +237,7 @@ def train(params, train_set, num_boost_round=100, ...@@ -240,7 +237,7 @@ def train(params, train_set, num_boost_round=100,
booster.best_iteration = 0 booster.best_iteration = 0
# start training # start training
for i in range_(init_iteration, init_iteration + num_boost_round): for i in range(init_iteration, init_iteration + num_boost_round):
for cb in callbacks_before_iter: for cb in callbacks_before_iter:
cb(callback.CallbackEnv(model=booster, cb(callback.CallbackEnv(model=booster,
params=params, params=params,
...@@ -277,7 +274,7 @@ def train(params, train_set, num_boost_round=100, ...@@ -277,7 +274,7 @@ def train(params, train_set, num_boost_round=100,
return booster return booster
class CVBooster(object): class CVBooster:
"""CVBooster in LightGBM. """CVBooster in LightGBM.
Auxiliary data structure to hold and redirect all boosters of ``cv`` function. Auxiliary data structure to hold and redirect all boosters of ``cv`` function.
...@@ -328,7 +325,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi ...@@ -328,7 +325,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
group_info = full_data.get_group() group_info = full_data.get_group()
if group_info is not None: if group_info is not None:
group_info = np.array(group_info, dtype=np.int32, copy=False) group_info = np.array(group_info, dtype=np.int32, copy=False)
flatted_group = np.repeat(range_(len(group_info)), repeats=group_info) flatted_group = np.repeat(range(len(group_info)), repeats=group_info)
else: else:
flatted_group = np.zeros(num_data, dtype=np.int32) flatted_group = np.zeros(num_data, dtype=np.int32)
folds = folds.split(X=np.zeros(num_data), y=full_data.get_label(), groups=flatted_group) folds = folds.split(X=np.zeros(num_data), y=full_data.get_label(), groups=flatted_group)
...@@ -340,7 +337,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi ...@@ -340,7 +337,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
raise LightGBMError('Scikit-learn is required for ranking cv.') raise LightGBMError('Scikit-learn is required for ranking cv.')
# ranking task, split according to groups # ranking task, split according to groups
group_info = np.array(full_data.get_group(), dtype=np.int32, copy=False) group_info = np.array(full_data.get_group(), dtype=np.int32, copy=False)
flatted_group = np.repeat(range_(len(group_info)), repeats=group_info) flatted_group = np.repeat(range(len(group_info)), repeats=group_info)
group_kfold = _LGBMGroupKFold(n_splits=nfold) group_kfold = _LGBMGroupKFold(n_splits=nfold)
folds = group_kfold.split(X=np.zeros(num_data), groups=flatted_group) folds = group_kfold.split(X=np.zeros(num_data), groups=flatted_group)
elif stratified: elif stratified:
...@@ -354,9 +351,9 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi ...@@ -354,9 +351,9 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
else: else:
randidx = np.arange(num_data) randidx = np.arange(num_data)
kstep = int(num_data / nfold) kstep = int(num_data / nfold)
test_id = [randidx[i: i + kstep] for i in range_(0, num_data, kstep)] test_id = [randidx[i: i + kstep] for i in range(0, num_data, kstep)]
train_id = [np.concatenate([test_id[i] for i in range_(nfold) if k != i]) for k in range_(nfold)] train_id = [np.concatenate([test_id[i] for i in range(nfold) if k != i]) for k in range(nfold)]
folds = zip_(train_id, test_id) folds = zip(train_id, test_id)
ret = CVBooster() ret = CVBooster()
for train_idx, test_idx in folds: for train_idx, test_idx in folds:
...@@ -539,7 +536,7 @@ def cv(params, train_set, num_boost_round=100, ...@@ -539,7 +536,7 @@ def cv(params, train_set, num_boost_round=100,
if num_boost_round <= 0: if num_boost_round <= 0:
raise ValueError("num_boost_round should be greater than zero.") raise ValueError("num_boost_round should be greater than zero.")
if isinstance(init_model, string_type): if isinstance(init_model, str):
predictor = _InnerPredictor(model_file=init_model, pred_parameter=params) predictor = _InnerPredictor(model_file=init_model, pred_parameter=params)
elif isinstance(init_model, Booster): elif isinstance(init_model, Booster):
predictor = init_model._to_predictor(dict(init_model.params, **params)) predictor = init_model._to_predictor(dict(init_model.params, **params))
...@@ -573,7 +570,7 @@ def cv(params, train_set, num_boost_round=100, ...@@ -573,7 +570,7 @@ def cv(params, train_set, num_boost_round=100,
callbacks.add(callback.early_stopping(early_stopping_rounds, first_metric_only, verbose=False)) callbacks.add(callback.early_stopping(early_stopping_rounds, first_metric_only, verbose=False))
if verbose_eval is True: if verbose_eval is True:
callbacks.add(callback.print_evaluation(show_stdv=show_stdv)) callbacks.add(callback.print_evaluation(show_stdv=show_stdv))
elif isinstance(verbose_eval, integer_types): elif isinstance(verbose_eval, int):
callbacks.add(callback.print_evaluation(verbose_eval, show_stdv=show_stdv)) callbacks.add(callback.print_evaluation(verbose_eval, show_stdv=show_stdv))
callbacks_before_iter = {cb for cb in callbacks if getattr(cb, 'before_iteration', False)} callbacks_before_iter = {cb for cb in callbacks if getattr(cb, 'before_iteration', False)}
...@@ -581,7 +578,7 @@ def cv(params, train_set, num_boost_round=100, ...@@ -581,7 +578,7 @@ def cv(params, train_set, num_boost_round=100,
callbacks_before_iter = sorted(callbacks_before_iter, key=attrgetter('order')) callbacks_before_iter = sorted(callbacks_before_iter, key=attrgetter('order'))
callbacks_after_iter = sorted(callbacks_after_iter, key=attrgetter('order')) callbacks_after_iter = sorted(callbacks_after_iter, key=attrgetter('order'))
for i in range_(num_boost_round): for i in range(num_boost_round):
for cb in callbacks_before_iter: for cb in callbacks_before_iter:
cb(callback.CallbackEnv(model=cvfolds, cb(callback.CallbackEnv(model=cvfolds,
params=params, params=params,
......
# coding: utf-8 # coding: utf-8
"""Plotting library.""" """Plotting library."""
from __future__ import absolute_import, division
import warnings import warnings
from copy import deepcopy from copy import deepcopy
from io import BytesIO from io import BytesIO
...@@ -9,8 +7,7 @@ from io import BytesIO ...@@ -9,8 +7,7 @@ from io import BytesIO
import numpy as np import numpy as np
from .basic import Booster from .basic import Booster
from .compat import (MATPLOTLIB_INSTALLED, GRAPHVIZ_INSTALLED, from .compat import MATPLOTLIB_INSTALLED, GRAPHVIZ_INSTALLED
range_, zip_, string_type)
from .sklearn import LGBMModel from .sklearn import LGBMModel
...@@ -22,7 +19,7 @@ def _check_not_tuple_of_2_elements(obj, obj_name='obj'): ...@@ -22,7 +19,7 @@ def _check_not_tuple_of_2_elements(obj, obj_name='obj'):
def _float2str(value, precision=None): def _float2str(value, precision=None):
return ("{0:.{1}f}".format(value, precision) return ("{0:.{1}f}".format(value, precision)
if precision is not None and not isinstance(value, string_type) if precision is not None and not isinstance(value, str)
else str(value)) else str(value))
...@@ -97,12 +94,12 @@ def plot_importance(booster, ax=None, height=0.2, ...@@ -97,12 +94,12 @@ def plot_importance(booster, ax=None, height=0.2,
if not len(importance): if not len(importance):
raise ValueError("Booster's feature_importance is empty.") raise ValueError("Booster's feature_importance is empty.")
tuples = sorted(zip_(feature_name, importance), key=lambda x: x[1]) tuples = sorted(zip(feature_name, importance), key=lambda x: x[1])
if ignore_zero: if ignore_zero:
tuples = [x for x in tuples if x[1] > 0] tuples = [x for x in tuples if x[1] > 0]
if max_num_features is not None and max_num_features > 0: if max_num_features is not None and max_num_features > 0:
tuples = tuples[-max_num_features:] tuples = tuples[-max_num_features:]
labels, values = zip_(*tuples) labels, values = zip(*tuples)
if ax is None: if ax is None:
if figsize is not None: if figsize is not None:
...@@ -112,7 +109,7 @@ def plot_importance(booster, ax=None, height=0.2, ...@@ -112,7 +109,7 @@ def plot_importance(booster, ax=None, height=0.2,
ylocs = np.arange(len(values)) ylocs = np.arange(len(values))
ax.barh(ylocs, values, align='center', height=height, **kwargs) ax.barh(ylocs, values, align='center', height=height, **kwargs)
for x, y in zip_(values, ylocs): for x, y in zip(values, ylocs):
ax.text(x + 1, y, ax.text(x + 1, y,
_float2str(x, precision) if importance_type == 'gain' else x, _float2str(x, precision) if importance_type == 'gain' else x,
va='center') va='center')
...@@ -238,7 +235,7 @@ def plot_split_value_histogram(booster, feature, bins=None, ax=None, width_coef= ...@@ -238,7 +235,7 @@ def plot_split_value_histogram(booster, feature, bins=None, ax=None, width_coef=
if title is not None: if title is not None:
title = title.replace('@feature@', str(feature)) title = title.replace('@feature@', str(feature))
title = title.replace('@index/name@', ('name' if isinstance(feature, string_type) else 'index')) title = title.replace('@index/name@', ('name' if isinstance(feature, str) else 'index'))
ax.set_title(title) ax.set_title(title)
if xlabel is not None: if xlabel is not None:
ax.set_xlabel(xlabel) ax.set_xlabel(xlabel)
...@@ -337,7 +334,7 @@ def plot_metric(booster, metric=None, dataset_names=None, ...@@ -337,7 +334,7 @@ def plot_metric(booster, metric=None, dataset_names=None,
raise KeyError('No given metric in eval results.') raise KeyError('No given metric in eval results.')
results = metrics_for_one[metric] results = metrics_for_one[metric]
num_iteration, max_result, min_result = len(results), max(results), min(results) num_iteration, max_result, min_result = len(results), max(results), min(results)
x_ = range_(num_iteration) x_ = range(num_iteration)
ax.plot(x_, results, label=name) ax.plot(x_, results, label=name)
for name in dataset_names: for name in dataset_names:
......
# coding: utf-8 # coding: utf-8
"""Scikit-learn wrapper interface for LightGBM.""" """Scikit-learn wrapper interface for LightGBM."""
from __future__ import absolute_import
import copy import copy
import warnings import warnings
from inspect import signature
import numpy as np import numpy as np
from .basic import Dataset, LightGBMError, _ConfigAliases from .basic import Dataset, LightGBMError, _ConfigAliases
...@@ -12,11 +12,11 @@ from .compat import (SKLEARN_INSTALLED, _LGBMClassifierBase, ...@@ -12,11 +12,11 @@ from .compat import (SKLEARN_INSTALLED, _LGBMClassifierBase,
LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase, LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase,
_LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckSampleWeight, _LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckSampleWeight,
_LGBMAssertAllFinite, _LGBMCheckClassificationTargets, _LGBMComputeSampleWeight, _LGBMAssertAllFinite, _LGBMCheckClassificationTargets, _LGBMComputeSampleWeight,
argc_, range_, zip_, string_type, DataFrame, DataTable) DataFrame, DataTable)
from .engine import train from .engine import train
class _ObjectiveFunctionWrapper(object): class _ObjectiveFunctionWrapper:
"""Proxy class for objective function.""" """Proxy class for objective function."""
def __init__(self, func): def __init__(self, func):
...@@ -69,7 +69,7 @@ class _ObjectiveFunctionWrapper(object): ...@@ -69,7 +69,7 @@ class _ObjectiveFunctionWrapper(object):
The value of the second order derivative (Hessian) for each sample point. The value of the second order derivative (Hessian) for each sample point.
""" """
labels = dataset.get_label() labels = dataset.get_label()
argc = argc_(self.func) argc = len(signature(self.func).parameters)
if argc == 2: if argc == 2:
grad, hess = self.func(labels, preds) grad, hess = self.func(labels, preds)
elif argc == 3: elif argc == 3:
...@@ -88,15 +88,15 @@ class _ObjectiveFunctionWrapper(object): ...@@ -88,15 +88,15 @@ class _ObjectiveFunctionWrapper(object):
num_class = len(grad) // num_data num_class = len(grad) // num_data
if num_class * num_data != len(grad): if num_class * num_data != len(grad):
raise ValueError("Length of grad and hess should equal to num_class * num_data") raise ValueError("Length of grad and hess should equal to num_class * num_data")
for k in range_(num_class): for k in range(num_class):
for i in range_(num_data): for i in range(num_data):
idx = k * num_data + i idx = k * num_data + i
grad[idx] *= weight[i] grad[idx] *= weight[i]
hess[idx] *= weight[i] hess[idx] *= weight[i]
return grad, hess return grad, hess
class _EvalFunctionWrapper(object): class _EvalFunctionWrapper:
"""Proxy class for evaluation function.""" """Proxy class for evaluation function."""
def __init__(self, func): def __init__(self, func):
...@@ -158,7 +158,7 @@ class _EvalFunctionWrapper(object): ...@@ -158,7 +158,7 @@ class _EvalFunctionWrapper(object):
Is eval result higher better, e.g. AUC is ``is_higher_better``. Is eval result higher better, e.g. AUC is ``is_higher_better``.
""" """
labels = dataset.get_label() labels = dataset.get_label()
argc = argc_(self.func) argc = len(signature(self.func).parameters)
if argc == 2: if argc == 2:
return self.func(labels, preds) return self.func(labels, preds)
elif argc == 3: elif argc == 3:
...@@ -340,7 +340,7 @@ class LGBMModel(_LGBMModelBase): ...@@ -340,7 +340,7 @@ class LGBMModel(_LGBMModelBase):
params : dict params : dict
Parameter names mapped to their values. Parameter names mapped to their values.
""" """
params = super(LGBMModel, self).get_params(deep=deep) params = super().get_params(deep=deep)
params.update(self._other_params) params.update(self._other_params)
return params return params
...@@ -518,10 +518,10 @@ class LGBMModel(_LGBMModelBase): ...@@ -518,10 +518,10 @@ class LGBMModel(_LGBMModelBase):
# Separate built-in from callable evaluation metrics # Separate built-in from callable evaluation metrics
eval_metrics_callable = [_EvalFunctionWrapper(f) for f in eval_metric_list if callable(f)] eval_metrics_callable = [_EvalFunctionWrapper(f) for f in eval_metric_list if callable(f)]
eval_metrics_builtin = [m for m in eval_metric_list if isinstance(m, string_type)] eval_metrics_builtin = [m for m in eval_metric_list if isinstance(m, str)]
# register default metric for consistency with callable eval_metric case # register default metric for consistency with callable eval_metric case
original_metric = self._objective if isinstance(self._objective, string_type) else None original_metric = self._objective if isinstance(self._objective, str) else None
if original_metric is None: if original_metric is None:
# try to deduce from class instance # try to deduce from class instance
if isinstance(self, LGBMRegressor): if isinstance(self, LGBMRegressor):
...@@ -537,7 +537,7 @@ class LGBMModel(_LGBMModelBase): ...@@ -537,7 +537,7 @@ class LGBMModel(_LGBMModelBase):
original_metric = params.pop(metric_alias) original_metric = params.pop(metric_alias)
# concatenate metric from params (or default if not provided in params) and eval_metric # concatenate metric from params (or default if not provided in params) and eval_metric
original_metric = [original_metric] if isinstance(original_metric, (string_type, type(None))) else original_metric original_metric = [original_metric] if isinstance(original_metric, (str, type(None))) else original_metric
params['metric'] = [e for e in eval_metrics_builtin if e not in original_metric] + original_metric params['metric'] = [e for e in eval_metrics_builtin if e not in original_metric] + original_metric
params['metric'] = [metric for metric in params['metric'] if metric is not None] params['metric'] = [metric for metric in params['metric'] if metric is not None]
...@@ -767,16 +767,11 @@ class LGBMRegressor(LGBMModel, _LGBMRegressorBase): ...@@ -767,16 +767,11 @@ class LGBMRegressor(LGBMModel, _LGBMRegressorBase):
verbose=True, feature_name='auto', categorical_feature='auto', verbose=True, feature_name='auto', categorical_feature='auto',
callbacks=None, init_model=None): callbacks=None, init_model=None):
"""Docstring is inherited from the LGBMModel.""" """Docstring is inherited from the LGBMModel."""
super(LGBMRegressor, self).fit(X, y, sample_weight=sample_weight, super().fit(X, y, sample_weight=sample_weight, init_score=init_score,
init_score=init_score, eval_set=eval_set, eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight,
eval_names=eval_names, eval_init_score=eval_init_score, eval_metric=eval_metric,
eval_sample_weight=eval_sample_weight, early_stopping_rounds=early_stopping_rounds, verbose=verbose, feature_name=feature_name,
eval_init_score=eval_init_score, categorical_feature=categorical_feature, callbacks=callbacks, init_model=init_model)
eval_metric=eval_metric,
early_stopping_rounds=early_stopping_rounds,
verbose=verbose, feature_name=feature_name,
categorical_feature=categorical_feature,
callbacks=callbacks, init_model=init_model)
return self return self
_base_doc = LGBMModel.fit.__doc__ _base_doc = LGBMModel.fit.__doc__
...@@ -803,7 +798,7 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase): ...@@ -803,7 +798,7 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase):
_LGBMCheckClassificationTargets(y) _LGBMCheckClassificationTargets(y)
self._le = _LGBMLabelEncoder().fit(y) self._le = _LGBMLabelEncoder().fit(y)
_y = self._le.transform(y) _y = self._le.transform(y)
self._class_map = dict(zip_(self._le.classes_, self._le.transform(self._le.classes_))) self._class_map = dict(zip(self._le.classes_, self._le.transform(self._le.classes_)))
if isinstance(self.class_weight, dict): if isinstance(self.class_weight, dict):
self._class_weight = {self._class_map[k]: v for k, v in self.class_weight.items()} self._class_weight = {self._class_map[k]: v for k, v in self.class_weight.items()}
...@@ -817,7 +812,7 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase): ...@@ -817,7 +812,7 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase):
self._objective = "multiclass" self._objective = "multiclass"
if not callable(eval_metric): if not callable(eval_metric):
if isinstance(eval_metric, (string_type, type(None))): if isinstance(eval_metric, (str, type(None))):
eval_metric = [eval_metric] eval_metric = [eval_metric]
if self._n_classes > 2: if self._n_classes > 2:
for index, metric in enumerate(eval_metric): for index, metric in enumerate(eval_metric):
...@@ -844,17 +839,12 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase): ...@@ -844,17 +839,12 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase):
else: else:
valid_sets[i] = (valid_x, self._le.transform(valid_y)) valid_sets[i] = (valid_x, self._le.transform(valid_y))
super(LGBMClassifier, self).fit(X, _y, sample_weight=sample_weight, super().fit(X, _y, sample_weight=sample_weight, init_score=init_score, eval_set=valid_sets,
init_score=init_score, eval_set=valid_sets, eval_names=eval_names, eval_sample_weight=eval_sample_weight,
eval_names=eval_names, eval_class_weight=eval_class_weight, eval_init_score=eval_init_score,
eval_sample_weight=eval_sample_weight, eval_metric=eval_metric, early_stopping_rounds=early_stopping_rounds,
eval_class_weight=eval_class_weight, verbose=verbose, feature_name=feature_name, categorical_feature=categorical_feature,
eval_init_score=eval_init_score, callbacks=callbacks, init_model=init_model)
eval_metric=eval_metric,
early_stopping_rounds=early_stopping_rounds,
verbose=verbose, feature_name=feature_name,
categorical_feature=categorical_feature,
callbacks=callbacks, init_model=init_model)
return self return self
_base_doc = LGBMModel.fit.__doc__ _base_doc = LGBMModel.fit.__doc__
...@@ -919,8 +909,7 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase): ...@@ -919,8 +909,7 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase):
X_SHAP_values : array-like of shape = [n_samples, (n_features + 1) * n_classes] or list with n_classes length of such objects X_SHAP_values : array-like of shape = [n_samples, (n_features + 1) * n_classes] or list with n_classes length of such objects
If ``pred_contrib=True``, the feature contributions for each sample. If ``pred_contrib=True``, the feature contributions for each sample.
""" """
result = super(LGBMClassifier, self).predict(X, raw_score, start_iteration, num_iteration, result = super().predict(X, raw_score, start_iteration, num_iteration, pred_leaf, pred_contrib, **kwargs)
pred_leaf, pred_contrib, **kwargs)
if callable(self._objective) and not (raw_score or pred_leaf or pred_contrib): if callable(self._objective) and not (raw_score or pred_leaf or pred_contrib):
warnings.warn("Cannot compute class probabilities or labels " warnings.warn("Cannot compute class probabilities or labels "
"due to the usage of customized objective function.\n" "due to the usage of customized objective function.\n"
...@@ -967,23 +956,18 @@ class LGBMRanker(LGBMModel): ...@@ -967,23 +956,18 @@ class LGBMRanker(LGBMModel):
elif len(eval_group) != len(eval_set): elif len(eval_group) != len(eval_set):
raise ValueError("Length of eval_group should be equal to eval_set") raise ValueError("Length of eval_group should be equal to eval_set")
elif (isinstance(eval_group, dict) elif (isinstance(eval_group, dict)
and any(i not in eval_group or eval_group[i] is None for i in range_(len(eval_group))) and any(i not in eval_group or eval_group[i] is None for i in range(len(eval_group)))
or isinstance(eval_group, list) or isinstance(eval_group, list)
and any(group is None for group in eval_group)): and any(group is None for group in eval_group)):
raise ValueError("Should set group for all eval datasets for ranking task; " raise ValueError("Should set group for all eval datasets for ranking task; "
"if you use dict, the index should start from 0") "if you use dict, the index should start from 0")
self._eval_at = eval_at self._eval_at = eval_at
super(LGBMRanker, self).fit(X, y, sample_weight=sample_weight, super().fit(X, y, sample_weight=sample_weight, init_score=init_score, group=group,
init_score=init_score, group=group, eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight,
eval_set=eval_set, eval_names=eval_names, eval_init_score=eval_init_score, eval_group=eval_group, eval_metric=eval_metric,
eval_sample_weight=eval_sample_weight, early_stopping_rounds=early_stopping_rounds, verbose=verbose, feature_name=feature_name,
eval_init_score=eval_init_score, eval_group=eval_group, categorical_feature=categorical_feature, callbacks=callbacks, init_model=init_model)
eval_metric=eval_metric,
early_stopping_rounds=early_stopping_rounds,
verbose=verbose, feature_name=feature_name,
categorical_feature=categorical_feature,
callbacks=callbacks, init_model=init_model)
return self return self
_base_doc = LGBMModel.fit.__doc__ _base_doc = LGBMModel.fit.__doc__
......
# coding: utf-8 # coding: utf-8
"""Setup lightgbm package.""" """Setup lightgbm package."""
from __future__ import absolute_import
import io
import logging import logging
import os import os
import struct import struct
...@@ -329,8 +326,8 @@ if __name__ == "__main__": ...@@ -329,8 +326,8 @@ if __name__ == "__main__":
copy_file(os.path.join(CURRENT_DIR, os.path.pardir, 'VERSION.txt'), copy_file(os.path.join(CURRENT_DIR, os.path.pardir, 'VERSION.txt'),
os.path.join(CURRENT_DIR, 'lightgbm', 'VERSION.txt'), os.path.join(CURRENT_DIR, 'lightgbm', 'VERSION.txt'),
verbose=0) verbose=0)
version = io.open(os.path.join(CURRENT_DIR, 'lightgbm', 'VERSION.txt'), encoding='utf-8').read().strip() version = open(os.path.join(CURRENT_DIR, 'lightgbm', 'VERSION.txt'), encoding='utf-8').read().strip()
readme = io.open(os.path.join(CURRENT_DIR, 'README.rst'), encoding='utf-8').read() readme = open(os.path.join(CURRENT_DIR, 'README.rst'), encoding='utf-8').read()
sys.path.insert(0, CURRENT_DIR) sys.path.insert(0, CURRENT_DIR)
...@@ -368,8 +365,6 @@ if __name__ == "__main__": ...@@ -368,8 +365,6 @@ if __name__ == "__main__":
'Operating System :: Microsoft :: Windows', 'Operating System :: Microsoft :: Windows',
'Operating System :: POSIX', 'Operating System :: POSIX',
'Operating System :: Unix', 'Operating System :: Unix',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.7',
......
...@@ -7,7 +7,7 @@ import numpy as np ...@@ -7,7 +7,7 @@ import numpy as np
from sklearn.datasets import load_svmlight_file from sklearn.datasets import load_svmlight_file
class FileLoader(object): class FileLoader:
def __init__(self, directory, prefix, config_file='train.conf'): def __init__(self, directory, prefix, config_file='train.conf'):
directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), directory) directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), directory)
......
...@@ -3,6 +3,7 @@ import copy ...@@ -3,6 +3,7 @@ import copy
import itertools import itertools
import math import math
import os import os
import pickle
import psutil import psutil
import random import random
import unittest import unittest
...@@ -14,11 +15,6 @@ from sklearn.datasets import load_svmlight_file, make_multilabel_classification ...@@ -14,11 +15,6 @@ from sklearn.datasets import load_svmlight_file, make_multilabel_classification
from sklearn.metrics import log_loss, mean_absolute_error, mean_squared_error, roc_auc_score, average_precision_score from sklearn.metrics import log_loss, mean_absolute_error, mean_squared_error, roc_auc_score, average_precision_score
from sklearn.model_selection import train_test_split, TimeSeriesSplit, GroupKFold from sklearn.model_selection import train_test_split, TimeSeriesSplit, GroupKFold
try:
import cPickle as pickle
except ImportError:
import pickle
from .utils import load_boston, load_breast_cancer, load_digits, load_iris from .utils import load_boston, load_breast_cancer, load_digits, load_iris
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment