Unverified Commit 90342e92 authored by Nikita Titov's avatar Nikita Titov Committed by GitHub
Browse files

[python] allow to pass some params as pathlib.Path objects (#4440)

* allow to pass some params as pathlib.Path objects

* fix lint

* improve indentation
parent b09da434
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
Contributors: https://github.com/microsoft/LightGBM/graphs/contributors. Contributors: https://github.com/microsoft/LightGBM/graphs/contributors.
""" """
import os from pathlib import Path
from .basic import Booster, Dataset, Sequence, register_logger from .basic import Booster, Dataset, Sequence, register_logger
from .callback import early_stopping, print_evaluation, record_evaluation, reset_parameter from .callback import early_stopping, print_evaluation, record_evaluation, reset_parameter
...@@ -23,11 +23,9 @@ except ImportError: ...@@ -23,11 +23,9 @@ except ImportError:
pass pass
dir_path = os.path.dirname(os.path.realpath(__file__)) _version_path = Path(__file__).parent.absolute() / 'VERSION.txt'
if _version_path.is_file():
if os.path.isfile(os.path.join(dir_path, 'VERSION.txt')): __version__ = _version_path.read_text(encoding='utf-8').strip()
with open(os.path.join(dir_path, 'VERSION.txt')) as version_file:
__version__ = version_file.read().strip()
__all__ = ['Dataset', 'Booster', 'CVBooster', 'Sequence', __all__ = ['Dataset', 'Booster', 'CVBooster', 'Sequence',
'register_logger', 'register_logger',
......
...@@ -3,12 +3,14 @@ ...@@ -3,12 +3,14 @@
import abc import abc
import ctypes import ctypes
import json import json
import os
import warnings import warnings
from collections import OrderedDict from collections import OrderedDict
from copy import deepcopy from copy import deepcopy
from functools import wraps from functools import wraps
from logging import Logger from logging import Logger
from os import SEEK_END
from os.path import getsize
from pathlib import Path
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union
...@@ -243,7 +245,7 @@ def param_dict_to_str(data): ...@@ -243,7 +245,7 @@ def param_dict_to_str(data):
else: else:
return str(x) return str(x)
pairs.append(f"{key}={','.join(map(to_string, val))}") pairs.append(f"{key}={','.join(map(to_string, val))}")
elif isinstance(val, (str, NUMERIC_TYPES)) or is_numeric(val): elif isinstance(val, (str, Path, NUMERIC_TYPES)) or is_numeric(val):
pairs.append(f"{key}={val}") pairs.append(f"{key}={val}")
elif val is not None: elif val is not None:
raise TypeError(f'Unknown type of parameter:{key}, got:{type(val).__name__}') raise TypeError(f'Unknown type of parameter:{key}, got:{type(val).__name__}')
...@@ -251,23 +253,17 @@ def param_dict_to_str(data): ...@@ -251,23 +253,17 @@ def param_dict_to_str(data):
class _TempFile: class _TempFile:
"""Proxy class to workaround errors on Windows."""
def __enter__(self): def __enter__(self):
with NamedTemporaryFile(prefix="lightgbm_tmp_", delete=True) as f: with NamedTemporaryFile(prefix="lightgbm_tmp_", delete=True) as f:
self.name = f.name self.name = f.name
self.path = Path(self.name)
return self return self
def __exit__(self, exc_type, exc_val, exc_tb): def __exit__(self, exc_type, exc_val, exc_tb):
if os.path.isfile(self.name): if self.path.is_file():
os.remove(self.name) self.path.unlink()
def readlines(self):
with open(self.name, "r+") as f:
ret = f.readlines()
return ret
def writelines(self, lines):
with open(self.name, "w+") as f:
f.writelines(lines)
class LightGBMError(Exception): class LightGBMError(Exception):
...@@ -584,12 +580,12 @@ def _load_pandas_categorical(file_name=None, model_str=None): ...@@ -584,12 +580,12 @@ def _load_pandas_categorical(file_name=None, model_str=None):
pandas_key = 'pandas_categorical:' pandas_key = 'pandas_categorical:'
offset = -len(pandas_key) offset = -len(pandas_key)
if file_name is not None: if file_name is not None:
max_offset = -os.path.getsize(file_name) max_offset = -getsize(file_name)
with open(file_name, 'rb') as f: with open(file_name, 'rb') as f:
while True: while True:
if offset < max_offset: if offset < max_offset:
offset = max_offset offset = max_offset
f.seek(offset, os.SEEK_END) f.seek(offset, SEEK_END)
lines = f.readlines() lines = f.readlines()
if len(lines) >= 2: if len(lines) >= 2:
break break
...@@ -685,7 +681,7 @@ class _InnerPredictor: ...@@ -685,7 +681,7 @@ class _InnerPredictor:
Parameters Parameters
---------- ----------
model_file : string or None, optional (default=None) model_file : string, pathlib.Path or None, optional (default=None)
Path to the model file. Path to the model file.
booster_handle : object or None, optional (default=None) booster_handle : object or None, optional (default=None)
Handle of Booster. Handle of Booster.
...@@ -698,7 +694,7 @@ class _InnerPredictor: ...@@ -698,7 +694,7 @@ class _InnerPredictor:
"""Prediction task""" """Prediction task"""
out_num_iterations = ctypes.c_int(0) out_num_iterations = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterCreateFromModelfile( _safe_call(_LIB.LGBM_BoosterCreateFromModelfile(
c_str(model_file), c_str(str(model_file)),
ctypes.byref(out_num_iterations), ctypes.byref(out_num_iterations),
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
out_num_class = ctypes.c_int(0) out_num_class = ctypes.c_int(0)
...@@ -743,9 +739,9 @@ class _InnerPredictor: ...@@ -743,9 +739,9 @@ class _InnerPredictor:
Parameters Parameters
---------- ----------
data : string, numpy array, pandas DataFrame, H2O DataTable's Frame or scipy.sparse data : string, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame or scipy.sparse
Data source for prediction. Data source for prediction.
When data type is string, it represents the path of txt file. When data type is string or pathlib.Path, it represents the path of txt file.
start_iteration : int, optional (default=0) start_iteration : int, optional (default=0)
Start index of the iteration to predict. Start index of the iteration to predict.
num_iteration : int, optional (default=-1) num_iteration : int, optional (default=-1)
...@@ -780,21 +776,19 @@ class _InnerPredictor: ...@@ -780,21 +776,19 @@ class _InnerPredictor:
predict_type = C_API_PREDICT_CONTRIB predict_type = C_API_PREDICT_CONTRIB
int_data_has_header = 1 if data_has_header else 0 int_data_has_header = 1 if data_has_header else 0
if isinstance(data, str): if isinstance(data, (str, Path)):
with _TempFile() as f: with _TempFile() as f:
_safe_call(_LIB.LGBM_BoosterPredictForFile( _safe_call(_LIB.LGBM_BoosterPredictForFile(
self.handle, self.handle,
c_str(data), c_str(str(data)),
ctypes.c_int(int_data_has_header), ctypes.c_int(int_data_has_header),
ctypes.c_int(predict_type), ctypes.c_int(predict_type),
ctypes.c_int(start_iteration), ctypes.c_int(start_iteration),
ctypes.c_int(num_iteration), ctypes.c_int(num_iteration),
c_str(self.pred_parameter), c_str(self.pred_parameter),
c_str(f.name))) c_str(f.name)))
lines = f.readlines() preds = np.loadtxt(f.name, dtype=np.float64)
nrow = len(lines) nrow = preds.shape[0]
preds = [float(token) for line in lines for token in line.split('\t')]
preds = np.array(preds, dtype=np.float64, copy=False)
elif isinstance(data, scipy.sparse.csr_matrix): elif isinstance(data, scipy.sparse.csr_matrix):
preds, nrow = self.__pred_for_csr(data, start_iteration, num_iteration, predict_type) preds, nrow = self.__pred_for_csr(data, start_iteration, num_iteration, predict_type)
elif isinstance(data, scipy.sparse.csc_matrix): elif isinstance(data, scipy.sparse.csc_matrix):
...@@ -829,9 +823,9 @@ class _InnerPredictor: ...@@ -829,9 +823,9 @@ class _InnerPredictor:
def __get_num_preds(self, start_iteration, num_iteration, nrow, predict_type): def __get_num_preds(self, start_iteration, num_iteration, nrow, predict_type):
"""Get size of prediction result.""" """Get size of prediction result."""
if nrow > MAX_INT32: if nrow > MAX_INT32:
raise LightGBMError('LightGBM cannot perform prediction for data' raise LightGBMError('LightGBM cannot perform prediction for data '
f'with number of rows greater than MAX_INT32 ({MAX_INT32}).\n' f'with number of rows greater than MAX_INT32 ({MAX_INT32}).\n'
'You can split your data into chunks' 'You can split your data into chunks '
'and then concatenate predictions for them') 'and then concatenate predictions for them')
n_preds = ctypes.c_int64(0) n_preds = ctypes.c_int64(0)
_safe_call(_LIB.LGBM_BoosterCalcNumPredict( _safe_call(_LIB.LGBM_BoosterCalcNumPredict(
...@@ -1133,9 +1127,9 @@ class Dataset: ...@@ -1133,9 +1127,9 @@ class Dataset:
Parameters Parameters
---------- ----------
data : string, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequences or list of numpy arrays data : string, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequences or list of numpy arrays
Data source of Dataset. Data source of Dataset.
If string, it represents the path to txt file. If string or pathlib.Path, it represents the path to txt file.
label : list, numpy 1-D array, pandas Series / one-column DataFrame or None, optional (default=None) label : list, numpy 1-D array, pandas Series / one-column DataFrame or None, optional (default=None)
Label of the data. Label of the data.
reference : Dataset or None, optional (default=None) reference : Dataset or None, optional (default=None)
...@@ -1384,7 +1378,7 @@ class Dataset: ...@@ -1384,7 +1378,7 @@ class Dataset:
def _set_init_score_by_predictor(self, predictor, data, used_indices=None): def _set_init_score_by_predictor(self, predictor, data, used_indices=None):
data_has_header = False data_has_header = False
if isinstance(data, str): if isinstance(data, (str, Path)):
# check data has header or not # check data has header or not
data_has_header = any(self.params.get(alias, False) for alias in _ConfigAliases.get("header")) data_has_header = any(self.params.get(alias, False) for alias in _ConfigAliases.get("header"))
num_data = self.num_data() num_data = self.num_data()
...@@ -1395,7 +1389,7 @@ class Dataset: ...@@ -1395,7 +1389,7 @@ class Dataset:
is_reshape=False) is_reshape=False)
if used_indices is not None: if used_indices is not None:
assert not self.need_slice assert not self.need_slice
if isinstance(data, str): if isinstance(data, (str, Path)):
sub_init_score = np.empty(num_data * predictor.num_class, dtype=np.float32) sub_init_score = np.empty(num_data * predictor.num_class, dtype=np.float32)
assert num_data == len(used_indices) assert num_data == len(used_indices)
for i in range(len(used_indices)): for i in range(len(used_indices)):
...@@ -1472,10 +1466,10 @@ class Dataset: ...@@ -1472,10 +1466,10 @@ class Dataset:
elif reference is not None: elif reference is not None:
raise TypeError('Reference dataset should be None or dataset instance') raise TypeError('Reference dataset should be None or dataset instance')
# start construct data # start construct data
if isinstance(data, str): if isinstance(data, (str, Path)):
self.handle = ctypes.c_void_p() self.handle = ctypes.c_void_p()
_safe_call(_LIB.LGBM_DatasetCreateFromFile( _safe_call(_LIB.LGBM_DatasetCreateFromFile(
c_str(data), c_str(str(data)),
c_str(params_str), c_str(params_str),
ref_dataset, ref_dataset,
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
...@@ -1775,9 +1769,9 @@ class Dataset: ...@@ -1775,9 +1769,9 @@ class Dataset:
Parameters Parameters
---------- ----------
data : string, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequences or list of numpy arrays data : string, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, Sequence, list of Sequences or list of numpy arrays
Data source of Dataset. Data source of Dataset.
If string, it represents the path to txt file. If string or pathlib.Path, it represents the path to txt file.
label : list, numpy 1-D array, pandas Series / one-column DataFrame or None, optional (default=None) label : list, numpy 1-D array, pandas Series / one-column DataFrame or None, optional (default=None)
Label of the data. Label of the data.
weight : list, numpy 1-D array, pandas Series or None, optional (default=None) weight : list, numpy 1-D array, pandas Series or None, optional (default=None)
...@@ -1842,7 +1836,7 @@ class Dataset: ...@@ -1842,7 +1836,7 @@ class Dataset:
Parameters Parameters
---------- ----------
filename : string filename : string or pathlib.Path
Name of the output file. Name of the output file.
Returns Returns
...@@ -1852,7 +1846,7 @@ class Dataset: ...@@ -1852,7 +1846,7 @@ class Dataset:
""" """
_safe_call(_LIB.LGBM_DatasetSaveBinary( _safe_call(_LIB.LGBM_DatasetSaveBinary(
self.construct().handle, self.construct().handle,
c_str(filename))) c_str(str(filename))))
return self return self
def _update_params(self, params): def _update_params(self, params):
...@@ -2242,7 +2236,7 @@ class Dataset: ...@@ -2242,7 +2236,7 @@ class Dataset:
Returns Returns
------- -------
data : string, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, list of numpy arrays or None data : string, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame, scipy.sparse, list of numpy arrays or None
Raw data used in the Dataset construction. Raw data used in the Dataset construction.
""" """
if self.handle is None: if self.handle is None:
...@@ -2442,7 +2436,7 @@ class Dataset: ...@@ -2442,7 +2436,7 @@ class Dataset:
Parameters Parameters
---------- ----------
filename : string filename : string or pathlib.Path
Name of the output file. Name of the output file.
Returns Returns
...@@ -2452,7 +2446,7 @@ class Dataset: ...@@ -2452,7 +2446,7 @@ class Dataset:
""" """
_safe_call(_LIB.LGBM_DatasetDumpText( _safe_call(_LIB.LGBM_DatasetDumpText(
self.construct().handle, self.construct().handle,
c_str(filename))) c_str(str(filename))))
return self return self
...@@ -2468,7 +2462,7 @@ class Booster: ...@@ -2468,7 +2462,7 @@ class Booster:
Parameters for Booster. Parameters for Booster.
train_set : Dataset or None, optional (default=None) train_set : Dataset or None, optional (default=None)
Training dataset. Training dataset.
model_file : string or None, optional (default=None) model_file : string, pathlib.Path or None, optional (default=None)
Path to the model file. Path to the model file.
model_str : string or None, optional (default=None) model_str : string or None, optional (default=None)
Model will be loaded from this string. Model will be loaded from this string.
...@@ -2561,7 +2555,7 @@ class Booster: ...@@ -2561,7 +2555,7 @@ class Booster:
out_num_iterations = ctypes.c_int(0) out_num_iterations = ctypes.c_int(0)
self.handle = ctypes.c_void_p() self.handle = ctypes.c_void_p()
_safe_call(_LIB.LGBM_BoosterCreateFromModelfile( _safe_call(_LIB.LGBM_BoosterCreateFromModelfile(
c_str(model_file), c_str(str(model_file)),
ctypes.byref(out_num_iterations), ctypes.byref(out_num_iterations),
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
out_num_class = ctypes.c_int(0) out_num_class = ctypes.c_int(0)
...@@ -3200,7 +3194,7 @@ class Booster: ...@@ -3200,7 +3194,7 @@ class Booster:
Parameters Parameters
---------- ----------
filename : string filename : string or pathlib.Path
Filename to save Booster. Filename to save Booster.
num_iteration : int or None, optional (default=None) num_iteration : int or None, optional (default=None)
Index of the iteration that should be saved. Index of the iteration that should be saved.
...@@ -3226,7 +3220,7 @@ class Booster: ...@@ -3226,7 +3220,7 @@ class Booster:
ctypes.c_int(start_iteration), ctypes.c_int(start_iteration),
ctypes.c_int(num_iteration), ctypes.c_int(num_iteration),
ctypes.c_int(importance_type_int), ctypes.c_int(importance_type_int),
c_str(filename))) c_str(str(filename))))
_dump_pandas_categorical(self.pandas_categorical, filename) _dump_pandas_categorical(self.pandas_categorical, filename)
return self return self
...@@ -3400,9 +3394,9 @@ class Booster: ...@@ -3400,9 +3394,9 @@ class Booster:
Parameters Parameters
---------- ----------
data : string, numpy array, pandas DataFrame, H2O DataTable's Frame or scipy.sparse data : string, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame or scipy.sparse
Data source for prediction. Data source for prediction.
If string, it represents the path to txt file. If string or pathlib.Path, it represents the path to txt file.
start_iteration : int, optional (default=0) start_iteration : int, optional (default=0)
Start index of the iteration to predict. Start index of the iteration to predict.
If <= 0, starts from the first iteration. If <= 0, starts from the first iteration.
...@@ -3455,9 +3449,9 @@ class Booster: ...@@ -3455,9 +3449,9 @@ class Booster:
Parameters Parameters
---------- ----------
data : string, numpy array, pandas DataFrame, H2O DataTable's Frame or scipy.sparse data : string, pathlib.Path, numpy array, pandas DataFrame, H2O DataTable's Frame or scipy.sparse
Data source for refit. Data source for refit.
If string, it represents the path to txt file. If string or pathlib.Path, it represents the path to txt file.
label : list, numpy 1-D array or pandas Series / one-column DataFrame label : list, numpy 1-D array or pandas Series / one-column DataFrame
Label for refit. Label for refit.
decay_rate : float, optional (default=0.9) decay_rate : float, optional (default=0.9)
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
import collections import collections
import copy import copy
from operator import attrgetter from operator import attrgetter
from pathlib import Path
import numpy as np import numpy as np
...@@ -76,7 +77,7 @@ def train(params, train_set, num_boost_round=100, ...@@ -76,7 +77,7 @@ def train(params, train_set, num_boost_round=100,
If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i]. If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
To ignore the default metric corresponding to the used objective, To ignore the default metric corresponding to the used objective,
set the ``metric`` parameter to the string ``"None"`` in ``params``. set the ``metric`` parameter to the string ``"None"`` in ``params``.
init_model : string, Booster or None, optional (default=None) init_model : string, pathlib.Path, Booster or None, optional (default=None)
Filename of LightGBM model or Booster instance used for continue training. Filename of LightGBM model or Booster instance used for continue training.
feature_name : list of strings or 'auto', optional (default="auto") feature_name : list of strings or 'auto', optional (default="auto")
Feature names. Feature names.
...@@ -161,7 +162,7 @@ def train(params, train_set, num_boost_round=100, ...@@ -161,7 +162,7 @@ def train(params, train_set, num_boost_round=100,
if num_boost_round <= 0: if num_boost_round <= 0:
raise ValueError("num_boost_round should be greater than zero.") raise ValueError("num_boost_round should be greater than zero.")
if isinstance(init_model, str): if isinstance(init_model, (str, Path)):
predictor = _InnerPredictor(model_file=init_model, pred_parameter=params) predictor = _InnerPredictor(model_file=init_model, pred_parameter=params)
elif isinstance(init_model, Booster): elif isinstance(init_model, Booster):
predictor = init_model._to_predictor(dict(init_model.params, **params)) predictor = init_model._to_predictor(dict(init_model.params, **params))
...@@ -470,7 +471,7 @@ def cv(params, train_set, num_boost_round=100, ...@@ -470,7 +471,7 @@ def cv(params, train_set, num_boost_round=100,
If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i]. If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
To ignore the default metric corresponding to the used objective, To ignore the default metric corresponding to the used objective,
set ``metrics`` to the string ``"None"``. set ``metrics`` to the string ``"None"``.
init_model : string, Booster or None, optional (default=None) init_model : string, pathlib.Path, Booster or None, optional (default=None)
Filename of LightGBM model or Booster instance used for continue training. Filename of LightGBM model or Booster instance used for continue training.
feature_name : list of strings or 'auto', optional (default="auto") feature_name : list of strings or 'auto', optional (default="auto")
Feature names. Feature names.
...@@ -545,7 +546,7 @@ def cv(params, train_set, num_boost_round=100, ...@@ -545,7 +546,7 @@ def cv(params, train_set, num_boost_round=100,
if num_boost_round <= 0: if num_boost_round <= 0:
raise ValueError("num_boost_round should be greater than zero.") raise ValueError("num_boost_round should be greater than zero.")
if isinstance(init_model, str): if isinstance(init_model, (str, Path)):
predictor = _InnerPredictor(model_file=init_model, pred_parameter=params) predictor = _InnerPredictor(model_file=init_model, pred_parameter=params)
elif isinstance(init_model, Booster): elif isinstance(init_model, Booster):
predictor = init_model._to_predictor(dict(init_model.params, **params)) predictor = init_model._to_predictor(dict(init_model.params, **params))
......
# coding: utf-8 # coding: utf-8
"""Find the path to LightGBM dynamic library files.""" """Find the path to LightGBM dynamic library files."""
import os from os import environ
from pathlib import Path
from platform import system from platform import system
from typing import List from typing import List
...@@ -13,27 +14,26 @@ def find_lib_path() -> List[str]: ...@@ -13,27 +14,26 @@ def find_lib_path() -> List[str]:
lib_path: list of strings lib_path: list of strings
List of all found library paths to LightGBM. List of all found library paths to LightGBM.
""" """
if os.environ.get('LIGHTGBM_BUILD_DOC', False): if environ.get('LIGHTGBM_BUILD_DOC', False):
# we don't need lib_lightgbm while building docs # we don't need lib_lightgbm while building docs
return [] return []
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) curr_path = Path(__file__).parent.absolute()
dll_path = [curr_path, dll_path = [curr_path,
os.path.join(curr_path, '../../'), curr_path.parents[1],
os.path.join(curr_path, 'compile'), curr_path / 'compile',
os.path.join(curr_path, '../compile'), curr_path.parent / 'compile',
os.path.join(curr_path, '../../lib/')] curr_path.parents[1] / 'lib']
if system() in ('Windows', 'Microsoft'): if system() in ('Windows', 'Microsoft'):
dll_path.append(os.path.join(curr_path, '../compile/Release/')) dll_path.append(curr_path.parent / 'compile' / 'Release')
dll_path.append(os.path.join(curr_path, '../compile/windows/x64/DLL/')) dll_path.append(curr_path.parent / 'compile' / 'windows' / 'x64' / 'DLL')
dll_path.append(os.path.join(curr_path, '../../Release/')) dll_path.append(curr_path.parents[1] / 'Release')
dll_path.append(os.path.join(curr_path, '../../windows/x64/DLL/')) dll_path.append(curr_path.parents[1] / 'windows' / 'x64' / 'DLL')
dll_path = [os.path.join(p, 'lib_lightgbm.dll') for p in dll_path] dll_path = [p / 'lib_lightgbm.dll' for p in dll_path]
else: else:
dll_path = [os.path.join(p, 'lib_lightgbm.so') for p in dll_path] dll_path = [p / 'lib_lightgbm.so' for p in dll_path]
lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)] lib_path = [str(p) for p in dll_path if p.is_file()]
if not lib_path: if not lib_path:
dll_path = [os.path.realpath(p) for p in dll_path] dll_path_joined = '\n'.join(map(str, dll_path))
new_line = "\n" raise Exception(f'Cannot find lightgbm library file in following paths:\n{dll_path_joined}')
raise Exception(f'Cannot find lightgbm library file in following paths:{new_line}{new_line.join(dll_path)}')
return lib_path return lib_path
...@@ -256,7 +256,7 @@ _lgbmmodel_doc_fit = ( ...@@ -256,7 +256,7 @@ _lgbmmodel_doc_fit = (
callbacks : list of callback functions or None, optional (default=None) callbacks : list of callback functions or None, optional (default=None)
List of callback functions that are applied at each iteration. List of callback functions that are applied at each iteration.
See Callbacks in Python API for more information. See Callbacks in Python API for more information.
init_model : string, Booster, LGBMModel or None, optional (default=None) init_model : string, pathlib.Path, Booster, LGBMModel or None, optional (default=None)
Filename of LightGBM model, Booster instance or LGBMModel instance used for continue training. Filename of LightGBM model, Booster instance or LGBMModel instance used for continue training.
Returns Returns
......
...@@ -49,8 +49,8 @@ def test_basic(tmp_path): ...@@ -49,8 +49,8 @@ def test_basic(tmp_path):
assert bst.lower_bound() == pytest.approx(-2.9040190126976606) assert bst.lower_bound() == pytest.approx(-2.9040190126976606)
assert bst.upper_bound() == pytest.approx(3.3182142872462883) assert bst.upper_bound() == pytest.approx(3.3182142872462883)
tname = str(tmp_path / "svm_light.dat") tname = tmp_path / "svm_light.dat"
model_file = str(tmp_path / "model.txt") model_file = tmp_path / "model.txt"
bst.save_model(model_file) bst.save_model(model_file)
pred_from_matr = bst.predict(X_test) pred_from_matr = bst.predict(X_test)
...@@ -153,8 +153,8 @@ def test_sequence(tmpdir, sample_count, batch_size, include_0_and_nan, num_seq): ...@@ -153,8 +153,8 @@ def test_sequence(tmpdir, sample_count, batch_size, include_0_and_nan, num_seq):
X = data[:, :-1] X = data[:, :-1]
Y = data[:, -1] Y = data[:, -1]
npy_bin_fname = str(tmpdir / 'data_from_npy.bin') npy_bin_fname = tmpdir / 'data_from_npy.bin'
seq_bin_fname = str(tmpdir / 'data_from_seq.bin') seq_bin_fname = tmpdir / 'data_from_seq.bin'
# Create dataset from numpy array directly. # Create dataset from numpy array directly.
ds = lgb.Dataset(X, label=Y, params=params) ds = lgb.Dataset(X, label=Y, params=params)
...@@ -175,9 +175,9 @@ def test_sequence(tmpdir, sample_count, batch_size, include_0_and_nan, num_seq): ...@@ -175,9 +175,9 @@ def test_sequence(tmpdir, sample_count, batch_size, include_0_and_nan, num_seq):
valid_X = valid_data[:, :-1] valid_X = valid_data[:, :-1]
valid_Y = valid_data[:, -1] valid_Y = valid_data[:, -1]
valid_npy_bin_fname = str(tmpdir / 'valid_data_from_npy.bin') valid_npy_bin_fname = tmpdir / 'valid_data_from_npy.bin'
valid_seq_bin_fname = str(tmpdir / 'valid_data_from_seq.bin') valid_seq_bin_fname = tmpdir / 'valid_data_from_seq.bin'
valid_seq2_bin_fname = str(tmpdir / 'valid_data_from_seq2.bin') valid_seq2_bin_fname = tmpdir / 'valid_data_from_seq2.bin'
valid_ds = lgb.Dataset(valid_X, label=valid_Y, params=params, reference=ds) valid_ds = lgb.Dataset(valid_X, label=valid_Y, params=params, reference=ds)
valid_ds.save_binary(valid_npy_bin_fname) valid_ds.save_binary(valid_npy_bin_fname)
...@@ -268,10 +268,10 @@ def test_add_features_equal_data_on_alternating_used_unused(tmp_path): ...@@ -268,10 +268,10 @@ def test_add_features_equal_data_on_alternating_used_unused(tmp_path):
d1 = lgb.Dataset(X[:, :j], feature_name=names[:j]).construct() d1 = lgb.Dataset(X[:, :j], feature_name=names[:j]).construct()
d2 = lgb.Dataset(X[:, j:], feature_name=names[j:]).construct() d2 = lgb.Dataset(X[:, j:], feature_name=names[j:]).construct()
d1.add_features_from(d2) d1.add_features_from(d2)
d1name = str(tmp_path / "d1.txt") d1name = tmp_path / "d1.txt"
d1._dump_text(d1name) d1._dump_text(d1name)
d = lgb.Dataset(X, feature_name=names).construct() d = lgb.Dataset(X, feature_name=names).construct()
dname = str(tmp_path / "d.txt") dname = tmp_path / "d.txt"
d._dump_text(dname) d._dump_text(dname)
with open(d1name, 'rt') as d1f: with open(d1name, 'rt') as d1f:
d1txt = d1f.read() d1txt = d1f.read()
...@@ -297,8 +297,8 @@ def test_add_features_same_booster_behaviour(tmp_path): ...@@ -297,8 +297,8 @@ def test_add_features_same_booster_behaviour(tmp_path):
for k in range(10): for k in range(10):
b.update() b.update()
b1.update() b1.update()
dname = str(tmp_path / "d.txt") dname = tmp_path / "d.txt"
d1name = str(tmp_path / "d1.txt") d1name = tmp_path / "d1.txt"
b1.save_model(d1name) b1.save_model(d1name)
b.save_model(dname) b.save_model(dname)
with open(dname, 'rt') as df: with open(dname, 'rt') as df:
...@@ -352,7 +352,7 @@ def test_cegb_affects_behavior(tmp_path): ...@@ -352,7 +352,7 @@ def test_cegb_affects_behavior(tmp_path):
base = lgb.Booster(train_set=ds) base = lgb.Booster(train_set=ds)
for k in range(10): for k in range(10):
base.update() base.update()
basename = str(tmp_path / "basename.txt") basename = tmp_path / "basename.txt"
base.save_model(basename) base.save_model(basename)
with open(basename, 'rt') as f: with open(basename, 'rt') as f:
basetxt = f.read() basetxt = f.read()
...@@ -364,7 +364,7 @@ def test_cegb_affects_behavior(tmp_path): ...@@ -364,7 +364,7 @@ def test_cegb_affects_behavior(tmp_path):
booster = lgb.Booster(train_set=ds, params=case) booster = lgb.Booster(train_set=ds, params=case)
for k in range(10): for k in range(10):
booster.update() booster.update()
casename = str(tmp_path / "casename.txt") casename = tmp_path / "casename.txt"
booster.save_model(casename) booster.save_model(casename)
with open(casename, 'rt') as f: with open(casename, 'rt') as f:
casetxt = f.read() casetxt = f.read()
...@@ -391,13 +391,13 @@ def test_cegb_scaling_equalities(tmp_path): ...@@ -391,13 +391,13 @@ def test_cegb_scaling_equalities(tmp_path):
for k in range(10): for k in range(10):
booster1.update() booster1.update()
booster2.update() booster2.update()
p1name = str(tmp_path / "p1.txt") p1name = tmp_path / "p1.txt"
# Reset booster1's parameters to p2, so the parameter section of the file matches. # Reset booster1's parameters to p2, so the parameter section of the file matches.
booster1.reset_parameter(p2) booster1.reset_parameter(p2)
booster1.save_model(p1name) booster1.save_model(p1name)
with open(p1name, 'rt') as f: with open(p1name, 'rt') as f:
p1txt = f.read() p1txt = f.read()
p2name = str(tmp_path / "p2.txt") p2name = tmp_path / "p2.txt"
booster2.save_model(p2name) booster2.save_model(p2name)
with open(p2name, 'rt') as f: with open(p2name, 'rt') as f:
p2txt = f.read() p2txt = f.read()
......
...@@ -24,7 +24,7 @@ class FileLoader: ...@@ -24,7 +24,7 @@ class FileLoader:
self.params[key] = value if key != 'num_trees' else int(value) self.params[key] = value if key != 'num_trees' else int(value)
def load_dataset(self, suffix, is_sparse=False): def load_dataset(self, suffix, is_sparse=False):
filename = self.path(suffix) filename = str(self.path(suffix))
if is_sparse: if is_sparse:
X, Y = load_svmlight_file(filename, dtype=np.float64, zero_based=True) X, Y = load_svmlight_file(filename, dtype=np.float64, zero_based=True)
return X, Y, filename return X, Y, filename
...@@ -62,7 +62,7 @@ class FileLoader: ...@@ -62,7 +62,7 @@ class FileLoader:
assert a == b, f assert a == b, f
def path(self, suffix): def path(self, suffix):
return str(self.directory / f'{self.prefix}{suffix}') return self.directory / f'{self.prefix}{suffix}'
def test_binary(): def test_binary():
......
...@@ -2261,7 +2261,7 @@ def test_forced_bins(): ...@@ -2261,7 +2261,7 @@ def test_forced_bins():
x[:, 0] = np.arange(0, 1, 0.01) x[:, 0] = np.arange(0, 1, 0.01)
x[:, 1] = -np.arange(0, 1, 0.01) x[:, 1] = -np.arange(0, 1, 0.01)
y = np.arange(0, 1, 0.01) y = np.arange(0, 1, 0.01)
forcedbins_filename = str( forcedbins_filename = (
Path(__file__).absolute().parents[2] / 'examples' / 'regression' / 'forced_bins.json' Path(__file__).absolute().parents[2] / 'examples' / 'regression' / 'forced_bins.json'
) )
params = {'objective': 'regression_l1', params = {'objective': 'regression_l1',
...@@ -2285,7 +2285,7 @@ def test_forced_bins(): ...@@ -2285,7 +2285,7 @@ def test_forced_bins():
est = lgb.train(params, lgb_x, num_boost_round=20) est = lgb.train(params, lgb_x, num_boost_round=20)
predicted = est.predict(new_x) predicted = est.predict(new_x)
assert len(np.unique(predicted)) == 3 assert len(np.unique(predicted)) == 3
params['forcedbins_filename'] = str( params['forcedbins_filename'] = (
Path(__file__).absolute().parents[2] / 'examples' / 'regression' / 'forced_bins2.json' Path(__file__).absolute().parents[2] / 'examples' / 'regression' / 'forced_bins2.json'
) )
params['max_bin'] = 11 params['max_bin'] = 11
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment