Commit eba6d200 authored by wxchan's avatar wxchan
Browse files

Squash into one commit:

1. merge python-package
2. add dump model to json
3. fix bugs
4. clean code with pylint
5. update python examples
parent 19e085c9
# coding: utf-8
"""Find the path to lightgbm dynamic library files."""
import os
import sys
def find_lib_path():
"""Find the path to LightGBM library files.
Returns
-------
lib_path: list(string)
List of all found library path to LightGBM
"""
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
dll_path = [curr_path, os.path.join(curr_path, '../../lib/'),
os.path.join(curr_path, '../../'),
os.path.join(curr_path, './lib/'),
os.path.join(sys.prefix, 'lightgbm')]
if os.name == 'nt':
dll_path.append(os.path.join(curr_path, '../../windows/x64/Dll/'))
dll_path.append(os.path.join(curr_path, './windows/x64/Dll/'))
dll_path = [os.path.join(p, 'lib_lightgbm.dll') for p in dll_path]
else:
dll_path = [os.path.join(p, 'lib_lightgbm.so') for p in dll_path]
lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
if not lib_path:
raise Exception('Cannot find lightgbm Library')
return lib_path
# coding: utf-8
# pylint: disable = invalid-name, W0105
"""Scikit-Learn Wrapper interface for LightGBM."""
from __future__ import absolute_import
import numpy as np
from .basic import LightGBMError, Predictor, Dataset, Booster, is_str
from .engine import train
# sklearn
try:
from sklearn.base import BaseEstimator
from sklearn.base import RegressorMixin, ClassifierMixin
from sklearn.preprocessing import LabelEncoder
SKLEARN_INSTALLED = True
LGBMModelBase = BaseEstimator
LGBMRegressorBase = RegressorMixin
LGBMClassifierBase = ClassifierMixin
LGBMLabelEncoder = LabelEncoder
except ImportError:
SKLEARN_INSTALLED = False
LGBMModelBase = object
LGBMClassifierBase = object
LGBMRegressorBase = object
LGBMLabelEncoder = None
def _point_wise_objective(func):
"""Decorate an objective function
Note: for multi-class task, the y_pred is group by class_id first, then group by row_id
if you want to get i-th row y_pred in j-th class, the access way is y_pred[j*num_data+i]
and you should group grad and hess in this way as well
Parameters
----------
func: callable
Expects a callable with signature ``func(y_true, y_pred)``:
y_true: array_like of shape [n_samples]
The target values
y_pred: array_like of shape [n_samples] or shape[n_samples* n_class] (for multi-class)
The predicted values
Returns
-------
new_func: callable
The new objective function as expected by ``lightgbm.engine.train``.
The signature is ``new_func(preds, dataset)``:
preds: array_like, shape [n_samples] or shape[n_samples* n_class]
The predicted values
dataset: ``dataset``
The training set from which the labels will be extracted using
``dataset.get_label()``
"""
def inner(preds, dataset):
"""internal function"""
labels = dataset.get_label()
grad, hess = func(labels, preds)
"""weighted for objective"""
weight = dataset.get_weight()
if weight is not None:
"""only one class"""
if len(weight) == len(grad):
grad = np.multiply(grad, weight)
hess = np.multiply(hess, weight)
else:
num_data = len(weight)
num_class = len(grad) // num_data
if num_class * num_data != len(grad):
raise ValueError("length of grad and hess should equal with num_class * num_data")
for k in range(num_class):
for i in range(num_data):
idx = k * num_data + i
grad[idx] *= weight[i]
hess[idx] *= weight[i]
return grad, hess
return inner
class LGBMModel(LGBMModelBase):
"""Implementation of the Scikit-Learn API for LightGBM.
Parameters
----------
num_leaves : int
Maximum tree leaves for base learners.
max_depth : int
Maximum tree depth for base learners, -1 means no limit.
learning_rate : float
Boosting learning rate
n_estimators : int
Number of boosted trees to fit.
silent : boolean
Whether to print messages while running boosting.
objective : string or callable
Specify the learning task and the corresponding learning objective or
a custom objective function to be used (see note below).
nthread : int
Number of parallel threads
min_split_gain : float
Minimum loss reduction required to make a further partition on a leaf node of the tree.
min_child_weight : int
Minimum sum of instance weight(hessian) needed in a child(leaf)
min_child_samples : int
Minimum number of data need in a child(leaf)
subsample : float
Subsample ratio of the training instance.
subsample_freq : int
frequence of subsample, <=0 means no enable
colsample_bytree : float
Subsample ratio of columns when constructing each tree.
reg_alpha : float
L1 regularization term on weights
reg_lambda : float
L2 regularization term on weights
scale_pos_weight : float
Balancing of positive and negative weights.
is_unbalance : bool
Is unbalance for binary classification
seed : int
Random number seed.
Note
----
A custom objective function can be provided for the ``objective``
parameter. In this case, it should have the signature
``objective(y_true, y_pred) -> grad, hess``:
y_true: array_like of shape [n_samples]
The target values
y_pred: array_like of shape [n_samples] or shape[n_samples* n_class]
The predicted values
grad: array_like of shape [n_samples] or shape[n_samples* n_class]
The value of the gradient for each sample point.
hess: array_like of shape [n_samples] or shape[n_samples* n_class]
The value of the second derivative for each sample point
for multi-class task, the y_pred is group by class_id first, then group by row_id
if you want to get i-th row y_pred in j-th class, the access way is y_pred[j*num_data+i]
and you should group grad and hess in this way as well
"""
def __init__(self, num_leaves=31, max_depth=-1,
learning_rate=0.1, n_estimators=10, max_bin=255,
silent=True, objective="regression",
nthread=-1, min_split_gain=0, min_child_weight=5, min_child_samples=10,
subsample=1, subsample_freq=1, colsample_bytree=1,
reg_alpha=0, reg_lambda=0, scale_pos_weight=1,
is_unbalance=False, seed=0):
if not SKLEARN_INSTALLED:
raise LightGBMError('sklearn needs to be installed in order to use this module')
self.num_leaves = num_leaves
self.max_depth = max_depth
self.learning_rate = learning_rate
self.n_estimators = n_estimators
self.max_bin = max_bin
self.silent = silent
self.objective = objective
self.nthread = nthread
self.min_split_gain = min_split_gain
self.min_child_weight = min_child_weight
self.min_child_samples = min_child_samples
self.subsample = subsample
self.subsample_freq = subsample_freq
self.colsample_bytree = colsample_bytree
self.reg_alpha = reg_alpha
self.reg_lambda = reg_lambda
self.scale_pos_weight = scale_pos_weight
self.is_unbalance = is_unbalance
self.seed = seed
self._Booster = None
if callable(self.objective):
self.fobj = _point_wise_objective(self.objective)
else:
self.fobj = None
def booster(self):
"""Get the underlying lightgbm Booster of this model.
This will raise an exception when fit was not called
Returns
-------
booster : a lightgbm booster of underlying model
"""
if self._Booster is None:
raise LightGBMError('need to call fit beforehand')
return self._Booster
def get_params(self, deep=False):
"""Get parameters"""
params = super(LGBMModel, self).get_params(deep=deep)
params['verbose'] = 0 if self.silent else 1
if self.nthread <= 0:
params.pop('nthread', None)
return params
def fit(self, X, y, eval_set=None, eval_metric=None,
early_stopping_rounds=None, verbose=True,
train_fields=None, valid_fields=None, other_params=None):
"""
Fit the gradient boosting model
Parameters
----------
X : array_like
Feature matrix
y : array_like
Labels
eval_set : list, optional
A list of (X, y) tuple pairs to use as a validation set for early-stopping
eval_metric : str, list of str, callable, optional
If a str, should be a built-in evaluation metric to use.
If callable, a custom evaluation metric. The call
signature is func(y_predicted, dataset) where dataset will be a
Dataset fobject such that you may need to call the get_label
method. And it must return (eval_name->str, eval_result->float, is_bigger_better->Bool)
early_stopping_rounds : int
verbose : bool
If `verbose` and an evaluation set is used, writes the evaluation
train_fields : dict
other data file in training data. e.g. train_fields['weight'] is weight data
support fields: weight, group, init_score
valid_fields : dict
other data file in training data. \
e.g. valid_fields[0]['weight'] is weight data for first valid data
support fields: weight, group, init_score
other_params: dict
other parameters
"""
evals_result = {}
params = self.get_params()
if other_params is not None:
params.update(other_params)
if self.fobj:
params["objective"] = "None"
else:
params["objective"] = self.objective
if eval_metric is None and eval_set is not None:
eval_metric = {
'regression': 'l2',
'binary': 'binary_logloss',
'lambdarank': 'ndcg',
'multiclass': 'multi_logloss'
}.get(self.objective, None)
if callable(eval_metric):
feval = eval_metric
elif is_str(eval_metric) or isinstance(eval_metric, list):
feval = None
params.update({'metric': eval_metric})
else:
feval = None
feval = eval_metric if callable(eval_metric) else None
self._Booster = train(params, (X, y),
self.n_estimators, valid_datas=eval_set,
early_stopping_rounds=early_stopping_rounds,
evals_result=evals_result, fobj=self.fobj, feval=feval,
verbose_eval=verbose, train_fields=train_fields,
valid_fields=valid_fields)
if evals_result:
for val in evals_result.items():
evals_result_key = list(val[1].keys())[0]
evals_result[val[0]][evals_result_key] = val[1][evals_result_key]
self.evals_result_ = evals_result
if early_stopping_rounds is not None:
self.best_iteration = self._Booster.best_iteration
return self
def predict(self, data, raw_score=False, num_iteration=0):
return self.booster().predict(data,
raw_score=raw_score,
num_iteration=num_iteration)
def apply(self, X, num_iteration=0):
"""Return the predicted leaf every tree for each sample.
Parameters
----------
X : array_like, shape=[n_samples, n_features]
Input features matrix.
ntree_limit : int
Limit number of trees in the prediction; defaults to 0 (use all trees).
Returns
-------
X_leaves : array_like, shape=[n_samples, n_trees]
"""
return self.booster().predict(X,
pred_leaf=True,
num_iteration=num_iteration)
def evals_result(self):
"""Return the evaluation results.
Returns
-------
evals_result : dictionary
"""
if self.evals_result_:
evals_result = self.evals_result_
else:
raise LightGBMError('No results.')
return evals_result
class LGBMRegressor(LGBMModel, LGBMRegressorBase):
__doc__ = """Implementation of the scikit-learn API for LightGBM regression.
""" + '\n'.join(LGBMModel.__doc__.split('\n')[2:])
class LGBMClassifier(LGBMModel, LGBMClassifierBase):
__doc__ = """Implementation of the scikit-learn API for LightGBM classification.
""" + '\n'.join(LGBMModel.__doc__.split('\n')[2:])
def fit(self, X, y, eval_set=None, eval_metric=None,
early_stopping_rounds=None, verbose=True,
train_fields=None, valid_fields=None, other_params=None):
self.classes_ = np.unique(y)
self.n_classes_ = len(self.classes_)
if other_params is None:
other_params = {}
if self.n_classes_ > 2:
# Switch to using a multiclass objective in the underlying LGBM instance
self.objective = "multiclass"
other_params['num_class'] = self.n_classes_
if eval_metric is None and eval_set is not None:
eval_metric = "multi_logloss"
else:
self.objective = "binary"
if eval_metric is None and eval_set is not None:
eval_metric = "binary_logloss"
self._le = LGBMLabelEncoder().fit(y)
training_labels = self._le.transform(y)
if eval_set is not None:
eval_set = list((x[0], self._le.transform(x[1])) for x in eval_set)
super(LGBMClassifier, self).fit(X, training_labels, eval_set,
eval_metric, early_stopping_rounds,
verbose, train_fields, valid_fields,
other_params)
return self
def predict(self, data, raw_score=False, num_iteration=0):
class_probs = self.booster().predict(data,
raw_score=raw_score,
num_iteration=num_iteration)
if len(class_probs.shape) > 1:
column_indexes = np.argmax(class_probs, axis=1)
else:
column_indexes = np.repeat(0, class_probs.shape[0])
column_indexes[class_probs > 0.5] = 1
return self._le.inverse_transform(column_indexes)
def predict_proba(self, data, raw_score=False, num_iteration=0):
class_probs = self.booster().predict(data,
raw_score=raw_score,
num_iteration=num_iteration)
if self.n_classes_ > 2:
return class_probs
else:
classone_probs = class_probs
classzero_probs = 1.0 - classone_probs
return np.vstack((classzero_probs, classone_probs)).transpose()
def _group_wise_objective(func):
"""Decorate an objective function
Parameters
----------
func: callable
Expects a callable with signature ``func(y_true, group, y_pred)``:
y_true: array_like of shape [n_samples]
The target values
group : array_like of shape
group size data of data
y_pred: array_like of shape [n_samples] or shape[n_samples* n_class] (for multi-class)
The predicted values
Returns
-------
new_func: callable
The new objective function as expected by ``lightgbm.engine.train``.
The signature is ``new_func(preds, dataset)``:
preds: array_like, shape [n_samples] or shape[n_samples* n_class]
The predicted values
dataset: ``dataset``
The training set from which the labels will be extracted using
``dataset.get_label()``
"""
def inner(preds, dataset):
"""internal function"""
labels = dataset.get_label()
group = dataset.get_group()
if group is None:
raise ValueError("group should not be None for ranking task")
grad, hess = func(labels, group, preds)
"""weighted for objective"""
weight = dataset.get_weight()
if weight is not None:
"""only one class"""
if len(weight) == len(grad):
grad = np.multiply(grad, weight)
hess = np.multiply(hess, weight)
else:
raise ValueError("lenght of grad and hess should equal with num_data")
return grad, hess
return inner
class LGBMRanker(LGBMModel):
__doc__ = """Implementation of the scikit-learn API for LightGBM ranking application.
""" + '\n'.join(LGBMModel.__doc__.split('\n')[2:])
def fit(self, X, y, eval_set=None, eval_metric=None,
early_stopping_rounds=None, verbose=True,
train_fields=None, valid_fields=None, other_params=None):
"""check group data"""
if "group" not in train_fields:
raise ValueError("should set group in train_fields for ranking task")
if eval_set is not None:
if valid_fields is None:
raise ValueError("valid_fields cannot be None when eval_set is not None")
elif len(valid_fields) != len(eval_set):
raise ValueError("lenght of valid_fields should equal with eval_set")
else:
for inner in valid_fields:
if "group" not in inner:
raise ValueError("should set group in valid_fields for ranking task")
if callable(self.objective):
self.fobj = _group_wise_objective(self.objective)
else:
self.objective = "lambdarank"
self.fobj = None
if eval_metric is None and eval_set is not None:
eval_metric = "ndcg"
super(LGBMRanker, self).fit(X, y, eval_set, eval_metric,
early_stopping_rounds, verbose,
train_fields, valid_fields,
other_params)
return self
# coding: utf-8
# pylint: disable=invalid-name, exec-used
"""Setup lightgbm package."""
from __future__ import absolute_import
import sys
import os
from setuptools import setup, find_packages
# import subprocess
sys.path.insert(0, '.')
CURRENT_DIR = os.path.dirname(__file__)
libpath_py = os.path.join(CURRENT_DIR, 'lightgbm/libpath.py')
libpath = {'__file__': libpath_py}
exec(compile(open(libpath_py, "rb").read(), libpath_py, 'exec'), libpath, libpath)
LIB_PATH = libpath['find_lib_path']()
print("Install lib_lightgbm from: %s" % LIB_PATH)
setup(name='lightgbm',
version=0.1,
description="LightGBM Python Package",
install_requires=[
'numpy',
'scipy',
],
maintainer='Guolin Ke',
maintainer_email='guolin.ke@microsoft.com',
zip_safe=False,
packages=find_packages(),
include_package_data=True,
data_files=[('lightgbm', LIB_PATH)],
url='https://github.com/Microsoft/LightGBM')
......@@ -108,7 +108,7 @@ void Application::LoadData() {
// prediction is needed if using input initial model(continued train)
PredictFunction predict_fun = nullptr;
// need to continue training
if (boosting_->NumberOfSubModels() > 0) {
if (boosting_->NumberOfTotalModel() > 0) {
Predictor predictor(boosting_.get(), true, false);
predict_fun = predictor.GetPredictFunction();
}
......@@ -139,40 +139,44 @@ void Application::LoadData() {
for (auto metric_type : config_.metric_types) {
auto metric = std::unique_ptr<Metric>(Metric::CreateMetric(metric_type, config_.metric_config));
if (metric == nullptr) { continue; }
metric->Init("training", train_data_->metadata(),
train_data_->num_data());
metric->Init(train_data_->metadata(), train_data_->num_data());
train_metric_.push_back(std::move(metric));
}
}
train_metric_.shrink_to_fit();
// Add validation data, if it exists
for (size_t i = 0; i < config_.io_config.valid_data_filenames.size(); ++i) {
// add
auto new_dataset = std::unique_ptr<Dataset>(
dataset_loader.LoadFromFileAlignWithOtherDataset(
config_.io_config.valid_data_filenames[i].c_str(),
train_data_.get())
);
valid_datas_.push_back(std::move(new_dataset));
// need save binary file
if (config_.io_config.is_save_binary_file) {
valid_datas_.back()->SaveBinaryFile(nullptr);
}
// add metric for validation data
valid_metrics_.emplace_back();
for (auto metric_type : config_.metric_types) {
auto metric = std::unique_ptr<Metric>(Metric::CreateMetric(metric_type, config_.metric_config));
if (metric == nullptr) { continue; }
metric->Init(config_.io_config.valid_data_filenames[i].c_str(),
valid_datas_.back()->metadata(),
valid_datas_.back()->num_data());
valid_metrics_.back().push_back(std::move(metric));
if (config_.metric_types.size() > 0) {
// only when have metrics then need to construct validation data
// Add validation data, if it exists
for (size_t i = 0; i < config_.io_config.valid_data_filenames.size(); ++i) {
// add
auto new_dataset = std::unique_ptr<Dataset>(
dataset_loader.LoadFromFileAlignWithOtherDataset(
config_.io_config.valid_data_filenames[i].c_str(),
train_data_.get())
);
valid_datas_.push_back(std::move(new_dataset));
// need save binary file
if (config_.io_config.is_save_binary_file) {
valid_datas_.back()->SaveBinaryFile(nullptr);
}
// add metric for validation data
valid_metrics_.emplace_back();
for (auto metric_type : config_.metric_types) {
auto metric = std::unique_ptr<Metric>(Metric::CreateMetric(metric_type, config_.metric_config));
if (metric == nullptr) { continue; }
metric->Init(valid_datas_.back()->metadata(),
valid_datas_.back()->num_data());
valid_metrics_.back().push_back(std::move(metric));
}
valid_metrics_.back().shrink_to_fit();
}
valid_metrics_.back().shrink_to_fit();
valid_datas_.shrink_to_fit();
valid_metrics_.shrink_to_fit();
}
valid_datas_.shrink_to_fit();
valid_metrics_.shrink_to_fit();
auto end_time = std::chrono::high_resolution_clock::now();
// output used time on each iteration
Log::Info("Finished loading data in %f seconds",
......@@ -209,7 +213,7 @@ void Application::InitTrain() {
Common::ConstPtrInVectorWrapper<Metric>(train_metric_));
// add validation data into boosting
for (size_t i = 0; i < valid_datas_.size(); ++i) {
boosting_->AddDataset(valid_datas_[i].get(),
boosting_->AddValidDataset(valid_datas_[i].get(),
Common::ConstPtrInVectorWrapper<Metric>(valid_metrics_[i]));
}
Log::Info("Finished initializing training");
......@@ -227,17 +231,15 @@ void Application::Train() {
// output used time per iteration
Log::Info("%f seconds elapsed, finished iteration %d", std::chrono::duration<double,
std::milli>(end_time - start_time) * 1e-3, iter + 1);
boosting_->SaveModelToFile(NO_LIMIT, is_finished, config_.io_config.output_model.c_str());
}
is_finished = true;
// save model to file
boosting_->SaveModelToFile(NO_LIMIT, is_finished, config_.io_config.output_model.c_str());
boosting_->SaveModelToFile(-1, config_.io_config.output_model.c_str());
Log::Info("Finished training");
}
void Application::Predict() {
boosting_->SetNumUsedModel(config_.io_config.num_model_predict);
boosting_->SetNumIterationForPred(config_.io_config.num_iteration_predict);
// create predictor
Predictor predictor(boosting_.get(), config_.io_config.is_predict_raw_score,
config_.io_config.is_predict_leaf_index);
......
......@@ -15,7 +15,7 @@ BoostingType GetBoostingTypeFromModelFile(const char* filename) {
return BoostingType::kUnknow;
}
void LoadFileToBoosting(Boosting* boosting, const char* filename) {
void Boosting::LoadFileToBoosting(Boosting* boosting, const char* filename) {
if (boosting != nullptr) {
TextReader<size_t> model_reader(filename, true);
model_reader.ReadAllLines();
......
......@@ -43,6 +43,7 @@ public:
* \brief one training iteration
*/
bool TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) override {
is_update_score_cur_iter_ = false;
GBDT::TrainOneIter(gradient, hessian, false);
// normalize
Normalize();
......@@ -58,22 +59,15 @@ public:
* \return training score
*/
const score_t* GetTrainingScore(data_size_t* out_len) override {
DroppingTrees();
if (!is_update_score_cur_iter_) {
// only drop one time in one iteration
DroppingTrees();
is_update_score_cur_iter_ = true;
}
*out_len = train_score_updater_->num_data() * num_class_;
return train_score_updater_->score();
}
/*!
* \brief save model to file
* \param num_used_model number of model that want to save, -1 means save all
* \param is_finish is training finished or not
* \param filename filename that want to save to
*/
void SaveModelToFile(int num_used_model, bool is_finish, const char* filename) override {
// only save model once when is_finish = true
if (is_finish && saved_model_size_ < 0) {
GBDT::SaveModelToFile(num_used_model, is_finish, filename);
}
}
/*!
* \brief Get Type name of this boosting object
*/
......@@ -133,6 +127,8 @@ private:
double drop_rate_;
/*! \brief Random generator, used to select dropping trees */
Random random_for_drop_;
/*! \brief Flag that the score is update on current iter or not*/
bool is_update_score_cur_iter_;
};
} // namespace LightGBM
......
......@@ -16,7 +16,9 @@
namespace LightGBM {
GBDT::GBDT() : saved_model_size_(-1), num_used_model_(0) {
GBDT::GBDT()
:num_iteration_for_pred_(0),
num_init_iteration_(0) {
}
......@@ -26,69 +28,94 @@ GBDT::~GBDT() {
void GBDT::Init(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function,
const std::vector<const Metric*>& training_metrics) {
gbdt_config_ = config;
iter_ = 0;
saved_model_size_ = -1;
num_used_model_ = 0;
num_iteration_for_pred_ = 0;
max_feature_idx_ = 0;
num_class_ = config->num_class;
train_data_ = nullptr;
ResetTrainingData(config, train_data, object_function, training_metrics);
}
void GBDT::ResetTrainingData(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function,
const std::vector<const Metric*>& training_metrics) {
if (train_data_ != nullptr && !train_data_->CheckAlign(*train_data)) {
Log::Fatal("cannot reset training data, since new training data has different bin mappers");
}
gbdt_config_ = config;
early_stopping_round_ = gbdt_config_->early_stopping_round;
shrinkage_rate_ = gbdt_config_->learning_rate;
train_data_ = train_data;
num_class_ = config->num_class;
random_ = Random(gbdt_config_->bagging_seed);
// create tree learner
tree_learner_.clear();
for (int i = 0; i < num_class_; ++i) {
auto new_tree_learner = std::unique_ptr<TreeLearner>(TreeLearner::CreateTreeLearner(gbdt_config_->tree_learner_type, gbdt_config_->tree_config));
new_tree_learner->Init(train_data_);
new_tree_learner->Init(train_data);
// init tree learner
tree_learner_.push_back(std::move(new_tree_learner));
}
tree_learner_.shrink_to_fit();
object_function_ = object_function;
// push training metrics
training_metrics_.clear();
for (const auto& metric : training_metrics) {
training_metrics_.push_back(metric);
}
training_metrics_.shrink_to_fit();
// create score tracker
train_score_updater_.reset(new ScoreUpdater(train_data_, num_class_));
num_data_ = train_data_->num_data();
// create buffer for gradients and hessians
if (object_function_ != nullptr) {
gradients_ = std::vector<score_t>(num_data_ * num_class_);
hessians_ = std::vector<score_t>(num_data_ * num_class_);
}
sigmoid_ = -1.0f;
if (object_function_ != nullptr
if (object_function_ != nullptr
&& std::string(object_function_->GetName()) == std::string("binary")) {
// only binary classification need sigmoid transform
sigmoid_ = gbdt_config_->sigmoid;
}
// get max feature index
max_feature_idx_ = train_data_->num_total_features() - 1;
// get label index
label_idx_ = train_data_->label_idx();
// if need bagging, create buffer
if (gbdt_config_->bagging_fraction < 1.0 && gbdt_config_->bagging_freq > 0) {
out_of_bag_data_indices_ = std::vector<data_size_t>(num_data_);
bag_data_indices_ = std::vector<data_size_t>(num_data_);
} else {
out_of_bag_data_cnt_ = 0;
out_of_bag_data_indices_.clear();
bag_data_cnt_ = num_data_;
bag_data_indices_.clear();
if (train_data_ != train_data) {
// not same training data, need reset score and others
// create score tracker
train_score_updater_.reset(new ScoreUpdater(train_data, num_class_));
// update score
for (int i = 0; i < iter_; ++i) {
for (int curr_class = 0; curr_class < num_class_; ++curr_class) {
auto curr_tree = (i + num_init_iteration_) * num_class_ + curr_class;
train_score_updater_->AddScore(models_[curr_tree].get(), curr_class);
}
}
num_data_ = train_data->num_data();
// create buffer for gradients and hessians
if (object_function_ != nullptr) {
gradients_ = std::vector<score_t>(num_data_ * num_class_);
hessians_ = std::vector<score_t>(num_data_ * num_class_);
}
// get max feature index
max_feature_idx_ = train_data->num_total_features() - 1;
// get label index
label_idx_ = train_data->label_idx();
// if need bagging, create buffer
if (gbdt_config_->bagging_fraction < 1.0 && gbdt_config_->bagging_freq > 0) {
out_of_bag_data_indices_ = std::vector<data_size_t>(num_data_);
bag_data_indices_ = std::vector<data_size_t>(num_data_);
} else {
out_of_bag_data_cnt_ = 0;
out_of_bag_data_indices_.clear();
bag_data_cnt_ = num_data_;
bag_data_indices_.clear();
}
}
// initialize random generator
random_ = Random(gbdt_config_->bagging_seed);
train_data_ = train_data;
}
void GBDT::AddDataset(const Dataset* valid_data,
void GBDT::AddValidDataset(const Dataset* valid_data,
const std::vector<const Metric*>& valid_metrics) {
if (iter_ > 0) {
Log::Fatal("Cannot add validation data after training started");
if (!train_data_->CheckAlign(*valid_data)) {
Log::Fatal("cannot add validation data, since it has different bin mappers with training data");
}
// for a validation dataset, we need its score and metric
auto new_score_updater = std::unique_ptr<ScoreUpdater>(new ScoreUpdater(valid_data, num_class_));
// update score
for (int i = 0; i < iter_; ++i) {
for (int curr_class = 0; curr_class < num_class_; ++curr_class) {
auto curr_tree = (i + num_init_iteration_) * num_class_ + curr_class;
new_score_updater->AddScore(models_[curr_tree].get(), curr_class);
}
}
valid_score_updater_.push_back(std::move(new_score_updater));
valid_metrics_.emplace_back();
if (early_stopping_round_ > 0) {
......@@ -204,6 +231,25 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
}
void GBDT::RollbackOneIter() {
if (iter_ == 0) { return; }
int cur_iter = iter_ + num_init_iteration_ - 1;
// reset score
for (int curr_class = 0; curr_class < num_class_; ++curr_class) {
auto curr_tree = cur_iter * num_class_ + curr_class;
models_[curr_tree]->Shrinkage(-1.0);
train_score_updater_->AddScore(models_[curr_tree].get(), curr_class);
for (auto& score_updater : valid_score_updater_) {
score_updater->AddScore(models_[curr_tree].get(), curr_class);
}
}
// remove model
for (int curr_class = 0; curr_class < num_class_; ++curr_class) {
models_.pop_back();
}
--iter_;
}
bool GBDT::EvalAndCheckEarlyStopping() {
bool is_met_early_stopping = false;
// print message for metric
......@@ -236,7 +282,7 @@ bool GBDT::OutputMetric(int iter) {
auto name = sub_metric->GetName();
auto scores = sub_metric->Eval(train_score_updater_->score());
for (size_t k = 0; k < name.size(); ++k) {
Log::Info("Iteration: %d, %s : %f", iter, name[k].c_str(), scores[k]);
Log::Info("Iteration:%d, training %s : %f", iter, name[k].c_str(), scores[k]);
}
}
}
......@@ -248,7 +294,7 @@ bool GBDT::OutputMetric(int iter) {
if ((iter % gbdt_config_->output_freq) == 0) {
auto name = valid_metrics_[i][j]->GetName();
for (size_t k = 0; k < name.size(); ++k) {
Log::Info("Iteration: %d, %s : %f", iter, name[k].c_str(), test_scores[k]);
Log::Info("Iteration:%d, valid_%d %s : %f", iter, i + 1, name[k].c_str(), test_scores[k]);
}
}
if (!ret && early_stopping_round_ > 0) {
......@@ -296,24 +342,23 @@ const score_t* GBDT::GetTrainingScore(data_size_t* out_len) {
return train_score_updater_->score();
}
void GBDT::GetPredictAt(int data_idx, score_t* out_result, data_size_t* out_len) const {
void GBDT::GetPredictAt(int data_idx, score_t* out_result, data_size_t* out_len) {
CHECK(data_idx >= 0 && data_idx <= static_cast<int>(valid_metrics_.size()));
std::vector<double> ret;
const score_t* raw_scores = nullptr;
data_size_t num_data = 0;
if (data_idx == 0) {
raw_scores = train_score_updater_->score();
raw_scores = GetTrainingScore(out_len);
num_data = train_score_updater_->num_data();
} else {
auto used_idx = data_idx - 1;
raw_scores = valid_score_updater_[used_idx]->score();
num_data = valid_score_updater_[used_idx]->num_data();
*out_len = num_data * num_class_;
}
*out_len = num_data * num_class_;
if (num_class_ > 1) {
#pragma omp parallel for schedule(guided)
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data; ++i) {
std::vector<double> tmp_result;
for (int j = 0; j < num_class_; ++j) {
......@@ -325,12 +370,12 @@ void GBDT::GetPredictAt(int data_idx, score_t* out_result, data_size_t* out_len)
}
}
} else if(sigmoid_ > 0.0f){
#pragma omp parallel for schedule(guided)
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data; ++i) {
out_result[i] = static_cast<score_t>(1.0f / (1.0f + std::exp(-2.0f * sigmoid_ * raw_scores[i])));
}
} else {
#pragma omp parallel for schedule(guided)
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data; ++i) {
out_result[i] = raw_scores[i];
}
......@@ -348,55 +393,85 @@ void GBDT::Boosting() {
GetGradients(GetTrainingScore(&num_score), gradients_.data(), hessians_.data());
}
void GBDT::SaveModelToFile(int num_used_model, bool is_finish, const char* filename) {
// first time to this function, open file
if (saved_model_size_ < 0) {
model_output_file_.open(filename);
// output model type
model_output_file_ << Name() << std::endl;
// output number of class
model_output_file_ << "num_class=" << num_class_ << std::endl;
// output label index
model_output_file_ << "label_index=" << label_idx_ << std::endl;
// output max_feature_idx
model_output_file_ << "max_feature_idx=" << max_feature_idx_ << std::endl;
// output objective name
if (object_function_ != nullptr) {
model_output_file_ << "objective=" << object_function_->GetName() << std::endl;
std::string GBDT::DumpModel() const {
std::stringstream ss;
ss << "{";
ss << "\"name\":\"" << Name() << "\"," << std::endl;
ss << "\"num_class\":" << num_class_ << "," << std::endl;
ss << "\"label_index\":" << label_idx_ << "," << std::endl;
ss << "\"max_feature_idx\":" << max_feature_idx_ << "," << std::endl;
if (object_function_ != nullptr) {
ss << "\"objective\":\"" << object_function_->GetName() << "\"," << std::endl;
}
ss << "\"sigmoid\":" << sigmoid_ << "," << std::endl;
ss << "\"tree_info\":[";
for (int i = 0; i < static_cast<int>(models_.size()); ++i) {
if (i > 0) {
ss << ",";
}
ss << "{";
ss << "\"tree_index\":" << i << ",";
ss << models_[i]->ToJSON();
ss << "}";
}
ss << "]," << std::endl;
std::vector<std::pair<size_t, std::string>> pairs = FeatureImportance();
ss << "\"feature_importances\":{" << std::endl;
for (size_t i = 0; i < pairs.size(); ++i) {
if (i > 0) {
ss << ",";
}
// output sigmoid parameter
model_output_file_ << "sigmoid=" << sigmoid_ << std::endl;
model_output_file_ << std::endl;
saved_model_size_ = 0;
ss << "\"" << pairs[i].second << "\":" << pairs[i].first;
}
// already saved
if (!model_output_file_.is_open()) {
return;
ss << "}" << std::endl;
ss << "}" << std::endl;
return ss.str();
}
void GBDT::SaveModelToFile(int num_iteration, const char* filename) const {
/*! \brief File to write models */
std::ofstream output_file;
output_file.open(filename);
// output model type
output_file << Name() << std::endl;
// output number of class
output_file << "num_class=" << num_class_ << std::endl;
// output label index
output_file << "label_index=" << label_idx_ << std::endl;
// output max_feature_idx
output_file << "max_feature_idx=" << max_feature_idx_ << std::endl;
// output objective name
if (object_function_ != nullptr) {
output_file << "objective=" << object_function_->GetName() << std::endl;
}
if (num_used_model == NO_LIMIT) {
// output sigmoid parameter
output_file << "sigmoid=" << sigmoid_ << std::endl;
output_file << std::endl;
int num_used_model = 0;
if (num_iteration <= 0) {
num_used_model = static_cast<int>(models_.size());
} else {
num_used_model = num_used_model * num_class_;
num_used_model = num_iteration * num_class_;
}
int rest = num_used_model - early_stopping_round_ * num_class_;
num_used_model = std::min(num_used_model, static_cast<int>(models_.size()));
// output tree models
for (int i = saved_model_size_; i < rest; ++i) {
model_output_file_ << "Tree=" << i << std::endl;
model_output_file_ << models_[i]->ToString() << std::endl;
for (int i = 0; i < num_used_model; ++i) {
output_file << "Tree=" << i << std::endl;
output_file << models_[i]->ToString() << std::endl;
}
saved_model_size_ = std::max(saved_model_size_, rest);
model_output_file_.flush();
// training finished, can close file
if (is_finish) {
for (int i = saved_model_size_; i < num_used_model; ++i) {
model_output_file_ << "Tree=" << i << std::endl;
model_output_file_ << models_[i]->ToString() << std::endl;
}
model_output_file_ << std::endl << FeatureImportance() << std::endl;
model_output_file_.close();
std::vector<std::pair<size_t, std::string>> pairs = FeatureImportance();
output_file << std::endl << "feature importances:" << std::endl;
for (size_t i = 0; i < pairs.size(); ++i) {
output_file << pairs[i].second << "=" << std::to_string(pairs[i].first) << std::endl;
}
output_file.close();
}
void GBDT::LoadModelFromString(const std::string& model_str) {
......@@ -452,10 +527,11 @@ void GBDT::LoadModelFromString(const std::string& model_str) {
}
}
Log::Info("Finished loading %d models", models_.size());
num_used_model_ = static_cast<int>(models_.size()) / num_class_;
num_iteration_for_pred_ = static_cast<int>(models_.size()) / num_class_;
num_init_iteration_ = num_iteration_for_pred_;
}
std::string GBDT::FeatureImportance() const {
std::vector<std::pair<size_t, std::string>> GBDT::FeatureImportance() const {
std::vector<size_t> feature_importances(max_feature_idx_ + 1, 0);
for (size_t iter = 0; iter < models_.size(); ++iter) {
for (int split_idx = 0; split_idx < models_[iter]->num_leaves() - 1; ++split_idx) {
......@@ -475,18 +551,12 @@ std::string GBDT::FeatureImportance() const {
const std::pair<size_t, std::string>& rhs) {
return lhs.first > rhs.first;
});
std::stringstream str_buf;
// write to model file
str_buf << std::endl << "feature importances:" << std::endl;
for (size_t i = 0; i < pairs.size(); ++i) {
str_buf << pairs[i].second << "=" << std::to_string(pairs[i].first) << std::endl;
}
return str_buf.str();
return pairs;
}
std::vector<double> GBDT::PredictRaw(const double* value) const {
std::vector<double> ret(num_class_, 0.0f);
for (int i = 0; i < num_used_model_; ++i) {
for (int i = 0; i < num_iteration_for_pred_; ++i) {
for (int j = 0; j < num_class_; ++j) {
ret[j] += models_[i * num_class_ + j]->Predict(value);
}
......@@ -496,7 +566,7 @@ std::vector<double> GBDT::PredictRaw(const double* value) const {
std::vector<double> GBDT::Predict(const double* value) const {
std::vector<double> ret(num_class_, 0.0f);
for (int i = 0; i < num_used_model_; ++i) {
for (int i = 0; i < num_iteration_for_pred_; ++i) {
for (int j = 0; j < num_class_; ++j) {
ret[j] += models_[i * num_class_ + j]->Predict(value);
}
......@@ -512,7 +582,7 @@ std::vector<double> GBDT::Predict(const double* value) const {
std::vector<int> GBDT::PredictLeafIndex(const double* value) const {
std::vector<int> ret;
for (int i = 0; i < num_used_model_; ++i) {
for (int i = 0; i < num_iteration_for_pred_; ++i) {
for (int j = 0; j < num_class_; ++j) {
ret.push_back(models_[i * num_class_ + j]->PredictLeafIndex(value));
}
......
......@@ -35,12 +35,53 @@ public:
void Init(const BoostingConfig* gbdt_config, const Dataset* train_data, const ObjectiveFunction* object_function,
const std::vector<const Metric*>& training_metrics)
override;
/*!
* \brief Merge model from other boosting object
Will insert to the front of current boosting object
* \param other
*/
void MergeFrom(const Boosting* other) override {
auto other_gbdt = reinterpret_cast<const GBDT*>(other);
// tmp move to other vector
auto original_models = std::move(models_);
models_ = std::vector<std::unique_ptr<Tree>>();
// push model from other first
for (const auto& tree : other_gbdt->models_) {
auto new_tree = std::unique_ptr<Tree>(new Tree(*(tree.get())));
models_.push_back(std::move(new_tree));
}
num_init_iteration_ = static_cast<int>(models_.size()) / num_class_;
// push model in current object
for (const auto& tree : original_models) {
auto new_tree = std::unique_ptr<Tree>(new Tree(*(tree.get())));
models_.push_back(std::move(new_tree));
}
num_iteration_for_pred_ = static_cast<int>(models_.size()) / num_class_;
}
/*!
* \brief Reset training data for current boosting
* \param train_data Training data
* \param object_function Training objective function
* \param training_metrics Training metric
*/
void ResetTrainingData(const BoostingConfig* config, const Dataset* train_data, const ObjectiveFunction* object_function, const std::vector<const Metric*>& training_metrics) override;
/*!
* \brief Reset shrinkage_rate data for current boosting
* \param shrinkage_rate Configs for boosting
*/
void ResetShrinkageRate(double shrinkage_rate) override {
shrinkage_rate_ = shrinkage_rate;
}
/*!
* \brief Adding a validation dataset
* \param valid_data Validation dataset
* \param valid_metrics Metrics for validation dataset
*/
void AddDataset(const Dataset* valid_data,
void AddValidDataset(const Dataset* valid_data,
const std::vector<const Metric*>& valid_metrics) override;
/*!
* \brief Training logic
......@@ -51,6 +92,13 @@ public:
*/
virtual bool TrainOneIter(const score_t* gradient, const score_t* hessian, bool is_eval) override;
/*!
* \brief Rollback one iteration
*/
void RollbackOneIter() override;
int GetCurrentIteration() const override { return iter_ + num_init_iteration_; }
bool EvalAndCheckEarlyStopping() override;
/*!
......@@ -73,40 +121,48 @@ public:
* \param result used to store prediction result, should allocate memory before call this function
* \param out_len lenght of returned score
*/
void GetPredictAt(int data_idx, score_t* out_result, data_size_t* out_len) const override;
void GetPredictAt(int data_idx, score_t* out_result, data_size_t* out_len) override;
/*!
* \brief Predtion for one record without sigmoid transformation
* \brief Prediction for one record without sigmoid transformation
* \param feature_values Feature value on this record
* \return Prediction result for this record
*/
std::vector<double> PredictRaw(const double* feature_values) const override;
/*!
* \brief Predtion for one record with sigmoid transformation if enabled
* \brief Prediction for one record with sigmoid transformation if enabled
* \param feature_values Feature value on this record
* \return Prediction result for this record
*/
std::vector<double> Predict(const double* feature_values) const override;
/*!
* \brief Predtion for one record with leaf index
* \brief Prediction for one record with leaf index
* \param feature_values Feature value on this record
* \return Predicted leaf index for this record
*/
std::vector<int> PredictLeafIndex(const double* value) const override;
/*!
* \brief save model to file
* \param num_used_model number of model that want to save, -1 means save all
* \param is_finish is training finished or not
* \param filename filename that want to save to
* \brief Dump model to json format string
* \return Json format string of model
*/
virtual void SaveModelToFile(int num_used_model, bool is_finish, const char* filename) override;
std::string DumpModel() const override;
/*!
* \brief Save model to file
* \param num_used_model Number of model that want to save, -1 means save all
* \param is_finish Is training finished or not
* \param filename Filename that want to save to
*/
virtual void SaveModelToFile(int num_iterations, const char* filename) const override ;
/*!
* \brief Restore from a serialized string
*/
void LoadModelFromString(const std::string& model_str) override;
/*!
* \brief Get max feature index of this model
* \return Max feature index of this model
......@@ -119,11 +175,12 @@ public:
*/
inline int LabelIdx() const override { return label_idx_; }
/*!
* \brief Get number of weak sub-models
* \return Number of weak sub-models
*/
inline int NumberOfSubModels() const override { return static_cast<int>(models_.size()); }
inline int NumberOfTotalModel() const override { return static_cast<int>(models_.size()); }
/*!
* \brief Get number of classes
......@@ -132,14 +189,18 @@ public:
inline int NumberOfClasses() const override { return num_class_; }
/*!
* \brief Set number of used model for prediction
* \brief Set number of iterations for prediction
*/
inline void SetNumUsedModel(int num_used_model) {
if (num_used_model >= 0) {
num_used_model_ = static_cast<int>(num_used_model / num_class_);
inline void SetNumIterationForPred(int num_iteration) override {
if (num_iteration > 0) {
num_iteration_for_pred_ = num_iteration;
} else {
num_iteration_for_pred_ = static_cast<int>(models_.size()) / num_class_;
}
num_iteration_for_pred_ = std::min(num_iteration_for_pred_,
static_cast<int>(models_.size()) / num_class_);
}
/*!
* \brief Get Type name of this boosting object
*/
......@@ -178,7 +239,7 @@ protected:
* \brief Calculate feature importances
* \param last_iter Last tree use to calculate
*/
std::string FeatureImportance() const;
std::vector<std::pair<size_t, std::string>> FeatureImportance() const;
/*! \brief current iteration */
int iter_;
/*! \brief Pointer to training data */
......@@ -218,7 +279,7 @@ protected:
std::vector<data_size_t> bag_data_indices_;
/*! \brief Number of in-bag data */
data_size_t bag_data_cnt_;
/*! \brief Number of traning data */
/*! \brief Number of training data */
data_size_t num_data_;
/*! \brief Number of classes */
int num_class_;
......@@ -226,19 +287,17 @@ protected:
Random random_;
/*!
* \brief Sigmoid parameter, used for prediction.
* if > 0 meas output score will transform by sigmoid function
* if > 0 means output score will transform by sigmoid function
*/
double sigmoid_;
/*! \brief Index of label column */
data_size_t label_idx_;
/*! \brief Saved number of models */
int saved_model_size_;
/*! \brief File to write models */
std::ofstream model_output_file_;
/*! \brief number of used model */
int num_used_model_;
int num_iteration_for_pred_;
/*! \brief Shrinkage rate for one iteration */
double shrinkage_rate_;
/*! \brief Number of loaded initial models */
int num_init_iteration_;
};
} // namespace LightGBM
......
......@@ -16,6 +16,7 @@
#include <cstring>
#include <memory>
#include <stdexcept>
#include <mutex>
#include "./application/predictor.hpp"
......@@ -28,75 +29,88 @@ public:
}
Booster(const Dataset* train_data,
std::vector<const Dataset*> valid_data,
std::vector<std::string> valid_names,
const char* parameters)
:train_data_(train_data), valid_datas_(valid_data) {
config_.LoadFromString(parameters);
const char* parameters) {
auto param = ConfigBase::Str2Map(parameters);
config_.Set(param);
// create boosting
if (config_.io_config.input_model.size() > 0) {
Log::Warning("continued train from model is not support for c_api, \
please use continued train with input score");
}
boosting_.reset(Boosting::CreateBoosting(config_.boosting_type, ""));
// create objective function
objective_fun_.reset(ObjectiveFunction::CreateObjectiveFunction(config_.objective_type,
config_.objective_config));
if (objective_fun_ == nullptr) {
Log::Warning("Using self-defined objective functions");
}
// create training metric
for (auto metric_type : config_.metric_types) {
auto metric = std::unique_ptr<Metric>(
Metric::CreateMetric(metric_type, config_.metric_config));
if (metric == nullptr) { continue; }
metric->Init("training", train_data_->metadata(),
train_data_->num_data());
train_metric_.push_back(std::move(metric));
}
train_metric_.shrink_to_fit();
// add metric for validation data
for (size_t i = 0; i < valid_datas_.size(); ++i) {
valid_metrics_.emplace_back();
for (auto metric_type : config_.metric_types) {
auto metric = std::unique_ptr<Metric>(Metric::CreateMetric(metric_type, config_.metric_config));
if (metric == nullptr) { continue; }
metric->Init(valid_names[i].c_str(),
valid_datas_[i]->metadata(),
valid_datas_[i]->num_data());
valid_metrics_.back().push_back(std::move(metric));
}
valid_metrics_.back().shrink_to_fit();
}
valid_metrics_.shrink_to_fit();
// initialize the objective function
if (objective_fun_ != nullptr) {
objective_fun_->Init(train_data_->metadata(), train_data_->num_data());
}
boosting_.reset(Boosting::CreateBoosting(config_.boosting_type, nullptr));
ConstructObjectAndTrainingMetrics(train_data);
// initialize the boosting
boosting_->Init(&config_.boosting_config, train_data_, objective_fun_.get(),
boosting_->Init(&config_.boosting_config, train_data, objective_fun_.get(),
Common::ConstPtrInVectorWrapper<Metric>(train_metric_));
// add validation data into boosting
for (size_t i = 0; i < valid_datas_.size(); ++i) {
boosting_->AddDataset(valid_datas_[i],
Common::ConstPtrInVectorWrapper<Metric>(valid_metrics_[i]));
}
}
void MergeFrom(const Booster* other) {
std::lock_guard<std::mutex> lock(mutex_);
boosting_->MergeFrom(other->boosting_.get());
}
~Booster() {
}
void ResetTrainingData(const Dataset* train_data) {
std::lock_guard<std::mutex> lock(mutex_);
train_data_ = train_data;
ConstructObjectAndTrainingMetrics(train_data_);
// initialize the boosting
boosting_->ResetTrainingData(&config_.boosting_config, train_data_,
objective_fun_.get(), Common::ConstPtrInVectorWrapper<Metric>(train_metric_));
}
void ResetConfig(const char* parameters) {
std::lock_guard<std::mutex> lock(mutex_);
auto param = ConfigBase::Str2Map(parameters);
if (param.count("num_class")) {
Log::Fatal("cannot change num class during training");
}
if (param.count("boosting_type")) {
Log::Fatal("cannot change boosting_type during training");
}
config_.Set(param);
if (param.size() == 1 && (param.count("learning_rate") || param.count("shrinkage_rate"))) {
// only need to set learning rate
boosting_->ResetShrinkageRate(config_.boosting_config.learning_rate);
} else {
ResetTrainingData(train_data_);
}
}
void AddValidData(const Dataset* valid_data) {
std::lock_guard<std::mutex> lock(mutex_);
valid_metrics_.emplace_back();
for (auto metric_type : config_.metric_types) {
auto metric = std::unique_ptr<Metric>(Metric::CreateMetric(metric_type, config_.metric_config));
if (metric == nullptr) { continue; }
metric->Init(valid_data->metadata(), valid_data->num_data());
valid_metrics_.back().push_back(std::move(metric));
}
valid_metrics_.back().shrink_to_fit();
boosting_->AddValidDataset(valid_data,
Common::ConstPtrInVectorWrapper<Metric>(valid_metrics_.back()));
}
bool TrainOneIter() {
std::lock_guard<std::mutex> lock(mutex_);
return boosting_->TrainOneIter(nullptr, nullptr, false);
}
bool TrainOneIter(const float* gradients, const float* hessians) {
std::lock_guard<std::mutex> lock(mutex_);
return boosting_->TrainOneIter(gradients, hessians, false);
}
void PrepareForPrediction(int num_used_model, int predict_type) {
boosting_->SetNumUsedModel(num_used_model);
void RollbackOneIter() {
std::lock_guard<std::mutex> lock(mutex_);
boosting_->RollbackOneIter();
}
void PrepareForPrediction(int num_iteration, int predict_type) {
std::lock_guard<std::mutex> lock(mutex_);
boosting_->SetNumIterationForPred(num_iteration);
bool is_predict_leaf = false;
bool is_raw_score = false;
if (predict_type == C_API_PREDICT_LEAF_INDEX) {
......@@ -109,6 +123,10 @@ public:
predictor_.reset(new Predictor(boosting_.get(), is_raw_score, is_predict_leaf));
}
void GetPredictAt(int data_idx, score_t* out_result, data_size_t* out_len) {
boosting_->GetPredictAt(data_idx, out_result, out_len);
}
std::vector<double> Predict(const std::vector<std::pair<int, double>>& features) {
return predictor_->GetPredictFunction()(features);
}
......@@ -117,25 +135,64 @@ public:
predictor_->Predict(data_filename, result_filename, data_has_header);
}
void SaveModelToFile(int num_used_model, const char* filename) {
boosting_->SaveModelToFile(num_used_model, true, filename);
void SaveModelToFile(int num_iteration, const char* filename) {
boosting_->SaveModelToFile(num_iteration, filename);
}
const Boosting* GetBoosting() const { return boosting_.get(); }
const float* GetTrainingScore(int* out_len) const { return boosting_->GetTrainingScore(out_len); }
std::string DumpModel() {
return boosting_->DumpModel();
}
const inline int NumberOfClasses() const { return boosting_->NumberOfClasses(); }
int GetEvalCounts() const {
int ret = 0;
for (const auto& metric : train_metric_) {
ret += static_cast<int>(metric->GetName().size());
}
return ret;
}
int GetEvalNames(char** out_strs) const {
int idx = 0;
for (const auto& metric : train_metric_) {
for (const auto& name : metric->GetName()) {
std::strcpy(out_strs[idx], name.c_str());
++idx;
}
}
return idx;
}
const Boosting* GetBoosting() const { return boosting_.get(); }
private:
void ConstructObjectAndTrainingMetrics(const Dataset* train_data) {
// create objective function
objective_fun_.reset(ObjectiveFunction::CreateObjectiveFunction(config_.objective_type,
config_.objective_config));
if (objective_fun_ == nullptr) {
Log::Warning("Using self-defined objective functions");
}
// create training metric
train_metric_.clear();
for (auto metric_type : config_.metric_types) {
auto metric = std::unique_ptr<Metric>(
Metric::CreateMetric(metric_type, config_.metric_config));
if (metric == nullptr) { continue; }
metric->Init(train_data->metadata(), train_data->num_data());
train_metric_.push_back(std::move(metric));
}
train_metric_.shrink_to_fit();
// initialize the objective function
if (objective_fun_ != nullptr) {
objective_fun_->Init(train_data->metadata(), train_data->num_data());
}
}
const Dataset* train_data_;
std::unique_ptr<Boosting> boosting_;
/*! \brief All configs */
OverallConfig config_;
/*! \brief Training data */
const Dataset* train_data_;
/*! \brief Validation data */
std::vector<const Dataset*> valid_datas_;
/*! \brief Metric for training data */
std::vector<std::unique_ptr<Metric>> train_metric_;
/*! \brief Metrics for validation data */
......@@ -144,7 +201,8 @@ private:
std::unique_ptr<ObjectiveFunction> objective_fun_;
/*! \brief Using predictor for prediction task */
std::unique_ptr<Predictor> predictor_;
/*! \brief mutex for threading safe call */
std::mutex mutex_;
};
}
......@@ -152,17 +210,18 @@ private:
using namespace LightGBM;
DllExport const char* LGBM_GetLastError() {
return LastErrorMsg().c_str();
return LastErrorMsg();
}
DllExport int LGBM_CreateDatasetFromFile(const char* filename,
DllExport int LGBM_DatasetCreateFromFile(const char* filename,
const char* parameters,
const DatesetHandle* reference,
DatesetHandle* out) {
API_BEGIN();
OverallConfig config;
config.LoadFromString(parameters);
DatasetLoader loader(config.io_config, nullptr);
auto param = ConfigBase::Str2Map(parameters);
IOConfig io_config;
io_config.Set(param);
DatasetLoader loader(io_config, nullptr);
loader.SetHeader(filename);
if (reference == nullptr) {
*out = loader.LoadFromFile(filename);
......@@ -173,16 +232,7 @@ DllExport int LGBM_CreateDatasetFromFile(const char* filename,
API_END();
}
DllExport int LGBM_CreateDatasetFromBinaryFile(const char* filename,
DatesetHandle* out) {
API_BEGIN();
OverallConfig config;
DatasetLoader loader(config.io_config, nullptr);
*out = loader.LoadFromBinFile(filename, 0, 1);
API_END();
}
DllExport int LGBM_CreateDatasetFromMat(const void* data,
DllExport int LGBM_DatasetCreateFromMat(const void* data,
int data_type,
int32_t nrow,
int32_t ncol,
......@@ -191,15 +241,16 @@ DllExport int LGBM_CreateDatasetFromMat(const void* data,
const DatesetHandle* reference,
DatesetHandle* out) {
API_BEGIN();
OverallConfig config;
config.LoadFromString(parameters);
DatasetLoader loader(config.io_config, nullptr);
auto param = ConfigBase::Str2Map(parameters);
IOConfig io_config;
io_config.Set(param);
DatasetLoader loader(io_config, nullptr);
std::unique_ptr<Dataset> ret;
auto get_row_fun = RowFunctionFromDenseMatric(data, nrow, ncol, data_type, is_row_major);
if (reference == nullptr) {
// sample data first
Random rand(config.io_config.data_random_seed);
const int sample_cnt = static_cast<int>(nrow < config.io_config.bin_construct_sample_cnt ? nrow : config.io_config.bin_construct_sample_cnt);
Random rand(io_config.data_random_seed);
const int sample_cnt = static_cast<int>(nrow < io_config.bin_construct_sample_cnt ? nrow : io_config.bin_construct_sample_cnt);
auto sample_indices = rand.Sample(nrow, sample_cnt);
std::vector<std::vector<double>> sample_values(ncol);
for (size_t i = 0; i < sample_indices.size(); ++i) {
......@@ -213,10 +264,10 @@ DllExport int LGBM_CreateDatasetFromMat(const void* data,
}
ret.reset(loader.CostructFromSampleData(sample_values, sample_cnt, nrow));
} else {
ret.reset(new Dataset(nrow, config.io_config.num_class));
ret.reset(new Dataset(nrow, io_config.num_class));
ret->CopyFeatureMapperFrom(
reinterpret_cast<const Dataset*>(*reference),
config.io_config.is_enable_sparse);
io_config.is_enable_sparse);
}
#pragma omp parallel for schedule(guided)
......@@ -230,7 +281,7 @@ DllExport int LGBM_CreateDatasetFromMat(const void* data,
API_END();
}
DllExport int LGBM_CreateDatasetFromCSR(const void* indptr,
DllExport int LGBM_DatasetCreateFromCSR(const void* indptr,
int indptr_type,
const int32_t* indices,
const void* data,
......@@ -242,16 +293,17 @@ DllExport int LGBM_CreateDatasetFromCSR(const void* indptr,
const DatesetHandle* reference,
DatesetHandle* out) {
API_BEGIN();
OverallConfig config;
config.LoadFromString(parameters);
DatasetLoader loader(config.io_config, nullptr);
auto param = ConfigBase::Str2Map(parameters);
IOConfig io_config;
io_config.Set(param);
DatasetLoader loader(io_config, nullptr);
std::unique_ptr<Dataset> ret;
auto get_row_fun = RowFunctionFromCSR(indptr, indptr_type, indices, data, data_type, nindptr, nelem);
int32_t nrow = static_cast<int32_t>(nindptr - 1);
if (reference == nullptr) {
// sample data first
Random rand(config.io_config.data_random_seed);
const int sample_cnt = static_cast<int>(nrow < config.io_config.bin_construct_sample_cnt ? nrow : config.io_config.bin_construct_sample_cnt);
Random rand(io_config.data_random_seed);
const int sample_cnt = static_cast<int>(nrow < io_config.bin_construct_sample_cnt ? nrow : io_config.bin_construct_sample_cnt);
auto sample_indices = rand.Sample(nrow, sample_cnt);
std::vector<std::vector<double>> sample_values;
for (size_t i = 0; i < sample_indices.size(); ++i) {
......@@ -274,10 +326,10 @@ DllExport int LGBM_CreateDatasetFromCSR(const void* indptr,
CHECK(num_col >= static_cast<int>(sample_values.size()));
ret.reset(loader.CostructFromSampleData(sample_values, sample_cnt, nrow));
} else {
ret.reset(new Dataset(nrow, config.io_config.num_class));
ret.reset(new Dataset(nrow, io_config.num_class));
ret->CopyFeatureMapperFrom(
reinterpret_cast<const Dataset*>(*reference),
config.io_config.is_enable_sparse);
io_config.is_enable_sparse);
}
#pragma omp parallel for schedule(guided)
......@@ -291,7 +343,7 @@ DllExport int LGBM_CreateDatasetFromCSR(const void* indptr,
API_END();
}
DllExport int LGBM_CreateDatasetFromCSC(const void* col_ptr,
DllExport int LGBM_DatasetCreateFromCSC(const void* col_ptr,
int col_ptr_type,
const int32_t* indices,
const void* data,
......@@ -303,17 +355,18 @@ DllExport int LGBM_CreateDatasetFromCSC(const void* col_ptr,
const DatesetHandle* reference,
DatesetHandle* out) {
API_BEGIN();
OverallConfig config;
config.LoadFromString(parameters);
DatasetLoader loader(config.io_config, nullptr);
auto param = ConfigBase::Str2Map(parameters);
IOConfig io_config;
io_config.Set(param);
DatasetLoader loader(io_config, nullptr);
std::unique_ptr<Dataset> ret;
auto get_col_fun = ColumnFunctionFromCSC(col_ptr, col_ptr_type, indices, data, data_type, ncol_ptr, nelem);
int32_t nrow = static_cast<int32_t>(num_row);
if (reference == nullptr) {
Log::Warning("Construct from CSC format is not efficient");
// sample data first
Random rand(config.io_config.data_random_seed);
const int sample_cnt = static_cast<int>(nrow < config.io_config.bin_construct_sample_cnt ? nrow : config.io_config.bin_construct_sample_cnt);
Random rand(io_config.data_random_seed);
const int sample_cnt = static_cast<int>(nrow < io_config.bin_construct_sample_cnt ? nrow : io_config.bin_construct_sample_cnt);
auto sample_indices = rand.Sample(nrow, sample_cnt);
std::vector<std::vector<double>> sample_values(ncol_ptr - 1);
#pragma omp parallel for schedule(guided)
......@@ -323,10 +376,10 @@ DllExport int LGBM_CreateDatasetFromCSC(const void* col_ptr,
}
ret.reset(loader.CostructFromSampleData(sample_values, sample_cnt, nrow));
} else {
ret.reset(new Dataset(nrow, config.io_config.num_class));
ret.reset(new Dataset(nrow, io_config.num_class));
ret->CopyFeatureMapperFrom(
reinterpret_cast<const Dataset*>(*reference),
config.io_config.is_enable_sparse);
io_config.is_enable_sparse);
}
#pragma omp parallel for schedule(guided)
......@@ -340,6 +393,26 @@ DllExport int LGBM_CreateDatasetFromCSC(const void* col_ptr,
API_END();
}
DllExport int LGBM_DatasetGetSubset(
const DatesetHandle* handle,
const int32_t* used_row_indices,
int32_t num_used_row_indices,
const char* parameters,
DatesetHandle* out) {
API_BEGIN();
auto param = ConfigBase::Str2Map(parameters);
IOConfig io_config;
io_config.Set(param);
auto full_dataset = reinterpret_cast<const Dataset*>(*handle);
auto ret = std::unique_ptr<Dataset>(
full_dataset->Subset(used_row_indices,
num_used_row_indices,
io_config.is_enable_sparse));
ret->FinishLoad();
*out = ret.release();
API_END();
}
DllExport int LGBM_DatasetFree(DatesetHandle handle) {
API_BEGIN();
delete reinterpret_cast<Dataset*>(handle);
......@@ -387,6 +460,7 @@ DllExport int LGBM_DatasetGetField(DatesetHandle handle,
is_success = true;
}
if (!is_success) { throw std::runtime_error("Field not found"); }
if (*out_ptr == nullptr) { *out_len = 0; }
API_END();
}
......@@ -410,28 +484,24 @@ DllExport int LGBM_DatasetGetNumFeature(DatesetHandle handle,
// ---- start of booster
DllExport int LGBM_BoosterCreate(const DatesetHandle train_data,
const DatesetHandle valid_datas[],
const char* valid_names[],
int n_valid_datas,
const char* parameters,
BoosterHandle* out) {
API_BEGIN();
const Dataset* p_train_data = reinterpret_cast<const Dataset*>(train_data);
std::vector<const Dataset*> p_valid_datas;
std::vector<std::string> p_valid_names;
for (int i = 0; i < n_valid_datas; ++i) {
p_valid_datas.emplace_back(reinterpret_cast<const Dataset*>(valid_datas[i]));
p_valid_names.emplace_back(valid_names[i]);
}
*out = new Booster(p_train_data, p_valid_datas, p_valid_names, parameters);
auto ret = std::unique_ptr<Booster>(new Booster(p_train_data, parameters));
*out = ret.release();
API_END();
}
DllExport int LGBM_BoosterLoadFromModelfile(
DllExport int LGBM_BoosterCreateFromModelfile(
const char* filename,
int64_t* out_num_iterations,
BoosterHandle* out) {
API_BEGIN();
*out = new Booster(filename);
auto ret = std::unique_ptr<Booster>(new Booster(filename));
*out_num_iterations = static_cast<int64_t>(ret->GetBoosting()->NumberOfTotalModel()
/ ret->GetBoosting()->NumberOfClasses());
*out = ret.release();
API_END();
}
......@@ -441,6 +511,47 @@ DllExport int LGBM_BoosterFree(BoosterHandle handle) {
API_END();
}
DllExport int LGBM_BoosterMerge(BoosterHandle handle,
BoosterHandle other_handle) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
Booster* ref_other_booster = reinterpret_cast<Booster*>(other_handle);
ref_booster->MergeFrom(ref_other_booster);
API_END();
}
DllExport int LGBM_BoosterAddValidData(BoosterHandle handle,
const DatesetHandle valid_data) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
const Dataset* p_dataset = reinterpret_cast<const Dataset*>(valid_data);
ref_booster->AddValidData(p_dataset);
API_END();
}
DllExport int LGBM_BoosterResetTrainingData(BoosterHandle handle,
const DatesetHandle train_data) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
const Dataset* p_dataset = reinterpret_cast<const Dataset*>(train_data);
ref_booster->ResetTrainingData(p_dataset);
API_END();
}
DllExport int LGBM_BoosterResetParameter(BoosterHandle handle, const char* parameters) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
ref_booster->ResetConfig(parameters);
API_END();
}
DllExport int LGBM_BoosterGetNumClasses(BoosterHandle handle, int64_t* out_len) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
*out_len = ref_booster->GetBoosting()->NumberOfClasses();
API_END();
}
DllExport int LGBM_BoosterUpdateOneIter(BoosterHandle handle, int* is_finished) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
......@@ -466,14 +577,50 @@ DllExport int LGBM_BoosterUpdateOneIterCustom(BoosterHandle handle,
API_END();
}
DllExport int LGBM_BoosterEval(BoosterHandle handle,
int data,
DllExport int LGBM_BoosterRollbackOneIter(BoosterHandle handle) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
ref_booster->RollbackOneIter();
API_END();
}
DllExport int LGBM_BoosterGetCurrentIteration(BoosterHandle handle, int64_t* out_iteration) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
*out_iteration = ref_booster->GetBoosting()->GetCurrentIteration();
API_END();
}
/*!
* \brief Get number of eval
* \return total number of eval result
*/
DllExport int LGBM_BoosterGetEvalCounts(BoosterHandle handle, int64_t* out_len) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
*out_len = ref_booster->GetEvalCounts();
API_END();
}
/*!
* \brief Get number of eval
* \return total number of eval result
*/
DllExport int LGBM_BoosterGetEvalNames(BoosterHandle handle, int64_t* out_len, char** out_strs) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
*out_len = ref_booster->GetEvalNames(out_strs);
API_END();
}
DllExport int LGBM_BoosterGetEval(BoosterHandle handle,
int data_idx,
int64_t* out_len,
float* out_results) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
auto boosting = ref_booster->GetBoosting();
auto result_buf = boosting->GetEvalAt(data);
auto result_buf = boosting->GetEvalAt(data_idx);
*out_len = static_cast<int64_t>(result_buf.size());
for (size_t i = 0; i < result_buf.size(); ++i) {
(out_results)[i] = static_cast<float>(result_buf[i]);
......@@ -481,39 +628,27 @@ DllExport int LGBM_BoosterEval(BoosterHandle handle,
API_END();
}
DllExport int LGBM_BoosterGetScore(BoosterHandle handle,
int64_t* out_len,
const float** out_result) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
int len = 0;
*out_result = ref_booster->GetTrainingScore(&len);
*out_len = static_cast<int64_t>(len);
API_END();
}
DllExport int LGBM_BoosterGetPredict(BoosterHandle handle,
int data,
int data_idx,
int64_t* out_len,
float* out_result) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
auto boosting = ref_booster->GetBoosting();
int len = 0;
boosting->GetPredictAt(data, out_result, &len);
ref_booster->GetPredictAt(data_idx, out_result, &len);
*out_len = static_cast<int64_t>(len);
API_END();
}
DllExport int LGBM_BoosterPredictForFile(BoosterHandle handle,
int predict_type,
int64_t n_used_trees,
int data_has_header,
const char* data_filename,
int data_has_header,
int predict_type,
int64_t num_iteration,
const char* result_filename) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
ref_booster->PrepareForPrediction(static_cast<int>(n_used_trees), predict_type);
ref_booster->PrepareForPrediction(static_cast<int>(num_iteration), predict_type);
bool bool_data_has_header = data_has_header > 0 ? true : false;
ref_booster->PredictForFile(data_filename, result_filename, bool_data_has_header);
API_END();
......@@ -529,23 +664,32 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
int64_t nelem,
int64_t,
int predict_type,
int64_t n_used_trees,
double* out_result) {
int64_t num_iteration,
int64_t* out_len,
float* out_result) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
ref_booster->PrepareForPrediction(static_cast<int>(n_used_trees), predict_type);
ref_booster->PrepareForPrediction(static_cast<int>(num_iteration), predict_type);
auto get_row_fun = RowFunctionFromCSR(indptr, indptr_type, indices, data, data_type, nindptr, nelem);
int num_class = ref_booster->NumberOfClasses();
int num_preb_in_one_row = ref_booster->GetBoosting()->NumberOfClasses();
if (predict_type == C_API_PREDICT_LEAF_INDEX) {
if (num_iteration > 0) {
num_preb_in_one_row *= static_cast<int>(num_iteration);
} else {
num_preb_in_one_row *= ref_booster->GetBoosting()->NumberOfTotalModel() / num_preb_in_one_row;
}
}
int nrow = static_cast<int>(nindptr - 1);
#pragma omp parallel for schedule(guided)
for (int i = 0; i < nrow; ++i) {
auto one_row = get_row_fun(i);
auto predicton_result = ref_booster->Predict(one_row);
for (int j = 0; j < num_class; ++j) {
out_result[i * num_class + j] = predicton_result[j];
for (int j = 0; j < static_cast<int>(predicton_result.size()); ++j) {
out_result[i * num_preb_in_one_row + j] = static_cast<float>(predicton_result[j]);
}
}
*out_len = nrow * num_preb_in_one_row;
API_END();
}
......@@ -556,31 +700,54 @@ DllExport int LGBM_BoosterPredictForMat(BoosterHandle handle,
int32_t ncol,
int is_row_major,
int predict_type,
int64_t n_used_trees,
double* out_result) {
int64_t num_iteration,
int64_t* out_len,
float* out_result) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
ref_booster->PrepareForPrediction(static_cast<int>(n_used_trees), predict_type);
ref_booster->PrepareForPrediction(static_cast<int>(num_iteration), predict_type);
auto get_row_fun = RowPairFunctionFromDenseMatric(data, nrow, ncol, data_type, is_row_major);
int num_class = ref_booster->NumberOfClasses();
int num_preb_in_one_row = ref_booster->GetBoosting()->NumberOfClasses();
if (predict_type == C_API_PREDICT_LEAF_INDEX) {
if (num_iteration > 0) {
num_preb_in_one_row *= static_cast<int>(num_iteration);
} else {
num_preb_in_one_row *= ref_booster->GetBoosting()->NumberOfTotalModel() / num_preb_in_one_row;
}
}
#pragma omp parallel for schedule(guided)
for (int i = 0; i < nrow; ++i) {
auto one_row = get_row_fun(i);
auto predicton_result = ref_booster->Predict(one_row);
for (int j = 0; j < num_class; ++j) {
out_result[i * num_class + j] = predicton_result[j];
for (int j = 0; j < static_cast<int>(predicton_result.size()); ++j) {
out_result[i * num_preb_in_one_row + j] = static_cast<float>(predicton_result[j]);
}
}
*out_len = nrow * num_preb_in_one_row;
API_END();
}
DllExport int LGBM_BoosterSaveModel(BoosterHandle handle,
int num_used_model,
int num_iteration,
const char* filename) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
ref_booster->SaveModelToFile(num_used_model, filename);
ref_booster->SaveModelToFile(num_iteration, filename);
API_END();
}
DllExport int LGBM_BoosterDumpModel(BoosterHandle handle,
int buffer_len,
int64_t* out_len,
char** out_str) {
API_BEGIN();
Booster* ref_booster = reinterpret_cast<Booster*>(handle);
std::string model = ref_booster->DumpModel();
*out_len = static_cast<int64_t>(model.size());
if (*out_len <= buffer_len) {
std::strcpy(*out_str, model.c_str());
}
API_END();
}
......
......@@ -5,14 +5,14 @@
#include <vector>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <algorithm>
namespace LightGBM {
void OverallConfig::LoadFromString(const char* str) {
std::unordered_map<std::string, std::string> ConfigBase::Str2Map(const char* parameters) {
std::unordered_map<std::string, std::string> params;
auto args = Common::Split(str, " \t\n\r");
auto args = Common::Split(parameters, " \t\n\r");
for (auto arg : args) {
std::vector<std::string> tmp_strs = Common::Split(arg.c_str(), '=');
if (tmp_strs.size() == 2) {
......@@ -27,7 +27,7 @@ void OverallConfig::LoadFromString(const char* str) {
}
}
ParameterAlias::KeyAliasTransform(&params);
Set(params);
return params;
}
void OverallConfig::Set(const std::unordered_map<std::string, std::string>& params) {
......@@ -95,16 +95,15 @@ void OverallConfig::GetMetricType(const std::unordered_map<std::string, std::str
// split
std::vector<std::string> metrics = Common::Split(value.c_str(), ',');
// remove dumplicate
std::unordered_map<std::string, int> metric_maps;
std::unordered_set<std::string> metric_sets;
for (auto& metric : metrics) {
std::transform(metric.begin(), metric.end(), metric.begin(), Common::tolower);
if (metric_maps.count(metric) <= 0) {
metric_maps[metric] = 1;
if (metric_sets.count(metric) <= 0) {
metric_sets.insert(metric);
}
}
for (auto& pair : metric_maps) {
std::string sub_metric_str = pair.first;
metric_types.push_back(sub_metric_str);
for (auto& metric : metric_sets) {
metric_types.push_back(metric);
}
metric_types.shrink_to_fit();
}
......@@ -183,7 +182,7 @@ void IOConfig::Set(const std::unordered_map<std::string, std::string>& params) {
GetInt(params, "data_random_seed", &data_random_seed);
GetString(params, "data", &data_filename);
GetInt(params, "verbose", &verbosity);
GetInt(params, "num_model_predict", &num_model_predict);
GetInt(params, "num_iteration_predict", &num_iteration_predict);
GetInt(params, "bin_construct_sample_cnt", &bin_construct_sample_cnt);
GetBool(params, "is_pre_partition", &is_pre_partition);
GetBool(params, "is_enable_sparse", &is_enable_sparse);
......@@ -214,6 +213,7 @@ void ObjectiveConfig::Set(const std::unordered_map<std::string, std::string>& pa
CHECK(max_position > 0);
GetInt(params, "num_class", &num_class);
CHECK(num_class >= 1);
GetDouble(params, "scale_pos_weight", &scale_pos_weight);
std::string tmp_str = "";
if (GetString(params, "label_gain", &tmp_str)) {
label_gain = Common::StringToDoubleArray(tmp_str, ',');
......
......@@ -14,17 +14,16 @@
namespace LightGBM {
const char* Dataset::binary_file_token = "______LightGBM_Binary_File_Token______\n";
Dataset::Dataset() {
num_class_ = 1;
num_data_ = 0;
is_loading_from_binfile_ = false;
}
Dataset::Dataset(data_size_t num_data, int num_class) {
num_class_ = num_class;
num_data_ = num_data;
is_loading_from_binfile_ = false;
metadata_.Init(num_data_, num_class_, -1, -1);
}
......@@ -56,6 +55,21 @@ void Dataset::CopyFeatureMapperFrom(const Dataset* dataset, bool is_enable_spars
num_features_ = static_cast<int>(features_.size());
num_total_features_ = dataset->num_total_features_;
feature_names_ = dataset->feature_names_;
label_idx_ = dataset->label_idx_;
}
Dataset* Dataset::Subset(const data_size_t* used_indices, data_size_t num_used_indices, bool is_enable_sparse) const {
auto ret = std::unique_ptr<Dataset>(new Dataset(num_used_indices, num_class_));
ret->CopyFeatureMapperFrom(this, is_enable_sparse);
#pragma omp parallel for schedule(guided)
for (int fidx = 0; fidx < num_features_; ++fidx) {
auto iterator = features_[fidx]->bin_data()->GetIterator(0);
for (data_size_t i = 0; i < num_used_indices; ++i) {
ret->features_[fidx]->PushBin(0, i, iterator->Get(used_indices[i]));
}
}
ret->metadata_.Init(metadata_, used_indices, num_used_indices);
return ret.release();
}
bool Dataset::SetFloatField(const char* field_name, const float* field_data, data_size_t num_element) {
......@@ -78,6 +92,8 @@ bool Dataset::SetIntField(const char* field_name, const int* field_data, data_si
name = Common::Trim(name);
if (name == std::string("query") || name == std::string("group")) {
metadata_.SetQueryBoundaries(field_data, num_element);
} else if (name == std::string("query_id") || name == std::string("group_id")) {
metadata_.SetQueryId(field_data, num_element);
} else {
return false;
}
......@@ -107,7 +123,7 @@ bool Dataset::GetIntField(const char* field_name, int64_t* out_len, const int**
name = Common::Trim(name);
if (name == std::string("query") || name == std::string("group")) {
*out_ptr = metadata_.query_boundaries();
*out_len = num_data_;
*out_len = metadata_.num_queries();
} else {
return false;
}
......@@ -115,15 +131,27 @@ bool Dataset::GetIntField(const char* field_name, int64_t* out_len, const int**
}
void Dataset::SaveBinaryFile(const char* bin_filename) {
bool is_file_existed = false;
FILE* file;
#ifdef _MSC_VER
fopen_s(&file, bin_filename, "rb");
#else
file = fopen(bin_filename, "rb");
#endif
if (file != NULL) {
is_file_existed = true;
Log::Warning("File %s existed, cannot save binary to it", bin_filename);
fclose(file);
}
if (!is_loading_from_binfile_) {
if (!is_file_existed) {
std::string bin_filename_str(data_filename_);
// if not pass a filename, just append ".bin" of original file
if (bin_filename == nullptr || bin_filename[0] == '\0') {
bin_filename_str.append(".bin");
bin_filename = bin_filename_str.c_str();
}
FILE* file;
#ifdef _MSC_VER
fopen_s(&file, bin_filename, "wb");
#else
......@@ -133,7 +161,8 @@ void Dataset::SaveBinaryFile(const char* bin_filename) {
Log::Fatal("Cannot write binary data to %s ", bin_filename);
}
Log::Info("Saving data to binary file %s", bin_filename);
size_t size_of_token = std::strlen(binary_file_token);
fwrite(binary_file_token, sizeof(char), size_of_token, file);
// get size of header
size_t size_of_header = sizeof(num_data_) + sizeof(num_class_) + sizeof(num_features_) + sizeof(num_total_features_)
+ sizeof(size_t) + sizeof(int) * used_feature_map_.size();
......
......@@ -142,18 +142,18 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac
Please use an additional query file or pre-partition the data");
}
}
auto parser = std::unique_ptr<Parser>(Parser::CreateParser(filename, io_config_.has_header, 0, label_idx_));
if (parser == nullptr) {
Log::Fatal("Could not recognize data format of %s", filename);
}
auto dataset = std::unique_ptr<Dataset>(new Dataset());
data_size_t num_global_data = 0;
std::vector<data_size_t> used_data_indices;
auto dataset = std::unique_ptr<Dataset>(new Dataset());
dataset->data_filename_ = filename;
dataset->num_class_ = io_config_.num_class;
dataset->metadata_.Init(filename, dataset->num_class_);
bool is_loading_from_binfile = CheckCanLoadFromBin(filename);
if (!is_loading_from_binfile) {
auto bin_filename = CheckCanLoadFromBin(filename);
if (bin_filename.size() == 0) {
auto parser = std::unique_ptr<Parser>(Parser::CreateParser(filename, io_config_.has_header, 0, label_idx_));
if (parser == nullptr) {
Log::Fatal("Could not recognize data format of %s", filename);
}
dataset->data_filename_ = filename;
dataset->num_class_ = io_config_.num_class;
dataset->metadata_.Init(filename, dataset->num_class_);
if (!io_config_.use_two_round_loading) {
// read data to memory
auto text_data = LoadTextDataToMemory(filename, dataset->metadata_, rank, num_machines,&num_global_data, &used_data_indices);
......@@ -185,8 +185,6 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac
}
} else {
// load data from binary file
std::string bin_filename(filename);
bin_filename.append(".bin");
dataset.reset(LoadFromBinFile(bin_filename.c_str(), rank, num_machines));
}
// check meta data
......@@ -199,18 +197,18 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac
Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename, const Dataset* train_data) {
auto parser = std::unique_ptr<Parser>(Parser::CreateParser(filename, io_config_.has_header, 0, label_idx_));
if (parser == nullptr) {
Log::Fatal("Could not recognize data format of %s", filename);
}
data_size_t num_global_data = 0;
std::vector<data_size_t> used_data_indices;
auto dataset = std::unique_ptr<Dataset>(new Dataset());
dataset->data_filename_ = filename;
dataset->num_class_ = io_config_.num_class;
dataset->metadata_.Init(filename, dataset->num_class_);
bool is_loading_from_binfile = CheckCanLoadFromBin(filename);
if (!is_loading_from_binfile) {
auto bin_filename = CheckCanLoadFromBin(filename);
if (bin_filename.size() == 0) {
auto parser = std::unique_ptr<Parser>(Parser::CreateParser(filename, io_config_.has_header, 0, label_idx_));
if (parser == nullptr) {
Log::Fatal("Could not recognize data format of %s", filename);
}
dataset->data_filename_ = filename;
dataset->num_class_ = io_config_.num_class;
dataset->metadata_.Init(filename, dataset->num_class_);
if (!io_config_.use_two_round_loading) {
// read data in memory
auto text_data = LoadTextDataToMemory(filename, dataset->metadata_, 0, 1, &num_global_data, &used_data_indices);
......@@ -234,8 +232,6 @@ Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename,
}
} else {
// load data from binary file
std::string bin_filename(filename);
bin_filename.append(".bin");
dataset.reset(LoadFromBinFile(bin_filename.c_str(), 0, 1));
}
// not need to check validation data
......@@ -260,9 +256,19 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* bin_filename, int rank, int
// buffer to read binary file
size_t buffer_size = 16 * 1024 * 1024;
auto buffer = std::vector<char>(buffer_size);
// check token
size_t size_of_token = std::strlen(Dataset::binary_file_token);
size_t read_cnt = fread(buffer.data(), sizeof(char), size_of_token, file);
if (read_cnt != size_of_token) {
Log::Fatal("Binary file error: token has the wrong size");
}
if (std::string(buffer.data()) != std::string(Dataset::binary_file_token)) {
Log::Fatal("input file is not LightGBM binary file");
}
// read size of header
size_t read_cnt = fread(buffer.data(), sizeof(size_t), 1, file);
read_cnt = fread(buffer.data(), sizeof(size_t), 1, file);
if (read_cnt != 1) {
Log::Fatal("Binary file error: header has the wrong size");
......@@ -401,7 +407,6 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* bin_filename, int rank, int
}
dataset->features_.shrink_to_fit();
fclose(file);
dataset->is_loading_from_binfile_ = true;
return dataset.release();
}
......@@ -849,7 +854,7 @@ void DatasetLoader::ExtractFeaturesFromFile(const char* filename, const Parser*
}
/*! \brief Check can load from binary file */
bool DatasetLoader::CheckCanLoadFromBin(const char* filename) {
std::string DatasetLoader::CheckCanLoadFromBin(const char* filename) {
std::string bin_filename(filename);
bin_filename.append(".bin");
......@@ -860,12 +865,32 @@ bool DatasetLoader::CheckCanLoadFromBin(const char* filename) {
#else
file = fopen(bin_filename.c_str(), "rb");
#endif
if (file == NULL) {
return false;
bin_filename = std::string(filename);
#ifdef _MSC_VER
fopen_s(&file, bin_filename.c_str(), "rb");
#else
file = fopen(bin_filename.c_str(), "rb");
#endif
if (file == NULL) {
Log::Fatal("cannot open data file %s", bin_filename.c_str());
}
}
size_t buffer_size = 256;
auto buffer = std::vector<char>(buffer_size);
// read size of token
size_t size_of_token = std::strlen(Dataset::binary_file_token);
size_t read_cnt = fread(buffer.data(), sizeof(char), size_of_token, file);
fclose(file);
if (read_cnt == size_of_token
&& std::string(buffer.data()) == std::string(Dataset::binary_file_token)) {
return bin_filename;
} else {
fclose(file);
return true;
return std::string();
}
}
}
\ No newline at end of file
......@@ -50,6 +50,69 @@ void Metadata::Init(data_size_t num_data, int num_class, int weight_idx, int que
}
}
void Metadata::Init(const Metadata& fullset, const data_size_t* used_indices, data_size_t num_used_indices) {
num_data_ = num_used_indices;
num_class_ = fullset.num_class_;
label_ = std::vector<float>(num_used_indices);
for (data_size_t i = 0; i < num_used_indices; i++) {
label_[i] = fullset.label_[used_indices[i]];
}
if (fullset.weights_.size() > 0) {
weights_ = std::vector<float>(num_used_indices);
num_weights_ = num_used_indices;
for (data_size_t i = 0; i < num_used_indices; i++) {
weights_[i] = fullset.weights_[used_indices[i]];
}
} else {
num_weights_ = 0;
}
if (fullset.init_score_.size() > 0) {
init_score_ = std::vector<float>(num_used_indices);
num_init_score_ = num_used_indices;
for (data_size_t i = 0; i < num_used_indices; i++) {
init_score_[i] = fullset.init_score_[used_indices[i]];
}
} else {
num_init_score_ = 0;
}
if (fullset.query_boundaries_.size() > 0) {
std::vector<data_size_t> used_query;
data_size_t data_idx = 0;
for (data_size_t qid = 0; qid < num_queries_ && data_idx < num_used_indices; ++qid) {
data_size_t start = fullset.query_boundaries_[qid];
data_size_t end = fullset.query_boundaries_[qid + 1];
data_size_t len = end - start;
if (used_indices[data_idx] > start) {
continue;
} else if (used_indices[data_idx] == start) {
if (num_used_indices >= data_idx + len && used_indices[data_idx + len - 1] == end - 1) {
used_query.push_back(qid);
data_idx += len;
} else {
Log::Fatal("Data partition error, data didn't match queries");
}
} else {
Log::Fatal("Data partition error, data didn't match queries");
}
}
query_boundaries_ = std::vector<data_size_t>(used_query.size() + 1);
num_queries_ = static_cast<data_size_t>(used_query.size());
query_boundaries_[0] = 0;
for (data_size_t i = 0; i < num_queries_; ++i) {
data_size_t qid = used_query[i];
data_size_t len = fullset.query_boundaries_[qid + 1] - fullset.query_boundaries_[qid];
query_boundaries_[i + 1] = query_boundaries_[i] + len;
}
} else {
num_queries_ = 0;
}
}
void Metadata::PartitionLabel(const std::vector<data_size_t>& used_indices) {
if (used_indices.size() <= 0) {
return;
......@@ -196,6 +259,13 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
void Metadata::SetInitScore(const float* init_score, data_size_t len) {
std::lock_guard<std::mutex> lock(mutex_);
// save to nullptr
if (init_score == nullptr || len == 0) {
init_score_.clear();
num_init_score_ = 0;
return;
}
if (len != num_data_ * num_class_) {
Log::Fatal("Initial score size doesn't match data size");
}
......@@ -208,6 +278,10 @@ void Metadata::SetInitScore(const float* init_score, data_size_t len) {
}
void Metadata::SetLabel(const float* label, data_size_t len) {
std::lock_guard<std::mutex> lock(mutex_);
if (label == nullptr) {
Log::Fatal("label cannot be nullptr");
}
if (num_data_ != len) {
Log::Fatal("len of label is not same with #data");
}
......@@ -219,6 +293,13 @@ void Metadata::SetLabel(const float* label, data_size_t len) {
}
void Metadata::SetWeights(const float* weights, data_size_t len) {
std::lock_guard<std::mutex> lock(mutex_);
// save to nullptr
if (weights == nullptr || len == 0) {
weights_.clear();
num_weights_ = 0;
return;
}
if (num_data_ != len) {
Log::Fatal("len of weights is not same with #data");
}
......@@ -232,6 +313,13 @@ void Metadata::SetWeights(const float* weights, data_size_t len) {
}
void Metadata::SetQueryBoundaries(const data_size_t* query_boundaries, data_size_t len) {
std::lock_guard<std::mutex> lock(mutex_);
// save to nullptr
if (query_boundaries == nullptr || len == 0) {
query_boundaries_.clear();
num_queries_ = 0;
return;
}
data_size_t sum = 0;
for (data_size_t i = 0; i < len; ++i) {
sum += query_boundaries[i];
......@@ -248,6 +336,47 @@ void Metadata::SetQueryBoundaries(const data_size_t* query_boundaries, data_size
LoadQueryWeights();
}
void Metadata::SetQueryId(const data_size_t* query_id, data_size_t len) {
std::lock_guard<std::mutex> lock(mutex_);
// save to nullptr
if (query_id == nullptr || len == 0) {
query_boundaries_.clear();
queries_.clear();
num_queries_ = 0;
return;
}
if (num_data_ != len) {
Log::Fatal("len of query id is not same with #data");
}
if (queries_.size() > 0) { queries_.clear(); }
queries_ = std::vector<data_size_t>(num_data_);
for (data_size_t i = 0; i < num_weights_; ++i) {
queries_[i] = query_id[i];
}
// need convert query_id to boundaries
std::vector<data_size_t> tmp_buffer;
data_size_t last_qid = -1;
data_size_t cur_cnt = 0;
for (data_size_t i = 0; i < num_data_; ++i) {
if (last_qid != queries_[i]) {
if (cur_cnt > 0) {
tmp_buffer.push_back(cur_cnt);
}
cur_cnt = 0;
last_qid = queries_[i];
}
++cur_cnt;
}
tmp_buffer.push_back(cur_cnt);
query_boundaries_ = std::vector<data_size_t>(tmp_buffer.size() + 1);
num_queries_ = static_cast<data_size_t>(tmp_buffer.size());
query_boundaries_[0] = 0;
for (size_t i = 0; i < tmp_buffer.size(); ++i) {
query_boundaries_[i + 1] = query_boundaries_[i] + tmp_buffer[i];
}
queries_.clear();
LoadQueryWeights();
}
void Metadata::LoadWeights() {
num_weights_ = 0;
......
......@@ -279,7 +279,7 @@ inline VAL_T SparseBinIterator<VAL_T>::InnerGet(data_size_t idx) {
while (cur_pos_ < idx && i_delta_ < bin_data_->num_vals_) {
bin_data_->NextNonzero(&i_delta_, &cur_pos_);
}
if (cur_pos_ == idx && i_delta_ < bin_data_->num_vals_) {
if (cur_pos_ == idx && i_delta_ < bin_data_->num_vals_ && i_delta_ >= 0) {
return bin_data_->vals_[i_delta_];
} else {
return 0;
......
......@@ -125,6 +125,43 @@ std::string Tree::ToString() {
return ss.str();
}
std::string Tree::ToJSON() {
std::stringstream ss;
ss << "\"num_leaves\":" << num_leaves_ << "," << std::endl;
ss << "\"tree_structure\":" << NodeToJSON(0) << std::endl;
return ss.str();
}
std::string Tree::NodeToJSON(int index) {
std::stringstream ss;
if (index >= 0) {
// non-leaf
ss << "{" << std::endl;
ss << "\"split_index\":" << index << "," << std::endl;
ss << "\"split_feature\":" << split_feature_real_.data()[index] << "," << std::endl;
ss << "\"split_gain\":" << split_gain_.data()[index] << "," << std::endl;
ss << "\"threshold\":" << threshold_.data()[index] << "," << std::endl;
ss << "\"internal_value\":" << internal_value_.data()[index] << "," << std::endl;
ss << "\"left_child\":" << NodeToJSON(left_child_.data()[index]) << "," << std::endl;
ss << "\"right_child\":" << NodeToJSON(right_child_.data()[index]) << std::endl;
ss << "}";
} else {
// leaf
index = ~index;
ss << "{" << std::endl;
ss << "\"leaf_index\":" << index << "," << std::endl;
ss << "\"leaf_parent\":" << leaf_parent_.data()[index] << "," << std::endl;
ss << "\"leaf_value\":" << leaf_value_.data()[index] << std::endl;
ss << "}";
}
return ss.str();
}
Tree::Tree(const std::string& str) {
std::vector<std::string> lines = Common::Split(str.c_str(), '\n');
std::unordered_map<std::string, std::string> key_vals;
......
......@@ -29,11 +29,8 @@ public:
}
void Init(const char* test_name, const Metadata& metadata, data_size_t num_data) override {
std::stringstream str_buf;
str_buf << test_name << "'s : " << PointWiseLossCalculator::Name();
name_.emplace_back(str_buf.str());
void Init(const Metadata& metadata, data_size_t num_data) override {
name_.emplace_back(PointWiseLossCalculator::Name());
num_data_ = num_data;
// get label
......@@ -119,7 +116,7 @@ public:
}
inline static const char* Name() {
return "log loss";
return "logloss";
}
};
/*!
......@@ -138,7 +135,7 @@ public:
}
inline static const char* Name() {
return "error rate";
return "error";
}
};
......@@ -162,10 +159,8 @@ public:
return 1.0f;
}
void Init(const char* test_name, const Metadata& metadata, data_size_t num_data) override {
std::stringstream str_buf;
str_buf << test_name << "'s : AUC";
name_.emplace_back(str_buf.str());
void Init(const Metadata& metadata, data_size_t num_data) override {
name_.emplace_back("auc");
num_data_ = num_data;
// get label
......
......@@ -23,10 +23,9 @@ public:
}
void Init(const char* test_name, const Metadata& metadata, data_size_t num_data) override {
std::stringstream str_buf;
str_buf << test_name << " : " << PointWiseLossCalculator::Name();
name_.emplace_back(str_buf.str());
void Init(const Metadata& metadata, data_size_t num_data) override {
name_.emplace_back(PointWiseLossCalculator::Name());
num_data_ = num_data;
// get label
label_ = metadata.label();
......@@ -110,7 +109,7 @@ public:
}
inline static const char* Name() {
return "multi error";
return "multi_error";
}
};
......@@ -130,7 +129,7 @@ public:
}
inline static const char* Name() {
return "multi logloss";
return "multi_logloss";
}
};
......
......@@ -33,12 +33,9 @@ public:
~NDCGMetric() {
}
void Init(const char* test_name, const Metadata& metadata, data_size_t num_data) override {
void Init(const Metadata& metadata, data_size_t num_data) override {
for (auto k : eval_at_) {
std::stringstream str_buf;
str_buf << test_name << "'s : ";
str_buf << "NDCG@" + std::to_string(k) + " ";
name_.emplace_back(str_buf.str());
name_.emplace_back(std::string("ndcg@") + std::to_string(k));
}
num_data_ = num_data;
// get label
......
......@@ -31,10 +31,8 @@ public:
return -1.0f;
}
void Init(const char* test_name, const Metadata& metadata, data_size_t num_data) override {
std::stringstream str_buf;
str_buf << test_name << " : " << PointWiseLossCalculator::Name();
name_.emplace_back(str_buf.str());
void Init(const Metadata& metadata, data_size_t num_data) override {
name_.emplace_back(PointWiseLossCalculator::Name());
num_data_ = num_data;
// get label
......@@ -103,7 +101,7 @@ public:
}
inline static const char* Name() {
return "l2 loss";
return "l2";
}
};
......@@ -116,7 +114,7 @@ public:
return std::fabs(score - label);
}
inline static const char* Name() {
return "l1 loss";
return "l1";
}
};
......
......@@ -28,10 +28,6 @@ Linkers::Linkers(NetworkConfig config) {
// parser clients from file
ParseMachineList(config.machine_list_filename.c_str());
if (num_machines_ <= 1) {
return;
}
if (rank_ == -1) {
// get ip list of local machine
std::unordered_set<std::string> local_ip_list = TcpSocket::GetLocalIpList();
......@@ -101,10 +97,15 @@ void Linkers::ParseMachineList(const char * filename) {
client_ips_.push_back(str_after_split[0]);
client_ports_.push_back(atoi(str_after_split[1].c_str()));
}
if (client_ips_.size() == 0) {
Log::Fatal("Machine list file doesn't contain any ip and port. \
Please check it again");
}
if (client_ips_.size() != static_cast<size_t>(num_machines_)) {
Log::Warning("World size is larger than the machine_list size, change world size to %d", client_ips_.size());
num_machines_ = static_cast<int>(client_ips_.size());
}
}
void Linkers::TryBind(int port) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment