Commit c861be93 authored by Guolin Ke's avatar Guolin Ke
Browse files

some bugs fix

parent 6b288215
...@@ -519,7 +519,7 @@ class Dataset(object): ...@@ -519,7 +519,7 @@ class Dataset(object):
params_str = param_dict_to_str(params) params_str = param_dict_to_str(params)
_safe_call(_LIB.LGBM_DatasetGetSubset( _safe_call(_LIB.LGBM_DatasetGetSubset(
ctypes.byref(self.handle), ctypes.byref(self.handle),
used_indices.data_as(ctypes.POINTER(ctypes.c_int32)), used_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_int32)),
used_indices.shape[0], used_indices.shape[0],
c_str(params_str), c_str(params_str),
ctypes.byref(ret.handle))) ctypes.byref(ret.handle)))
...@@ -808,6 +808,7 @@ class Booster(object): ...@@ -808,6 +808,7 @@ class Booster(object):
self.__need_reload_eval_info = True self.__need_reload_eval_info = True
self.__is_manage_handle = True self.__is_manage_handle = True
self.__train_data_name = "training" self.__train_data_name = "training"
self.__attr = {}
params = {} if params is None else params params = {} if params is None else params
if silent: if silent:
params["verbose"] = 0 params["verbose"] = 0
...@@ -1205,3 +1206,36 @@ class Booster(object): ...@@ -1205,3 +1206,36 @@ class Booster(object):
self.__higher_better_inner_eval.append(True) self.__higher_better_inner_eval.append(True)
else: else:
self.__higher_better_inner_eval.append(False) self.__higher_better_inner_eval.append(False)
def attr(self, key):
"""Get attribute string from the Booster.
Parameters
----------
key : str
The key to get attribute from.
Returns
-------
value : str
The attribute value of the key, returns None if attribute do not exist.
"""
if key in self.__attr:
return self.__attr[key]
else:
return None
def set_attr(self, **kwargs):
"""Set the attribute of the Booster.
Parameters
----------
**kwargs
The attributes to set. Setting a value to None deletes an attribute.
"""
for key, value in kwargs.items():
if value is not None:
if not isinstance(value, STRING_TYPES):
raise ValueError("Set Attr only accepts string values")
self.__attr[key] = value
else:
self.__attr.pop(key, None)
from __future__ import absolute_import from __future__ import absolute_import
import collections
class EarlyStopException(Exception): class EarlyStopException(Exception):
"""Exception of early stopping. """Exception of early stopping.
......
...@@ -16,7 +16,7 @@ def _construct_dataset(data, reference=None, ...@@ -16,7 +16,7 @@ def _construct_dataset(data, reference=None,
group = None group = None
init_score = None init_score = None
if other_fields is not None: if other_fields is not None:
if not is isinstance(other_fields, dict): if not isinstance(other_fields, dict):
raise TypeError("other filed data should be dict type") raise TypeError("other filed data should be dict type")
weight = None if 'weight' not in other_fields else other_fields['weight'] weight = None if 'weight' not in other_fields else other_fields['weight']
group = None if 'group' not in other_fields else other_fields['group'] group = None if 'group' not in other_fields else other_fields['group']
...@@ -127,7 +127,8 @@ def train(params, train_data, num_boost_round=100, ...@@ -127,7 +127,8 @@ def train(params, train_data, num_boost_round=100,
"""reduce cost for prediction training data""" """reduce cost for prediction training data"""
if valid_datas[i] is train_data: if valid_datas[i] is train_data:
is_valid_contain_train = True is_valid_contain_train = True
train_data_name = valid_names[i] if valid_names is not None:
train_data_name = valid_names[i]
continue continue
valid_set = _construct_dataset( valid_set = _construct_dataset(
valid_datas[i], valid_datas[i],
...@@ -136,7 +137,10 @@ def train(params, train_data, num_boost_round=100, ...@@ -136,7 +137,10 @@ def train(params, train_data, num_boost_round=100,
other_fields, other_fields,
predictor) predictor)
valid_sets.append(valid_set) valid_sets.append(valid_set)
name_valid_sets.append(valid_names[i]) if valid_names is not None:
name_valid_sets.append(valid_names[i])
else:
name_valid_sets.append('valid_'+str(i))
"""process callbacks""" """process callbacks"""
callbacks = [] if callbacks is None else callbacks callbacks = [] if callbacks is None else callbacks
...@@ -153,8 +157,8 @@ def train(params, train_data, num_boost_round=100, ...@@ -153,8 +157,8 @@ def train(params, train_data, num_boost_round=100,
if learning_rates is not None: if learning_rates is not None:
callbacks.append(callback.reset_learning_rate(learning_rates)) callbacks.append(callback.reset_learning_rate(learning_rates))
if evals_result is not None: if out_eval_result is not None:
callbacks.append(callback.record_evaluation(evals_result)) callbacks.append(callback.record_evaluation(out_eval_result))
callbacks_before_iter = [ callbacks_before_iter = [
cb for cb in callbacks if cb.__dict__.get('before_iteration', False)] cb for cb in callbacks if cb.__dict__.get('before_iteration', False)]
...@@ -203,7 +207,7 @@ def train(params, train_data, num_boost_round=100, ...@@ -203,7 +207,7 @@ def train(params, train_data, num_boost_round=100,
class CVBooster(object): class CVBooster(object):
""""Auxiliary datastruct to hold one fold of CV.""" """"Auxiliary datastruct to hold one fold of CV."""
def __init__(self, train_set, valid_test, param): def __init__(self, train_set, valid_test, params):
""""Initialize the CVBooster""" """"Initialize the CVBooster"""
self.train_set = train_set self.train_set = train_set
self.valid_test = valid_test self.valid_test = valid_test
...@@ -268,12 +272,12 @@ def _agg_cv_result(raw_results): ...@@ -268,12 +272,12 @@ def _agg_cv_result(raw_results):
metric_type[key] = one_line[3] metric_type[key] = one_line[3]
if key not in cvmap: if key not in cvmap:
cvmap[key] = [] cvmap[key] = []
cvmap[key].append(one_result[2]) cvmap[key].append(one_line[2])
results = [] results = []
for k, v in cvmap.items(): for k, v in cvmap.items():
v = np.array(v) v = np.array(v)
mean, std = np.mean(v), np.std(v) mean, std = np.mean(v), np.std(v)
results.extend(['cv_agg', k, mean, metric_type[k], std]) results.append(('cv_agg', k, mean, metric_type[k], std))
return results return results
def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False, def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False,
...@@ -339,9 +343,14 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False, ...@@ -339,9 +343,14 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False,
if not 'metric' in params: if not 'metric' in params:
params['metric'] = [] params['metric'] = []
else:
if is_str(params['metric']):
params['metric'] = params['metric'].split(',')
else:
params['metric'] = list(params['metric'])
if len(metric) > 0: if metrics is not None and len(metrics) > 0:
params['metric'].extend(metric) params['metric'].extend(metrics)
train_set = _construct_dataset(train_data, None, params, train_fields) train_set = _construct_dataset(train_data, None, params, train_fields)
...@@ -374,8 +383,7 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False, ...@@ -374,8 +383,7 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False,
evaluation_result_list=None)) evaluation_result_list=None))
for fold in cvfolds: for fold in cvfolds:
fold.update(fobj) fold.update(fobj)
res = aggcv([f.eval(feval) for f in cvfolds]) res = _agg_cv_result([f.eval(feval) for f in cvfolds])
for _, key, mean, _, std in res: for _, key, mean, _, std in res:
if key + '-mean' not in results: if key + '-mean' not in results:
results[key + '-mean'] = [] results[key + '-mean'] = []
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment