Commit 83007b1c authored by Guolin Ke's avatar Guolin Ke
Browse files

update some comments

parent 67ca6091
...@@ -65,13 +65,13 @@ DllExport int LGBM_DatasetCreateFromFile(const char* filename, ...@@ -65,13 +65,13 @@ DllExport int LGBM_DatasetCreateFromFile(const char* filename,
/*! /*!
* \brief create a dataset from CSR format * \brief create a dataset from CSR format
* \param indptr pointer to row headers * \param indptr pointer to row headers
* \param indptr_type * \param indptr_type type of indptr, can be C_API_DTYPE_INT32 or C_API_DTYPE_INT64
* \param indices findex * \param indices findex
* \param data fvalue * \param data fvalue
* \param data_type * \param data_type type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64
* \param nindptr number of rows in the matrix + 1 * \param nindptr number of rows in the matrix + 1
* \param nelem number of nonzero elements in the matrix * \param nelem number of nonzero elements in the matrix
* \param num_col number of columns; when it's set to 0, then guess from data * \param num_col number of columns
* \param parameters additional parameters * \param parameters additional parameters
* \param reference used to align bin mapper with other dataset, nullptr means don't used * \param reference used to align bin mapper with other dataset, nullptr means don't used
* \param out created dataset * \param out created dataset
...@@ -92,13 +92,13 @@ DllExport int LGBM_DatasetCreateFromCSR(const void* indptr, ...@@ -92,13 +92,13 @@ DllExport int LGBM_DatasetCreateFromCSR(const void* indptr,
/*! /*!
* \brief create a dataset from CSC format * \brief create a dataset from CSC format
* \param col_ptr pointer to col headers * \param col_ptr pointer to col headers
* \param col_ptr_type * \param col_ptr_type type of col_ptr, can be C_API_DTYPE_INT32 or C_API_DTYPE_INT64
* \param indices findex * \param indices findex
* \param data fvalue * \param data fvalue
* \param data_type * \param data_type type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64
* \param ncol_ptr number of rows in the matrix + 1 * \param ncol_ptr number of cols in the matrix + 1
* \param nelem number of nonzero elements in the matrix * \param nelem number of nonzero elements in the matrix
* \param num_row number of rows; when it's set to 0, then guess from data * \param num_row number of rows
* \param parameters additional parameters * \param parameters additional parameters
* \param reference used to align bin mapper with other dataset, nullptr means don't used * \param reference used to align bin mapper with other dataset, nullptr means don't used
* \param out created dataset * \param out created dataset
...@@ -119,7 +119,7 @@ DllExport int LGBM_DatasetCreateFromCSC(const void* col_ptr, ...@@ -119,7 +119,7 @@ DllExport int LGBM_DatasetCreateFromCSC(const void* col_ptr,
/*! /*!
* \brief create dataset from dense matrix * \brief create dataset from dense matrix
* \param data pointer to the data space * \param data pointer to the data space
* \param data_type 0 * \param data_type type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64
* \param nrow number of rows * \param nrow number of rows
* \param ncol number columns * \param ncol number columns
* \param is_row_major 1 for row major, 0 for column major * \param is_row_major 1 for row major, 0 for column major
...@@ -139,7 +139,7 @@ DllExport int LGBM_DatasetCreateFromMat(const void* data, ...@@ -139,7 +139,7 @@ DllExport int LGBM_DatasetCreateFromMat(const void* data,
/*! /*!
* \brief Create subset of a data * \brief Create subset of a data
* \param full_data the full dataset * \param handle handle of full dataset
* \param used_row_indices Indices used in subset * \param used_row_indices Indices used in subset
* \param num_used_row_indices len of used_row_indices * \param num_used_row_indices len of used_row_indices
* \param parameters additional parameters * \param parameters additional parameters
...@@ -147,7 +147,7 @@ DllExport int LGBM_DatasetCreateFromMat(const void* data, ...@@ -147,7 +147,7 @@ DllExport int LGBM_DatasetCreateFromMat(const void* data,
* \return 0 when succeed, -1 when failure happens * \return 0 when succeed, -1 when failure happens
*/ */
DllExport int LGBM_DatasetGetSubset( DllExport int LGBM_DatasetGetSubset(
const DatesetHandle* full_data, const DatesetHandle* handle,
const int32_t* used_row_indices, const int32_t* used_row_indices,
int32_t num_used_row_indices, int32_t num_used_row_indices,
const char* parameters, const char* parameters,
...@@ -170,11 +170,13 @@ DllExport int LGBM_DatasetSaveBinary(DatesetHandle handle, ...@@ -170,11 +170,13 @@ DllExport int LGBM_DatasetSaveBinary(DatesetHandle handle,
/*! /*!
* \brief set vector to a content in info * \brief set vector to a content in info
* Note: group and group only work for C_API_DTYPE_INT32
* label and weight only work for C_API_DTYPE_FLOAT32
* \param handle a instance of dataset * \param handle a instance of dataset
* \param field_name field name, can be label, weight, group * \param field_name field name, can be label, weight, group, group_id
* \param field_data pointer to vector * \param field_data pointer to vector
* \param num_element number of element in field_data * \param num_element number of element in field_data
* \param type float32 or int32 * \param type C_API_DTYPE_FLOAT32 or C_API_DTYPE_INT32
* \return 0 when succeed, -1 when failure happens * \return 0 when succeed, -1 when failure happens
*/ */
DllExport int LGBM_DatasetSetField(DatesetHandle handle, DllExport int LGBM_DatasetSetField(DatesetHandle handle,
...@@ -189,7 +191,7 @@ DllExport int LGBM_DatasetSetField(DatesetHandle handle, ...@@ -189,7 +191,7 @@ DllExport int LGBM_DatasetSetField(DatesetHandle handle,
* \param field_name field name * \param field_name field name
* \param out_len used to set result length * \param out_len used to set result length
* \param out_ptr pointer to the result * \param out_ptr pointer to the result
* \param out_type float32 or int32 * \param out_type C_API_DTYPE_FLOAT32 or C_API_DTYPE_INT32
* \return 0 when succeed, -1 when failure happens * \return 0 when succeed, -1 when failure happens
*/ */
DllExport int LGBM_DatasetGetField(DatesetHandle handle, DllExport int LGBM_DatasetGetField(DatesetHandle handle,
...@@ -232,13 +234,13 @@ DllExport int LGBM_BoosterCreate(const DatesetHandle train_data, ...@@ -232,13 +234,13 @@ DllExport int LGBM_BoosterCreate(const DatesetHandle train_data,
/*! /*!
* \brief load an existing boosting from model file * \brief load an existing boosting from model file
* \param filename filename of model * \param filename filename of model
* \param out_num_total_model number of total models * \param out_num_iterations number of iterations of this booster
* \param out handle of created Booster * \param out handle of created Booster
* \return 0 when succeed, -1 when failure happens * \return 0 when succeed, -1 when failure happens
*/ */
DllExport int LGBM_BoosterCreateFromModelfile( DllExport int LGBM_BoosterCreateFromModelfile(
const char* filename, const char* filename,
int64_t* out_num_total_model, int64_t* out_num_iterations,
BoosterHandle* out); BoosterHandle* out);
...@@ -287,7 +289,8 @@ DllExport int LGBM_BoosterResetParameter(BoosterHandle handle, const char* param ...@@ -287,7 +289,8 @@ DllExport int LGBM_BoosterResetParameter(BoosterHandle handle, const char* param
/*! /*!
* \brief Get number of class * \brief Get number of class
* \param handle handle * \param handle handle
* \return number of class * \param out_len number of class
* \return 0 when succeed, -1 when failure happens
*/ */
DllExport int LGBM_BoosterGetNumClasses(BoosterHandle handle, int64_t* out_len); DllExport int LGBM_BoosterGetNumClasses(BoosterHandle handle, int64_t* out_len);
...@@ -322,28 +325,34 @@ DllExport int LGBM_BoosterRollbackOneIter(BoosterHandle handle); ...@@ -322,28 +325,34 @@ DllExport int LGBM_BoosterRollbackOneIter(BoosterHandle handle);
/*! /*!
* \brief Get iteration of current boosting rounds * \brief Get iteration of current boosting rounds
* \return iteration of boosting rounds * \param out_iteration iteration of boosting rounds
* \return 0 when succeed, -1 when failure happens
*/ */
DllExport int LGBM_BoosterGetCurrentIteration(BoosterHandle handle, int64_t* out_iteration); DllExport int LGBM_BoosterGetCurrentIteration(BoosterHandle handle, int64_t* out_iteration);
/*! /*!
* \brief Get number of eval * \brief Get number of eval
* \return total number of eval result * \param out_len total number of eval results
* \return 0 when succeed, -1 when failure happens
*/ */
DllExport int LGBM_BoosterGetEvalCounts(BoosterHandle handle, int64_t* out_len); DllExport int LGBM_BoosterGetEvalCounts(BoosterHandle handle, int64_t* out_len);
/*! /*!
* \brief Get number of eval * \brief Get Name of eval
* \return total number of eval result * \param out_len total number of eval results
* \param out_strs names of eval result
* \return 0 when succeed, -1 when failure happens
*/ */
DllExport int LGBM_BoosterGetEvalNames(BoosterHandle handle, int64_t* out_len, char** out_strs); DllExport int LGBM_BoosterGetEvalNames(BoosterHandle handle, int64_t* out_len, char** out_strs);
/*! /*!
* \brief get evaluation for training data and validation data * \brief get evaluation for training data and validation data
Note: 1. you should call LGBM_BoosterGetEvalNames first to get the name of evaluation results
2. should pre-allocate memory for out_results, you can get its length by LGBM_BoosterGetEvalCounts
* \param handle handle * \param handle handle
* \param data_idx 0:training data, 1: 1st valid data, 2:2nd valid data ... * \param data_idx 0:training data, 1: 1st valid data, 2:2nd valid data ...
* \param out_len len of output result * \param out_len len of output result
* \param out_result the string containing evaluation statistics, should allocate memory before call this function * \param out_result float arrary contains result
* \return 0 when succeed, -1 when failure happens * \return 0 when succeed, -1 when failure happens
*/ */
DllExport int LGBM_BoosterGetEval(BoosterHandle handle, DllExport int LGBM_BoosterGetEval(BoosterHandle handle,
...@@ -353,7 +362,8 @@ DllExport int LGBM_BoosterGetEval(BoosterHandle handle, ...@@ -353,7 +362,8 @@ DllExport int LGBM_BoosterGetEval(BoosterHandle handle,
/*! /*!
* \brief Get prediction for training data and validation data * \brief Get prediction for training data and validation data
this can be used to support customized eval function this can be used to support customized eval function
Note: should pre-allocate memory for out_result, its length is equal to num_class * num_data
* \param handle handle * \param handle handle
* \param data_idx 0:training data, 1: 1st valid data, 2:2nd valid data ... * \param data_idx 0:training data, 1: 1st valid data, 2:2nd valid data ...
* \param out_len len of output result * \param out_len len of output result
...@@ -371,9 +381,9 @@ DllExport int LGBM_BoosterGetPredict(BoosterHandle handle, ...@@ -371,9 +381,9 @@ DllExport int LGBM_BoosterGetPredict(BoosterHandle handle,
* \param data_filename filename of data file * \param data_filename filename of data file
* \param data_has_header data file has header or not * \param data_has_header data file has header or not
* \param predict_type * \param predict_type
* 0:normal, with transform (if needed) * C_API_PREDICT_NORMAL: normal prediction, with transform (if needed)
* 1:raw score * C_API_PREDICT_RAW_SCORE: raw score
* 2:leaf index * C_API_PREDICT_LEAF_INDEX: leaf index
* \param num_iteration number of iteration for prediction, <= 0 means no limit * \param num_iteration number of iteration for prediction, <= 0 means no limit
* \param result_filename filename of result file * \param result_filename filename of result file
* \return 0 when succeed, -1 when failure happens * \return 0 when succeed, -1 when failure happens
...@@ -387,19 +397,22 @@ DllExport int LGBM_BoosterPredictForFile(BoosterHandle handle, ...@@ -387,19 +397,22 @@ DllExport int LGBM_BoosterPredictForFile(BoosterHandle handle,
/*! /*!
* \brief make prediction for an new data set * \brief make prediction for an new data set
* Note: should pre-allocate memory for out_result,
* for noraml and raw score: its length is equal to num_class * num_data
* for leaf index, its length is equal to num_class * num_data * num_iteration
* \param handle handle * \param handle handle
* \param indptr pointer to row headers * \param indptr pointer to row headers
* \param indptr_type * \param indptr_type type of indptr, can be C_API_DTYPE_INT32 or C_API_DTYPE_INT64
* \param indices findex * \param indices findex
* \param data fvalue * \param data fvalue
* \param data_type * \param data_type type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64
* \param nindptr number of rows in the matrix + 1 * \param nindptr number of rows in the matrix + 1
* \param nelem number of nonzero elements in the matrix * \param nelem number of nonzero elements in the matrix
* \param num_col number of columns; when it's set to 0, then guess from data * \param num_col number of columns; when it's set to 0, then guess from data
* \param predict_type * \param predict_type
* 0:normal, with transform (if needed) * C_API_PREDICT_NORMAL: normal prediction, with transform (if needed)
* 1:raw score * C_API_PREDICT_RAW_SCORE: raw score
* 2:leaf index * C_API_PREDICT_LEAF_INDEX: leaf index
* \param num_iteration number of iteration for prediction, <= 0 means no limit * \param num_iteration number of iteration for prediction, <= 0 means no limit
* \param out_len len of output result * \param out_len len of output result
* \param out_result used to set a pointer to array, should allocate memory before call this function * \param out_result used to set a pointer to array, should allocate memory before call this function
...@@ -421,16 +434,19 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle, ...@@ -421,16 +434,19 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
/*! /*!
* \brief make prediction for an new data set * \brief make prediction for an new data set
* Note: should pre-allocate memory for out_result,
* for noraml and raw score: its length is equal to num_class * num_data
* for leaf index, its length is equal to num_class * num_data * num_iteration
* \param handle handle * \param handle handle
* \param data pointer to the data space * \param data pointer to the data space
* \param data_type * \param data_type type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64
* \param nrow number of rows * \param nrow number of rows
* \param ncol number columns * \param ncol number columns
* \param is_row_major 1 for row major, 0 for column major * \param is_row_major 1 for row major, 0 for column major
* \param predict_type * \param predict_type
* 0:normal, with transform (if needed) * C_API_PREDICT_NORMAL: normal prediction, with transform (if needed)
* 1:raw score * C_API_PREDICT_RAW_SCORE: raw score
* 2:leaf index * C_API_PREDICT_LEAF_INDEX: leaf index
* \param num_iteration number of iteration for prediction, <= 0 means no limit * \param num_iteration number of iteration for prediction, <= 0 means no limit
* \param out_len len of output result * \param out_len len of output result
* \param out_result used to set a pointer to array, should allocate memory before call this function * \param out_result used to set a pointer to array, should allocate memory before call this function
......
...@@ -186,43 +186,42 @@ def c_int_array(data): ...@@ -186,43 +186,42 @@ def c_int_array(data):
class Predictor(object): class Predictor(object):
""""A Predictor of LightGBM. """"A Predictor of LightGBM.
""" """
def __init__(self,model_file=None, params=None, booster_handle=None, is_manage_handle=True): def __init__(self,model_file=None, booster_handle=None, is_manage_handle=True):
"""Initialize the Predictor. """Initialize the Predictor.
Parameters Parameters
---------- ----------
model_file : string model_file : string
Path to the model file. Path to the model file.
params : dict
Parameters for boosters.
""" """
self.handle = ctypes.c_void_p() self.handle = ctypes.c_void_p()
self.__is_manage_handle = True self.__is_manage_handle = True
if model_file is not None: if model_file is not None:
"""Prediction task""" """Prediction task"""
out_num_total_model = ctypes.c_int64(0) out_num_iterations = ctypes.c_int64(0)
_safe_call(_LIB.LGBM_BoosterCreateFromModelfile( _safe_call(_LIB.LGBM_BoosterCreateFromModelfile(
c_str(model_file), c_str(model_file),
ctypes.byref(out_num_total_model), ctypes.byref(out_num_iterations),
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
self.__num_total_model = out_num_total_model.value out_num_class = ctypes.c_int64(0)
tmp_out_len = ctypes.c_int64(0)
_safe_call(_LIB.LGBM_BoosterGetNumClasses( _safe_call(_LIB.LGBM_BoosterGetNumClasses(
self.handle, self.handle,
ctypes.byref(tmp_out_len))) ctypes.byref(out_num_class)))
self.num_class = tmp_out_len.value self.num_class = out_num_class.value
self.__num_total_model = out_num_iterations.value * self.num_class
elif booster_handle is not None: elif booster_handle is not None:
self.__is_manage_handle = is_manage_handle self.__is_manage_handle = is_manage_handle
self.handle = booster_handle self.handle = booster_handle
tmp_out_len = ctypes.c_int64(0) out_num_class = ctypes.c_int64(0)
_safe_call(_LIB.LGBM_BoosterGetNumClasses( _safe_call(_LIB.LGBM_BoosterGetNumClasses(
self.handle, self.handle,
ctypes.byref(tmp_out_len))) ctypes.byref(out_num_class)))
self.num_class = tmp_out_len.value self.num_class = out_num_class.value
out_num_iterations = ctypes.c_int64(0)
_safe_call(_LIB.LGBM_BoosterGetCurrentIteration( _safe_call(_LIB.LGBM_BoosterGetCurrentIteration(
self.handle, self.handle,
ctypes.byref(tmp_out_len))) ctypes.byref(out_num_iterations)))
self.__num_total_model = self.num_class * tmp_out_len.value self.__num_total_model = out_num_iterations.value * self.num_class
else: else:
raise TypeError('Need Model file to create a booster') raise TypeError('Need Model file to create a booster')
...@@ -855,12 +854,11 @@ class Booster(object): ...@@ -855,12 +854,11 @@ class Booster(object):
self.__get_eval_info() self.__get_eval_info()
elif model_file is not None: elif model_file is not None:
"""Prediction task""" """Prediction task"""
out_num_total_model = ctypes.c_int64(0) out_num_iterations = ctypes.c_int64(0)
_safe_call(_LIB.LGBM_BoosterCreateFromModelfile( _safe_call(_LIB.LGBM_BoosterCreateFromModelfile(
c_str(model_file), c_str(model_file),
ctypes.byref(out_num_total_model), ctypes.byref(out_num_iterations),
ctypes.byref(self.handle))) ctypes.byref(self.handle)))
self.__num_total_model = out_num_total_model.value
out_num_class = ctypes.c_int64(0) out_num_class = ctypes.c_int64(0)
_safe_call(_LIB.LGBM_BoosterGetNumClasses( _safe_call(_LIB.LGBM_BoosterGetNumClasses(
self.handle, self.handle,
......
...@@ -385,7 +385,7 @@ DllExport int LGBM_DatasetCreateFromCSC(const void* col_ptr, ...@@ -385,7 +385,7 @@ DllExport int LGBM_DatasetCreateFromCSC(const void* col_ptr,
} }
DllExport int LGBM_DatasetGetSubset( DllExport int LGBM_DatasetGetSubset(
const DatesetHandle* full_data, const DatesetHandle* handle,
const int32_t* used_row_indices, const int32_t* used_row_indices,
int32_t num_used_row_indices, int32_t num_used_row_indices,
const char* parameters, const char* parameters,
...@@ -394,7 +394,7 @@ DllExport int LGBM_DatasetGetSubset( ...@@ -394,7 +394,7 @@ DllExport int LGBM_DatasetGetSubset(
auto param = ConfigBase::Str2Map(parameters); auto param = ConfigBase::Str2Map(parameters);
IOConfig io_config; IOConfig io_config;
io_config.Set(param); io_config.Set(param);
auto full_dataset = reinterpret_cast<const Dataset*>(*full_data); auto full_dataset = reinterpret_cast<const Dataset*>(*handle);
auto ret = std::unique_ptr<Dataset>( auto ret = std::unique_ptr<Dataset>(
full_dataset->Subset(used_row_indices, full_dataset->Subset(used_row_indices,
num_used_row_indices, num_used_row_indices,
...@@ -486,11 +486,12 @@ DllExport int LGBM_BoosterCreate(const DatesetHandle train_data, ...@@ -486,11 +486,12 @@ DllExport int LGBM_BoosterCreate(const DatesetHandle train_data,
DllExport int LGBM_BoosterCreateFromModelfile( DllExport int LGBM_BoosterCreateFromModelfile(
const char* filename, const char* filename,
int64_t* num_total_model, int64_t* out_num_iterations,
BoosterHandle* out) { BoosterHandle* out) {
API_BEGIN(); API_BEGIN();
auto ret = std::unique_ptr<Booster>(new Booster(filename)); auto ret = std::unique_ptr<Booster>(new Booster(filename));
*num_total_model = static_cast<int64_t>(ret->GetBoosting()->NumberOfTotalModel()); *out_num_iterations = static_cast<int64_t>(ret->GetBoosting()->NumberOfTotalModel()
/ ret->GetBoosting()->NumberOfClasses());
*out = ret.release(); *out = ret.release();
API_END(); API_END();
} }
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include <vector> #include <vector>
#include <string> #include <string>
#include <unordered_map> #include <unordered_set>
#include <algorithm> #include <algorithm>
namespace LightGBM { namespace LightGBM {
...@@ -95,16 +95,15 @@ void OverallConfig::GetMetricType(const std::unordered_map<std::string, std::str ...@@ -95,16 +95,15 @@ void OverallConfig::GetMetricType(const std::unordered_map<std::string, std::str
// split // split
std::vector<std::string> metrics = Common::Split(value.c_str(), ','); std::vector<std::string> metrics = Common::Split(value.c_str(), ',');
// remove dumplicate // remove dumplicate
std::unordered_map<std::string, int> metric_maps; std::unordered_set<std::string> metric_sets;
for (auto& metric : metrics) { for (auto& metric : metrics) {
std::transform(metric.begin(), metric.end(), metric.begin(), Common::tolower); std::transform(metric.begin(), metric.end(), metric.begin(), Common::tolower);
if (metric_maps.count(metric) <= 0) { if (metric_sets.count(metric) <= 0) {
metric_maps[metric] = 1; metric_sets.insert(metric);
} }
} }
for (auto& pair : metric_maps) { for (auto& metric : metric_sets) {
std::string sub_metric_str = pair.first; metric_types.push_back(metric);
metric_types.push_back(sub_metric_str);
} }
metric_types.shrink_to_fit(); metric_types.shrink_to_fit();
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment