Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
83007b1c
Commit
83007b1c
authored
Nov 27, 2016
by
Guolin Ke
Browse files
update some comments
parent
67ca6091
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
76 additions
and
62 deletions
+76
-62
include/LightGBM/c_api.h
include/LightGBM/c_api.h
+50
-34
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+15
-17
src/c_api.cpp
src/c_api.cpp
+5
-4
src/io/config.cpp
src/io/config.cpp
+6
-7
No files found.
include/LightGBM/c_api.h
View file @
83007b1c
...
...
@@ -65,13 +65,13 @@ DllExport int LGBM_DatasetCreateFromFile(const char* filename,
/*!
* \brief create a dataset from CSR format
* \param indptr pointer to row headers
* \param indptr_type
* \param indptr_type
type of indptr, can be C_API_DTYPE_INT32 or C_API_DTYPE_INT64
* \param indices findex
* \param data fvalue
* \param data_type
* \param data_type
type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64
* \param nindptr number of rows in the matrix + 1
* \param nelem number of nonzero elements in the matrix
* \param num_col number of columns
; when it's set to 0, then guess from data
* \param num_col number of columns
* \param parameters additional parameters
* \param reference used to align bin mapper with other dataset, nullptr means don't used
* \param out created dataset
...
...
@@ -92,13 +92,13 @@ DllExport int LGBM_DatasetCreateFromCSR(const void* indptr,
/*!
* \brief create a dataset from CSC format
* \param col_ptr pointer to col headers
* \param col_ptr_type
* \param col_ptr_type
type of col_ptr, can be C_API_DTYPE_INT32 or C_API_DTYPE_INT64
* \param indices findex
* \param data fvalue
* \param data_type
* \param ncol_ptr number of
row
s in the matrix + 1
* \param data_type
type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64
* \param ncol_ptr number of
col
s in the matrix + 1
* \param nelem number of nonzero elements in the matrix
* \param num_row number of rows
; when it's set to 0, then guess from data
* \param num_row number of rows
* \param parameters additional parameters
* \param reference used to align bin mapper with other dataset, nullptr means don't used
* \param out created dataset
...
...
@@ -119,7 +119,7 @@ DllExport int LGBM_DatasetCreateFromCSC(const void* col_ptr,
/*!
* \brief create dataset from dense matrix
* \param data pointer to the data space
* \param data_type
0
* \param data_type
type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64
* \param nrow number of rows
* \param ncol number columns
* \param is_row_major 1 for row major, 0 for column major
...
...
@@ -139,7 +139,7 @@ DllExport int LGBM_DatasetCreateFromMat(const void* data,
/*!
* \brief Create subset of a data
* \param
full_data the
full dataset
* \param
handle handle of
full dataset
* \param used_row_indices Indices used in subset
* \param num_used_row_indices len of used_row_indices
* \param parameters additional parameters
...
...
@@ -147,7 +147,7 @@ DllExport int LGBM_DatasetCreateFromMat(const void* data,
* \return 0 when succeed, -1 when failure happens
*/
DllExport
int
LGBM_DatasetGetSubset
(
const
DatesetHandle
*
full_data
,
const
DatesetHandle
*
handle
,
const
int32_t
*
used_row_indices
,
int32_t
num_used_row_indices
,
const
char
*
parameters
,
...
...
@@ -170,11 +170,13 @@ DllExport int LGBM_DatasetSaveBinary(DatesetHandle handle,
/*!
* \brief set vector to a content in info
* Note: group and group only work for C_API_DTYPE_INT32
* label and weight only work for C_API_DTYPE_FLOAT32
* \param handle a instance of dataset
* \param field_name field name, can be label, weight, group
* \param field_name field name, can be label, weight, group
, group_id
* \param field_data pointer to vector
* \param num_element number of element in field_data
* \param type
float32 or int
32
* \param type
C_API_DTYPE_FLOAT32 or C_API_DTYPE_INT
32
* \return 0 when succeed, -1 when failure happens
*/
DllExport
int
LGBM_DatasetSetField
(
DatesetHandle
handle
,
...
...
@@ -189,7 +191,7 @@ DllExport int LGBM_DatasetSetField(DatesetHandle handle,
* \param field_name field name
* \param out_len used to set result length
* \param out_ptr pointer to the result
* \param out_type
float32 or int
32
* \param out_type
C_API_DTYPE_FLOAT32 or C_API_DTYPE_INT
32
* \return 0 when succeed, -1 when failure happens
*/
DllExport
int
LGBM_DatasetGetField
(
DatesetHandle
handle
,
...
...
@@ -232,13 +234,13 @@ DllExport int LGBM_BoosterCreate(const DatesetHandle train_data,
/*!
* \brief load an existing boosting from model file
* \param filename filename of model
* \param out_num_
total_model number of total models
* \param out_num_
iterations number of iterations of this booster
* \param out handle of created Booster
* \return 0 when succeed, -1 when failure happens
*/
DllExport
int
LGBM_BoosterCreateFromModelfile
(
const
char
*
filename
,
int64_t
*
out_num_
total_model
,
int64_t
*
out_num_
iterations
,
BoosterHandle
*
out
);
...
...
@@ -287,7 +289,8 @@ DllExport int LGBM_BoosterResetParameter(BoosterHandle handle, const char* param
/*!
* \brief Get number of class
* \param handle handle
* \return number of class
* \param out_len number of class
* \return 0 when succeed, -1 when failure happens
*/
DllExport
int
LGBM_BoosterGetNumClasses
(
BoosterHandle
handle
,
int64_t
*
out_len
);
...
...
@@ -322,28 +325,34 @@ DllExport int LGBM_BoosterRollbackOneIter(BoosterHandle handle);
/*!
* \brief Get iteration of current boosting rounds
* \return iteration of boosting rounds
* \param out_iteration iteration of boosting rounds
* \return 0 when succeed, -1 when failure happens
*/
DllExport
int
LGBM_BoosterGetCurrentIteration
(
BoosterHandle
handle
,
int64_t
*
out_iteration
);
/*!
* \brief Get number of eval
* \return total number of eval result
* \param out_len total number of eval results
* \return 0 when succeed, -1 when failure happens
*/
DllExport
int
LGBM_BoosterGetEvalCounts
(
BoosterHandle
handle
,
int64_t
*
out_len
);
/*!
* \brief Get number of eval
* \return total number of eval result
* \brief Get Name of eval
* \param out_len total number of eval results
* \param out_strs names of eval result
* \return 0 when succeed, -1 when failure happens
*/
DllExport
int
LGBM_BoosterGetEvalNames
(
BoosterHandle
handle
,
int64_t
*
out_len
,
char
**
out_strs
);
/*!
* \brief get evaluation for training data and validation data
Note: 1. you should call LGBM_BoosterGetEvalNames first to get the name of evaluation results
2. should pre-allocate memory for out_results, you can get its length by LGBM_BoosterGetEvalCounts
* \param handle handle
* \param data_idx 0:training data, 1: 1st valid data, 2:2nd valid data ...
* \param out_len len of output result
* \param out_result
the string containing evaluation statistics, should allocate memory before call this function
* \param out_result
float arrary contains result
* \return 0 when succeed, -1 when failure happens
*/
DllExport
int
LGBM_BoosterGetEval
(
BoosterHandle
handle
,
...
...
@@ -353,7 +362,8 @@ DllExport int LGBM_BoosterGetEval(BoosterHandle handle,
/*!
* \brief Get prediction for training data and validation data
this can be used to support customized eval function
this can be used to support customized eval function
Note: should pre-allocate memory for out_result, its length is equal to num_class * num_data
* \param handle handle
* \param data_idx 0:training data, 1: 1st valid data, 2:2nd valid data ...
* \param out_len len of output result
...
...
@@ -371,9 +381,9 @@ DllExport int LGBM_BoosterGetPredict(BoosterHandle handle,
* \param data_filename filename of data file
* \param data_has_header data file has header or not
* \param predict_type
*
0:normal
, with transform (if needed)
*
1:
raw score
*
2:
leaf index
*
C_API_PREDICT_NORMAL: normal prediction
, with transform (if needed)
*
C_API_PREDICT_RAW_SCORE:
raw score
*
C_API_PREDICT_LEAF_INDEX:
leaf index
* \param num_iteration number of iteration for prediction, <= 0 means no limit
* \param result_filename filename of result file
* \return 0 when succeed, -1 when failure happens
...
...
@@ -387,19 +397,22 @@ DllExport int LGBM_BoosterPredictForFile(BoosterHandle handle,
/*!
* \brief make prediction for an new data set
* Note: should pre-allocate memory for out_result,
* for noraml and raw score: its length is equal to num_class * num_data
* for leaf index, its length is equal to num_class * num_data * num_iteration
* \param handle handle
* \param indptr pointer to row headers
* \param indptr_type
* \param indptr_type
type of indptr, can be C_API_DTYPE_INT32 or C_API_DTYPE_INT64
* \param indices findex
* \param data fvalue
* \param data_type
* \param data_type
type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64
* \param nindptr number of rows in the matrix + 1
* \param nelem number of nonzero elements in the matrix
* \param num_col number of columns; when it's set to 0, then guess from data
* \param predict_type
*
0:normal
, with transform (if needed)
*
1:
raw score
*
2:
leaf index
*
C_API_PREDICT_NORMAL: normal prediction
, with transform (if needed)
*
C_API_PREDICT_RAW_SCORE:
raw score
*
C_API_PREDICT_LEAF_INDEX:
leaf index
* \param num_iteration number of iteration for prediction, <= 0 means no limit
* \param out_len len of output result
* \param out_result used to set a pointer to array, should allocate memory before call this function
...
...
@@ -421,16 +434,19 @@ DllExport int LGBM_BoosterPredictForCSR(BoosterHandle handle,
/*!
* \brief make prediction for an new data set
* Note: should pre-allocate memory for out_result,
* for noraml and raw score: its length is equal to num_class * num_data
* for leaf index, its length is equal to num_class * num_data * num_iteration
* \param handle handle
* \param data pointer to the data space
* \param data_type
* \param data_type
type of data pointer, can be C_API_DTYPE_FLOAT32 or C_API_DTYPE_FLOAT64
* \param nrow number of rows
* \param ncol number columns
* \param is_row_major 1 for row major, 0 for column major
* \param predict_type
*
0:normal
, with transform (if needed)
*
1:
raw score
*
2:
leaf index
*
C_API_PREDICT_NORMAL: normal prediction
, with transform (if needed)
*
C_API_PREDICT_RAW_SCORE:
raw score
*
C_API_PREDICT_LEAF_INDEX:
leaf index
* \param num_iteration number of iteration for prediction, <= 0 means no limit
* \param out_len len of output result
* \param out_result used to set a pointer to array, should allocate memory before call this function
...
...
python-package/lightgbm/basic.py
View file @
83007b1c
...
...
@@ -186,43 +186,42 @@ def c_int_array(data):
class
Predictor
(
object
):
""""A Predictor of LightGBM.
"""
def
__init__
(
self
,
model_file
=
None
,
params
=
None
,
booster_handle
=
None
,
is_manage_handle
=
True
):
def
__init__
(
self
,
model_file
=
None
,
booster_handle
=
None
,
is_manage_handle
=
True
):
"""Initialize the Predictor.
Parameters
----------
model_file : string
Path to the model file.
params : dict
Parameters for boosters.
"""
self
.
handle
=
ctypes
.
c_void_p
()
self
.
__is_manage_handle
=
True
if
model_file
is
not
None
:
"""Prediction task"""
out_num_
total_model
=
ctypes
.
c_int64
(
0
)
out_num_
iterations
=
ctypes
.
c_int64
(
0
)
_safe_call
(
_LIB
.
LGBM_BoosterCreateFromModelfile
(
c_str
(
model_file
),
ctypes
.
byref
(
out_num_
total_model
),
ctypes
.
byref
(
out_num_
iterations
),
ctypes
.
byref
(
self
.
handle
)))
self
.
__num_total_model
=
out_num_total_model
.
value
tmp_out_len
=
ctypes
.
c_int64
(
0
)
out_num_class
=
ctypes
.
c_int64
(
0
)
_safe_call
(
_LIB
.
LGBM_BoosterGetNumClasses
(
self
.
handle
,
ctypes
.
byref
(
tmp_out_len
)))
self
.
num_class
=
tmp_out_len
.
value
ctypes
.
byref
(
out_num_class
)))
self
.
num_class
=
out_num_class
.
value
self
.
__num_total_model
=
out_num_iterations
.
value
*
self
.
num_class
elif
booster_handle
is
not
None
:
self
.
__is_manage_handle
=
is_manage_handle
self
.
handle
=
booster_handle
tmp_out_len
=
ctypes
.
c_int64
(
0
)
out_num_class
=
ctypes
.
c_int64
(
0
)
_safe_call
(
_LIB
.
LGBM_BoosterGetNumClasses
(
self
.
handle
,
ctypes
.
byref
(
tmp_out_len
)))
self
.
num_class
=
tmp_out_len
.
value
ctypes
.
byref
(
out_num_class
)))
self
.
num_class
=
out_num_class
.
value
out_num_iterations
=
ctypes
.
c_int64
(
0
)
_safe_call
(
_LIB
.
LGBM_BoosterGetCurrentIteration
(
self
.
handle
,
ctypes
.
byref
(
tmp_out_len
)))
self
.
__num_total_model
=
self
.
num_class
*
tmp_out_len
.
value
ctypes
.
byref
(
out_num_iterations
)))
self
.
__num_total_model
=
out_num_iterations
.
value
*
self
.
num_class
else
:
raise
TypeError
(
'Need Model file to create a booster'
)
...
...
@@ -855,12 +854,11 @@ class Booster(object):
self
.
__get_eval_info
()
elif
model_file
is
not
None
:
"""Prediction task"""
out_num_
total_model
=
ctypes
.
c_int64
(
0
)
out_num_
iterations
=
ctypes
.
c_int64
(
0
)
_safe_call
(
_LIB
.
LGBM_BoosterCreateFromModelfile
(
c_str
(
model_file
),
ctypes
.
byref
(
out_num_
total_model
),
ctypes
.
byref
(
out_num_
iterations
),
ctypes
.
byref
(
self
.
handle
)))
self
.
__num_total_model
=
out_num_total_model
.
value
out_num_class
=
ctypes
.
c_int64
(
0
)
_safe_call
(
_LIB
.
LGBM_BoosterGetNumClasses
(
self
.
handle
,
...
...
src/c_api.cpp
View file @
83007b1c
...
...
@@ -385,7 +385,7 @@ DllExport int LGBM_DatasetCreateFromCSC(const void* col_ptr,
}
DllExport
int
LGBM_DatasetGetSubset
(
const
DatesetHandle
*
full_data
,
const
DatesetHandle
*
handle
,
const
int32_t
*
used_row_indices
,
int32_t
num_used_row_indices
,
const
char
*
parameters
,
...
...
@@ -394,7 +394,7 @@ DllExport int LGBM_DatasetGetSubset(
auto
param
=
ConfigBase
::
Str2Map
(
parameters
);
IOConfig
io_config
;
io_config
.
Set
(
param
);
auto
full_dataset
=
reinterpret_cast
<
const
Dataset
*>
(
*
full_data
);
auto
full_dataset
=
reinterpret_cast
<
const
Dataset
*>
(
*
handle
);
auto
ret
=
std
::
unique_ptr
<
Dataset
>
(
full_dataset
->
Subset
(
used_row_indices
,
num_used_row_indices
,
...
...
@@ -486,11 +486,12 @@ DllExport int LGBM_BoosterCreate(const DatesetHandle train_data,
DllExport
int
LGBM_BoosterCreateFromModelfile
(
const
char
*
filename
,
int64_t
*
num_total_model
,
int64_t
*
out_num_iterations
,
BoosterHandle
*
out
)
{
API_BEGIN
();
auto
ret
=
std
::
unique_ptr
<
Booster
>
(
new
Booster
(
filename
));
*
num_total_model
=
static_cast
<
int64_t
>
(
ret
->
GetBoosting
()
->
NumberOfTotalModel
());
*
out_num_iterations
=
static_cast
<
int64_t
>
(
ret
->
GetBoosting
()
->
NumberOfTotalModel
()
/
ret
->
GetBoosting
()
->
NumberOfClasses
());
*
out
=
ret
.
release
();
API_END
();
}
...
...
src/io/config.cpp
View file @
83007b1c
...
...
@@ -5,7 +5,7 @@
#include <vector>
#include <string>
#include <unordered_
map
>
#include <unordered_
set
>
#include <algorithm>
namespace
LightGBM
{
...
...
@@ -95,16 +95,15 @@ void OverallConfig::GetMetricType(const std::unordered_map<std::string, std::str
// split
std
::
vector
<
std
::
string
>
metrics
=
Common
::
Split
(
value
.
c_str
(),
','
);
// remove dumplicate
std
::
unordered_
map
<
std
::
string
,
int
>
metric_
map
s
;
std
::
unordered_
set
<
std
::
string
>
metric_
set
s
;
for
(
auto
&
metric
:
metrics
)
{
std
::
transform
(
metric
.
begin
(),
metric
.
end
(),
metric
.
begin
(),
Common
::
tolower
);
if
(
metric_
map
s
.
count
(
metric
)
<=
0
)
{
metric_
maps
[
metric
]
=
1
;
if
(
metric_
set
s
.
count
(
metric
)
<=
0
)
{
metric_
sets
.
insert
(
metric
)
;
}
}
for
(
auto
&
pair
:
metric_maps
)
{
std
::
string
sub_metric_str
=
pair
.
first
;
metric_types
.
push_back
(
sub_metric_str
);
for
(
auto
&
metric
:
metric_sets
)
{
metric_types
.
push_back
(
metric
);
}
metric_types
.
shrink_to_fit
();
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment