Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
28972b86
"git@developer.sourcefind.cn:tianlh/lightgbm-dcu.git" did not exist on "611cf5d414a8273dfdd261651e2f127322a3db89"
Commit
28972b86
authored
Jan 02, 2017
by
Guolin Ke
Browse files
[python-package] fix tmp file access problem in windows
parent
7f778877
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
64 additions
and
34 deletions
+64
-34
include/LightGBM/boosting.h
include/LightGBM/boosting.h
+1
-1
include/LightGBM/c_api.h
include/LightGBM/c_api.h
+2
-0
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+37
-8
python-package/lightgbm/engine.py
python-package/lightgbm/engine.py
+1
-1
src/boosting/gbdt.cpp
src/boosting/gbdt.cpp
+10
-9
src/boosting/gbdt.h
src/boosting/gbdt.h
+3
-7
src/c_api.cpp
src/c_api.cpp
+5
-5
tests/python_package_test/test_basic.py
tests/python_package_test/test_basic.py
+5
-3
No files found.
include/LightGBM/boosting.h
View file @
28972b86
...
@@ -136,7 +136,7 @@ public:
...
@@ -136,7 +136,7 @@ public:
* \brief Dump model to json format string
* \brief Dump model to json format string
* \return Json format string of model
* \return Json format string of model
*/
*/
virtual
std
::
string
DumpModel
()
const
=
0
;
virtual
std
::
string
DumpModel
(
int
num_iteration
)
const
=
0
;
/*!
/*!
* \brief Save model to file
* \brief Save model to file
...
...
include/LightGBM/c_api.h
View file @
28972b86
...
@@ -557,12 +557,14 @@ DllExport int LGBM_BoosterSaveModel(BoosterHandle handle,
...
@@ -557,12 +557,14 @@ DllExport int LGBM_BoosterSaveModel(BoosterHandle handle,
/*!
/*!
* \brief dump model to json
* \brief dump model to json
* \param handle handle
* \param handle handle
* \param num_iteration, <= 0 means save all
* \param buffer_len string buffer length, if buffer_len < out_len, re-allocate buffer
* \param buffer_len string buffer length, if buffer_len < out_len, re-allocate buffer
* \param out_len actual output length
* \param out_len actual output length
* \param out_str json format string of model, need to pre-allocate memory before call this
* \param out_str json format string of model, need to pre-allocate memory before call this
* \return 0 when succeed, -1 when failure happens
* \return 0 when succeed, -1 when failure happens
*/
*/
DllExport
int
LGBM_BoosterDumpModel
(
BoosterHandle
handle
,
DllExport
int
LGBM_BoosterDumpModel
(
BoosterHandle
handle
,
int
num_iteration
,
int
buffer_len
,
int
buffer_len
,
int64_t
*
out_len
,
int64_t
*
out_len
,
char
*
out_str
);
char
*
out_str
);
...
...
python-package/lightgbm/basic.py
View file @
28972b86
...
@@ -9,6 +9,7 @@ import sys
...
@@ -9,6 +9,7 @@ import sys
import
ctypes
import
ctypes
import
json
import
json
from
tempfile
import
NamedTemporaryFile
from
tempfile
import
NamedTemporaryFile
import
os
import
numpy
as
np
import
numpy
as
np
import
scipy.sparse
import
scipy.sparse
...
@@ -131,6 +132,22 @@ def param_dict_to_str(data):
...
@@ -131,6 +132,22 @@ def param_dict_to_str(data):
%
(
key
,
type
(
val
).
__name__
))
%
(
key
,
type
(
val
).
__name__
))
return
' '
.
join
(
pairs
)
return
' '
.
join
(
pairs
)
class
_temp_file
:
def
__enter__
(
self
):
with
NamedTemporaryFile
(
prefix
=
"lightgbm_tmp_"
,
delete
=
True
)
as
f
:
self
.
name
=
f
.
name
return
self
def
__exit__
(
self
,
exc_type
,
exc_val
,
exc_tb
):
if
os
.
path
.
isfile
(
self
.
name
):
os
.
remove
(
self
.
name
)
def
readlines
(
self
):
with
open
(
self
.
name
,
"r+"
)
as
f
:
ret
=
f
.
readlines
()
return
ret
def
writelines
(
self
,
lines
):
with
open
(
self
.
name
,
"w+"
)
as
f
:
ret
=
f
.
writelines
(
lines
)
"""marco definition of data type in c_api of LightGBM"""
"""marco definition of data type in c_api of LightGBM"""
C_API_DTYPE_FLOAT32
=
0
C_API_DTYPE_FLOAT32
=
0
C_API_DTYPE_FLOAT64
=
1
C_API_DTYPE_FLOAT64
=
1
...
@@ -276,7 +293,7 @@ class _InnerPredictor(object):
...
@@ -276,7 +293,7 @@ class _InnerPredictor(object):
if
num_iteration
>
self
.
num_total_iteration
:
if
num_iteration
>
self
.
num_total_iteration
:
num_iteration
=
self
.
num_total_iteration
num_iteration
=
self
.
num_total_iteration
if
is_str
(
data
):
if
is_str
(
data
):
with
NamedTemporaryFile
(
mode
=
'w+'
)
as
f
:
with
_temp_file
(
)
as
f
:
_safe_call
(
_LIB
.
LGBM_BoosterPredictForFile
(
_safe_call
(
_LIB
.
LGBM_BoosterPredictForFile
(
self
.
handle
,
self
.
handle
,
c_str
(
data
),
c_str
(
data
),
...
@@ -1336,7 +1353,7 @@ class Booster(object):
...
@@ -1336,7 +1353,7 @@ class Booster(object):
return
self
.
__deepcopy__
(
None
)
return
self
.
__deepcopy__
(
None
)
def
__deepcopy__
(
self
,
_
):
def
__deepcopy__
(
self
,
_
):
with
NamedTemporaryFile
(
mode
=
'w+'
)
as
f
:
with
_temp_file
(
)
as
f
:
self
.
save_model
(
f
.
name
)
self
.
save_model
(
f
.
name
)
return
Booster
(
model_file
=
f
.
name
)
return
Booster
(
model_file
=
f
.
name
)
...
@@ -1346,7 +1363,7 @@ class Booster(object):
...
@@ -1346,7 +1363,7 @@ class Booster(object):
this
.
pop
(
'train_set'
,
None
)
this
.
pop
(
'train_set'
,
None
)
this
.
pop
(
'valid_sets'
,
None
)
this
.
pop
(
'valid_sets'
,
None
)
if
handle
is
not
None
:
if
handle
is
not
None
:
with
NamedTemporaryFile
(
mode
=
'w+'
)
as
f
:
with
_temp_file
(
)
as
f
:
self
.
save_model
(
f
.
name
)
self
.
save_model
(
f
.
name
)
this
[
"handle"
]
=
f
.
readlines
()
this
[
"handle"
]
=
f
.
readlines
()
return
this
return
this
...
@@ -1356,9 +1373,8 @@ class Booster(object):
...
@@ -1356,9 +1373,8 @@ class Booster(object):
if
model
is
not
None
:
if
model
is
not
None
:
handle
=
ctypes
.
c_void_p
()
handle
=
ctypes
.
c_void_p
()
out_num_iterations
=
ctypes
.
c_int64
(
0
)
out_num_iterations
=
ctypes
.
c_int64
(
0
)
with
NamedTemporaryFile
(
mode
=
'w+'
)
as
f
:
with
_temp_file
(
)
as
f
:
f
.
writelines
(
model
)
f
.
writelines
(
model
)
f
.
flush
()
_safe_call
(
_LIB
.
LGBM_BoosterCreateFromModelfile
(
_safe_call
(
_LIB
.
LGBM_BoosterCreateFromModelfile
(
c_str
(
f
.
name
),
c_str
(
f
.
name
),
ctypes
.
byref
(
out_num_iterations
),
ctypes
.
byref
(
out_num_iterations
),
...
@@ -1570,27 +1586,37 @@ class Booster(object):
...
@@ -1570,27 +1586,37 @@ class Booster(object):
filename : str
filename : str
Filename to save
Filename to save
num_iteration: int
num_iteration: int
Number of iteration that want to save. < 0 means save
all
Number of iteration that want to save. < 0 means save
the best iteration(if have)
"""
"""
if
num_iteration
<=
0
:
num_iteration
=
self
.
best_iteration
_safe_call
(
_LIB
.
LGBM_BoosterSaveModel
(
_safe_call
(
_LIB
.
LGBM_BoosterSaveModel
(
self
.
handle
,
self
.
handle
,
num_iteration
,
num_iteration
,
c_str
(
filename
)))
c_str
(
filename
)))
def
dump_model
(
self
):
def
dump_model
(
self
,
num_iteration
=-
1
):
"""
"""
Dump model to json format
Dump model to json format
Parameters
----------
num_iteration: int
Number of iteration that want to dump. < 0 means dump to best iteration(if have)
Returns
Returns
-------
-------
Json format of model
Json format of model
"""
"""
if
num_iteration
<=
0
:
num_iteration
=
self
.
best_iteration
buffer_len
=
1
<<
20
buffer_len
=
1
<<
20
tmp_out_len
=
ctypes
.
c_int64
(
0
)
tmp_out_len
=
ctypes
.
c_int64
(
0
)
string_buffer
=
ctypes
.
create_string_buffer
(
buffer_len
)
string_buffer
=
ctypes
.
create_string_buffer
(
buffer_len
)
ptr_string_buffer
=
ctypes
.
c_char_p
(
*
[
ctypes
.
addressof
(
string_buffer
)])
ptr_string_buffer
=
ctypes
.
c_char_p
(
*
[
ctypes
.
addressof
(
string_buffer
)])
_safe_call
(
_LIB
.
LGBM_BoosterDumpModel
(
_safe_call
(
_LIB
.
LGBM_BoosterDumpModel
(
self
.
handle
,
self
.
handle
,
num_iteration
,
buffer_len
,
buffer_len
,
ctypes
.
byref
(
tmp_out_len
),
ctypes
.
byref
(
tmp_out_len
),
ptr_string_buffer
))
ptr_string_buffer
))
...
@@ -1601,6 +1627,7 @@ class Booster(object):
...
@@ -1601,6 +1627,7 @@ class Booster(object):
ptr_string_buffer
=
ctypes
.
c_char_p
(
*
[
ctypes
.
addressof
(
string_buffer
)])
ptr_string_buffer
=
ctypes
.
c_char_p
(
*
[
ctypes
.
addressof
(
string_buffer
)])
_safe_call
(
_LIB
.
LGBM_BoosterDumpModel
(
_safe_call
(
_LIB
.
LGBM_BoosterDumpModel
(
self
.
handle
,
self
.
handle
,
num_iteration
,
actual_len
,
actual_len
,
ctypes
.
byref
(
tmp_out_len
),
ctypes
.
byref
(
tmp_out_len
),
ptr_string_buffer
))
ptr_string_buffer
))
...
@@ -1616,7 +1643,7 @@ class Booster(object):
...
@@ -1616,7 +1643,7 @@ class Booster(object):
Data source for prediction
Data source for prediction
When data type is string, it represents the path of txt file
When data type is string, it represents the path of txt file
num_iteration : int
num_iteration : int
Used iteration for prediction
Used iteration for prediction
, < 0 means predict for best iteration(if have)
raw_score : bool
raw_score : bool
True for predict raw score
True for predict raw score
pred_leaf : bool
pred_leaf : bool
...
@@ -1631,6 +1658,8 @@ class Booster(object):
...
@@ -1631,6 +1658,8 @@ class Booster(object):
Prediction result
Prediction result
"""
"""
predictor
=
_InnerPredictor
(
booster_handle
=
self
.
handle
)
predictor
=
_InnerPredictor
(
booster_handle
=
self
.
handle
)
if
num_iteration
<=
0
:
num_iteration
=
self
.
best_iteration
return
predictor
.
predict
(
data
,
num_iteration
,
raw_score
,
pred_leaf
,
data_has_header
,
is_reshape
)
return
predictor
.
predict
(
data
,
num_iteration
,
raw_score
,
pred_leaf
,
data_has_header
,
is_reshape
)
def
_to_predictor
(
self
):
def
_to_predictor
(
self
):
...
...
python-package/lightgbm/engine.py
View file @
28972b86
...
@@ -190,7 +190,7 @@ def train(params, train_set, num_boost_round=100,
...
@@ -190,7 +190,7 @@ def train(params, train_set, num_boost_round=100,
if
booster
.
attr
(
'best_iteration'
)
is
not
None
:
if
booster
.
attr
(
'best_iteration'
)
is
not
None
:
booster
.
best_iteration
=
int
(
booster
.
attr
(
'best_iteration'
))
+
1
booster
.
best_iteration
=
int
(
booster
.
attr
(
'best_iteration'
))
+
1
else
:
else
:
booster
.
best_iteration
=
num_boost_round
booster
.
best_iteration
=
-
1
return
booster
return
booster
...
...
src/boosting/gbdt.cpp
View file @
28972b86
...
@@ -245,7 +245,7 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
...
@@ -245,7 +245,7 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
}
}
void
GBDT
::
RollbackOneIter
()
{
void
GBDT
::
RollbackOneIter
()
{
if
(
iter_
=
=
0
)
{
return
;
}
if
(
iter_
<
=
0
)
{
return
;
}
int
cur_iter
=
iter_
+
num_init_iteration_
-
1
;
int
cur_iter
=
iter_
+
num_init_iteration_
-
1
;
// reset score
// reset score
for
(
int
curr_class
=
0
;
curr_class
<
num_class_
;
++
curr_class
)
{
for
(
int
curr_class
=
0
;
curr_class
<
num_class_
;
++
curr_class
)
{
...
@@ -428,7 +428,7 @@ void GBDT::Boosting() {
...
@@ -428,7 +428,7 @@ void GBDT::Boosting() {
GetGradients
(
GetTrainingScore
(
&
num_score
),
gradients_
.
data
(),
hessians_
.
data
());
GetGradients
(
GetTrainingScore
(
&
num_score
),
gradients_
.
data
(),
hessians_
.
data
());
}
}
std
::
string
GBDT
::
DumpModel
()
const
{
std
::
string
GBDT
::
DumpModel
(
int
num_iteration
)
const
{
std
::
stringstream
str_buf
;
std
::
stringstream
str_buf
;
str_buf
<<
"{"
;
str_buf
<<
"{"
;
...
@@ -449,7 +449,11 @@ std::string GBDT::DumpModel() const {
...
@@ -449,7 +449,11 @@ std::string GBDT::DumpModel() const {
<<
std
::
endl
;
<<
std
::
endl
;
str_buf
<<
"
\"
tree_info
\"
:["
;
str_buf
<<
"
\"
tree_info
\"
:["
;
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
models_
.
size
());
++
i
)
{
int
num_used_model
=
static_cast
<
int
>
(
models_
.
size
());
if
(
num_iteration
>
0
)
{
num_used_model
=
std
::
min
(
num_iteration
*
num_class_
,
num_used_model
);
}
for
(
int
i
=
0
;
i
<
num_used_model
;
++
i
)
{
if
(
i
>
0
)
{
if
(
i
>
0
)
{
str_buf
<<
","
;
str_buf
<<
","
;
}
}
...
@@ -491,13 +495,10 @@ void GBDT::SaveModelToFile(int num_iteration, const char* filename) const {
...
@@ -491,13 +495,10 @@ void GBDT::SaveModelToFile(int num_iteration, const char* filename) const {
output_file
<<
"feature_names="
<<
Common
::
Join
(
feature_names
.
get
(),
" "
)
<<
std
::
endl
;
output_file
<<
"feature_names="
<<
Common
::
Join
(
feature_names
.
get
(),
" "
)
<<
std
::
endl
;
output_file
<<
std
::
endl
;
output_file
<<
std
::
endl
;
int
num_used_model
=
0
;
int
num_used_model
=
static_cast
<
int
>
(
models_
.
size
());
if
(
num_iteration
<=
0
)
{
if
(
num_iteration
>
0
)
{
num_used_model
=
static_cast
<
int
>
(
models_
.
size
());
num_used_model
=
std
::
min
(
num_iteration
*
num_class_
,
num_used_model
);
}
else
{
num_used_model
=
num_iteration
*
num_class_
;
}
}
num_used_model
=
std
::
min
(
num_used_model
,
static_cast
<
int
>
(
models_
.
size
()));
// output tree models
// output tree models
for
(
int
i
=
0
;
i
<
num_used_model
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_used_model
;
++
i
)
{
output_file
<<
"Tree="
<<
i
<<
std
::
endl
;
output_file
<<
"Tree="
<<
i
<<
std
::
endl
;
...
...
src/boosting/gbdt.h
View file @
28972b86
...
@@ -148,7 +148,7 @@ public:
...
@@ -148,7 +148,7 @@ public:
* \brief Dump model to json format string
* \brief Dump model to json format string
* \return Json format string of model
* \return Json format string of model
*/
*/
std
::
string
DumpModel
()
const
override
;
std
::
string
DumpModel
(
int
num_iteration
)
const
override
;
/*!
/*!
* \brief Save model to file
* \brief Save model to file
...
@@ -175,7 +175,6 @@ public:
...
@@ -175,7 +175,6 @@ public:
*/
*/
inline
int
LabelIdx
()
const
override
{
return
label_idx_
;
}
inline
int
LabelIdx
()
const
override
{
return
label_idx_
;
}
/*!
/*!
* \brief Get number of weak sub-models
* \brief Get number of weak sub-models
* \return Number of weak sub-models
* \return Number of weak sub-models
...
@@ -192,13 +191,10 @@ public:
...
@@ -192,13 +191,10 @@ public:
* \brief Set number of iterations for prediction
* \brief Set number of iterations for prediction
*/
*/
inline
void
SetNumIterationForPred
(
int
num_iteration
)
override
{
inline
void
SetNumIterationForPred
(
int
num_iteration
)
override
{
num_iteration_for_pred_
=
static_cast
<
int
>
(
models_
.
size
())
/
num_class_
;
if
(
num_iteration
>
0
)
{
if
(
num_iteration
>
0
)
{
num_iteration_for_pred_
=
num_iteration
;
num_iteration_for_pred_
=
std
::
min
(
num_iteration
,
num_iteration_for_pred_
);
}
else
{
num_iteration_for_pred_
=
static_cast
<
int
>
(
models_
.
size
())
/
num_class_
;
}
}
num_iteration_for_pred_
=
std
::
min
(
num_iteration_for_pred_
,
static_cast
<
int
>
(
models_
.
size
())
/
num_class_
);
}
}
inline
double
GetLeafValue
(
int
tree_idx
,
int
leaf_idx
)
const
{
inline
double
GetLeafValue
(
int
tree_idx
,
int
leaf_idx
)
const
{
...
...
src/c_api.cpp
View file @
28972b86
...
@@ -181,8 +181,8 @@ public:
...
@@ -181,8 +181,8 @@ public:
boosting_
->
SaveModelToFile
(
num_iteration
,
filename
);
boosting_
->
SaveModelToFile
(
num_iteration
,
filename
);
}
}
std
::
string
DumpModel
()
{
std
::
string
DumpModel
(
int
num_iteration
)
{
return
boosting_
->
DumpModel
();
return
boosting_
->
DumpModel
(
num_iteration
);
}
}
double
GetLeafValue
(
int
tree_idx
,
int
leaf_idx
)
const
{
double
GetLeafValue
(
int
tree_idx
,
int
leaf_idx
)
const
{
...
@@ -581,8 +581,7 @@ DllExport int LGBM_BoosterCreateFromModelfile(
...
@@ -581,8 +581,7 @@ DllExport int LGBM_BoosterCreateFromModelfile(
BoosterHandle
*
out
)
{
BoosterHandle
*
out
)
{
API_BEGIN
();
API_BEGIN
();
auto
ret
=
std
::
unique_ptr
<
Booster
>
(
new
Booster
(
filename
));
auto
ret
=
std
::
unique_ptr
<
Booster
>
(
new
Booster
(
filename
));
*
out_num_iterations
=
static_cast
<
int64_t
>
(
ret
->
GetBoosting
()
->
NumberOfTotalModel
()
*
out_num_iterations
=
static_cast
<
int64_t
>
(
ret
->
GetBoosting
()
->
GetCurrentIteration
());
/
ret
->
GetBoosting
()
->
NumberOfClasses
());
*
out
=
ret
.
release
();
*
out
=
ret
.
release
();
API_END
();
API_END
();
}
}
...
@@ -872,12 +871,13 @@ DllExport int LGBM_BoosterSaveModel(BoosterHandle handle,
...
@@ -872,12 +871,13 @@ DllExport int LGBM_BoosterSaveModel(BoosterHandle handle,
}
}
DllExport
int
LGBM_BoosterDumpModel
(
BoosterHandle
handle
,
DllExport
int
LGBM_BoosterDumpModel
(
BoosterHandle
handle
,
int
num_iteration
,
int
buffer_len
,
int
buffer_len
,
int64_t
*
out_len
,
int64_t
*
out_len
,
char
*
out_str
)
{
char
*
out_str
)
{
API_BEGIN
();
API_BEGIN
();
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
Booster
*
ref_booster
=
reinterpret_cast
<
Booster
*>
(
handle
);
std
::
string
model
=
ref_booster
->
DumpModel
();
std
::
string
model
=
ref_booster
->
DumpModel
(
num_iteration
);
*
out_len
=
static_cast
<
int64_t
>
(
model
.
size
())
+
1
;
*
out_len
=
static_cast
<
int64_t
>
(
model
.
size
())
+
1
;
if
(
*
out_len
<=
buffer_len
)
{
if
(
*
out_len
<=
buffer_len
)
{
std
::
strcpy
(
out_str
,
model
.
c_str
());
std
::
strcpy
(
out_str
,
model
.
c_str
());
...
...
tests/python_package_test/test_basic.py
View file @
28972b86
# coding: utf-8
# coding: utf-8
# pylint: skip-file
# pylint: skip-file
import
unittest
,
tempfile
import
unittest
,
tempfile
,
os
import
numpy
as
np
import
numpy
as
np
from
sklearn.datasets
import
load_breast_cancer
from
sklearn.datasets
import
load_breast_cancer
from
sklearn.model_selection
import
train_test_split
from
sklearn.model_selection
import
train_test_split
...
@@ -31,9 +31,11 @@ class TestBasic(unittest.TestCase):
...
@@ -31,9 +31,11 @@ class TestBasic(unittest.TestCase):
bst
.
save_model
(
"model.txt"
)
bst
.
save_model
(
"model.txt"
)
pred_from_matr
=
bst
.
predict
(
X_test
)
pred_from_matr
=
bst
.
predict
(
X_test
)
with
tempfile
.
NamedTemporaryFile
()
as
f
:
with
tempfile
.
NamedTemporaryFile
()
as
f
:
tname
=
f
.
name
with
open
(
tname
,
"w+b"
)
as
f
:
np
.
savetxt
(
f
,
X_test
,
delimiter
=
','
)
np
.
savetxt
(
f
,
X_test
,
delimiter
=
','
)
f
.
flush
(
)
pred_from_file
=
bst
.
predict
(
tname
)
pred_from_file
=
bst
.
predict
(
f
.
name
)
os
.
remove
(
t
name
)
self
.
assertEqual
(
len
(
pred_from_matr
),
len
(
pred_from_file
))
self
.
assertEqual
(
len
(
pred_from_matr
),
len
(
pred_from_file
))
for
preds
in
zip
(
pred_from_matr
,
pred_from_file
):
for
preds
in
zip
(
pred_from_matr
,
pred_from_file
):
self
.
assertAlmostEqual
(
*
preds
,
places
=
5
)
self
.
assertAlmostEqual
(
*
preds
,
places
=
5
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment