Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
83a14174
Commit
83a14174
authored
Nov 26, 2016
by
Guolin Ke
Browse files
some bugs fixed
parent
0612dcc0
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
96 additions
and
8 deletions
+96
-8
include/LightGBM/c_api.h
include/LightGBM/c_api.h
+1
-1
include/LightGBM/dataset.h
include/LightGBM/dataset.h
+7
-1
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+21
-5
src/c_api.cpp
src/c_api.cpp
+1
-1
src/io/dataset.cpp
src/io/dataset.cpp
+3
-0
src/io/metadata.cpp
src/io/metadata.cpp
+63
-0
No files found.
include/LightGBM/c_api.h
View file @
83a14174
...
...
@@ -149,7 +149,7 @@ DllExport int LGBM_DatasetCreateFromMat(const void* data,
DllExport
int
LGBM_DatasetGetSubset
(
const
DatesetHandle
*
full_data
,
const
int32_t
*
used_row_indices
,
const
int32_t
num_used_row_indices
,
int32_t
num_used_row_indices
,
const
char
*
parameters
,
DatesetHandle
*
out
);
...
...
include/LightGBM/dataset.h
View file @
83a14174
...
...
@@ -47,6 +47,13 @@ public:
*/
void
Init
(
const
char
*
data_filename
,
const
int
num_class
);
/*!
* \brief init as subset
* \param metadata Filename of data
* \param used_indices
* \param num_used_indices
*/
void
Init
(
const
Metadata
&
metadata
,
const
data_size_t
*
used_indices
,
data_size_t
num_used_indices
);
/*!
* \brief Initial with binary memory
* \param memory Pointer to memory
*/
...
...
@@ -77,7 +84,6 @@ public:
void
CheckOrPartition
(
data_size_t
num_all_data
,
const
std
::
vector
<
data_size_t
>&
used_data_indices
);
void
SetLabel
(
const
float
*
label
,
data_size_t
len
);
void
SetWeights
(
const
float
*
weights
,
data_size_t
len
);
...
...
python-package/lightgbm/basic.py
View file @
83a14174
...
...
@@ -410,6 +410,10 @@ class Dataset(object):
params: dict, optional
other parameters
"""
self
.
__label
=
None
self
.
__weight
=
None
self
.
__init_score
=
None
self
.
__group
=
None
if
data
is
None
:
self
.
handle
=
None
return
...
...
@@ -453,10 +457,6 @@ class Dataset(object):
self
.
__init_from_csr
(
csr
,
params_str
,
ref_dataset
)
except
:
raise
TypeError
(
'can not initialize Dataset from {}'
.
format
(
type
(
data
).
__name__
))
self
.
__label
=
None
self
.
__weight
=
None
self
.
__init_score
=
None
self
.
__group
=
None
if
label
is
not
None
:
self
.
set_label
(
label
)
if
self
.
get_label
()
is
None
:
...
...
@@ -505,6 +505,22 @@ class Dataset(object):
return
Dataset
(
data
,
label
=
label
,
max_bin
=
self
.
max_bin
,
reference
=
self
,
weight
=
weight
,
group_id
=
group_id
,
predictor
=
self
.
predictor
,
silent
=
silent
,
params
=
params
)
def
subset
(
self
,
used_indices
,
params
=
None
):
used_indices
=
list_to_1d_numpy
(
used_indices
,
np
.
int32
)
ret
=
Dataset
(
None
)
ret
.
handle
=
ctypes
.
c_void_p
()
params_str
=
dict_to_str
(
params
)
_safe_call
(
_LIB
.
LGBM_DatasetGetSubset
(
ctypes
.
byref
(
self
.
handle
),
used_indices
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int32
)),
used_indices
.
shape
[
0
],
c_str
(
params_str
),
ctypes
.
byref
(
ret
.
handle
)))
ret
.
max_bin
=
self
.
max_bin
ret
.
predictor
=
self
.
predictor
if
ret
.
get_label
()
is
None
:
raise
ValueError
(
"label should not be None"
)
return
ret
def
__init_from_np2d
(
self
,
mat
,
params_str
,
ref_dataset
):
"""
...
...
@@ -1102,7 +1118,7 @@ class Booster(object):
def
__inner_eval
(
self
,
data_name
,
data_idx
,
feval
=
None
):
"""
Evaulate traning or validation data
Evaulate tra
i
ning
or validation data
"""
if
data_idx
>=
self
.
__num_dataset
:
raise
ValueError
(
"data_idx should be smaller than number of dataset"
)
...
...
src/c_api.cpp
View file @
83a14174
...
...
@@ -387,7 +387,7 @@ DllExport int LGBM_DatasetCreateFromCSC(const void* col_ptr,
DllExport
int
LGBM_DatasetGetSubset
(
const
DatesetHandle
*
full_data
,
const
int32_t
*
used_row_indices
,
const
int32_t
num_used_row_indices
,
int32_t
num_used_row_indices
,
const
char
*
parameters
,
DatesetHandle
*
out
)
{
API_BEGIN
();
...
...
src/io/dataset.cpp
View file @
83a14174
...
...
@@ -55,6 +55,7 @@ void Dataset::CopyFeatureMapperFrom(const Dataset* dataset, bool is_enable_spars
num_features_
=
static_cast
<
int
>
(
features_
.
size
());
num_total_features_
=
dataset
->
num_total_features_
;
feature_names_
=
dataset
->
feature_names_
;
label_idx_
=
dataset
->
label_idx_
;
}
Dataset
*
Dataset
::
Subset
(
const
data_size_t
*
used_indices
,
data_size_t
num_used_indices
,
bool
is_enable_sparse
)
const
{
...
...
@@ -67,6 +68,8 @@ Dataset* Dataset::Subset(const data_size_t* used_indices, data_size_t num_used_i
ret
->
features_
[
fidx
]
->
PushBin
(
0
,
i
,
iterator
->
Get
(
used_indices
[
i
]));
}
}
ret
->
metadata_
.
Init
(
metadata_
,
used_indices
,
num_used_indices
);
return
ret
.
release
();
}
bool
Dataset
::
SetFloatField
(
const
char
*
field_name
,
const
float
*
field_data
,
data_size_t
num_element
)
{
...
...
src/io/metadata.cpp
View file @
83a14174
...
...
@@ -50,6 +50,69 @@ void Metadata::Init(data_size_t num_data, int num_class, int weight_idx, int que
}
}
void
Metadata
::
Init
(
const
Metadata
&
fullset
,
const
data_size_t
*
used_indices
,
data_size_t
num_used_indices
)
{
num_data_
=
num_used_indices
;
num_class_
=
fullset
.
num_class_
;
label_
=
std
::
vector
<
float
>
(
num_used_indices
);
for
(
data_size_t
i
=
0
;
i
<
num_used_indices
;
i
++
)
{
label_
[
i
]
=
fullset
.
label_
[
used_indices
[
i
]];
}
if
(
fullset
.
weights_
.
size
()
>
0
)
{
weights_
=
std
::
vector
<
float
>
(
num_used_indices
);
num_weights_
=
num_used_indices
;
for
(
data_size_t
i
=
0
;
i
<
num_used_indices
;
i
++
)
{
weights_
[
i
]
=
fullset
.
weights_
[
used_indices
[
i
]];
}
}
else
{
num_weights_
=
0
;
}
if
(
fullset
.
init_score_
.
size
()
>
0
)
{
init_score_
=
std
::
vector
<
float
>
(
num_used_indices
);
num_init_score_
=
num_used_indices
;
for
(
data_size_t
i
=
0
;
i
<
num_used_indices
;
i
++
)
{
init_score_
[
i
]
=
fullset
.
init_score_
[
used_indices
[
i
]];
}
}
else
{
num_init_score_
=
0
;
}
if
(
fullset
.
query_boundaries_
.
size
()
>
0
)
{
std
::
vector
<
data_size_t
>
used_query
;
data_size_t
data_idx
=
0
;
for
(
data_size_t
qid
=
0
;
qid
<
num_queries_
&&
data_idx
<
num_used_indices
;
++
qid
)
{
data_size_t
start
=
fullset
.
query_boundaries_
[
qid
];
data_size_t
end
=
fullset
.
query_boundaries_
[
qid
+
1
];
data_size_t
len
=
end
-
start
;
if
(
used_indices
[
data_idx
]
>
start
)
{
continue
;
}
else
if
(
used_indices
[
data_idx
]
==
start
)
{
if
(
num_used_indices
>=
data_idx
+
len
&&
used_indices
[
data_idx
+
len
-
1
]
==
end
-
1
)
{
used_query
.
push_back
(
qid
);
data_idx
+=
len
;
}
else
{
Log
::
Fatal
(
"Data partition error, data didn't match queries"
);
}
}
else
{
Log
::
Fatal
(
"Data partition error, data didn't match queries"
);
}
}
query_boundaries_
=
std
::
vector
<
data_size_t
>
(
used_query
.
size
()
+
1
);
num_queries_
=
static_cast
<
data_size_t
>
(
used_query
.
size
());
query_boundaries_
[
0
]
=
0
;
for
(
data_size_t
i
=
0
;
i
<
num_queries_
;
++
i
)
{
data_size_t
qid
=
used_query
[
i
];
data_size_t
len
=
fullset
.
query_boundaries_
[
qid
+
1
]
-
fullset
.
query_boundaries_
[
qid
];
query_boundaries_
[
i
+
1
]
=
query_boundaries_
[
i
]
+
len
;
}
}
else
{
num_queries_
=
0
;
}
}
void
Metadata
::
PartitionLabel
(
const
std
::
vector
<
data_size_t
>&
used_indices
)
{
if
(
used_indices
.
size
()
<=
0
)
{
return
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment