Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
3ef3a489
"include/vscode:/vscode.git/clone" did not exist on "662d8a23502173db60e2d9d600c508e06d8ba173"
Commit
3ef3a489
authored
Jan 10, 2017
by
Guolin Ke
Browse files
change init_score to double type
parent
12a96334
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
112 additions
and
57 deletions
+112
-57
R-package/src/lightgbm_R.cpp
R-package/src/lightgbm_R.cpp
+8
-0
include/LightGBM/dataset.h
include/LightGBM/dataset.h
+8
-6
python-package/lightgbm/basic.py
python-package/lightgbm/basic.py
+21
-5
src/boosting/score_updater.hpp
src/boosting/score_updater.hpp
+8
-4
src/c_api.cpp
src/c_api.cpp
+5
-0
src/io/dataset.cpp
src/io/dataset.cpp
+21
-3
src/io/dense_bin.hpp
src/io/dense_bin.hpp
+4
-1
src/io/metadata.cpp
src/io/metadata.cpp
+37
-38
No files found.
R-package/src/lightgbm_R.cpp
View file @
3ef3a489
...
@@ -204,6 +204,8 @@ SEXP LGBM_DatasetSetField_R(SEXP handle,
...
@@ -204,6 +204,8 @@ SEXP LGBM_DatasetSetField_R(SEXP handle,
vec
[
i
]
=
static_cast
<
int32_t
>
(
R_INT_PTR
(
field_data
)[
i
]);
vec
[
i
]
=
static_cast
<
int32_t
>
(
R_INT_PTR
(
field_data
)[
i
]);
}
}
CHECK_CALL
(
LGBM_DatasetSetField
(
R_GET_PTR
(
handle
),
name
,
vec
.
data
(),
len
,
C_API_DTYPE_INT32
));
CHECK_CALL
(
LGBM_DatasetSetField
(
R_GET_PTR
(
handle
),
name
,
vec
.
data
(),
len
,
C_API_DTYPE_INT32
));
}
else
if
(
!
strcmp
(
"init_score"
,
name
))
{
CHECK_CALL
(
LGBM_DatasetSetField
(
R_GET_PTR
(
handle
),
name
,
R_REAL_PTR
(
field_data
),
len
,
C_API_DTYPE_FLOAT64
));
}
else
{
}
else
{
std
::
vector
<
float
>
vec
(
len
);
std
::
vector
<
float
>
vec
(
len
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
...
@@ -234,6 +236,12 @@ SEXP LGBM_DatasetGetField_R(SEXP handle,
...
@@ -234,6 +236,12 @@ SEXP LGBM_DatasetGetField_R(SEXP handle,
for
(
int
i
=
0
;
i
<
out_len
-
1
;
++
i
)
{
for
(
int
i
=
0
;
i
<
out_len
-
1
;
++
i
)
{
R_INT_PTR
(
field_data
)[
i
]
=
p_data
[
i
+
1
]
-
p_data
[
i
];
R_INT_PTR
(
field_data
)[
i
]
=
p_data
[
i
+
1
]
-
p_data
[
i
];
}
}
}
else
if
(
!
strcmp
(
"init_score"
,
name
))
{
auto
p_data
=
reinterpret_cast
<
const
double
*>
(
res
);
#pragma omp parallel for schedule(static)
for
(
int
i
=
0
;
i
<
out_len
;
++
i
)
{
R_REAL_PTR
(
field_data
)[
i
]
=
p_data
[
i
];
}
}
else
{
}
else
{
auto
p_data
=
reinterpret_cast
<
const
float
*>
(
res
);
auto
p_data
=
reinterpret_cast
<
const
float
*>
(
res
);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
...
...
include/LightGBM/dataset.h
View file @
3ef3a489
...
@@ -94,8 +94,6 @@ public:
...
@@ -94,8 +94,6 @@ public:
* \brief Set initial scores
* \brief Set initial scores
* \param init_score Initial scores, this class will manage memory for init_score.
* \param init_score Initial scores, this class will manage memory for init_score.
*/
*/
void
SetInitScore
(
const
float
*
init_score
,
data_size_t
len
);
void
SetInitScore
(
const
double
*
init_score
,
data_size_t
len
);
void
SetInitScore
(
const
double
*
init_score
,
data_size_t
len
);
...
@@ -195,7 +193,7 @@ public:
...
@@ -195,7 +193,7 @@ public:
* \brief Get initial scores, if not exists, will return nullptr
* \brief Get initial scores, if not exists, will return nullptr
* \return Pointer of initial scores
* \return Pointer of initial scores
*/
*/
inline
const
float
*
init_score
()
const
{
inline
const
double
*
init_score
()
const
{
if
(
!
init_score_
.
empty
())
{
if
(
!
init_score_
.
empty
())
{
return
init_score_
.
data
();
return
init_score_
.
data
();
}
else
{
}
else
{
...
@@ -206,7 +204,7 @@ public:
...
@@ -206,7 +204,7 @@ public:
/*!
/*!
* \brief Get size of initial scores
* \brief Get size of initial scores
*/
*/
inline
data_size
_t
num_init_score
()
const
{
return
num_init_score_
;
}
inline
int64
_t
num_init_score
()
const
{
return
num_init_score_
;
}
/*! \brief Disable copy */
/*! \brief Disable copy */
Metadata
&
operator
=
(
const
Metadata
&
)
=
delete
;
Metadata
&
operator
=
(
const
Metadata
&
)
=
delete
;
...
@@ -239,9 +237,9 @@ private:
...
@@ -239,9 +237,9 @@ private:
/*! \brief Number of querys */
/*! \brief Number of querys */
data_size_t
num_queries_
;
data_size_t
num_queries_
;
/*! \brief Number of Initial score, used to check correct weight file */
/*! \brief Number of Initial score, used to check correct weight file */
data_size
_t
num_init_score_
;
int64
_t
num_init_score_
;
/*! \brief Initial score */
/*! \brief Initial score */
std
::
vector
<
float
>
init_score_
;
std
::
vector
<
double
>
init_score_
;
/*! \brief Queries data */
/*! \brief Queries data */
std
::
vector
<
data_size_t
>
queries_
;
std
::
vector
<
data_size_t
>
queries_
;
/*! \brief mutex for threading safe call */
/*! \brief mutex for threading safe call */
...
@@ -336,10 +334,14 @@ public:
...
@@ -336,10 +334,14 @@ public:
bool
SetFloatField
(
const
char
*
field_name
,
const
float
*
field_data
,
data_size_t
num_element
);
bool
SetFloatField
(
const
char
*
field_name
,
const
float
*
field_data
,
data_size_t
num_element
);
bool
SetDoubleField
(
const
char
*
field_name
,
const
double
*
field_data
,
data_size_t
num_element
);
bool
SetIntField
(
const
char
*
field_name
,
const
int
*
field_data
,
data_size_t
num_element
);
bool
SetIntField
(
const
char
*
field_name
,
const
int
*
field_data
,
data_size_t
num_element
);
bool
GetFloatField
(
const
char
*
field_name
,
data_size_t
*
out_len
,
const
float
**
out_ptr
);
bool
GetFloatField
(
const
char
*
field_name
,
data_size_t
*
out_len
,
const
float
**
out_ptr
);
bool
GetDoubleField
(
const
char
*
field_name
,
data_size_t
*
out_len
,
const
double
**
out_ptr
);
bool
GetIntField
(
const
char
*
field_name
,
data_size_t
*
out_len
,
const
int
**
out_ptr
);
bool
GetIntField
(
const
char
*
field_name
,
data_size_t
*
out_len
,
const
int
**
out_ptr
);
/*!
/*!
...
...
python-package/lightgbm/basic.py
View file @
3ef3a489
...
@@ -90,6 +90,14 @@ def cfloat32_array_to_numpy(cptr, length):
...
@@ -90,6 +90,14 @@ def cfloat32_array_to_numpy(cptr, length):
else
:
else
:
raise
RuntimeError
(
'Expected float pointer'
)
raise
RuntimeError
(
'Expected float pointer'
)
def
cfloat64_array_to_numpy
(
cptr
,
length
):
"""Convert a ctypes double pointer array to a numpy array.
"""
if
isinstance
(
cptr
,
ctypes
.
POINTER
(
ctypes
.
c_double
)):
return
np
.
fromiter
(
cptr
,
dtype
=
np
.
float64
,
count
=
length
)
else
:
raise
RuntimeError
(
'Expected double pointer'
)
def
cint32_array_to_numpy
(
cptr
,
length
):
def
cint32_array_to_numpy
(
cptr
,
length
):
"""Convert a ctypes float pointer array to a numpy array.
"""Convert a ctypes float pointer array to a numpy array.
...
@@ -162,7 +170,7 @@ C_API_PREDICT_LEAF_INDEX = 2
...
@@ -162,7 +170,7 @@ C_API_PREDICT_LEAF_INDEX = 2
"""data type of data field"""
"""data type of data field"""
FIELD_TYPE_MAPPER
=
{
"label"
:
C_API_DTYPE_FLOAT32
,
FIELD_TYPE_MAPPER
=
{
"label"
:
C_API_DTYPE_FLOAT32
,
"weight"
:
C_API_DTYPE_FLOAT32
,
"weight"
:
C_API_DTYPE_FLOAT32
,
"init_score"
:
C_API_DTYPE_FLOAT
32
,
"init_score"
:
C_API_DTYPE_FLOAT
64
,
"group"
:
C_API_DTYPE_INT32
}
"group"
:
C_API_DTYPE_INT32
}
...
@@ -616,7 +624,6 @@ class Dataset(object):
...
@@ -616,7 +624,6 @@ class Dataset(object):
for
j
in
range_
(
self
.
predictor
.
num_class
):
for
j
in
range_
(
self
.
predictor
.
num_class
):
new_init_score
[
j
*
num_data
+
i
]
=
init_score
[
i
*
self
.
predictor
.
num_class
+
j
]
new_init_score
[
j
*
num_data
+
i
]
=
init_score
[
i
*
self
.
predictor
.
num_class
+
j
]
init_score
=
new_init_score
init_score
=
new_init_score
init_score
=
init_score
.
astype
(
dtype
=
np
.
float32
,
copy
=
False
)
self
.
set_init_score
(
init_score
)
self
.
set_init_score
(
init_score
)
elif
self
.
predictor
is
not
None
:
elif
self
.
predictor
is
not
None
:
raise
TypeError
(
'wrong predictor type {}'
.
format
(
type
(
self
.
predictor
).
__name__
))
raise
TypeError
(
'wrong predictor type {}'
.
format
(
type
(
self
.
predictor
).
__name__
))
...
@@ -813,16 +820,23 @@ class Dataset(object):
...
@@ -813,16 +820,23 @@ class Dataset(object):
ctypes
.
c_int
(
0
),
ctypes
.
c_int
(
0
),
ctypes
.
c_int
(
FIELD_TYPE_MAPPER
[
field_name
])))
ctypes
.
c_int
(
FIELD_TYPE_MAPPER
[
field_name
])))
return
return
dtype
=
np
.
int32
if
field_name
==
'group'
else
np
.
float32
dtype
=
np
.
float32
if
field_name
==
'group'
:
dtype
=
np
.
int32
elif
field_name
==
'init_score'
:
dtype
=
np
.
float64
data
=
list_to_1d_numpy
(
data
,
dtype
,
name
=
field_name
)
data
=
list_to_1d_numpy
(
data
,
dtype
,
name
=
field_name
)
if
data
.
dtype
==
np
.
float32
:
if
data
.
dtype
==
np
.
float32
:
ptr_data
=
data
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_float
))
ptr_data
=
data
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_float
))
type_data
=
C_API_DTYPE_FLOAT32
type_data
=
C_API_DTYPE_FLOAT32
elif
data
.
dtype
==
np
.
float64
:
ptr_data
=
data
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_double
))
type_data
=
C_API_DTYPE_FLOAT64
elif
data
.
dtype
==
np
.
int32
:
elif
data
.
dtype
==
np
.
int32
:
ptr_data
=
data
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int32
))
ptr_data
=
data
.
ctypes
.
data_as
(
ctypes
.
POINTER
(
ctypes
.
c_int32
))
type_data
=
C_API_DTYPE_INT32
type_data
=
C_API_DTYPE_INT32
else
:
else
:
raise
TypeError
(
"Excepted np.float32 or np.int32, meet type({})"
.
format
(
data
.
dtype
))
raise
TypeError
(
"Excepted np.float32
/64
or np.int32, meet type({})"
.
format
(
data
.
dtype
))
if
type_data
!=
FIELD_TYPE_MAPPER
[
field_name
]:
if
type_data
!=
FIELD_TYPE_MAPPER
[
field_name
]:
raise
TypeError
(
"Input type error for set_field"
)
raise
TypeError
(
"Input type error for set_field"
)
_safe_call
(
_LIB
.
LGBM_DatasetSetField
(
_safe_call
(
_LIB
.
LGBM_DatasetSetField
(
...
@@ -864,6 +878,8 @@ class Dataset(object):
...
@@ -864,6 +878,8 @@ class Dataset(object):
return
cint32_array_to_numpy
(
ctypes
.
cast
(
ret
,
ctypes
.
POINTER
(
ctypes
.
c_int32
)),
tmp_out_len
.
value
)
return
cint32_array_to_numpy
(
ctypes
.
cast
(
ret
,
ctypes
.
POINTER
(
ctypes
.
c_int32
)),
tmp_out_len
.
value
)
elif
out_type
.
value
==
C_API_DTYPE_FLOAT32
:
elif
out_type
.
value
==
C_API_DTYPE_FLOAT32
:
return
cfloat32_array_to_numpy
(
ctypes
.
cast
(
ret
,
ctypes
.
POINTER
(
ctypes
.
c_float
)),
tmp_out_len
.
value
)
return
cfloat32_array_to_numpy
(
ctypes
.
cast
(
ret
,
ctypes
.
POINTER
(
ctypes
.
c_float
)),
tmp_out_len
.
value
)
elif
out_type
.
value
==
C_API_DTYPE_FLOAT64
:
return
cfloat64_array_to_numpy
(
ctypes
.
cast
(
ret
,
ctypes
.
POINTER
(
ctypes
.
c_double
)),
tmp_out_len
.
value
)
else
:
else
:
raise
TypeError
(
"Unknown type"
)
raise
TypeError
(
"Unknown type"
)
...
@@ -976,7 +992,7 @@ class Dataset(object):
...
@@ -976,7 +992,7 @@ class Dataset(object):
"""
"""
self
.
init_score
=
init_score
self
.
init_score
=
init_score
if
self
.
handle
is
not
None
and
init_score
is
not
None
:
if
self
.
handle
is
not
None
and
init_score
is
not
None
:
init_score
=
list_to_1d_numpy
(
init_score
,
name
=
'init_score'
)
init_score
=
list_to_1d_numpy
(
init_score
,
np
.
float64
,
name
=
'init_score'
)
self
.
set_field
(
'init_score'
,
init_score
)
self
.
set_field
(
'init_score'
,
init_score
)
def
set_group
(
self
,
group
):
def
set_group
(
self
,
group
):
...
...
src/boosting/score_updater.hpp
View file @
3ef3a489
...
@@ -20,18 +20,22 @@ public:
...
@@ -20,18 +20,22 @@ public:
*/
*/
ScoreUpdater
(
const
Dataset
*
data
,
int
num_class
)
:
data_
(
data
)
{
ScoreUpdater
(
const
Dataset
*
data
,
int
num_class
)
:
data_
(
data
)
{
num_data_
=
data
->
num_data
();
num_data_
=
data
->
num_data
();
size
_t
total_size
=
static_cast
<
size
_t
>
(
num_data_
)
*
num_class
;
int64
_t
total_size
=
static_cast
<
int64
_t
>
(
num_data_
)
*
num_class
;
score_
.
resize
(
total_size
);
score_
.
resize
(
total_size
);
// default start score is zero
// default start score is zero
std
::
fill
(
score_
.
begin
(),
score_
.
end
(),
0.0
f
);
#pragma omp parallel for schedule(static)
const
float
*
init_score
=
data
->
metadata
().
init_score
();
for
(
int64_t
i
=
0
;
i
<
total_size
;
++
i
)
{
score_
[
i
]
=
0.0
f
;
}
const
double
*
init_score
=
data
->
metadata
().
init_score
();
// if exists initial score, will start from it
// if exists initial score, will start from it
if
(
init_score
!=
nullptr
)
{
if
(
init_score
!=
nullptr
)
{
if
((
data
->
metadata
().
num_init_score
()
%
num_data_
)
!=
0
if
((
data
->
metadata
().
num_init_score
()
%
num_data_
)
!=
0
||
(
data
->
metadata
().
num_init_score
()
/
num_data_
)
!=
num_class
)
{
||
(
data
->
metadata
().
num_init_score
()
/
num_data_
)
!=
num_class
)
{
Log
::
Fatal
(
"number of class for initial score error"
);
Log
::
Fatal
(
"number of class for initial score error"
);
}
}
for
(
size_t
i
=
0
;
i
<
total_size
;
++
i
)
{
#pragma omp parallel for schedule(static)
for
(
int64_t
i
=
0
;
i
<
total_size
;
++
i
)
{
score_
[
i
]
=
init_score
[
i
];
score_
[
i
]
=
init_score
[
i
];
}
}
}
}
...
...
src/c_api.cpp
View file @
3ef3a489
...
@@ -536,6 +536,8 @@ DllExport int LGBM_DatasetSetField(DatasetHandle handle,
...
@@ -536,6 +536,8 @@ DllExport int LGBM_DatasetSetField(DatasetHandle handle,
is_success
=
dataset
->
SetFloatField
(
field_name
,
reinterpret_cast
<
const
float
*>
(
field_data
),
static_cast
<
int32_t
>
(
num_element
));
is_success
=
dataset
->
SetFloatField
(
field_name
,
reinterpret_cast
<
const
float
*>
(
field_data
),
static_cast
<
int32_t
>
(
num_element
));
}
else
if
(
type
==
C_API_DTYPE_INT32
)
{
}
else
if
(
type
==
C_API_DTYPE_INT32
)
{
is_success
=
dataset
->
SetIntField
(
field_name
,
reinterpret_cast
<
const
int
*>
(
field_data
),
static_cast
<
int32_t
>
(
num_element
));
is_success
=
dataset
->
SetIntField
(
field_name
,
reinterpret_cast
<
const
int
*>
(
field_data
),
static_cast
<
int32_t
>
(
num_element
));
}
else
if
(
type
==
C_API_DTYPE_FLOAT64
)
{
is_success
=
dataset
->
SetDoubleField
(
field_name
,
reinterpret_cast
<
const
double
*>
(
field_data
),
static_cast
<
int32_t
>
(
num_element
));
}
}
if
(
!
is_success
)
{
throw
std
::
runtime_error
(
"Input data type erorr or field not found"
);
}
if
(
!
is_success
)
{
throw
std
::
runtime_error
(
"Input data type erorr or field not found"
);
}
API_END
();
API_END
();
...
@@ -555,6 +557,9 @@ DllExport int LGBM_DatasetGetField(DatasetHandle handle,
...
@@ -555,6 +557,9 @@ DllExport int LGBM_DatasetGetField(DatasetHandle handle,
}
else
if
(
dataset
->
GetIntField
(
field_name
,
out_len
,
reinterpret_cast
<
const
int
**>
(
out_ptr
)))
{
}
else
if
(
dataset
->
GetIntField
(
field_name
,
out_len
,
reinterpret_cast
<
const
int
**>
(
out_ptr
)))
{
*
out_type
=
C_API_DTYPE_INT32
;
*
out_type
=
C_API_DTYPE_INT32
;
is_success
=
true
;
is_success
=
true
;
}
else
if
(
dataset
->
GetDoubleField
(
field_name
,
out_len
,
reinterpret_cast
<
const
double
**>
(
out_ptr
)))
{
*
out_type
=
C_API_DTYPE_FLOAT64
;
is_success
=
true
;
}
}
if
(
!
is_success
)
{
throw
std
::
runtime_error
(
"Field not found"
);
}
if
(
!
is_success
)
{
throw
std
::
runtime_error
(
"Field not found"
);
}
if
(
*
out_ptr
==
nullptr
)
{
*
out_len
=
0
;
}
if
(
*
out_ptr
==
nullptr
)
{
*
out_len
=
0
;
}
...
...
src/io/dataset.cpp
View file @
3ef3a489
...
@@ -77,7 +77,16 @@ bool Dataset::SetFloatField(const char* field_name, const float* field_data, dat
...
@@ -77,7 +77,16 @@ bool Dataset::SetFloatField(const char* field_name, const float* field_data, dat
metadata_
.
SetLabel
(
field_data
,
num_element
);
metadata_
.
SetLabel
(
field_data
,
num_element
);
}
else
if
(
name
==
std
::
string
(
"weight"
)
||
name
==
std
::
string
(
"weights"
))
{
}
else
if
(
name
==
std
::
string
(
"weight"
)
||
name
==
std
::
string
(
"weights"
))
{
metadata_
.
SetWeights
(
field_data
,
num_element
);
metadata_
.
SetWeights
(
field_data
,
num_element
);
}
else
if
(
name
==
std
::
string
(
"init_score"
))
{
}
else
{
return
false
;
}
return
true
;
}
bool
Dataset
::
SetDoubleField
(
const
char
*
field_name
,
const
double
*
field_data
,
data_size_t
num_element
)
{
std
::
string
name
(
field_name
);
name
=
Common
::
Trim
(
name
);
if
(
name
==
std
::
string
(
"init_score"
))
{
metadata_
.
SetInitScore
(
field_data
,
num_element
);
metadata_
.
SetInitScore
(
field_data
,
num_element
);
}
else
{
}
else
{
return
false
;
return
false
;
...
@@ -107,9 +116,18 @@ bool Dataset::GetFloatField(const char* field_name, data_size_t* out_len, const
...
@@ -107,9 +116,18 @@ bool Dataset::GetFloatField(const char* field_name, data_size_t* out_len, const
}
else
if
(
name
==
std
::
string
(
"weight"
)
||
name
==
std
::
string
(
"weights"
))
{
}
else
if
(
name
==
std
::
string
(
"weight"
)
||
name
==
std
::
string
(
"weights"
))
{
*
out_ptr
=
metadata_
.
weights
();
*
out_ptr
=
metadata_
.
weights
();
*
out_len
=
num_data_
;
*
out_len
=
num_data_
;
}
else
if
(
name
==
std
::
string
(
"init_score"
))
{
}
else
{
return
false
;
}
return
true
;
}
bool
Dataset
::
GetDoubleField
(
const
char
*
field_name
,
data_size_t
*
out_len
,
const
double
**
out_ptr
)
{
std
::
string
name
(
field_name
);
name
=
Common
::
Trim
(
name
);
if
(
name
==
std
::
string
(
"init_score"
))
{
*
out_ptr
=
metadata_
.
init_score
();
*
out_ptr
=
metadata_
.
init_score
();
*
out_len
=
num_data_
;
*
out_len
=
static_cast
<
data_size_t
>
(
metadata_
.
num_init_score
())
;
}
else
{
}
else
{
return
false
;
return
false
;
}
}
...
...
src/io/dense_bin.hpp
View file @
3ef3a489
...
@@ -20,7 +20,10 @@ public:
...
@@ -20,7 +20,10 @@ public:
:
num_data_
(
num_data
)
{
:
num_data_
(
num_data
)
{
data_
.
resize
(
num_data_
);
data_
.
resize
(
num_data_
);
VAL_T
default_bin_T
=
static_cast
<
VAL_T
>
(
default_bin
);
VAL_T
default_bin_T
=
static_cast
<
VAL_T
>
(
default_bin
);
std
::
fill
(
data_
.
begin
(),
data_
.
end
(),
default_bin_T
);
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
data_
[
i
]
=
default_bin_T
;
}
}
}
~
DenseBin
()
{
~
DenseBin
()
{
...
...
src/io/metadata.cpp
View file @
3ef3a489
...
@@ -36,7 +36,10 @@ void Metadata::Init(data_size_t num_data, int weight_idx, int query_idx) {
...
@@ -36,7 +36,10 @@ void Metadata::Init(data_size_t num_data, int weight_idx, int query_idx) {
}
}
weights_
=
std
::
vector
<
float
>
(
num_data_
);
weights_
=
std
::
vector
<
float
>
(
num_data_
);
num_weights_
=
num_data_
;
num_weights_
=
num_data_
;
std
::
fill
(
weights_
.
begin
(),
weights_
.
end
(),
0.0
f
);
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_weights_
;
++
i
)
{
weights_
[
i
]
=
0.0
f
;
}
}
}
if
(
query_idx
>=
0
)
{
if
(
query_idx
>=
0
)
{
if
(
!
query_boundaries_
.
empty
())
{
if
(
!
query_boundaries_
.
empty
())
{
...
@@ -45,7 +48,10 @@ void Metadata::Init(data_size_t num_data, int weight_idx, int query_idx) {
...
@@ -45,7 +48,10 @@ void Metadata::Init(data_size_t num_data, int weight_idx, int query_idx) {
}
}
if
(
!
query_weights_
.
empty
())
{
query_weights_
.
clear
();
}
if
(
!
query_weights_
.
empty
())
{
query_weights_
.
clear
();
}
queries_
=
std
::
vector
<
data_size_t
>
(
num_data_
);
queries_
=
std
::
vector
<
data_size_t
>
(
num_data_
);
std
::
fill
(
queries_
.
begin
(),
queries_
.
end
(),
0
);
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
queries_
[
i
]
=
0
;
}
}
}
}
}
...
@@ -53,6 +59,7 @@ void Metadata::Init(const Metadata& fullset, const data_size_t* used_indices, da
...
@@ -53,6 +59,7 @@ void Metadata::Init(const Metadata& fullset, const data_size_t* used_indices, da
num_data_
=
num_used_indices
;
num_data_
=
num_used_indices
;
label_
=
std
::
vector
<
float
>
(
num_used_indices
);
label_
=
std
::
vector
<
float
>
(
num_used_indices
);
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_used_indices
;
i
++
)
{
for
(
data_size_t
i
=
0
;
i
<
num_used_indices
;
i
++
)
{
label_
[
i
]
=
fullset
.
label_
[
used_indices
[
i
]];
label_
[
i
]
=
fullset
.
label_
[
used_indices
[
i
]];
}
}
...
@@ -60,6 +67,7 @@ void Metadata::Init(const Metadata& fullset, const data_size_t* used_indices, da
...
@@ -60,6 +67,7 @@ void Metadata::Init(const Metadata& fullset, const data_size_t* used_indices, da
if
(
!
fullset
.
weights_
.
empty
())
{
if
(
!
fullset
.
weights_
.
empty
())
{
weights_
=
std
::
vector
<
float
>
(
num_used_indices
);
weights_
=
std
::
vector
<
float
>
(
num_used_indices
);
num_weights_
=
num_used_indices
;
num_weights_
=
num_used_indices
;
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_used_indices
;
i
++
)
{
for
(
data_size_t
i
=
0
;
i
<
num_used_indices
;
i
++
)
{
weights_
[
i
]
=
fullset
.
weights_
[
used_indices
[
i
]];
weights_
[
i
]
=
fullset
.
weights_
[
used_indices
[
i
]];
}
}
...
@@ -68,9 +76,10 @@ void Metadata::Init(const Metadata& fullset, const data_size_t* used_indices, da
...
@@ -68,9 +76,10 @@ void Metadata::Init(const Metadata& fullset, const data_size_t* used_indices, da
}
}
if
(
!
fullset
.
init_score_
.
empty
())
{
if
(
!
fullset
.
init_score_
.
empty
())
{
int
num_class
=
static_cast
<
int
>
(
fullset
.
num_init_score_
)
/
fullset
.
num_data_
;
int
num_class
=
static_cast
<
int
>
(
fullset
.
num_init_score_
/
fullset
.
num_data_
);
init_score_
=
std
::
vector
<
float
>
(
num_used_indices
*
num_class
);
init_score_
=
std
::
vector
<
double
>
(
num_used_indices
*
num_class
);
num_init_score_
=
num_used_indices
*
num_class
;
num_init_score_
=
static_cast
<
int64_t
>
(
num_used_indices
)
*
num_class
;
#pragma omp parallel for schedule(static)
for
(
int
k
=
0
;
k
<
num_class
;
++
k
)
{
for
(
int
k
=
0
;
k
<
num_class
;
++
k
)
{
for
(
data_size_t
i
=
0
;
i
<
num_used_indices
;
i
++
)
{
for
(
data_size_t
i
=
0
;
i
<
num_used_indices
;
i
++
)
{
init_score_
[
k
*
num_data_
+
i
]
=
fullset
.
init_score_
[
k
*
fullset
.
num_data_
+
used_indices
[
i
]];
init_score_
[
k
*
num_data_
+
i
]
=
fullset
.
init_score_
[
k
*
fullset
.
num_data_
+
used_indices
[
i
]];
...
@@ -121,6 +130,7 @@ void Metadata::PartitionLabel(const std::vector<data_size_t>& used_indices) {
...
@@ -121,6 +130,7 @@ void Metadata::PartitionLabel(const std::vector<data_size_t>& used_indices) {
auto
old_label
=
label_
;
auto
old_label
=
label_
;
num_data_
=
static_cast
<
data_size_t
>
(
used_indices
.
size
());
num_data_
=
static_cast
<
data_size_t
>
(
used_indices
.
size
());
label_
=
std
::
vector
<
float
>
(
num_data_
);
label_
=
std
::
vector
<
float
>
(
num_data_
);
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
label_
[
i
]
=
old_label
[
used_indices
[
i
]];
label_
[
i
]
=
old_label
[
used_indices
[
i
]];
}
}
...
@@ -201,7 +211,8 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
...
@@ -201,7 +211,8 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
auto
old_weights
=
weights_
;
auto
old_weights
=
weights_
;
num_weights_
=
num_data_
;
num_weights_
=
num_data_
;
weights_
=
std
::
vector
<
float
>
(
num_data_
);
weights_
=
std
::
vector
<
float
>
(
num_data_
);
for
(
size_t
i
=
0
;
i
<
used_data_indices
.
size
();
++
i
)
{
#pragma omp parallel for schedule(static)
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
used_data_indices
.
size
());
++
i
)
{
weights_
[
i
]
=
old_weights
[
used_data_indices
[
i
]];
weights_
[
i
]
=
old_weights
[
used_data_indices
[
i
]];
}
}
old_weights
.
clear
();
old_weights
.
clear
();
...
@@ -243,9 +254,10 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
...
@@ -243,9 +254,10 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
// get local initial scores
// get local initial scores
if
(
!
init_score_
.
empty
())
{
if
(
!
init_score_
.
empty
())
{
auto
old_scores
=
init_score_
;
auto
old_scores
=
init_score_
;
int
num_class
=
num_init_score_
/
num_all_data
;
int
num_class
=
static_cast
<
int
>
(
num_init_score_
/
num_all_data
);
num_init_score_
=
num_data_
*
num_class
;
num_init_score_
=
static_cast
<
int64_t
>
(
num_data_
)
*
num_class
;
init_score_
=
std
::
vector
<
float
>
(
num_init_score_
);
init_score_
=
std
::
vector
<
double
>
(
num_init_score_
);
#pragma omp parallel for schedule(static)
for
(
int
k
=
0
;
k
<
num_class
;
++
k
){
for
(
int
k
=
0
;
k
<
num_class
;
++
k
){
for
(
size_t
i
=
0
;
i
<
used_data_indices
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
used_data_indices
.
size
();
++
i
)
{
init_score_
[
k
*
num_data_
+
i
]
=
old_scores
[
k
*
num_all_data
+
used_data_indices
[
i
]];
init_score_
[
k
*
num_data_
+
i
]
=
old_scores
[
k
*
num_all_data
+
used_data_indices
[
i
]];
...
@@ -259,26 +271,6 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
...
@@ -259,26 +271,6 @@ void Metadata::CheckOrPartition(data_size_t num_all_data, const std::vector<data
}
}
}
}
void
Metadata
::
SetInitScore
(
const
float
*
init_score
,
data_size_t
len
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
// save to nullptr
if
(
init_score
==
nullptr
||
len
==
0
)
{
init_score_
.
clear
();
num_init_score_
=
0
;
return
;
}
if
((
len
%
num_data_
)
!=
0
)
{
Log
::
Fatal
(
"Initial score size doesn't match data size"
);
}
if
(
!
init_score_
.
empty
())
{
init_score_
.
clear
();
}
num_init_score_
=
len
;
init_score_
=
std
::
vector
<
float
>
(
len
);
for
(
data_size_t
i
=
0
;
i
<
len
;
++
i
)
{
init_score_
[
i
]
=
init_score
[
i
];
}
}
void
Metadata
::
SetInitScore
(
const
double
*
init_score
,
data_size_t
len
)
{
void
Metadata
::
SetInitScore
(
const
double
*
init_score
,
data_size_t
len
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
// save to nullptr
// save to nullptr
...
@@ -292,9 +284,10 @@ void Metadata::SetInitScore(const double* init_score, data_size_t len) {
...
@@ -292,9 +284,10 @@ void Metadata::SetInitScore(const double* init_score, data_size_t len) {
}
}
if
(
!
init_score_
.
empty
())
{
init_score_
.
clear
();
}
if
(
!
init_score_
.
empty
())
{
init_score_
.
clear
();
}
num_init_score_
=
len
;
num_init_score_
=
len
;
init_score_
=
std
::
vector
<
float
>
(
len
);
init_score_
=
std
::
vector
<
double
>
(
len
);
for
(
data_size_t
i
=
0
;
i
<
len
;
++
i
)
{
#pragma omp parallel for schedule(static)
init_score_
[
i
]
=
static_cast
<
float
>
(
init_score
[
i
]);
for
(
int64_t
i
=
0
;
i
<
num_init_score_
;
++
i
)
{
init_score_
[
i
]
=
init_score
[
i
];
}
}
}
}
...
@@ -308,6 +301,7 @@ void Metadata::SetLabel(const float* label, data_size_t len) {
...
@@ -308,6 +301,7 @@ void Metadata::SetLabel(const float* label, data_size_t len) {
}
}
if
(
!
label_
.
empty
())
{
label_
.
clear
();
}
if
(
!
label_
.
empty
())
{
label_
.
clear
();
}
label_
=
std
::
vector
<
float
>
(
num_data_
);
label_
=
std
::
vector
<
float
>
(
num_data_
);
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_data_
;
++
i
)
{
label_
[
i
]
=
label
[
i
];
label_
[
i
]
=
label
[
i
];
}
}
...
@@ -327,6 +321,7 @@ void Metadata::SetWeights(const float* weights, data_size_t len) {
...
@@ -327,6 +321,7 @@ void Metadata::SetWeights(const float* weights, data_size_t len) {
if
(
!
weights_
.
empty
())
{
weights_
.
clear
();
}
if
(
!
weights_
.
empty
())
{
weights_
.
clear
();
}
num_weights_
=
num_data_
;
num_weights_
=
num_data_
;
weights_
=
std
::
vector
<
float
>
(
num_weights_
);
weights_
=
std
::
vector
<
float
>
(
num_weights_
);
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_weights_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_weights_
;
++
i
)
{
weights_
[
i
]
=
weights
[
i
];
weights_
[
i
]
=
weights
[
i
];
}
}
...
@@ -342,6 +337,7 @@ void Metadata::SetQuery(const data_size_t* query, data_size_t len) {
...
@@ -342,6 +337,7 @@ void Metadata::SetQuery(const data_size_t* query, data_size_t len) {
return
;
return
;
}
}
data_size_t
sum
=
0
;
data_size_t
sum
=
0
;
#pragma omp parallel for schedule(static) reduction(+:sum)
for
(
data_size_t
i
=
0
;
i
<
len
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
len
;
++
i
)
{
sum
+=
query
[
i
];
sum
+=
query
[
i
];
}
}
...
@@ -413,6 +409,7 @@ void Metadata::LoadWeights() {
...
@@ -413,6 +409,7 @@ void Metadata::LoadWeights() {
Log
::
Info
(
"Loading weights..."
);
Log
::
Info
(
"Loading weights..."
);
num_weights_
=
static_cast
<
data_size_t
>
(
reader
.
Lines
().
size
());
num_weights_
=
static_cast
<
data_size_t
>
(
reader
.
Lines
().
size
());
weights_
=
std
::
vector
<
float
>
(
num_weights_
);
weights_
=
std
::
vector
<
float
>
(
num_weights_
);
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_weights_
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_weights_
;
++
i
)
{
double
tmp_weight
=
0.0
f
;
double
tmp_weight
=
0.0
f
;
Common
::
Atof
(
reader
.
Lines
()[
i
].
c_str
(),
&
tmp_weight
);
Common
::
Atof
(
reader
.
Lines
()[
i
].
c_str
(),
&
tmp_weight
);
...
@@ -435,26 +432,28 @@ void Metadata::LoadInitialScore() {
...
@@ -435,26 +432,28 @@ void Metadata::LoadInitialScore() {
// use first line to count number class
// use first line to count number class
int
num_class
=
static_cast
<
int
>
(
Common
::
Split
(
reader
.
Lines
()[
0
].
c_str
(),
'\t'
).
size
());
int
num_class
=
static_cast
<
int
>
(
Common
::
Split
(
reader
.
Lines
()[
0
].
c_str
(),
'\t'
).
size
());
data_size_t
num_line
=
static_cast
<
data_size_t
>
(
reader
.
Lines
().
size
());
data_size_t
num_line
=
static_cast
<
data_size_t
>
(
reader
.
Lines
().
size
());
num_init_score_
=
static_cast
<
data_size_t
>
(
num_line
*
num_class
);
num_init_score_
=
static_cast
<
int64_t
>
(
num_line
)
*
num_class
;
init_score_
=
std
::
vector
<
float
>
(
num_init_score_
);
double
tmp
=
0.0
f
;
init_score_
=
std
::
vector
<
double
>
(
num_init_score_
);
if
(
num_class
==
1
)
{
if
(
num_class
==
1
)
{
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_line
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_line
;
++
i
)
{
double
tmp
=
0.0
f
;
Common
::
Atof
(
reader
.
Lines
()[
i
].
c_str
(),
&
tmp
);
Common
::
Atof
(
reader
.
Lines
()[
i
].
c_str
(),
&
tmp
);
init_score_
[
i
]
=
static_cast
<
float
>
(
tmp
);
init_score_
[
i
]
=
static_cast
<
double
>
(
tmp
);
}
}
}
else
{
}
else
{
std
::
vector
<
std
::
string
>
oneline_init_score
;
std
::
vector
<
std
::
string
>
oneline_init_score
;
#pragma omp parallel for schedule(static)
for
(
data_size_t
i
=
0
;
i
<
num_line
;
++
i
)
{
for
(
data_size_t
i
=
0
;
i
<
num_line
;
++
i
)
{
double
tmp
=
0.0
f
;
oneline_init_score
=
Common
::
Split
(
reader
.
Lines
()[
i
].
c_str
(),
'\t'
);
oneline_init_score
=
Common
::
Split
(
reader
.
Lines
()[
i
].
c_str
(),
'\t'
);
if
(
static_cast
<
int
>
(
oneline_init_score
.
size
())
!=
num_class
)
{
if
(
static_cast
<
int
>
(
oneline_init_score
.
size
())
!=
num_class
)
{
Log
::
Fatal
(
"Invalid initial score file. Redundant or insufficient columns."
);
Log
::
Fatal
(
"Invalid initial score file. Redundant or insufficient columns."
);
}
}
for
(
int
k
=
0
;
k
<
num_class
;
++
k
)
{
for
(
int
k
=
0
;
k
<
num_class
;
++
k
)
{
Common
::
Atof
(
oneline_init_score
[
k
].
c_str
(),
&
tmp
);
Common
::
Atof
(
oneline_init_score
[
k
].
c_str
(),
&
tmp
);
init_score_
[
k
*
num_line
+
i
]
=
static_cast
<
float
>
(
tmp
);
init_score_
[
k
*
num_line
+
i
]
=
static_cast
<
double
>
(
tmp
);
}
}
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment