Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tianlh
LightGBM-DCU
Commits
548cec82
Commit
548cec82
authored
Oct 21, 2025
by
Jeff Daily
Browse files
Merge branch 'master' into rocm3
parents
2f7bd8ef
5dbfcdc4
Changes
195
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
420 additions
and
375 deletions
+420
-375
src/treelearner/serial_tree_learner.h
src/treelearner/serial_tree_learner.h
+3
-3
src/treelearner/split_info.hpp
src/treelearner/split_info.hpp
+3
-3
src/treelearner/tree_learner.cpp
src/treelearner/tree_learner.cpp
+2
-0
src/treelearner/voting_parallel_tree_learner.cpp
src/treelearner/voting_parallel_tree_learner.cpp
+3
-2
swig/StringArray.hpp
swig/StringArray.hpp
+3
-3
tests/cpp_tests/test_array_args.cpp
tests/cpp_tests/test_array_args.cpp
+1
-0
tests/cpp_tests/test_arrow.cpp
tests/cpp_tests/test_arrow.cpp
+1
-0
tests/cpp_tests/test_byte_buffer.cpp
tests/cpp_tests/test_byte_buffer.cpp
+1
-0
tests/cpp_tests/test_chunked_array.cpp
tests/cpp_tests/test_chunked_array.cpp
+3
-0
tests/cpp_tests/test_serialize.cpp
tests/cpp_tests/test_serialize.cpp
+1
-0
tests/cpp_tests/test_single_row.cpp
tests/cpp_tests/test_single_row.cpp
+3
-1
tests/cpp_tests/test_stream.cpp
tests/cpp_tests/test_stream.cpp
+2
-0
tests/cpp_tests/testutils.cpp
tests/cpp_tests/testutils.cpp
+360
-359
tests/cpp_tests/testutils.h
tests/cpp_tests/testutils.h
+3
-3
tests/python_package_test/test_engine.py
tests/python_package_test/test_engine.py
+31
-1
No files found.
src/treelearner/serial_tree_learner.h
View file @
548cec82
...
@@ -2,8 +2,8 @@
...
@@ -2,8 +2,8 @@
* Copyright (c) 2016 Microsoft Corporation. All rights reserved.
* Copyright (c) 2016 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
*/
#ifndef LIGHTGBM_TREELEARNER_SERIAL_TREE_LEARNER_H_
#ifndef LIGHTGBM_
SRC_
TREELEARNER_SERIAL_TREE_LEARNER_H_
#define LIGHTGBM_TREELEARNER_SERIAL_TREE_LEARNER_H_
#define LIGHTGBM_
SRC_
TREELEARNER_SERIAL_TREE_LEARNER_H_
#include <LightGBM/dataset.h>
#include <LightGBM/dataset.h>
#include <LightGBM/tree.h>
#include <LightGBM/tree.h>
...
@@ -247,4 +247,4 @@ inline data_size_t SerialTreeLearner::GetGlobalDataCountInLeaf(int leaf_idx) con
...
@@ -247,4 +247,4 @@ inline data_size_t SerialTreeLearner::GetGlobalDataCountInLeaf(int leaf_idx) con
}
}
}
// namespace LightGBM
}
// namespace LightGBM
#endif // L
ightGBM
_TREELEARNER_SERIAL_TREE_LEARNER_H_
#endif // L
IGHTGBM_SRC
_TREELEARNER_SERIAL_TREE_LEARNER_H_
src/treelearner/split_info.hpp
View file @
548cec82
...
@@ -2,8 +2,8 @@
...
@@ -2,8 +2,8 @@
* Copyright (c) 2016 Microsoft Corporation. All rights reserved.
* Copyright (c) 2016 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
*/
#ifndef LIGHTGBM_TREELEARNER_SPLIT_INFO_HPP_
#ifndef LIGHTGBM_
SRC_
TREELEARNER_SPLIT_INFO_HPP_
#define LIGHTGBM_TREELEARNER_SPLIT_INFO_HPP_
#define LIGHTGBM_
SRC_
TREELEARNER_SPLIT_INFO_HPP_
#include <LightGBM/meta.h>
#include <LightGBM/meta.h>
...
@@ -291,4 +291,4 @@ struct LightSplitInfo {
...
@@ -291,4 +291,4 @@ struct LightSplitInfo {
};
};
}
// namespace LightGBM
}
// namespace LightGBM
#endif // L
ightGBM
_TREELEARNER_SPLIT_INFO_HPP_
#endif // L
IGHTGBM_SRC
_TREELEARNER_SPLIT_INFO_HPP_
src/treelearner/tree_learner.cpp
View file @
548cec82
...
@@ -4,6 +4,8 @@
...
@@ -4,6 +4,8 @@
*/
*/
#include <LightGBM/tree_learner.h>
#include <LightGBM/tree_learner.h>
#include <string>
#include "gpu_tree_learner.h"
#include "gpu_tree_learner.h"
#include "linear_tree_learner.h"
#include "linear_tree_learner.h"
#include "parallel_tree_learner.h"
#include "parallel_tree_learner.h"
...
...
src/treelearner/voting_parallel_tree_learner.cpp
View file @
548cec82
...
@@ -4,7 +4,9 @@
...
@@ -4,7 +4,9 @@
*/
*/
#include <LightGBM/utils/common.h>
#include <LightGBM/utils/common.h>
#include <algorithm>
#include <cstring>
#include <cstring>
#include <functional>
#include <tuple>
#include <tuple>
#include <vector>
#include <vector>
...
@@ -13,8 +15,7 @@
...
@@ -13,8 +15,7 @@
namespace
LightGBM
{
namespace
LightGBM
{
template
<
typename
TREELEARNER_T
>
template
<
typename
TREELEARNER_T
>
VotingParallelTreeLearner
<
TREELEARNER_T
>::
VotingParallelTreeLearner
(
const
Config
*
config
)
VotingParallelTreeLearner
<
TREELEARNER_T
>::
VotingParallelTreeLearner
(
const
Config
*
config
)
:
TREELEARNER_T
(
config
)
{
:
TREELEARNER_T
(
config
)
{
top_k_
=
this
->
config_
->
top_k
;
top_k_
=
this
->
config_
->
top_k
;
}
}
...
...
swig/StringArray.hpp
View file @
548cec82
...
@@ -4,8 +4,8 @@
...
@@ -4,8 +4,8 @@
*
*
* Author: Alberto Ferreira
* Author: Alberto Ferreira
*/
*/
#ifndef LIGHTGBM_SWIG_STRING
_
ARRAY_H_
#ifndef LIGHTGBM_SWIG_STRINGARRAY_H
PP
_
#define LIGHTGBM_SWIG_STRING
_
ARRAY_H_
#define LIGHTGBM_SWIG_STRINGARRAY_H
PP
_
#include <algorithm>
#include <algorithm>
#include <new>
#include <new>
...
@@ -137,4 +137,4 @@ class StringArray {
...
@@ -137,4 +137,4 @@ class StringArray {
std
::
vector
<
char
*>
_array
;
std
::
vector
<
char
*>
_array
;
};
};
#endif // LIGHTGBM_SWIG_STRING
_
ARRAY_H_
#endif // LIGHTGBM_SWIG_STRINGARRAY_H
PP
_
tests/cpp_tests/test_array_args.cpp
View file @
548cec82
...
@@ -8,6 +8,7 @@
...
@@ -8,6 +8,7 @@
#include <LightGBM/utils/array_args.h>
#include <LightGBM/utils/array_args.h>
#include <random>
#include <random>
#include <vector>
using
LightGBM
::
data_size_t
;
using
LightGBM
::
data_size_t
;
using
LightGBM
::
score_t
;
using
LightGBM
::
score_t
;
...
...
tests/cpp_tests/test_arrow.cpp
View file @
548cec82
...
@@ -10,6 +10,7 @@
...
@@ -10,6 +10,7 @@
#include <cmath>
#include <cmath>
#include <cstdlib>
#include <cstdlib>
#include <vector>
using
LightGBM
::
ArrowChunkedArray
;
using
LightGBM
::
ArrowChunkedArray
;
using
LightGBM
::
ArrowTable
;
using
LightGBM
::
ArrowTable
;
...
...
tests/cpp_tests/test_byte_buffer.cpp
View file @
548cec82
...
@@ -6,6 +6,7 @@
...
@@ -6,6 +6,7 @@
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <LightGBM/utils/byte_buffer.h>
#include <LightGBM/utils/byte_buffer.h>
#include <memory>
#include <random>
#include <random>
using
LightGBM
::
ByteBuffer
;
using
LightGBM
::
ByteBuffer
;
...
...
tests/cpp_tests/test_chunked_array.cpp
View file @
548cec82
...
@@ -5,6 +5,9 @@
...
@@ -5,6 +5,9 @@
* Author: Alberto Ferreira
* Author: Alberto Ferreira
*/
*/
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <vector>
#include "../include/LightGBM/utils/chunked_array.hpp"
#include "../include/LightGBM/utils/chunked_array.hpp"
using
LightGBM
::
ChunkedArray
;
using
LightGBM
::
ChunkedArray
;
...
...
tests/cpp_tests/test_serialize.cpp
View file @
548cec82
...
@@ -11,6 +11,7 @@
...
@@ -11,6 +11,7 @@
#include <LightGBM/dataset.h>
#include <LightGBM/dataset.h>
#include <iostream>
#include <iostream>
#include <string>
using
LightGBM
::
ByteBuffer
;
using
LightGBM
::
ByteBuffer
;
using
LightGBM
::
Dataset
;
using
LightGBM
::
Dataset
;
...
...
tests/cpp_tests/test_single_row.cpp
View file @
548cec82
...
@@ -7,8 +7,10 @@
...
@@ -7,8 +7,10 @@
#include <testutils.h>
#include <testutils.h>
#include <LightGBM/c_api.h>
#include <LightGBM/c_api.h>
#include <
iostrea
m>
#include <
algorith
m>
#include <fstream>
#include <fstream>
#include <iostream>
#include <vector>
using
LightGBM
::
TestUtils
;
using
LightGBM
::
TestUtils
;
...
...
tests/cpp_tests/test_stream.cpp
View file @
548cec82
...
@@ -10,6 +10,8 @@
...
@@ -10,6 +10,8 @@
#include <LightGBM/dataset.h>
#include <LightGBM/dataset.h>
#include <iostream>
#include <iostream>
#include <string>
#include <vector>
using
LightGBM
::
Dataset
;
using
LightGBM
::
Dataset
;
using
LightGBM
::
Log
;
using
LightGBM
::
Log
;
...
...
tests/cpp_tests/testutils.cpp
View file @
548cec82
...
@@ -3,437 +3,438 @@
...
@@ -3,437 +3,438 @@
* Licensed under the MIT License. See LICENSE file in the project root for license information.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
*/
#include <gtest/gtest.h>
#include <testutils.h>
#include <testutils.h>
#include <LightGBM/c_api.h>
#include <LightGBM/c_api.h>
#include <LightGBM/utils/random.h>
#include <LightGBM/utils/random.h>
#include <gtest/gtest.h>
#include <string>
#include <string>
#include <thread>
#include <thread>
#include <utility>
#include <utility>
#include <vector>
using
LightGBM
::
Log
;
using
LightGBM
::
Log
;
using
LightGBM
::
Random
;
using
LightGBM
::
Random
;
namespace
LightGBM
{
namespace
LightGBM
{
/*!
/*!
* Creates a Dataset from the internal repository examples.
* Creates a Dataset from the internal repository examples.
*/
*/
int
TestUtils
::
LoadDatasetFromExamples
(
const
char
*
filename
,
const
char
*
config
,
DatasetHandle
*
out
)
{
int
TestUtils
::
LoadDatasetFromExamples
(
const
char
*
filename
,
const
char
*
config
,
DatasetHandle
*
out
)
{
std
::
string
fullPath
(
"examples/"
);
std
::
string
fullPath
(
"examples/"
);
fullPath
+=
filename
;
fullPath
+=
filename
;
Log
::
Info
(
"Debug sample data path: %s"
,
fullPath
.
c_str
());
Log
::
Info
(
"Debug sample data path: %s"
,
fullPath
.
c_str
());
return
LGBM_DatasetCreateFromFile
(
return
LGBM_DatasetCreateFromFile
(
fullPath
.
c_str
(),
fullPath
.
c_str
(),
config
,
config
,
nullptr
,
nullptr
,
out
);
out
);
}
}
/*!
/*!
* Creates fake data in the passed vectors.
* Creates fake data in the passed vectors.
*/
*/
void
TestUtils
::
CreateRandomDenseData
(
void
TestUtils
::
CreateRandomDenseData
(
int32_t
nrows
,
int32_t
nrows
,
int32_t
ncols
,
int32_t
ncols
,
int32_t
nclasses
,
int32_t
nclasses
,
std
::
vector
<
double
>*
features
,
std
::
vector
<
double
>*
features
,
std
::
vector
<
float
>*
labels
,
std
::
vector
<
float
>*
labels
,
std
::
vector
<
float
>*
weights
,
std
::
vector
<
float
>*
weights
,
std
::
vector
<
double
>*
init_scores
,
std
::
vector
<
double
>*
init_scores
,
std
::
vector
<
int32_t
>*
groups
)
{
std
::
vector
<
int32_t
>*
groups
)
{
Random
rand
(
42
);
Random
rand
(
42
);
features
->
reserve
(
nrows
*
ncols
);
features
->
reserve
(
nrows
*
ncols
);
for
(
int32_t
row
=
0
;
row
<
nrows
;
row
++
)
{
for
(
int32_t
row
=
0
;
row
<
nrows
;
row
++
)
{
for
(
int32_t
col
=
0
;
col
<
ncols
;
col
++
)
{
for
(
int32_t
col
=
0
;
col
<
ncols
;
col
++
)
{
features
->
push_back
(
rand
.
NextFloat
());
features
->
push_back
(
rand
.
NextFloat
());
}
}
}
CreateRandomMetadata
(
nrows
,
nclasses
,
labels
,
weights
,
init_scores
,
groups
);
}
}
/*!
CreateRandomMetadata
(
nrows
,
nclasses
,
labels
,
weights
,
init_scores
,
groups
);
* Creates fake data in the passed vectors.
}
*/
void
TestUtils
::
CreateRandomSparseData
(
/*!
int32_t
nrows
,
* Creates fake data in the passed vectors.
int32_t
ncols
,
*/
int32_t
nclasses
,
void
TestUtils
::
CreateRandomSparseData
(
float
sparse_percent
,
int32_t
nrows
,
std
::
vector
<
int32_t
>*
indptr
,
int32_t
ncols
,
std
::
vector
<
int32_t
>*
indices
,
int32_t
nclasses
,
std
::
vector
<
double
>*
values
,
float
sparse_percent
,
std
::
vector
<
float
>*
labels
,
std
::
vector
<
int32_t
>*
indptr
,
std
::
vector
<
float
>*
weights
,
std
::
vector
<
int32_t
>*
indices
,
std
::
vector
<
double
>*
init_scores
,
std
::
vector
<
double
>*
values
,
std
::
vector
<
int32_t
>*
groups
)
{
std
::
vector
<
float
>*
labels
,
Random
rand
(
42
);
std
::
vector
<
float
>*
weights
,
indptr
->
reserve
(
static_cast
<
int32_t
>
(
nrows
+
1
));
std
::
vector
<
double
>*
init_scores
,
indices
->
reserve
(
static_cast
<
int32_t
>
(
sparse_percent
*
nrows
*
ncols
));
std
::
vector
<
int32_t
>*
groups
)
{
values
->
reserve
(
static_cast
<
int32_t
>
(
sparse_percent
*
nrows
*
ncols
));
Random
rand
(
42
);
indptr
->
reserve
(
static_cast
<
int32_t
>
(
nrows
+
1
));
indptr
->
push_back
(
0
);
indices
->
reserve
(
static_cast
<
int32_t
>
(
sparse_percent
*
nrows
*
ncols
));
for
(
int32_t
row
=
0
;
row
<
nrows
;
row
++
)
{
values
->
reserve
(
static_cast
<
int32_t
>
(
sparse_percent
*
nrows
*
ncols
));
for
(
int32_t
col
=
0
;
col
<
ncols
;
col
++
)
{
float
rnd
=
rand
.
NextFloat
();
indptr
->
push_back
(
0
);
if
(
rnd
<
sparse_percent
)
{
for
(
int32_t
row
=
0
;
row
<
nrows
;
row
++
)
{
indices
->
push_back
(
col
);
for
(
int32_t
col
=
0
;
col
<
ncols
;
col
++
)
{
values
->
push_back
(
rand
.
NextFloat
());
float
rnd
=
rand
.
NextFloat
();
}
if
(
rnd
<
sparse_percent
)
{
indices
->
push_back
(
col
);
values
->
push_back
(
rand
.
NextFloat
());
}
}
indptr
->
push_back
(
static_cast
<
int32_t
>
(
indices
->
size
()
-
1
));
}
}
indptr
->
push_back
(
static_cast
<
int32_t
>
(
indices
->
size
()
-
1
));
}
CreateRandomMetadata
(
nrows
,
nclasses
,
labels
,
weights
,
init_scores
,
groups
);
}
CreateRandomMetadata
(
nrows
,
nclasses
,
labels
,
weights
,
init_scores
,
groups
);
/*!
* Creates fake data in the passed vectors.
*/
void
TestUtils
::
CreateRandomMetadata
(
int32_t
nrows
,
int32_t
nclasses
,
std
::
vector
<
float
>*
labels
,
std
::
vector
<
float
>*
weights
,
std
::
vector
<
double
>*
init_scores
,
std
::
vector
<
int32_t
>*
groups
)
{
Random
rand
(
42
);
labels
->
reserve
(
nrows
);
if
(
weights
)
{
weights
->
reserve
(
nrows
);
}
if
(
init_scores
)
{
init_scores
->
reserve
(
nrows
*
nclasses
);
}
if
(
groups
)
{
groups
->
reserve
(
nrows
);
}
}
/*!
int32_t
group
=
0
;
* Creates fake data in the passed vectors.
*/
for
(
int32_t
row
=
0
;
row
<
nrows
;
row
++
)
{
void
TestUtils
::
CreateRandomMetadata
(
int32_t
nrows
,
labels
->
push_back
(
rand
.
NextFloat
());
int32_t
nclasses
,
std
::
vector
<
float
>*
labels
,
std
::
vector
<
float
>*
weights
,
std
::
vector
<
double
>*
init_scores
,
std
::
vector
<
int32_t
>*
groups
)
{
Random
rand
(
42
);
labels
->
reserve
(
nrows
);
if
(
weights
)
{
if
(
weights
)
{
weights
->
reserve
(
nrows
);
weights
->
push_back
(
rand
.
NextFloat
()
);
}
}
if
(
init_scores
)
{
if
(
init_scores
)
{
init_scores
->
reserve
(
nrows
*
nclasses
);
for
(
int32_t
i
=
0
;
i
<
nclasses
;
i
++
)
{
init_scores
->
push_back
(
rand
.
NextFloat
());
}
}
}
if
(
groups
)
{
if
(
groups
)
{
groups
->
reserve
(
nrows
);
if
(
rand
.
NextFloat
()
>
0.95
)
{
group
++
;
}
groups
->
push_back
(
group
);
}
}
}
}
void
TestUtils
::
StreamDenseDataset
(
DatasetHandle
dataset_handle
,
int32_t
nrows
,
int32_t
ncols
,
int32_t
nclasses
,
int32_t
batch_count
,
const
std
::
vector
<
double
>*
features
,
const
std
::
vector
<
float
>*
labels
,
const
std
::
vector
<
float
>*
weights
,
const
std
::
vector
<
double
>*
init_scores
,
const
std
::
vector
<
int32_t
>*
groups
)
{
int
result
=
LGBM_DatasetSetWaitForManualFinish
(
dataset_handle
,
1
);
EXPECT_EQ
(
0
,
result
)
<<
"LGBM_DatasetSetWaitForManualFinish result code: "
<<
result
;
Log
::
Info
(
" Begin StreamDenseDataset"
);
if
((
nrows
%
batch_count
)
!=
0
)
{
Log
::
Fatal
(
"This utility method only handles nrows that are a multiple of batch_count"
);
}
int32_t
group
=
0
;
const
double
*
features_ptr
=
features
->
data
();
const
float
*
labels_ptr
=
labels
->
data
();
const
float
*
weights_ptr
=
nullptr
;
if
(
weights
)
{
weights_ptr
=
weights
->
data
();
}
for
(
int32_t
row
=
0
;
row
<
nrows
;
row
++
)
{
// Since init_scores are in a column format, but need to be pushed as rows, we have to extract each batch
labels
->
push_back
(
rand
.
NextFloat
());
std
::
vector
<
double
>
init_score_batch
;
if
(
weights
)
{
const
double
*
init_scores_ptr
=
nullptr
;
weights
->
push_back
(
rand
.
NextFloat
());
if
(
init_scores
)
{
}
init_score_batch
.
reserve
(
nclasses
*
batch_count
);
if
(
init_scores
)
{
init_scores_ptr
=
init_score_batch
.
data
();
for
(
int32_t
i
=
0
;
i
<
nclasses
;
i
++
)
{
init_scores
->
push_back
(
rand
.
NextFloat
());
}
}
if
(
groups
)
{
if
(
rand
.
NextFloat
()
>
0.95
)
{
group
++
;
}
groups
->
push_back
(
group
);
}
}
}
}
void
TestUtils
::
StreamDenseDataset
(
DatasetHandle
dataset_handle
,
const
int32_t
*
groups_ptr
=
nullptr
;
int32_t
nrows
,
if
(
groups
)
{
int32_t
ncols
,
groups_ptr
=
groups
->
data
();
int32_t
nclasses
,
}
int32_t
batch_count
,
const
std
::
vector
<
double
>*
features
,
const
std
::
vector
<
float
>*
labels
,
const
std
::
vector
<
float
>*
weights
,
const
std
::
vector
<
double
>*
init_scores
,
const
std
::
vector
<
int32_t
>*
groups
)
{
int
result
=
LGBM_DatasetSetWaitForManualFinish
(
dataset_handle
,
1
);
EXPECT_EQ
(
0
,
result
)
<<
"LGBM_DatasetSetWaitForManualFinish result code: "
<<
result
;
Log
::
Info
(
" Begin StreamDenseDataset"
);
if
((
nrows
%
batch_count
)
!=
0
)
{
Log
::
Fatal
(
"This utility method only handles nrows that are a multiple of batch_count"
);
}
const
double
*
features_ptr
=
features
->
data
();
auto
start_time
=
std
::
chrono
::
steady_clock
::
now
();
const
float
*
labels_ptr
=
labels
->
data
();
const
float
*
weights_ptr
=
nullptr
;
if
(
weights
)
{
weights_ptr
=
weights
->
data
();
}
// Since init_scores are in a column format, but need to be pushed as rows, we have to extract each batch
for
(
int32_t
i
=
0
;
i
<
nrows
;
i
+=
batch_count
)
{
std
::
vector
<
double
>
init_score_batch
;
const
double
*
init_scores_ptr
=
nullptr
;
if
(
init_scores
)
{
if
(
init_scores
)
{
init_score_batch
.
reserve
(
nclasses
*
batch_count
);
init_scores_ptr
=
CreateInitScoreBatch
(
&
init_score_batch
,
i
,
nrows
,
nclasses
,
batch_count
,
init_scores
);
init_scores_ptr
=
init_score_batch
.
data
();
}
}
const
int32_t
*
groups_ptr
=
nullptr
;
result
=
LGBM_DatasetPushRowsWithMetadata
(
dataset_handle
,
if
(
groups
)
{
features_ptr
,
groups_ptr
=
groups
->
data
();
1
,
batch_count
,
ncols
,
i
,
labels_ptr
,
weights_ptr
,
init_scores_ptr
,
groups_ptr
,
0
);
EXPECT_EQ
(
0
,
result
)
<<
"LGBM_DatasetPushRowsWithMetadata result code: "
<<
result
;
if
(
result
!=
0
)
{
FAIL
()
<<
"LGBM_DatasetPushRowsWithMetadata failed"
;
// This forces an immediate failure, which EXPECT_EQ does not
}
}
auto
start_time
=
std
::
chrono
::
steady_clock
::
now
();
features_ptr
+=
batch_count
*
ncols
;
labels_ptr
+=
batch_count
;
for
(
int32_t
i
=
0
;
i
<
nrows
;
i
+=
batch_count
)
{
if
(
weights_ptr
)
{
if
(
init_scores
)
{
weights_ptr
+=
batch_count
;
init_scores_ptr
=
CreateInitScoreBatch
(
&
init_score_batch
,
i
,
nrows
,
nclasses
,
batch_count
,
init_scores
);
}
}
if
(
groups_ptr
)
{
groups_ptr
+=
batch_count
;
result
=
LGBM_DatasetPushRowsWithMetadata
(
dataset_handle
,
features_ptr
,
1
,
batch_count
,
ncols
,
i
,
labels_ptr
,
weights_ptr
,
init_scores_ptr
,
groups_ptr
,
0
);
EXPECT_EQ
(
0
,
result
)
<<
"LGBM_DatasetPushRowsWithMetadata result code: "
<<
result
;
if
(
result
!=
0
)
{
FAIL
()
<<
"LGBM_DatasetPushRowsWithMetadata failed"
;
// This forces an immediate failure, which EXPECT_EQ does not
}
features_ptr
+=
batch_count
*
ncols
;
labels_ptr
+=
batch_count
;
if
(
weights_ptr
)
{
weights_ptr
+=
batch_count
;
}
if
(
groups_ptr
)
{
groups_ptr
+=
batch_count
;
}
}
}
}
auto
cur_time
=
std
::
chrono
::
steady_clock
::
now
();
auto
cur_time
=
std
::
chrono
::
steady_clock
::
now
();
Log
::
Info
(
" Time: %d"
,
cur_time
-
start_time
);
Log
::
Info
(
" Time: %d"
,
cur_time
-
start_time
);
}
void
TestUtils
::
StreamSparseDataset
(
DatasetHandle
dataset_handle
,
int32_t
nrows
,
int32_t
nclasses
,
int32_t
batch_count
,
const
std
::
vector
<
int32_t
>*
indptr
,
const
std
::
vector
<
int32_t
>*
indices
,
const
std
::
vector
<
double
>*
values
,
const
std
::
vector
<
float
>*
labels
,
const
std
::
vector
<
float
>*
weights
,
const
std
::
vector
<
double
>*
init_scores
,
const
std
::
vector
<
int32_t
>*
groups
)
{
int
result
=
LGBM_DatasetSetWaitForManualFinish
(
dataset_handle
,
1
);
EXPECT_EQ
(
0
,
result
)
<<
"LGBM_DatasetSetWaitForManualFinish result code: "
<<
result
;
Log
::
Info
(
" Begin StreamSparseDataset"
);
if
((
nrows
%
batch_count
)
!=
0
)
{
Log
::
Fatal
(
"This utility method only handles nrows that are a multiple of batch_count"
);
}
}
void
TestUtils
::
StreamSparseDataset
(
DatasetHandle
dataset_handle
,
const
int32_t
*
indptr_ptr
=
indptr
->
data
();
int32_t
nrows
,
const
int32_t
*
indices_ptr
=
indices
->
data
();
int32_t
nclasses
,
const
double
*
values_ptr
=
values
->
data
();
int32_t
batch_count
,
const
float
*
labels_ptr
=
labels
->
data
();
const
std
::
vector
<
int32_t
>*
indptr
,
const
float
*
weights_ptr
=
nullptr
;
const
std
::
vector
<
int32_t
>*
indices
,
if
(
weights
)
{
const
std
::
vector
<
double
>*
values
,
weights_ptr
=
weights
->
data
();
const
std
::
vector
<
float
>*
labels
,
}
const
std
::
vector
<
float
>*
weights
,
const
std
::
vector
<
double
>*
init_scores
,
const
std
::
vector
<
int32_t
>*
groups
)
{
int
result
=
LGBM_DatasetSetWaitForManualFinish
(
dataset_handle
,
1
);
EXPECT_EQ
(
0
,
result
)
<<
"LGBM_DatasetSetWaitForManualFinish result code: "
<<
result
;
Log
::
Info
(
" Begin StreamSparseDataset"
);
if
((
nrows
%
batch_count
)
!=
0
)
{
Log
::
Fatal
(
"This utility method only handles nrows that are a multiple of batch_count"
);
}
const
int32_t
*
indptr_ptr
=
indptr
->
data
();
const
int32_t
*
groups_ptr
=
nullptr
;
const
int32_t
*
indices_ptr
=
indices
->
data
();
if
(
groups
)
{
const
double
*
values_ptr
=
values
->
data
();
groups_ptr
=
groups
->
data
();
const
float
*
labels_ptr
=
labels
->
data
();
}
const
float
*
weights_ptr
=
nullptr
;
if
(
weights
)
{
weights_ptr
=
weights
->
data
();
}
const
int32_t
*
groups_ptr
=
nullptr
;
auto
start_time
=
std
::
chrono
::
steady_clock
::
now
();
if
(
groups
)
{
groups_ptr
=
groups
->
data
();
}
auto
start_time
=
std
::
chrono
::
steady_clock
::
now
();
// Use multiple threads to test concurrency
int
thread_count
=
2
;
if
(
nrows
==
batch_count
)
{
thread_count
=
1
;
// If pushing all rows in 1 batch, we cannot have multiple threads
}
std
::
vector
<
std
::
thread
>
threads
;
threads
.
reserve
(
thread_count
);
for
(
int32_t
t
=
0
;
t
<
thread_count
;
++
t
)
{
std
::
thread
th
(
TestUtils
::
PushSparseBatch
,
dataset_handle
,
nrows
,
nclasses
,
batch_count
,
indptr
,
indptr_ptr
,
indices_ptr
,
values_ptr
,
labels_ptr
,
weights_ptr
,
init_scores
,
groups_ptr
,
thread_count
,
t
);
threads
.
push_back
(
std
::
move
(
th
));
}
// Use multiple threads to test concurrency
for
(
auto
&
t
:
threads
)
t
.
join
();
int
thread_count
=
2
;
if
(
nrows
==
batch_count
)
{
thread_count
=
1
;
// If pushing all rows in 1 batch, we cannot have multiple threads
}
std
::
vector
<
std
::
thread
>
threads
;
threads
.
reserve
(
thread_count
);
for
(
int32_t
t
=
0
;
t
<
thread_count
;
++
t
)
{
std
::
thread
th
(
TestUtils
::
PushSparseBatch
,
dataset_handle
,
nrows
,
nclasses
,
batch_count
,
indptr
,
indptr_ptr
,
indices_ptr
,
values_ptr
,
labels_ptr
,
weights_ptr
,
init_scores
,
groups_ptr
,
thread_count
,
t
);
threads
.
push_back
(
std
::
move
(
th
));
}
for
(
auto
&
t
:
threads
)
t
.
join
();
auto
cur_time
=
std
::
chrono
::
steady_clock
::
now
();
Log
::
Info
(
" Time: %d"
,
cur_time
-
start_time
);
}
auto
cur_time
=
std
::
chrono
::
steady_clock
::
now
();
/*!
Log
::
Info
(
" Time: %d"
,
cur_time
-
start_time
);
* Pushes data from 1 thread into a Dataset based on thread_id and nrows.
* e.g. with 100 rows, thread 0 will push rows 0-49, and thread 2 will push rows 50-99.
* Note that rows are still pushed in microbatches within their range.
*/
void
TestUtils
::
PushSparseBatch
(
DatasetHandle
dataset_handle
,
int32_t
nrows
,
int32_t
nclasses
,
int32_t
batch_count
,
const
std
::
vector
<
int32_t
>*
indptr
,
const
int32_t
*
indptr_ptr
,
const
int32_t
*
indices_ptr
,
const
double
*
values_ptr
,
const
float
*
labels_ptr
,
const
float
*
weights_ptr
,
const
std
::
vector
<
double
>*
init_scores
,
const
int32_t
*
groups_ptr
,
int32_t
thread_count
,
int32_t
thread_id
)
{
int32_t
threadChunkSize
=
nrows
/
thread_count
;
int32_t
startIndex
=
threadChunkSize
*
thread_id
;
int32_t
stopIndex
=
startIndex
+
threadChunkSize
;
indptr_ptr
+=
threadChunkSize
*
thread_id
;
labels_ptr
+=
threadChunkSize
*
thread_id
;
if
(
weights_ptr
)
{
weights_ptr
+=
threadChunkSize
*
thread_id
;
}
if
(
groups_ptr
)
{
groups_ptr
+=
threadChunkSize
*
thread_id
;
}
}
/*!
for
(
int32_t
i
=
startIndex
;
i
<
stopIndex
;
i
+=
batch_count
)
{
* Pushes data from 1 thread into a Dataset based on thread_id and nrows.
// Since init_scores are in a column format, but need to be pushed as rows, we have to extract each batch
* e.g. with 100 rows, thread 0 will push rows 0-49, and thread 2 will push rows 50-99.
std
::
vector
<
double
>
init_score_batch
;
* Note that rows are still pushed in microbatches within their range.
const
double
*
init_scores_ptr
=
nullptr
;
*/
if
(
init_scores
)
{
void
TestUtils
::
PushSparseBatch
(
DatasetHandle
dataset_handle
,
init_score_batch
.
reserve
(
nclasses
*
batch_count
);
int32_t
nrows
,
init_scores_ptr
=
CreateInitScoreBatch
(
&
init_score_batch
,
i
,
nrows
,
nclasses
,
batch_count
,
init_scores
);
int32_t
nclasses
,
}
int32_t
batch_count
,
const
std
::
vector
<
int32_t
>*
indptr
,
int32_t
nelem
=
indptr
->
at
(
i
+
batch_count
-
1
)
-
indptr
->
at
(
i
);
const
int32_t
*
indptr_ptr
,
const
int32_t
*
indices_ptr
,
int
result
=
LGBM_DatasetPushRowsByCSRWithMetadata
(
dataset_handle
,
const
double
*
values_ptr
,
indptr_ptr
,
const
float
*
labels_ptr
,
2
,
const
float
*
weights_ptr
,
indices_ptr
,
const
std
::
vector
<
double
>*
init_scores
,
values_ptr
,
const
int32_t
*
groups_ptr
,
1
,
int32_t
thread_count
,
batch_count
+
1
,
int32_t
thread_id
)
{
nelem
,
int32_t
threadChunkSize
=
nrows
/
thread_count
;
i
,
int32_t
startIndex
=
threadChunkSize
*
thread_id
;
labels_ptr
,
int32_t
stopIndex
=
startIndex
+
threadChunkSize
;
weights_ptr
,
init_scores_ptr
,
indptr_ptr
+=
threadChunkSize
*
thread_id
;
groups_ptr
,
labels_ptr
+=
threadChunkSize
*
thread_id
;
thread_id
);
EXPECT_EQ
(
0
,
result
)
<<
"LGBM_DatasetPushRowsByCSRWithMetadata result code: "
<<
result
;
if
(
result
!=
0
)
{
FAIL
()
<<
"LGBM_DatasetPushRowsByCSRWithMetadata failed"
;
// This forces an immediate failure, which EXPECT_EQ does not
}
indptr_ptr
+=
batch_count
;
labels_ptr
+=
batch_count
;
if
(
weights_ptr
)
{
if
(
weights_ptr
)
{
weights_ptr
+=
threadChunkSize
*
thread_id
;
weights_ptr
+=
batch_count
;
}
}
if
(
groups_ptr
)
{
if
(
groups_ptr
)
{
groups_ptr
+=
threadChunkSize
*
thread_id
;
groups_ptr
+=
batch_count
;
}
}
}
for
(
int32_t
i
=
startIndex
;
i
<
stopIndex
;
i
+=
batch_count
)
{
}
// Since init_scores are in a column format, but need to be pushed as rows, we have to extract each batch
std
::
vector
<
double
>
init_score_batch
;
const
double
*
init_scores_ptr
=
nullptr
;
void
TestUtils
::
AssertMetadata
(
const
Metadata
*
metadata
,
if
(
init_scores
)
{
const
std
::
vector
<
float
>*
ref_labels
,
init_score_batch
.
reserve
(
nclasses
*
batch_count
);
const
std
::
vector
<
float
>*
ref_weights
,
init_scores_ptr
=
CreateInitScoreBatch
(
&
init_score_batch
,
i
,
nrows
,
nclasses
,
batch_count
,
init_scores
);
const
std
::
vector
<
double
>*
ref_init_scores
,
}
const
std
::
vector
<
int32_t
>*
ref_groups
)
{
const
float
*
labels
=
metadata
->
label
();
int32_t
nelem
=
indptr
->
at
(
i
+
batch_count
-
1
)
-
indptr
->
at
(
i
);
auto
nTotal
=
static_cast
<
int32_t
>
(
ref_labels
->
size
());
for
(
auto
i
=
0
;
i
<
nTotal
;
i
++
)
{
int
result
=
LGBM_DatasetPushRowsByCSRWithMetadata
(
dataset_handle
,
EXPECT_EQ
(
ref_labels
->
at
(
i
),
labels
[
i
])
<<
"Inserted data: "
<<
ref_labels
->
at
(
i
)
<<
" at "
<<
i
;
indptr_ptr
,
if
(
ref_labels
->
at
(
i
)
!=
labels
[
i
])
{
2
,
FAIL
()
<<
"Mismatched labels"
;
// This forces an immediate failure, which EXPECT_EQ does not
indices_ptr
,
values_ptr
,
1
,
batch_count
+
1
,
nelem
,
i
,
labels_ptr
,
weights_ptr
,
init_scores_ptr
,
groups_ptr
,
thread_id
);
EXPECT_EQ
(
0
,
result
)
<<
"LGBM_DatasetPushRowsByCSRWithMetadata result code: "
<<
result
;
if
(
result
!=
0
)
{
FAIL
()
<<
"LGBM_DatasetPushRowsByCSRWithMetadata failed"
;
// This forces an immediate failure, which EXPECT_EQ does not
}
indptr_ptr
+=
batch_count
;
labels_ptr
+=
batch_count
;
if
(
weights_ptr
)
{
weights_ptr
+=
batch_count
;
}
if
(
groups_ptr
)
{
groups_ptr
+=
batch_count
;
}
}
}
}
}
const
float
*
weights
=
metadata
->
weights
();
void
TestUtils
::
AssertMetadata
(
const
Metadata
*
metadata
,
if
(
weights
)
{
const
std
::
vector
<
float
>*
ref_labels
,
if
(
!
ref_weights
)
{
const
std
::
vector
<
float
>*
ref_weights
,
FAIL
()
<<
"Expected null weights"
;
const
std
::
vector
<
double
>*
ref_init_scores
,
}
const
std
::
vector
<
int32_t
>*
ref_groups
)
{
const
float
*
labels
=
metadata
->
label
();
auto
nTotal
=
static_cast
<
int32_t
>
(
ref_labels
->
size
());
for
(
auto
i
=
0
;
i
<
nTotal
;
i
++
)
{
for
(
auto
i
=
0
;
i
<
nTotal
;
i
++
)
{
EXPECT_EQ
(
ref_
label
s
->
at
(
i
),
label
s
[
i
])
<<
"Inserted data: "
<<
ref_
label
s
->
at
(
i
)
<<
" at "
<<
i
;
EXPECT_EQ
(
ref_
weight
s
->
at
(
i
),
weight
s
[
i
])
<<
"Inserted data: "
<<
ref_
weight
s
->
at
(
i
);
if
(
ref_
label
s
->
at
(
i
)
!=
label
s
[
i
])
{
if
(
ref_
weight
s
->
at
(
i
)
!=
weight
s
[
i
])
{
FAIL
()
<<
"Mismatched
label
s"
;
// This forces an immediate failure, which EXPECT_EQ does not
FAIL
()
<<
"Mismatched
weight
s"
;
// This forces an immediate failure, which EXPECT_EQ does not
}
}
}
}
}
else
if
(
ref_weights
)
{
FAIL
()
<<
"Expected non-null weights"
;
}
const
float
*
weights
=
metadata
->
weights
();
const
double
*
init_scores
=
metadata
->
init_score
();
if
(
weights
)
{
if
(
init_scores
)
{
if
(
!
ref_weights
)
{
if
(
!
ref_init_scores
)
{
FAIL
()
<<
"Expected null weights"
;
FAIL
()
<<
"Expected null init_scores"
;
}
for
(
auto
i
=
0
;
i
<
nTotal
;
i
++
)
{
EXPECT_EQ
(
ref_weights
->
at
(
i
),
weights
[
i
])
<<
"Inserted data: "
<<
ref_weights
->
at
(
i
);
if
(
ref_weights
->
at
(
i
)
!=
weights
[
i
])
{
FAIL
()
<<
"Mismatched weights"
;
// This forces an immediate failure, which EXPECT_EQ does not
}
}
}
else
if
(
ref_weights
)
{
FAIL
()
<<
"Expected non-null weights"
;
}
}
for
(
size_t
i
=
0
;
i
<
ref_init_scores
->
size
();
i
++
)
{
const
double
*
init_scores
=
metadata
->
init_score
();
EXPECT_EQ
(
ref_init_scores
->
at
(
i
),
init_scores
[
i
])
<<
"Inserted data: "
<<
ref_init_scores
->
at
(
i
)
<<
" Index: "
<<
i
;
if
(
init_scores
)
{
if
(
ref_init_scores
->
at
(
i
)
!=
init_scores
[
i
])
{
if
(
!
ref_init_scores
)
{
FAIL
()
<<
"Mismatched init_scores"
;
// This forces an immediate failure, which EXPECT_EQ does not
FAIL
()
<<
"Expected null init_scores"
;
}
for
(
size_t
i
=
0
;
i
<
ref_init_scores
->
size
();
i
++
)
{
EXPECT_EQ
(
ref_init_scores
->
at
(
i
),
init_scores
[
i
])
<<
"Inserted data: "
<<
ref_init_scores
->
at
(
i
)
<<
" Index: "
<<
i
;
if
(
ref_init_scores
->
at
(
i
)
!=
init_scores
[
i
])
{
FAIL
()
<<
"Mismatched init_scores"
;
// This forces an immediate failure, which EXPECT_EQ does not
}
}
}
}
else
if
(
ref_init_scores
)
{
FAIL
()
<<
"Expected non-null init_scores"
;
}
}
}
else
if
(
ref_init_scores
)
{
FAIL
()
<<
"Expected non-null init_scores"
;
}
const
int32_t
*
query_boundaries
=
metadata
->
query_boundaries
();
const
int32_t
*
query_boundaries
=
metadata
->
query_boundaries
();
if
(
query_boundaries
)
{
if
(
query_boundaries
)
{
if
(
!
ref_groups
)
{
if
(
!
ref_groups
)
{
FAIL
()
<<
"Expected null query_boundaries"
;
FAIL
()
<<
"Expected null query_boundaries"
;
}
}
// Calculate expected boundaries
// Calculate expected boundaries
std
::
vector
<
int32_t
>
ref_query_boundaries
;
std
::
vector
<
int32_t
>
ref_query_boundaries
;
ref_query_boundaries
.
push_back
(
0
);
ref_query_boundaries
.
push_back
(
0
);
int
group_val
=
ref_groups
->
at
(
0
);
int
group_val
=
ref_groups
->
at
(
0
);
for
(
auto
i
=
1
;
i
<
nTotal
;
i
++
)
{
for
(
auto
i
=
1
;
i
<
nTotal
;
i
++
)
{
if
(
ref_groups
->
at
(
i
)
!=
group_val
)
{
if
(
ref_groups
->
at
(
i
)
!=
group_val
)
{
ref_query_boundaries
.
push_back
(
i
);
ref_query_boundaries
.
push_back
(
i
);
group_val
=
ref_groups
->
at
(
i
);
group_val
=
ref_groups
->
at
(
i
);
}
}
}
ref_query_boundaries
.
push_back
(
nTotal
);
}
ref_query_boundaries
.
push_back
(
nTotal
);
for
(
size_t
i
=
0
;
i
<
ref_query_boundaries
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
ref_query_boundaries
.
size
();
i
++
)
{
EXPECT_EQ
(
ref_query_boundaries
[
i
],
query_boundaries
[
i
])
<<
"Inserted data: "
<<
ref_query_boundaries
[
i
];
EXPECT_EQ
(
ref_query_boundaries
[
i
],
query_boundaries
[
i
])
<<
"Inserted data: "
<<
ref_query_boundaries
[
i
];
if
(
ref_query_boundaries
[
i
]
!=
query_boundaries
[
i
])
{
if
(
ref_query_boundaries
[
i
]
!=
query_boundaries
[
i
])
{
FAIL
()
<<
"Mismatched query_boundaries"
;
// This forces an immediate failure, which EXPECT_EQ does not
FAIL
()
<<
"Mismatched query_boundaries"
;
// This forces an immediate failure, which EXPECT_EQ does not
}
}
}
}
else
if
(
ref_groups
)
{
FAIL
()
<<
"Expected non-null query_boundaries"
;
}
}
}
else
if
(
ref_groups
)
{
FAIL
()
<<
"Expected non-null query_boundaries"
;
}
}
}
const
double
*
TestUtils
::
CreateInitScoreBatch
(
std
::
vector
<
double
>*
init_score_batch
,
int32_t
index
,
const
double
*
TestUtils
::
CreateInitScoreBatch
(
std
::
vector
<
double
>*
init_score_batch
,
int32_t
nrows
,
int32_t
index
,
int32_t
n
classe
s
,
int32_t
n
row
s
,
int32_t
batch_count
,
int32_t
nclasses
,
const
std
::
vector
<
double
>*
original_init_scores
)
{
int32_t
batch_count
,
// Extract a set of rows from the column-based format (still maintaining column based format)
const
std
::
vector
<
double
>*
original_init_scores
)
{
init_score_batch
->
clear
();
// Extract a set of rows from the column-based format (still maintaining column based format)
for
(
int32_t
c
=
0
;
c
<
nclasses
;
c
++
)
{
init_score_batch
->
clear
();
for
(
int32_t
row
=
index
;
row
<
index
+
batch_count
;
row
++
)
{
for
(
int32_t
c
=
0
;
c
<
nclasses
;
c
++
)
{
init_score_batch
->
push_back
(
original_init_scores
->
at
(
row
+
nrows
*
c
));
for
(
int32_t
row
=
index
;
row
<
index
+
batch_count
;
row
++
)
{
}
init_score_batch
->
push_back
(
original_init_scores
->
at
(
row
+
nrows
*
c
));
}
}
return
init_score_batch
->
data
();
}
}
return
init_score_batch
->
data
();
}
}
// namespace LightGBM
}
// namespace LightGBM
tests/cpp_tests/testutils.h
View file @
548cec82
...
@@ -2,8 +2,8 @@
...
@@ -2,8 +2,8 @@
* Copyright (c) 2022 Microsoft Corporation. All rights reserved.
* Copyright (c) 2022 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
*/
*/
#ifndef LIGHTGBM_TESTUTILS_H_
#ifndef LIGHTGBM_
TESTS_CPP_TESTS_
TESTUTILS_H_
#define LIGHTGBM_TESTUTILS_H_
#define LIGHTGBM_
TESTS_CPP_TESTS_
TESTUTILS_H_
#include <LightGBM/c_api.h>
#include <LightGBM/c_api.h>
#include <LightGBM/dataset.h>
#include <LightGBM/dataset.h>
...
@@ -121,4 +121,4 @@ class TestUtils {
...
@@ -121,4 +121,4 @@ class TestUtils {
int32_t
thread_id
);
int32_t
thread_id
);
};
};
}
// namespace LightGBM
}
// namespace LightGBM
#endif // LIGHTGBM_TESTUTILS_H_
#endif // LIGHTGBM_
TESTS_CPP_TESTS_
TESTUTILS_H_
tests/python_package_test/test_engine.py
View file @
548cec82
...
@@ -16,7 +16,14 @@ import psutil
...
@@ -16,7 +16,14 @@ import psutil
import
pytest
import
pytest
from
scipy.sparse
import
csr_matrix
,
isspmatrix_csc
,
isspmatrix_csr
from
scipy.sparse
import
csr_matrix
,
isspmatrix_csc
,
isspmatrix_csr
from
sklearn.datasets
import
load_svmlight_file
,
make_blobs
,
make_classification
,
make_multilabel_classification
from
sklearn.datasets
import
load_svmlight_file
,
make_blobs
,
make_classification
,
make_multilabel_classification
from
sklearn.metrics
import
average_precision_score
,
log_loss
,
mean_absolute_error
,
mean_squared_error
,
roc_auc_score
from
sklearn.metrics
import
(
average_precision_score
,
log_loss
,
mean_absolute_error
,
mean_squared_error
,
r2_score
,
roc_auc_score
,
)
from
sklearn.model_selection
import
GroupKFold
,
TimeSeriesSplit
,
train_test_split
from
sklearn.model_selection
import
GroupKFold
,
TimeSeriesSplit
,
train_test_split
import
lightgbm
as
lgb
import
lightgbm
as
lgb
...
@@ -4049,6 +4056,29 @@ def test_average_precision_metric():
...
@@ -4049,6 +4056,29 @@ def test_average_precision_metric():
assert
res
[
"training"
][
"average_precision"
][
-
1
]
==
pytest
.
approx
(
1
)
assert
res
[
"training"
][
"average_precision"
][
-
1
]
==
pytest
.
approx
(
1
)
def
test_r2_metric
():
# test against sklearn R2 metric
X
,
y
=
make_synthetic_regression
()
params
=
{
"objective"
:
"regression"
,
"metric"
:
"r2"
,
"verbose"
:
-
1
}
res
=
{}
train_data
=
lgb
.
Dataset
(
X
,
label
=
y
)
est
=
lgb
.
train
(
params
,
train_data
,
num_boost_round
=
1
,
valid_sets
=
[
train_data
],
callbacks
=
[
lgb
.
record_evaluation
(
res
)]
)
r2
=
res
[
"training"
][
"r2"
][
-
1
]
pred
=
est
.
predict
(
X
)
sklearn_r2
=
r2_score
(
y
,
pred
)
assert
r2
==
pytest
.
approx
(
sklearn_r2
)
assert
r2
!=
0
assert
r2
!=
1
# test that R2 is 1 when y has no variance and the model predicts perfectly
y
=
y
.
copy
()
y
[:]
=
1
lgb_X
=
lgb
.
Dataset
(
X
,
label
=
y
)
lgb
.
train
(
params
,
lgb_X
,
num_boost_round
=
1
,
valid_sets
=
[
lgb_X
],
callbacks
=
[
lgb
.
record_evaluation
(
res
)])
assert
res
[
"training"
][
"r2"
][
-
1
]
==
pytest
.
approx
(
1
)
def
test_reset_params_works_with_metric_num_class_and_boosting
():
def
test_reset_params_works_with_metric_num_class_and_boosting
():
X
,
y
=
load_breast_cancer
(
return_X_y
=
True
)
X
,
y
=
load_breast_cancer
(
return_X_y
=
True
)
dataset_params
=
{
"max_bin"
:
150
}
dataset_params
=
{
"max_bin"
:
150
}
...
...
Prev
1
…
6
7
8
9
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment