test_single_row.cpp 7.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
/*!
 * Copyright (c) 2022 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */

#include <gtest/gtest.h>
#include <testutils.h>
#include <LightGBM/c_api.h>

10
#include <algorithm>
11
#include <fstream>
12
13
#include <iostream>
#include <vector>
14
15
16

using LightGBM::TestUtils;

17
void test_predict_type(int predict_type, int num_predicts) {
18
19
20
21
22
23
24
25
26
27
28
29
    // Load some test data
    int result;

    DatasetHandle train_dataset;
    result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.train", "max_bin=15", &train_dataset);
    EXPECT_EQ(0, result) << "LoadDatasetFromExamples train result code: " << result;

    BoosterHandle booster_handle;
    result = LGBM_BoosterCreate(train_dataset, "app=binary metric=auc num_leaves=31 verbose=0", &booster_handle);
    EXPECT_EQ(0, result) << "LGBM_BoosterCreate result code: " << result;

    for (int i = 0; i < 51; i++) {
30
        int produced_empty_tree;
31
32
        result = LGBM_BoosterUpdateOneIter(
            booster_handle,
33
            &produced_empty_tree);
34
35
36
37
38
39
40
41
        EXPECT_EQ(0, result) << "LGBM_BoosterUpdateOneIter result code: " << result;
    }

    int n_features;
    result = LGBM_BoosterGetNumFeature(
        booster_handle,
        &n_features);
    EXPECT_EQ(0, result) << "LGBM_BoosterGetNumFeature result code: " << result;
42
    EXPECT_EQ(28, n_features) << "LGBM_BoosterGetNumFeature number of features: " << n_features;
43
44
45
46
47
48

    // Run a single row prediction and compare with regular Mat prediction:
    int64_t output_size;
    result = LGBM_BoosterCalcNumPredict(
        booster_handle,
        1,
49
        predict_type,          // predict_type
50
51
52
53
        0,                     // start_iteration
        -1,                    // num_iteration
        &output_size);
    EXPECT_EQ(0, result) << "LGBM_BoosterCalcNumPredict result code: " << result;
54
    EXPECT_EQ(num_predicts, output_size) << "LGBM_BoosterCalcNumPredict output size: " << output_size;
55

56
    std::ifstream test_file("examples/binary_classification/binary.test");
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
    std::vector<double> test;
    double x;
    int test_set_size = 0;
    while (test_file >> x) {
        if (test_set_size % (n_features + 1) == 0) {
            // Drop the result from the dataset, we only care about checking that prediction results are equal
            // in both cases
            test_file >> x;
            test_set_size++;
        }
        test.push_back(x);
        test_set_size++;
    }
    EXPECT_EQ(test_set_size % (n_features + 1), 0) << "Test size mismatch with dataset size (%)";
    test_set_size /= (n_features + 1);
    EXPECT_EQ(test_set_size, 500) << "Improperly parsed test file (test_set_size)";
    EXPECT_EQ(test.size(), test_set_size * n_features) << "Improperly parsed test file (test len)";

    std::vector<double> mat_output(output_size * test_set_size, -1);
    int64_t written;
    result = LGBM_BoosterPredictForMat(
        booster_handle,
        &test[0],
        C_API_DTYPE_FLOAT64,
        test_set_size,         // nrow
        n_features,            // ncol
        1,                     // is_row_major
84
        predict_type,          // predict_type
85
86
87
88
89
90
91
        0,                     // start_iteration
        -1,                    // num_iteration
        "",
        &written,
        &mat_output[0]);
    EXPECT_EQ(0, result) << "LGBM_BoosterPredictForMat result code: " << result;

92
    // Test LGBM_BoosterPredictForMat in multi-threaded mode
93
    const int kNThreads = 10;
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
    const int numIterations = 5;
    std::vector<std::thread> predict_for_mat_threads(kNThreads);
    for (int i = 0; i < kNThreads; i++) {
        predict_for_mat_threads[i] = std::thread(
            [
                i, test_set_size, output_size, n_features,
                    test = &test[0], booster_handle, predict_type, numIterations
            ]() {
                for (int j = 0; j < numIterations; j++) {
                    int result;
                    std::vector<double> mat_output(output_size * test_set_size, -1);
                    int64_t written;
                    result = LGBM_BoosterPredictForMat(
                        booster_handle,
                        &test[0],
                        C_API_DTYPE_FLOAT64,
                        test_set_size,         // nrow
                        n_features,            // ncol
                        1,                     // is_row_major
                        predict_type,          // predict_type
                        0,                     // start_iteration
                        -1,                    // num_iteration
                        "",
                        &written,
                        &mat_output[0]);
                    EXPECT_EQ(0, result) << "LGBM_BoosterPredictForMat result code: " << result;
                }
            });
    }
    for (std::thread& t : predict_for_mat_threads) {
        t.join();
    }

    // Now let's run with the single row fast prediction API:
128
129
130
131
    FastConfigHandle fast_configs[kNThreads];
    for (int i = 0; i < kNThreads; i++) {
        result = LGBM_BoosterPredictForMatSingleRowFastInit(
            booster_handle,
132
            predict_type,          // predict_type
133
134
135
136
137
138
139
140
141
142
            0,                     // start_iteration
            -1,                    // num_iteration
            C_API_DTYPE_FLOAT64,
            n_features,
            "",
            &fast_configs[i]);
        EXPECT_EQ(0, result) << "LGBM_BoosterPredictForMatSingleRowFastInit result code: " << result;
    }

    std::vector<double> single_row_output(output_size * test_set_size, -1);
143
    std::vector<std::thread> single_row_threads(kNThreads);
144
145
    int batch_size = (test_set_size + kNThreads - 1) / kNThreads;  // round up
    for (int i = 0; i < kNThreads; i++) {
146
        single_row_threads[i] = std::thread(
147
148
            [
                i, batch_size, test_set_size, output_size, n_features,
149
150
                    test = &test[0], fast_configs = &fast_configs[0], single_row_output = &single_row_output[0]
            ]() {
151
152
153
154
155
156
157
158
159
160
161
162
                int result;
                int64_t written;
                for (int j = i * batch_size; j < std::min((i + 1) * batch_size, test_set_size); j++) {
                    result = LGBM_BoosterPredictForMatSingleRowFast(
                        fast_configs[i],
                        &test[j * n_features],
                        &written,
                        &single_row_output[j * output_size]);
                    EXPECT_EQ(0, result) << "LGBM_BoosterPredictForMatSingleRowFast result code: " << result;
                    EXPECT_EQ(written, output_size) << "LGBM_BoosterPredictForMatSingleRowFast unexpected written output size";
                }
            });
163
164
      }
    for (std::thread& t : single_row_threads) {
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
        t.join();
    }

    EXPECT_EQ(single_row_output, mat_output) << "LGBM_BoosterPredictForMatSingleRowFast output mismatch with LGBM_BoosterPredictForMat";

    // Free all:
    for (int i = 0; i < kNThreads; i++) {
        result = LGBM_FastConfigFree(fast_configs[i]);
        EXPECT_EQ(0, result) << "LGBM_FastConfigFree result code: " << result;
    }

    result = LGBM_BoosterFree(booster_handle);
    EXPECT_EQ(0, result) << "LGBM_BoosterFree result code: " << result;

    result = LGBM_DatasetFree(train_dataset);
    EXPECT_EQ(0, result) << "LGBM_DatasetFree result code: " << result;
}
182
183
184
185
186
187
188
189

TEST(SingleRow, Normal) {
    test_predict_type(C_API_PREDICT_NORMAL, 1);
}

TEST(SingleRow, Contrib) {
    test_predict_type(C_API_PREDICT_CONTRIB, 29);
}