test_single_row.cpp 7.37 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
/*!
 * Copyright (c) 2022 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License. See LICENSE file in the project root for license information.
 */

#include <gtest/gtest.h>
#include <testutils.h>
#include <LightGBM/c_api.h>

#include <iostream>
#include <fstream>

using LightGBM::TestUtils;

15
void test_predict_type(int predict_type, int num_predicts) {
16
17
18
19
20
21
22
23
24
25
26
27
    // Load some test data
    int result;

    DatasetHandle train_dataset;
    result = TestUtils::LoadDatasetFromExamples("binary_classification/binary.train", "max_bin=15", &train_dataset);
    EXPECT_EQ(0, result) << "LoadDatasetFromExamples train result code: " << result;

    BoosterHandle booster_handle;
    result = LGBM_BoosterCreate(train_dataset, "app=binary metric=auc num_leaves=31 verbose=0", &booster_handle);
    EXPECT_EQ(0, result) << "LGBM_BoosterCreate result code: " << result;

    for (int i = 0; i < 51; i++) {
28
        int produced_empty_tree;
29
30
        result = LGBM_BoosterUpdateOneIter(
            booster_handle,
31
            &produced_empty_tree);
32
33
34
35
36
37
38
39
        EXPECT_EQ(0, result) << "LGBM_BoosterUpdateOneIter result code: " << result;
    }

    int n_features;
    result = LGBM_BoosterGetNumFeature(
        booster_handle,
        &n_features);
    EXPECT_EQ(0, result) << "LGBM_BoosterGetNumFeature result code: " << result;
40
    EXPECT_EQ(28, n_features) << "LGBM_BoosterGetNumFeature number of features: " << n_features;
41
42
43
44
45
46

    // Run a single row prediction and compare with regular Mat prediction:
    int64_t output_size;
    result = LGBM_BoosterCalcNumPredict(
        booster_handle,
        1,
47
        predict_type,          // predict_type
48
49
50
51
        0,                     // start_iteration
        -1,                    // num_iteration
        &output_size);
    EXPECT_EQ(0, result) << "LGBM_BoosterCalcNumPredict result code: " << result;
52
    EXPECT_EQ(num_predicts, output_size) << "LGBM_BoosterCalcNumPredict output size: " << output_size;
53

54
    std::ifstream test_file("examples/binary_classification/binary.test");
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
    std::vector<double> test;
    double x;
    int test_set_size = 0;
    while (test_file >> x) {
        if (test_set_size % (n_features + 1) == 0) {
            // Drop the result from the dataset, we only care about checking that prediction results are equal
            // in both cases
            test_file >> x;
            test_set_size++;
        }
        test.push_back(x);
        test_set_size++;
    }
    EXPECT_EQ(test_set_size % (n_features + 1), 0) << "Test size mismatch with dataset size (%)";
    test_set_size /= (n_features + 1);
    EXPECT_EQ(test_set_size, 500) << "Improperly parsed test file (test_set_size)";
    EXPECT_EQ(test.size(), test_set_size * n_features) << "Improperly parsed test file (test len)";

    std::vector<double> mat_output(output_size * test_set_size, -1);
    int64_t written;
    result = LGBM_BoosterPredictForMat(
        booster_handle,
        &test[0],
        C_API_DTYPE_FLOAT64,
        test_set_size,         // nrow
        n_features,            // ncol
        1,                     // is_row_major
82
        predict_type,          // predict_type
83
84
85
86
87
88
89
        0,                     // start_iteration
        -1,                    // num_iteration
        "",
        &written,
        &mat_output[0]);
    EXPECT_EQ(0, result) << "LGBM_BoosterPredictForMat result code: " << result;

90
    // Test LGBM_BoosterPredictForMat in multi-threaded mode
91
    const int kNThreads = 10;
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
    const int numIterations = 5;
    std::vector<std::thread> predict_for_mat_threads(kNThreads);
    for (int i = 0; i < kNThreads; i++) {
        predict_for_mat_threads[i] = std::thread(
            [
                i, test_set_size, output_size, n_features,
                    test = &test[0], booster_handle, predict_type, numIterations
            ]() {
                for (int j = 0; j < numIterations; j++) {
                    int result;
                    std::vector<double> mat_output(output_size * test_set_size, -1);
                    int64_t written;
                    result = LGBM_BoosterPredictForMat(
                        booster_handle,
                        &test[0],
                        C_API_DTYPE_FLOAT64,
                        test_set_size,         // nrow
                        n_features,            // ncol
                        1,                     // is_row_major
                        predict_type,          // predict_type
                        0,                     // start_iteration
                        -1,                    // num_iteration
                        "",
                        &written,
                        &mat_output[0]);
                    EXPECT_EQ(0, result) << "LGBM_BoosterPredictForMat result code: " << result;
                }
            });
    }
    for (std::thread& t : predict_for_mat_threads) {
        t.join();
    }

    // Now let's run with the single row fast prediction API:
126
127
128
129
    FastConfigHandle fast_configs[kNThreads];
    for (int i = 0; i < kNThreads; i++) {
        result = LGBM_BoosterPredictForMatSingleRowFastInit(
            booster_handle,
130
            predict_type,          // predict_type
131
132
133
134
135
136
137
138
139
140
            0,                     // start_iteration
            -1,                    // num_iteration
            C_API_DTYPE_FLOAT64,
            n_features,
            "",
            &fast_configs[i]);
        EXPECT_EQ(0, result) << "LGBM_BoosterPredictForMatSingleRowFastInit result code: " << result;
    }

    std::vector<double> single_row_output(output_size * test_set_size, -1);
141
    std::vector<std::thread> single_row_threads(kNThreads);
142
143
    int batch_size = (test_set_size + kNThreads - 1) / kNThreads;  // round up
    for (int i = 0; i < kNThreads; i++) {
144
        single_row_threads[i] = std::thread(
145
146
            [
                i, batch_size, test_set_size, output_size, n_features,
147
148
                    test = &test[0], fast_configs = &fast_configs[0], single_row_output = &single_row_output[0]
            ]() {
149
150
151
152
153
154
155
156
157
158
159
160
                int result;
                int64_t written;
                for (int j = i * batch_size; j < std::min((i + 1) * batch_size, test_set_size); j++) {
                    result = LGBM_BoosterPredictForMatSingleRowFast(
                        fast_configs[i],
                        &test[j * n_features],
                        &written,
                        &single_row_output[j * output_size]);
                    EXPECT_EQ(0, result) << "LGBM_BoosterPredictForMatSingleRowFast result code: " << result;
                    EXPECT_EQ(written, output_size) << "LGBM_BoosterPredictForMatSingleRowFast unexpected written output size";
                }
            });
161
162
      }
    for (std::thread& t : single_row_threads) {
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
        t.join();
    }

    EXPECT_EQ(single_row_output, mat_output) << "LGBM_BoosterPredictForMatSingleRowFast output mismatch with LGBM_BoosterPredictForMat";

    // Free all:
    for (int i = 0; i < kNThreads; i++) {
        result = LGBM_FastConfigFree(fast_configs[i]);
        EXPECT_EQ(0, result) << "LGBM_FastConfigFree result code: " << result;
    }

    result = LGBM_BoosterFree(booster_handle);
    EXPECT_EQ(0, result) << "LGBM_BoosterFree result code: " << result;

    result = LGBM_DatasetFree(train_dataset);
    EXPECT_EQ(0, result) << "LGBM_DatasetFree result code: " << result;
}
180
181
182
183
184
185
186
187

TEST(SingleRow, Normal) {
    test_predict_type(C_API_PREDICT_NORMAL, 1);
}

TEST(SingleRow, Contrib) {
    test_predict_type(C_API_PREDICT_CONTRIB, 29);
}