basic.cc 11.3 KB
Newer Older
yangzhong's avatar
yangzhong committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
/* Copyright 2019 The MLPerf Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

/// \file
/// \brief Basic functionality unit tests.

#include <algorithm>
#include <deque>
#include <future>
#include <iostream>
#include <queue>
#include <unordered_set>
#include <vector>

#include "../loadgen.h"
#include "../query_sample_library.h"
#include "../system_under_test.h"
#include "../test_settings.h"
#include "loadgen_test.h"

/// \brief Correctness unit tests.
namespace unit_tests {

/// \defgroup LoadgenTestsBasic Test Coverage: Basic

/// \brief Implements the client interfaces of the loadgen and
/// has some basic sanity checks that are enabled for all tests.
/// \details It also forwards calls to overrideable *Ext methods and implements
/// the TestProxy concept.
struct SystemUnderTestBasic : public mlperf::QuerySampleLibrary,
                              public mlperf::SystemUnderTest {
  const std::string& Name() const override { return name_; }

  size_t TotalSampleCount() override { return total_sample_count_; }
  size_t PerformanceSampleCount() override { return performance_sample_count_; }

  void LoadSamplesToRam(
      const std::vector<mlperf::QuerySampleIndex>& samples) override {
    for (auto s : samples) {
      samples_load_count_.at(s)++;
      loaded_samples_.push_back(s);
    }
    LoadSamplesToRamExt(samples);
  }
  virtual void LoadSamplesToRamExt(
      const std::vector<mlperf::QuerySampleIndex>& samples) {}

  void UnloadSamplesFromRam(
      const std::vector<mlperf::QuerySampleIndex>& samples) override {
    for (auto s : samples) {
      FAIL_IF(loaded_samples_.front() != s) &&
          FAIL_EXP(loaded_samples_.front()) && FAIL_EXP(s);
      loaded_samples_.pop_front();
      size_t prev_load_count = samples_load_count_.at(s)--;
      FAIL_IF(prev_load_count == 0) && FAIL_EXP(prev_load_count);
    }
    UnloadSamplesFromRamExt(samples);
  }
  virtual void UnloadSamplesFromRamExt(
      const std::vector<mlperf::QuerySampleIndex>& samples) {}

  void IssueQuery(const std::vector<mlperf::QuerySample>& samples) override {
    std::vector<mlperf::QuerySampleResponse> responses;
    query_sizes_.push_back(samples.size());
    samples_between_flushes_.back() += samples.size();
    responses.reserve(samples.size());
    for (auto s : samples) {
      FAIL_IF(samples_load_count_.at(s.index) == 0) &&
          FAIL_MSG("Issued unloaded sample:") && FAIL_EXP(s.index);
      samples_issue_count_.at(s.index)++;
      issued_samples_.push_back(s.index);
      responses.push_back({s.id, 0, 0});
    }
    mlperf::QuerySamplesComplete(responses.data(), responses.size());
    IssueQueryExt(samples);
  }
  virtual void IssueQueryExt(const std::vector<mlperf::QuerySample>& samples) {}

  void FlushQueries() override {
    samples_between_flushes_.push_back(0);
    FlushQueriesExt();
  }
  virtual void FlushQueriesExt() {}

  virtual void RunTest() {
    samples_load_count_.resize(total_sample_count_, 0);
    samples_issue_count_.resize(total_sample_count_, 0);
    samples_between_flushes_.resize(1, 0);
    mlperf::StartTest(this, this, test_settings_, log_settings_);
  }

  virtual void EndTest() {}

 protected:
  mlperf::TestSettings test_settings_;
  mlperf::LogSettings log_settings_;

  std::string name_{"BasicSUT"};
  size_t total_sample_count_;
  size_t performance_sample_count_;
  std::vector<mlperf::QuerySampleIndex> issued_samples_;
  std::deque<mlperf::QuerySampleIndex> loaded_samples_;
  std::vector<size_t> samples_load_count_;
  std::vector<size_t> samples_issue_count_;

  std::vector<size_t> query_sizes_;
  std::vector<size_t> samples_between_flushes_;
};

/// \brief Provides common test set up logic.
struct SystemUnderTestAccuracy : public SystemUnderTestBasic {
  virtual void SetUpTest(size_t samples_per_query,
                         size_t samples_per_query_remainder,
                         size_t accuracy_remainder,
                         mlperf::TestScenario scenario) {
    performance_sample_count_ =
        samples_per_query * 16 + samples_per_query_remainder;
    total_sample_count_ = performance_sample_count_ * 32 + accuracy_remainder;

    log_settings_.log_output.prefix_with_datetime = false;

    test_settings_.scenario = scenario;
    test_settings_.mode = mlperf::TestMode::AccuracyOnly;
    test_settings_.multi_stream_samples_per_query = samples_per_query;

    double qps = 1e3;
    test_settings_.server_target_qps = qps;
  }
};

/// \brief Verifies all samples from the QSL are included at least once
/// in accuracy mode.
/// \ingroup LoadgenTestsBasic
struct TestAccuracyIncludesAllSamples : public SystemUnderTestAccuracy {
  void EndTest() override {
    std::sort(issued_samples_.begin(), issued_samples_.end());

    FAIL_IF(issued_samples_.size() < total_sample_count_) &&
        FAIL_EXP(issued_samples_.size()) && FAIL_EXP(total_sample_count_);
    FAIL_IF(issued_samples_.front() != 0) && FAIL_EXP(issued_samples_.front());
    FAIL_IF(issued_samples_.back() != total_sample_count_ - 1) &&
        FAIL_EXP(issued_samples_.back()) && FAIL_EXP(total_sample_count_);

    mlperf::QuerySampleIndex prev = -1;
    size_t discontinuities = 0;
    size_t dupes = 0;
    for (auto s : issued_samples_) {
      if (s == prev) {
        dupes++;
      } else if (s - prev > 1) {
        discontinuities++;
      }
      prev = s;
    }

    FAIL_IF(discontinuities != 0) && FAIL_EXP(discontinuities);
    FAIL_IF(dupes != 0) && FAIL_EXP(dupes);
  }
};

REGISTER_TEST_ALL_SCENARIOS(AccuracyIncludesAllSamples,
                            TestProxy<TestAccuracyIncludesAllSamples>(), 4, 0,
                            0);

/// \brief Verifies samples from the QSL aren't included too many times.
/// \details This is a regression test for:
/// https://github.com/mlperf/inference/pull/386
/// The root cause was using different values for samples_per_query while
/// generating queries for the GNMT dataset.
/// \ingroup LoadgenTestsBasic
struct TestAccuracyDupesAreLimitted : public SystemUnderTestAccuracy {
  void SetUpTest(bool, mlperf::TestScenario scenario) {
    SystemUnderTestAccuracy::SetUpTest(4, 0, 0, scenario);
    total_sample_count_ = 3003;
    performance_sample_count_ = 1001;
  }

  void EndTest() override {
    std::sort(issued_samples_.begin(), issued_samples_.end());

    FAIL_IF(issued_samples_.size() < total_sample_count_) &&
        FAIL_EXP(issued_samples_.size()) && FAIL_EXP(total_sample_count_);
    FAIL_IF(issued_samples_.front() != 0) && FAIL_EXP(issued_samples_.front());
    FAIL_IF(issued_samples_.back() != total_sample_count_ - 1) &&
        FAIL_EXP(issued_samples_.back()) && FAIL_EXP(total_sample_count_);

    std::vector<size_t> issue_counts(total_sample_count_, 0);
    for (auto s : issued_samples_) {
      issue_counts.at(s)++;
    }

    const size_t max_count = 1;
    for (size_t i = 0; i < issue_counts.size(); i++) {
      FAIL_IF(issue_counts[i] > max_count) && FAIL_EXP(i) &&
          FAIL_EXP(max_count) && FAIL_EXP(issue_counts[i]);
    }
  }
};

REGISTER_TEST_ALL_SCENARIOS(TestAccuracyDupesAreLimitted,
                            TestProxy<TestAccuracyDupesAreLimitted>(), true);

/// \brief Verifies offline + accuracy doesn't hang if the last set
/// in the accuracy series is smaller than others.
/// \ingroup LoadgenTestsBasic
struct TestOfflineRemainderAccuracySet : public SystemUnderTestAccuracy {
  void SetUpTest() {
    SystemUnderTestAccuracy::SetUpTest(4, 0, 7, mlperf::TestScenario::Offline);
  }

  void EndTest() override {
    auto& flush_samples = samples_between_flushes_;

    FAIL_IF(flush_samples.size() < 3) && FAIL_EXP(flush_samples.size()) &&
        BAD_TEST_MSG("Test should generate multiple query sets.") && ABORT_TEST;

    // The last counter will be 0, since a test ends with a call to
    // FlushQuery.
    FAIL_IF(flush_samples.back() != 0) && FAIL_EXP(flush_samples.back()) &&
        FAIL_MSG(
            "Detected stray calls to IssueQuery after the last call to "
            "FlushQuery.");
    flush_samples.pop_back();

    // Verify the test ran with a smaller last accuracy set.
    size_t first_size = flush_samples.front();
    size_t last_size = flush_samples.back();
    FAIL_IF(first_size <= last_size) && FAIL_EXP(first_size) &&
        FAIL_EXP(last_size) && BAD_TEST_MSG();

    flush_samples.pop_back();  // Don't check the last set for equality.
    for (size_t query_size : flush_samples) {
      FAIL_IF(query_size != first_size) && FAIL_EXP(query_size) &&
          FAIL_EXP(first_size);
    }
  }
};

REGISTER_TEST(Offline_RemainderAccuracySets,
              TestProxy<TestOfflineRemainderAccuracySet>());

/// \brief Verifies all queries only contain samples that are contiguous,
/// even if the set size is not a multiple of samples_per_query.
/// \ingroup LoadgenTestsBasic
struct TestMultiStreamContiguousRemainderQuery
    : public SystemUnderTestAccuracy {
  void SetUpTest(mlperf::TestScenario scenario) {
    SystemUnderTestAccuracy::SetUpTest(4, 1, 0, scenario);
    first_qsl_offsets_.resize(total_sample_count_, kBadQslOffset);

    auto spq = test_settings_.multi_stream_samples_per_query;
    FAIL_IF(performance_sample_count_ % spq == 0) &&
        FAIL_EXP(performance_sample_count_) && FAIL_EXP(spq) &&
        BAD_TEST_MSG("There is no remainder.");
  }

  void LoadSamplesToRamExt(
      const std::vector<mlperf::QuerySampleIndex>& samples) override {
    FAIL_IF(loaded_samples_.size() != samples.size()) &&
        FAIL_MSG("Contiguous sample order is likely ambiguous.");
    for (size_t i = 0; i < samples.size(); i++) {
      auto& offset = first_qsl_offsets_.at(samples.at(i));
      // Samples may be loaded into multiple slots for padding purposes,
      // so make sure to only index the first time a sample appears in a
      // loaded set.
      if (offset == kBadQslOffset) {
        offset = i;
      }
    }
  }

  void UnloadSamplesFromRamExt(
      const std::vector<mlperf::QuerySampleIndex>& samples) override {
    FAIL_IF(!loaded_samples_.empty()) &&
        FAIL_MSG("Contiguous sample order is likely ambiguous.");
    for (size_t i = 0; i < samples.size(); i++) {
      first_qsl_offsets_.at(samples.at(i)) = kBadQslOffset;
    }
  }

  void IssueQueryExt(const std::vector<mlperf::QuerySample>& samples) override {
    size_t expected_offset = first_qsl_offsets_[samples[0].index];
    for (auto s : samples) {
      FAIL_IF(loaded_samples_[expected_offset] != s.index) &&
          FAIL_MSG("Samples are not contiguous.");
      expected_offset++;
    }
  }

  void FlushQueriesExt() override {}

  void EndTest() override {}

 private:
  static const size_t kBadQslOffset;
  std::vector<size_t> first_qsl_offsets_;
};

constexpr size_t TestMultiStreamContiguousRemainderQuery::kBadQslOffset =
    std::numeric_limits<size_t>::max();

REGISTER_TEST(MultiStream_RemainderQueryContiguous,
              TestProxy<TestMultiStreamContiguousRemainderQuery>(),
              mlperf::TestScenario::MultiStream);
}  // namespace unit_tests