hash_table.cpp 7.26 KB
Newer Older
1
2
3
4
5
// -------------------------------------------------------------
// cuDPP -- CUDA Data Parallel Primitives library
// -------------------------------------------------------------
// $Revision:$
// $Date:$
6
// -------------------------------------------------------------
7
8
// This source code is distributed under the terms of license.txt in
// the root directory of this source distribution.
9
// -------------------------------------------------------------
10
11
12
13
14
15
16

/**
 * @file hash_table.cpp
 *
 * @brief Implements a basic hash table that stores one value per key.
 */

traveller59's avatar
traveller59 committed
17
#include <cuhash/debugging.h>
18
#include <cuhash/hash_table.h>
19
20
21
22
23
24

#include <algorithm>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <cuda_runtime_api.h>
traveller59's avatar
traveller59 committed
25
#include <cuhash/cuda_util.h>
26
#include <limits>
27

traveller59's avatar
traveller59 committed
28
namespace cuhash {
29
30
31
32
33
34

char buffer[256];

//! @name Internal
/// @{
dim3 ComputeGridDim(unsigned n) {
35
36
37
38
39
40
41
  // Round up in order to make sure all items are hashed in.
  dim3 grid((n + kBlockSize - 1) / kBlockSize);
  if (grid.x > kGridSize) {
    grid.y = (grid.x + kGridSize - 1) / kGridSize;
    grid.x = kGridSize;
  }
  return grid;
42
43
}

44
unsigned ComputeMaxIterations(const unsigned n, const unsigned table_size,
45
                              const unsigned num_functions) {
46
  float lg_input_size = (float)(log((double)n) / log(2.0));
47
48
49

// #define CONSTANT_ITERATIONS
#ifdef CONSTANT_ITERATIONS
50
51
52
  // Set the maximum number of iterations to 7lg(N).
  const unsigned MAX_ITERATION_CONSTANT = 7;
  unsigned max_iterations = MAX_ITERATION_CONSTANT * lg_input_size;
53
#else
54
55
56
57
58
59
60
61
  // Use an empirical formula for determining what the maximum number of
  // iterations should be.  Works OK in most situations.
  float load_factor = float(n) / table_size;
  float ln_load_factor = (float)(log(load_factor) / log(2.71828183));

  unsigned max_iterations =
      (unsigned)(4.0 * ceil(-1.0 / (0.028255 + 1.1594772 * ln_load_factor) *
                            lg_input_size));
62
#endif
63
  return max_iterations;
64
65
66
}
/// @}

67
68
69
70
HashTable::HashTable()
    : table_size_(0), d_contents_(NULL), stash_count_(0), d_failures_(NULL) {
  CUDA_CHECK_ERROR("Failed in constructor.\n");
}
71
72

bool HashTable::Initialize(const unsigned max_table_entries,
73
                           const float space_usage,
74
                           const unsigned num_functions) {
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
  Release();

  // Determine the minimum amount of slots the table requires,
  // and whether the space_usage is within range.
  float minimum_space_usage;
  if (num_functions < 2 || num_functions > 5) {
    char message[256] = "Number of hash functions must be from 2 to 5; "
                        "others are unimplemented.";
    PrintMessage(message, true);
    return false;
  } else {
    minimum_space_usage = kMinimumSpaceUsages[num_functions];
  }

  if (space_usage < minimum_space_usage) {
    sprintf(buffer, "Minimum possible space usage for %u functions is %f.",
            num_functions, minimum_space_usage);
    PrintMessage(buffer);
    return false;
  }

  num_hash_functions_ = num_functions;
  table_size_ = unsigned(ceil(max_table_entries * space_usage));

  // Allocate memory.
  const unsigned slots_to_allocate = table_size_ + kStashSize;
  CUDA_SAFE_CALL(
      cudaMalloc((void **)&d_contents_, sizeof(Entry) * slots_to_allocate));
  CUDA_SAFE_CALL(cudaMalloc((void **)&d_failures_, sizeof(unsigned)));
  if (!d_contents_ || !d_failures_) {
    fprintf(stderr, "Failed to allocate %u slots.\n", slots_to_allocate);
    return false;
  }
  CUDA_CHECK_ERROR("Failed to initialize.\n");

  return true;
111
112
113
}

void HashTable::Release() {
114
  table_size_ = 0;
115

116
117
  CUDA_SAFE_CALL(cudaFree(d_contents_));
  CUDA_SAFE_CALL(cudaFree(d_failures_));
118

119
120
  d_contents_ = NULL;
  d_failures_ = NULL;
121

122
  CUDA_CHECK_ERROR("Failed during release.\n");
123
124
}

125
bool HashTable::Build(const unsigned n, const unsigned *d_keys,
126
                      const unsigned *d_values) {
127
128
129
130
  unsigned max_iterations =
      ComputeMaxIterations(n, table_size_, num_hash_functions_);
  unsigned num_failures = 1;
  unsigned num_attempts = 0;
131

132
133
  // Storage for statistics collection.
  unsigned *d_iterations_taken = NULL;
134
#ifdef TRACK_ITERATIONS
135
136
  CUDA_SAFE_CALL(
      cudaMalloc((void **)&d_iterations_taken, sizeof(unsigned) * n));
137
138
#endif

139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
  // Track how many items ended up in the stash.
  unsigned *d_stash_count = NULL;
  CUDA_SAFE_CALL(cudaMalloc((void **)&d_stash_count, sizeof(unsigned)));
  CUDA_CHECK_ERROR("Failed before main build loop.\n");

  // Main build loop.
  while (num_failures && ++num_attempts < kMaxRestartAttempts) {
    CUDA_SAFE_CALL(cudaMemset(d_stash_count, 0, sizeof(unsigned)));

    // Generate new hash functions.
    if (num_hash_functions_ == 2)
      constants_2_.Generate(n, d_keys, table_size_);
    else if (num_hash_functions_ == 3)
      constants_3_.Generate(n, d_keys, table_size_);
    else if (num_hash_functions_ == 4)
      constants_4_.Generate(n, d_keys, table_size_);
    else
      constants_5_.Generate(n, d_keys, table_size_);

    stash_constants_.x = std::max(1u, generate_random_uint32()) % kPrimeDivisor;
    stash_constants_.y = generate_random_uint32() % kPrimeDivisor;
    stash_count_ = 0;

    // Initialize memory.
    unsigned slots_in_table = table_size_ + kStashSize;
    CUDAWrapper::ClearTable(slots_in_table, kEntryEmpty, d_contents_);

    num_failures = 0;

    CUDAWrapper::CallCuckooHash(
        n, num_hash_functions_, d_keys, d_values, table_size_, constants_2_,
        constants_3_, constants_4_, constants_5_, max_iterations, d_contents_,
        stash_constants_, d_stash_count, d_failures_, d_iterations_taken);

    // Check if successful.
    CUDA_SAFE_CALL(cudaMemcpy(&num_failures, d_failures_, sizeof(unsigned),
                              cudaMemcpyDeviceToHost));
176
177

#ifdef COUNT_UNINSERTED
178
179
    if (num_failures) {
      printf("Failed to insert %u items.\n", num_failures);
180
    }
181
182
#endif
  }
183

184
185
186
187
188
189
  // Copy out the stash size.
  CUDA_SAFE_CALL(cudaMemcpy(&stash_count_, d_stash_count, sizeof(unsigned),
                            cudaMemcpyDeviceToHost));
  if (stash_count_ && num_failures == 0) {
    // sprintf(buffer, "Stash size: %u", stash_count_);
    // PrintMessage(buffer, true);
190
191

#ifdef _DEBUG
192
193
194
195
    PrintStashContents(d_contents_ + table_size_);
#endif
  }
  CUDA_SAFE_CALL(cudaFree(d_stash_count));
196
197

#ifdef TRACK_ITERATIONS
198
199
200
201
  if (num_failures == 0) {
    OutputBuildStatistics(n, d_iterations_taken);
  }
  CUDA_SAFE_CALL(cudaFree(d_iterations_taken));
202
203
#endif

204
205
206
207
208
209
210
211
212
213
214
215
  // Dump some info if a restart was required.
  if (num_attempts >= kMaxRestartAttempts) {
    sprintf(buffer, "Completely failed to build");
    PrintMessage(buffer, true);
  } else if (num_attempts > 1) {
    sprintf(buffer, "Needed %u attempts to build, you can ignore this message.",
            num_attempts);
    PrintMessage(buffer, true);
  }

  CUDA_CHECK_ERROR("Error occurred during hash table build.\n");
  return num_failures == 0;
216
217
}

218
void HashTable::Retrieve(const unsigned n_queries, const unsigned *d_keys,
219
                         unsigned *d_values) {
220
221
222
223
  CUDAWrapper::CallHashRetrieve(n_queries, num_hash_functions_, d_keys,
                                table_size_, d_contents_, constants_2_,
                                constants_3_, constants_4_, constants_5_,
                                stash_constants_, stash_count_, d_values);
224
225
}

226
}; // namespace cuhash
227
228
229
230
231
232

// Leave this at the end of the file
// Local Variables:
// mode:c++
// c-file-style: "NVIDIA"
// End: