debugging.cu 7.77 KB
Newer Older
1
2
3
4
5
// -------------------------------------------------------------
// cuDPP -- CUDA Data Parallel Primitives library
// -------------------------------------------------------------
// $Revision:$
// $Date:$
6
// -------------------------------------------------------------
7
8
// This source code is distributed under the terms of license.txt in
// the root directory of this source distribution.
9
// -------------------------------------------------------------
10
11
12
13
14
15
16
17

/**
 * @file
 * debugging.cu
 *
 * @brief Debugging/statistics/performance utilities for hash tables.
 */

traveller59's avatar
traveller59 committed
18
19
20
#include <cuhash/debugging.h>
#include <cuhash/definitions.h>
#include <cuhash/hash_table.cuh>
21
22

#include <algorithm>
traveller59's avatar
traveller59 committed
23
#include <cuhash/cuda_util.h>
24

traveller59's avatar
traveller59 committed
25
namespace cuhash {
26
27
28
29
30
31
32

//! Debugging function: Takes statistics on the hash functions' distribution.
/*! Determines:
 *    - How many unique slots each key has.
 *    - How many keys hash into each slot.
 *    - Whether any keys failed to get a full set of slots.
 */
33
34
35
36
37
__global__ void take_hash_function_statistics_kernel(
    const unsigned *keys, const unsigned n_entries, const unsigned table_size,
    const uint2 *constants, const unsigned num_functions,
    unsigned *num_slots_available, unsigned *num_hashing_in, unsigned *failed) {
  unsigned thread_index = threadIdx.x + blockIdx.x * blockDim.x +
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
                          blockIdx.y * blockDim.x * gridDim.x;

  if (thread_index >= n_entries)
    return;
  unsigned key = keys[thread_index];

  // Determine all of the locations the key hashes into.
  // Also count how many keys hash into each location.
  unsigned locations[kMaxHashFunctions];
  for (unsigned i = 0; i < num_functions; ++i) {
    locations[i] = hash_function_inner(constants[i], key) % table_size;

    if (num_hashing_in != NULL) {
      atomicAdd(num_hashing_in + locations[i], 1);
    }
  }

  // Determine whether all of the locations were different.
  unsigned num_slots = 1;
  for (unsigned i = 1; i < num_functions; ++i) {
    bool matched = false;
    for (unsigned j = 0; j < i; ++j) {
      if (locations[i] == locations[j]) {
        matched = true;
        break;
      }
    }
    if (!matched) {
      num_slots++;
    }
  }

  if (num_slots_available != NULL) {
    num_slots_available[thread_index] = num_slots;
  }

  if (failed != NULL && num_slots != num_functions) {
    *failed = 1;
  }
}

79
80
81
82
void TakeHashFunctionStatistics(const unsigned num_keys, const unsigned *d_keys,
                                const unsigned table_size,
                                const uint2 *constants,
                                const unsigned kNumHashFunctions) {
83
84
85
86
87
88
89
90
91
  char buffer[16000];
  PrintMessage("Hash function constants: ");

  for (unsigned i = 0; i < kNumHashFunctions; ++i) {
    sprintf(buffer, "\t%10u, %10u", constants[i].x, constants[i].y);
    PrintMessage(buffer);
  }

  unsigned *d_num_hashing_in = NULL;
92
93
94
95
96
97
#ifdef COUNT_HOW_MANY_HASH_INTO_EACH_SLOT
  CUDA_SAFE_CALL(
      cudaMalloc((void **)&d_num_hashing_in, sizeof(unsigned) * table_size));
  CUDA_SAFE_CALL(
      cudaMemset(d_num_hashing_in, 0, sizeof(unsigned) * table_size));
#endif
98
99

  unsigned *d_num_slots_available = NULL;
100
101
102
103
#ifdef COUNT_HOW_MANY_HAVE_CYCLES
  CUDA_SAFE_CALL(
      cudaMalloc((void **)&d_num_slots_available, sizeof(unsigned) * num_keys));
#endif
104
  uint2 *d_constants = NULL;
105
106
107
108
109
110
111
112
113
114
  CUDA_SAFE_CALL(
      cudaMalloc((void **)&d_constants, sizeof(uint2) * kNumHashFunctions));
  CUDA_SAFE_CALL(cudaMemcpy(d_constants, constants,
                            sizeof(uint2) * kNumHashFunctions,
                            cudaMemcpyHostToDevice));

  take_hash_function_statistics_kernel<<<ComputeGridDim(num_keys),
                                         kBlockSize>>>(
      d_keys, num_keys, table_size, d_constants, kNumHashFunctions,
      d_num_slots_available, d_num_hashing_in, NULL);
115
116
  CUDA_SAFE_CALL(cudaFree(d_constants));

117
#ifdef COUNT_HOW_MANY_HASH_INTO_EACH_SLOT
118
  unsigned *num_hashing_in = new unsigned[table_size];
119
  CUDA_SAFE_CALL(cudaMemcpy(num_hashing_in, d_num_hashing_in,
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
                            sizeof(unsigned) * table_size,
                            cudaMemcpyDeviceToHost));

  /*
  // Print how many items hash into each slot.
  // Used to make sure items are spread evenly throughout the table.
  buffer[0] = '\0';
  PrintMessage("Num hashing into each: ", true);
  for (unsigned i = 0; i < table_size; ++i) {
    sprintf(buffer, "%s\t%2u", buffer, num_hashing_in[i]);
    if (i % 25 == 24) {
      PrintMessage(buffer, true);
      buffer[0] = '\0';
    }
  }
  PrintMessage(buffer,true);
  */

  // Print a histogram of how many items are hashed into each slot.  Shows
  // if average number of items hashing into each slot is low.
  std::sort(num_hashing_in, num_hashing_in + table_size);
  int count = 1;
  unsigned previous = num_hashing_in[0];
  sprintf(buffer, "Num items hashing into a slot:\t");
  PrintMessage(buffer);
  for (unsigned i = 1; i < table_size; ++i) {
    if (num_hashing_in[i] != previous) {
      sprintf(buffer, "\t(%u, %u)", previous, count);
      PrintMessage(buffer);
      previous = num_hashing_in[i];
      count = 1;
    } else {
      count++;
    }
  }
  sprintf(buffer, "\t(%u, %u)", previous, count);
  PrintMessage(buffer);

158
  delete[] num_hashing_in;
159
  CUDA_SAFE_CALL(cudaFree(d_num_hashing_in));
160
#endif
161

162
#ifdef COUNT_HOW_MANY_HAVE_CYCLES
163
  unsigned *num_slots_available = new unsigned[num_keys];
164
  CUDA_SAFE_CALL(cudaMemcpy(num_slots_available, d_num_slots_available,
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
                            sizeof(unsigned) * num_keys,
                            cudaMemcpyDeviceToHost));

  static const unsigned kHistogramSize = kNumHashFunctions + 1;
  unsigned *histogram = new unsigned[kHistogramSize];
  memset(histogram, 0, sizeof(unsigned) * kHistogramSize);
  for (unsigned i = 0; i < num_keys; ++i) {
    histogram[num_slots_available[i]]++;
  }

  sprintf(buffer, "Slots assigned to each key: ");
  for (unsigned i = 1; i < kHistogramSize; ++i) {
    sprintf(buffer, "%s(%u, %u) ", buffer, i, histogram[i]);
  }
  PrintMessage(buffer);

181
182
  delete[] histogram;
  delete[] num_slots_available;
183
  CUDA_SAFE_CALL(cudaFree(d_num_slots_available));
184
#endif
185
186
}

187
188
189
bool CheckAssignedSameSlot(const unsigned N, const unsigned num_keys,
                           const unsigned *d_keys, const unsigned table_size,
                           uint2 *constants) {
190
  unsigned *d_cycle_exists = NULL;
191
  uint2 *d_constants = NULL;
192

193
194
  CUDA_SAFE_CALL(cudaMalloc((void **)&d_cycle_exists, sizeof(unsigned)));
  CUDA_SAFE_CALL(cudaMalloc((void **)&d_constants, sizeof(uint2) * N));
195
196

  CUDA_SAFE_CALL(cudaMemset(d_cycle_exists, 0, sizeof(unsigned)));
197
  CUDA_SAFE_CALL(cudaMemcpy(d_constants, constants, sizeof(uint2) * N,
198
199
200
                            cudaMemcpyHostToDevice));

  // Check if all keys were given a full set of N slots by the functions.
201
202
203
  take_hash_function_statistics_kernel<<<ComputeGridDim(num_keys),
                                         kBlockSize>>>(
      d_keys, num_keys, table_size, d_constants, N, NULL, NULL, d_cycle_exists);
204
205

  unsigned cycle_exists;
206
  CUDA_SAFE_CALL(cudaMemcpy(&cycle_exists, d_cycle_exists, sizeof(unsigned),
207
208
209
210
211
212
213
214
215
                            cudaMemcpyDeviceToHost));

  CUDA_SAFE_CALL(cudaFree(d_cycle_exists));
  CUDA_SAFE_CALL(cudaFree(d_constants));

  return (cycle_exists != 0);
}

void PrintStashContents(const Entry *d_stash) {
traveller59's avatar
traveller59 committed
216
  Entry *stash = new Entry[cuhash::kStashSize];
217
218
  CUDA_SAFE_CALL(cudaMemcpy(stash, d_stash, sizeof(Entry) * cuhash::kStashSize,
                            cudaMemcpyDeviceToHost));
traveller59's avatar
traveller59 committed
219
  for (unsigned i = 0; i < cuhash::kStashSize; ++i) {
220
221
    if (get_key(stash[i]) != kKeyEmpty) {
      char buffer[256];
222
223
      sprintf(buffer, "Stash[%u]: %u = %u", i, get_key(stash[i]),
              get_value(stash[i]));
224
225
226
      PrintMessage(buffer, true);
    }
  }
227
  delete[] stash;
228
229
}

230
}; // namespace cuhash
231
232
233
234
235
236

// Leave this at the end of the file
// Local Variables:
// mode:c++
// c-file-style: "NVIDIA"
// End: