hash_table.h 8.7 KB
Newer Older
1
2
3
4
5
// -------------------------------------------------------------
// cuDPP -- CUDA Data Parallel Primitives library
// -------------------------------------------------------------
// $Revision:$
// $Date:$
6
// -------------------------------------------------------------
7
8
// This source code is distributed under the terms of license.txt in
// the root directory of this source distribution.
9
// -------------------------------------------------------------
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24

/**
 * @file hash_table.h
 *
 * @brief Header for a basic hash table that stores one value per key.
 */

#ifndef CUDAHT__CUCKOO__SRC__LIBRARY__HASH_TABLE__H
#define CUDAHT__CUCKOO__SRC__LIBRARY__HASH_TABLE__H

#include "definitions.h"
#include "hash_functions.h"

#include <cstdio>

25
26
27
/** \addtogroup cudpp_app
 * @{
 */
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46

/** \addtogroup cudpp_hash_data_structures
 * @{
 */

/* --------------------------------------------------------------------------
   Doxygen definitions.
   -------------------------------------------------------------------------- */
/*! @namespace CudaHT
 *  @brief Encapsulates the hash table library.
 */

/*! @namespace CuckooHashing
 *  @brief Encapsulates the cuckoo hash table that uses stashes.
 */

/* -------------------------------------------------------------------------
   Hash table code.
   ------------------------------------------------------------------------- */
traveller59's avatar
traveller59 committed
47
namespace cuhash {
48
49
50
51

//! Compute how many thread blocks are required for the given number of threads.
dim3 ComputeGridDim(unsigned threads);

52
53
//! Compute how long an eviction chain is allowed to become for a given input
//! size.
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
/*! \param[in] num_keys       Number of keys in the input.
 *  \param[in] table_size     Number of slots in the hash table.
 *  \param[in] num_functions  Number of hash functions being used.
 *  \returns The number of iterations that should be allowed.
 *
 *  The latter two parameters are only needed when using an empirical
 *  formula for computing the chain length.
 */
unsigned ComputeMaxIterations(const unsigned num_keys,
                              const unsigned table_size,
                              const unsigned num_functions);

//! Basic hash table that stores one value for each key.
/*! The input consists of two unsigned arrays of keys and values.
 *  None of the keys are expected to be repeated.
 *
 *  @todo Templatize the interface without forcing the header file to
 *  have CUDA calls.
 *  @ingroup cudpp_app
 */
class HashTable {
75
public:
76
77
  HashTable();

78
  virtual ~HashTable() { Release(); }
79
80
81
82
83
84
85
86
87
88
89

  //! Initialize the hash table's memory. Must be called before \ref
  //! Build() and after the random number generator has been seeded.
  /*! @param[in] max_input_size   Largest expected number of items in the input.
   *  @param[in] space_usage Size of the hash table relative to the
   *                         input. Bigger tables are faster to build
   *                         and retrieve from.
   *  @param[in] num_functions Number of hash functions to use. May be
   *                           2-5. More hash functions make it easier
   *                           to build the table, but increase
   *                           retrieval times.
90
   *  @returns Whether the hash table was initialized successfully (true)
91
92
93
94
95
96
97
   *           or not (false).
   *
   *  The minimum space usage is dependent on the number of functions
   *  being used; for two through five functions, the minimum space
   *  usage is 2.1, 1.1, 1.03, and 1.02 respectively.
   */
  virtual bool Initialize(const unsigned max_input_size,
98
99
                          const float space_usage = 1.25,
                          const unsigned num_functions = 4);
100
101
102
103
104
105

  //! Free all memory.
  virtual void Release();

  //! Build the hash table.
  /*! @param[in] input_size   Number of key-value pairs being inserted.
106
   *  @param[in] d_keys       Device memory array containing all of the input
107
108
   *                          keys.
   *  @param[in] d_vals       Device memory array containing the keys' values.
109
   *  @returns Whether the hash table was built successfully (true) or
110
111
112
113
   *           not (false).
   *
   *  Several attempts are allowed to build the hash table in case of failure.
   *  The input keys are expected to be completely unique.
114
   *  To reduce the chance of a failure, increase the space usage or number of
115
   *  functions.
traveller59's avatar
traveller59 committed
116
   *  Keys are not allowed to be equal to cuhash::kKeyEmpty.
117
   */
118
  virtual bool Build(const unsigned input_size, const unsigned *d_keys,
119
120
121
122
123
124
125
126
127
128
129
                     const unsigned *d_vals);

  //! Query the hash table.
  /*! @param[in] n_queries        Number of keys in the query set.
   *  @param[in] d_query_keys     Device memory array containing all of
   *                              the query keys.
   *  @param[in] d_query_results  Values for the query keys.
   *
   *  kNotFound is returned for any query key that failed to be found
   *  in the table.
   */
130
131
  virtual void Retrieve(const unsigned n_queries, const unsigned *d_query_keys,
                        unsigned *d_query_results);
132
133
134
135
136
137
138

  //! @name Accessors
  /// @brief Mainly needed to use the __device__ CudaHT::retrieve()
  /// function directly.
  /// @{

  //! Returns how many slots the hash table has.
139
  inline unsigned get_table_size() const { return table_size_; }
140
141

  //! Returns how many items are stored in the stash.
142
  inline unsigned get_stash_count() const { return stash_count_; }
143
144

  //! Returns the constants used by the stash.
145
  inline uint2 get_stash_constants() const { return stash_constants_; }
146
147

  //! Returns the hash table contents.
148
  inline const Entry *get_contents() const { return d_contents_; }
149
150

  //! Returns the number of hash functions being used.
151
  inline unsigned get_num_hash_functions() const { return num_hash_functions_; }
152
153

  //! When using two hash functions, returns the constants.
154
  inline Functions<2> get_constants_2() const { return constants_2_; }
155
156

  //! When using three hash functions, returns the constants.
157
  inline Functions<3> get_constants_3() const { return constants_3_; }
158
159

  //! When using four hash functions, returns the constants.
160
  inline Functions<4> get_constants_4() const { return constants_4_; }
161
162

  //! When using five hash functions, returns the constants.
163
  inline Functions<5> get_constants_5() const { return constants_5_; }
164
165

  /// @}
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
  inline Entry *data() { return d_contents_; }
  inline const Entry *data() const { return d_contents_; }

protected:
  unsigned table_size_;         //!< Size of the hash table.
  unsigned num_hash_functions_; //!< Number of hash functions being used.
  Entry *d_contents_; //!< Device memory: The hash table contents.  The stash is
                      //!< stored at the end.
  unsigned stash_count_;  //!< Number of key-value pairs currently stored.
  uint2 stash_constants_; //!< Hash function constants for the stash.

  Functions<2> constants_2_; //!< Constants for a set of two hash functions.
  Functions<3> constants_3_; //!< Constants for a set of three hash functions.
  Functions<4> constants_4_; //!< Constants for a set of four hash functions.
  Functions<5> constants_5_; //!< Constants for a set of five hash functions.

  unsigned *d_failures_; //!< Device memory: General use error flag.
183
184
185
186
187
188
189
};

/*! @name Internal
 *  @{
 */
namespace CUDAWrapper {
//! Fills a 64-bit array with a particular value.
190
191
void ClearTable(const unsigned slots_in_table, const Entry fill_value,
                Entry *d_array);
192
193

//! Calls the Cuckoo Hash construction kernel.
194
195
196
197
198
199
200
201
202
void CallCuckooHash(const unsigned n_entries, const unsigned num_hash_functions,
                    const unsigned *d_keys, const unsigned *d_values,
                    const unsigned table_size, const Functions<2> constants_2,
                    const Functions<3> constants_3,
                    const Functions<4> constants_4,
                    const Functions<5> constants_5,
                    const unsigned max_iteration_attempts, Entry *d_contents,
                    uint2 stash_constants, unsigned *d_stash_count,
                    unsigned *d_failures, unsigned *d_iterations_taken);
203
204

//! Calls the kernel that performs retrievals.
205
206
207
208
209
210
211
212
213
214
void CallHashRetrieve(const unsigned n_queries,
                      const unsigned num_hash_functions,
                      const unsigned *keys_in, const unsigned table_size,
                      const Entry *table, const Functions<2> constants_2,
                      const Functions<3> constants_3,
                      const Functions<4> constants_4,
                      const Functions<5> constants_5,
                      const uint2 stash_constants, const unsigned stash_count,
                      unsigned *values_out);
}; // namespace CUDAWrapper
215
216
/// @}

217
}; // namespace cuhash
218
219
220
221
222
223
224
225
226
227
228

/** @} */ // end hash table data structures
/** @} */ // end cudpp_app

#endif

// Leave this at the end of the file
// Local Variables:
// mode:c++
// c-file-style: "NVIDIA"
// End: