Commit ceb47f1d authored by limm's avatar limm
Browse files

add third_party

parent 1cb25232
#include <iostream>
#include <string>
#include <parallel_hashmap/phmap.h>
using phmap::flat_hash_map;
int main()
{
// Create an unordered_map of three strings (that map to strings)
flat_hash_map<std::string, std::string> email =
{
{ "tom", "tom@gmail.com"},
{ "jeff", "jk@gmail.com"},
{ "jim", "jimg@microsoft.com"}
};
// Iterate and print keys and values
for (const auto& n : email)
std::cout << n.first << "'s email is: " << n.second << "\n";
// Add a new entry
email["bill"] = "bg@whatever.com";
// and print it
std::cout << "bill's email is: " << email["bill"] << "\n";
return 0;
}
#include <inttypes.h>
#ifdef STL_UNORDERED
#include <unordered_map>
#define MAPNAME std::unordered_map
#define EXTRAARGS
#elif defined(PHMAP_FLAT)
#include "parallel_hashmap/phmap.h"
#define MAPNAME phmap::flat_hash_map
#define NMSP phmap
#define EXTRAARGS
#else
#include "parallel_hashmap/phmap.h"
#if 1
#include <mutex>
#define MTX std::mutex
#elif 0
// Abseil's mutexes are very efficient (at least on windows)
#include "absl/synchronization/mutex.h"
#define MTX phmap::AbslMutex
#elif 1
#include <boost/thread/locks.hpp>
#if 1
#include <boost/thread/mutex.hpp>
#define MTX boost::mutex // faster if all we do is exclusive locks like this bench
#else
#include <boost/thread/shared_mutex.hpp>
#define MTX boost::upgrade_mutex
#endif
#elif 1
#include <windows.h>
class srwlock {
SRWLOCK _lock;
public:
srwlock() { InitializeSRWLock(&_lock); }
void lock() { AcquireSRWLockExclusive(&_lock); }
void unlock() { ReleaseSRWLockExclusive(&_lock); }
};
#define MTX srwlock
#else
// spinlocks - slow!
#include <atomic>
class spinlock {
std::atomic_flag flag = ATOMIC_FLAG_INIT;
public:
void lock() { while(flag.test_and_set(std::memory_order_acquire)); }
void unlock() { flag.clear(std::memory_order_release); }
};
#define MTX spinlock
#endif
#define MAPNAME phmap::parallel_flat_hash_map
#define NMSP phmap
#define MT_SUPPORT 1
#if MT_SUPPORT == 1
// create the parallel_flat_hash_map without internal mutexes, for when
// we programatically ensure that each thread uses different internal submaps
// --------------------------------------------------------------------------
#define EXTRAARGS , NMSP::priv::hash_default_hash<K>, \
NMSP::priv::hash_default_eq<K>, \
std::allocator<std::pair<const K, V>>, 4, NMSP::NullMutex
#elif MT_SUPPORT == 2
// create the parallel_flat_hash_map with internal mutexes, for when
// we read/write the same parallel_flat_hash_map from multiple threads,
// without any special precautions.
// --------------------------------------------------------------------------
#define EXTRAARGS , NMSP::priv::hash_default_hash<K>, \
NMSP::priv::hash_default_eq<K>, \
std::allocator<std::pair<const K, V>>, 4, MTX
#else
#define EXTRAARGS
#endif
#endif
#define phmap_xstr(s) phmap_str(s)
#define phmap_str(s) #s
template <class K, class V>
using HashT = MAPNAME<K, V EXTRAARGS>;
using hash_t = HashT<int64_t, int64_t>;
using str_hash_t = HashT<const char *, int64_t>;
const char *program_slug = phmap_xstr(MAPNAME); // "_4";
#include <cassert>
#include <ctime>
#include <cstring>
#include <cstdlib>
#include <cstdio>
#include <cmath>
#include <thread>
#include <chrono>
#include <ostream>
#include "parallel_hashmap/meminfo.h"
#include <vector>
using std::vector;
int64_t _abs(int64_t x) { return (x < 0) ? -x : x; }
#ifdef _MSC_VER
#pragma warning(disable : 4996)
#endif // _MSC_VER
// --------------------------------------------------------------------------
class Timer
{
typedef std::chrono::high_resolution_clock high_resolution_clock;
typedef std::chrono::milliseconds milliseconds;
public:
explicit Timer(bool run = false) { if (run) reset(); }
void reset() { _start = high_resolution_clock::now(); }
milliseconds elapsed() const
{
return std::chrono::duration_cast<milliseconds>(high_resolution_clock::now() - _start);
}
private:
high_resolution_clock::time_point _start;
};
// --------------------------------------------------------------------------
// from: https://github.com/preshing/RandomSequence
// --------------------------------------------------------------------------
class RSU
{
private:
unsigned int m_index;
unsigned int m_intermediateOffset;
static unsigned int permuteQPR(unsigned int x)
{
static const unsigned int prime = 4294967291u;
if (x >= prime)
return x; // The 5 integers out of range are mapped to themselves.
unsigned int residue = ((unsigned long long) x * x) % prime;
return (x <= prime / 2) ? residue : prime - residue;
}
public:
RSU(unsigned int seedBase, unsigned int seedOffset)
{
m_index = permuteQPR(permuteQPR(seedBase) + 0x682f0161);
m_intermediateOffset = permuteQPR(permuteQPR(seedOffset) + 0x46790905);
}
unsigned int next()
{
return permuteQPR((permuteQPR(m_index++) + m_intermediateOffset) ^ 0x5bf03635);
}
};
// --------------------------------------------------------------------------
char * new_string_from_integer(uint64_t num)
{
int ndigits = num == 0 ? 1 : (int)log10(num) + 1;
char * str = (char *)malloc(ndigits + 1);
sprintf(str, "%u", (unsigned int)num);
return str;
}
// --------------------------------------------------------------------------
template <class T>
void _fill(vector<T> &v)
{
srand(1); // for a fair/deterministic comparison
for (size_t i = 0, sz = v.size(); i < sz; ++i)
v[i] = (T)(i * 10 + rand() % 10);
}
// --------------------------------------------------------------------------
template <class T>
void _shuffle(vector<T> &v)
{
for (size_t n = v.size(); n >= 2; --n)
std::swap(v[n - 1], v[static_cast<unsigned>(rand()) % n]);
}
// --------------------------------------------------------------------------
template <class T, class HT>
Timer _fill_random(vector<T> &v, HT &hash)
{
_fill<T>(v);
_shuffle<T>(v);
Timer timer(true);
for (size_t i = 0, sz = v.size(); i < sz; ++i)
hash.insert(typename HT::value_type(v[i], 0));
return timer;
}
// --------------------------------------------------------------------------
void out(const char* test, int64_t cnt, const Timer &t, bool = false)
{
printf("%s,time,%u,%s,%f\n", test, (unsigned int)cnt, program_slug,
(float)((double)t.elapsed().count() / 1000));
}
// --------------------------------------------------------------------------
void outmem(const char*, int64_t cnt, uint64_t mem, bool final = false)
{
static uint64_t max_mem = 0;
static uint64_t max_keys = 0;
if (final)
printf("peak memory usage for %u values: %.2f GB\n", (unsigned int)max_keys,
max_mem / ((double)1000 * 1000 * 1000));
else {
if (mem > max_mem)
max_mem = mem;
if ((uint64_t)cnt > max_keys)
max_keys = cnt;
}
}
static bool all_done = false;
static int64_t s_num_keys[16] = { 0 };
static int64_t loop_idx = 0;
static int64_t inner_cnt = 0;
static const char *test = "random";
// --------------------------------------------------------------------------
template <class HT>
void _fill_random_inner(int64_t cnt, HT &hash, RSU &rsu)
{
for (int64_t i=0; i<cnt; ++i)
{
hash.insert(typename HT::value_type(rsu.next(), 0));
++s_num_keys[0];
}
}
// --------------------------------------------------------------------------
template <class HT>
void _fill_random_inner_mt(int64_t cnt, HT &hash, RSU &rsu)
{
constexpr int64_t num_threads = 8; // has to be a power of two
std::unique_ptr<std::thread> threads[num_threads];
auto thread_fn = [&hash, cnt, num_threads](size_t thread_idx, RSU rsu_) {
#if MT_SUPPORT
size_t modulo = hash.subcnt() / num_threads; // subcnt() returns the number of submaps
for (int64_t i=0; i<cnt; ++i) // iterate over all values
{
unsigned int key = rsu_.next(); // get next key to insert
#if MT_SUPPORT == 1
size_t hashval = hash.hash(key); // compute its hash
size_t idx = hash.subidx(hashval); // compute the submap index for this hash
if (idx / modulo == thread_idx) // if the submap is suitable for this thread
#elif MT_SUPPORT == 2
if (i % num_threads == thread_idx)
#endif
{
hash.insert(typename HT::value_type(key, 0)); // insert the value
++(s_num_keys[thread_idx]); // increment count of inserted values
}
}
#endif
};
// create and start 8 threads - each will insert in their own submaps
// thread 0 will insert the keys whose hash direct them to submap0 or submap1
// thread 1 will insert the keys whose hash direct them to submap2 or submap3
// --------------------------------------------------------------------------
for (size_t i=0; i<num_threads; ++i)
threads[i].reset(new std::thread(thread_fn, i, rsu));
// rsu passed by value to threads... we need to increment the reference object
for (int64_t i=0; i<cnt; ++i)
rsu.next();
// wait for the threads to finish their work and exit
for (size_t i=0; i<num_threads; ++i)
threads[i]->join();
}
// --------------------------------------------------------------------------
size_t total_num_keys()
{
size_t n = 0;
for (int i=0; i<16; ++i)
n += s_num_keys[i];
return n;
}
// --------------------------------------------------------------------------
template <class HT>
Timer _fill_random2(int64_t cnt, HT &hash)
{
test = "random";
unsigned int seed = 76687;
RSU rsu(seed, seed + 1);
Timer timer(true);
const int64_t num_loops = 10;
inner_cnt = cnt / num_loops;
for (int i=0; i<16; ++i)
s_num_keys[i] = 0;
for (loop_idx=0; loop_idx<num_loops; ++loop_idx)
{
#if 1 && MT_SUPPORT
// multithreaded insert
_fill_random_inner_mt(inner_cnt, hash, rsu);
#else
_fill_random_inner(inner_cnt, hash, rsu);
#endif
out(test, total_num_keys(), timer);
}
fprintf(stderr, "inserted %.2lfM\n", (double)hash.size() / 1000000);
outmem(test, total_num_keys(), spp::GetProcessMemoryUsed());
return timer;
}
// --------------------------------------------------------------------------
template <class T, class HT>
Timer _lookup(vector<T> &v, HT &hash, size_t &num_present)
{
_fill_random(v, hash);
num_present = 0;
size_t max_val = v.size() * 10;
Timer timer(true);
for (size_t i = 0, sz = v.size(); i < sz; ++i)
{
num_present += (size_t)(hash.find(v[i]) != hash.end());
num_present += (size_t)(hash.find((T)(rand() % max_val)) != hash.end());
}
return timer;
}
// --------------------------------------------------------------------------
template <class T, class HT>
Timer _delete(vector<T> &v, HT &hash)
{
_fill_random(v, hash);
_shuffle(v); // don't delete in insertion order
Timer timer(true);
for(size_t i = 0, sz = v.size(); i < sz; ++i)
hash.erase(v[i]);
return timer;
}
// --------------------------------------------------------------------------
void memlog()
{
std::this_thread::sleep_for(std::chrono::milliseconds(10));
uint64_t nbytes_old_out = spp::GetProcessMemoryUsed();
uint64_t nbytes_old = spp::GetProcessMemoryUsed(); // last non outputted mem measurement
outmem(test, 0, nbytes_old);
int64_t last_loop = 0;
while (!all_done)
{
uint64_t nbytes = spp::GetProcessMemoryUsed();
if ((double)_abs(nbytes - nbytes_old_out) / nbytes_old_out > 0.03 ||
(double)_abs(nbytes - nbytes_old) / nbytes_old > 0.01)
{
if ((double)(nbytes - nbytes_old) / nbytes_old > 0.03)
outmem(test, total_num_keys() - 1, nbytes_old);
outmem(test, total_num_keys(), nbytes);
nbytes_old_out = nbytes;
last_loop = loop_idx;
}
else if (loop_idx > last_loop)
{
outmem(test, total_num_keys(), nbytes);
nbytes_old_out = nbytes;
last_loop = loop_idx;
}
nbytes_old = nbytes;
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
}
// --------------------------------------------------------------------------
int main(int argc, char ** argv)
{
int64_t num_keys = 100000000;
const char *bench_name = "random";
int64_t i, value = 0;
if(argc > 2)
{
num_keys = atoi(argv[1]);
bench_name = argv[2];
}
hash_t hash;
str_hash_t str_hash;
srand(1); // for a fair/deterministic comparison
Timer timer(true);
#if MT_SUPPORT
if (!strcmp(program_slug,"absl::parallel_flat_hash_map") ||
!strcmp(program_slug,"phmap::parallel_flat_hash_map"))
program_slug = phmap_xstr(MAPNAME) "_mt";
#endif
std::thread t1(memlog);
try
{
if(!strcmp(bench_name, "sequential"))
{
for(i = 0; i < num_keys; i++)
hash.insert(hash_t::value_type(i, value));
}
#if 0
else if(!strcmp(bench_name, "random"))
{
vector<int64_t> v(num_keys);
timer = _fill_random(v, hash);
out("random", num_keys, timer);
}
#endif
else if(!strcmp(bench_name, "random"))
{
fprintf(stderr, "size = %zu\n", sizeof(hash));
timer = _fill_random2(num_keys, hash);
}
else if(!strcmp(bench_name, "lookup"))
{
vector<int64_t> v(num_keys);
size_t num_present;
timer = _lookup(v, hash, num_present);
//fprintf(stderr, "found %zu\n", num_present);
}
else if(!strcmp(bench_name, "delete"))
{
vector<int64_t> v(num_keys);
timer = _delete(v, hash);
}
else if(!strcmp(bench_name, "sequentialstring"))
{
for(i = 0; i < num_keys; i++)
str_hash.insert(str_hash_t::value_type(new_string_from_integer(i), value));
}
else if(!strcmp(bench_name, "randomstring"))
{
for(i = 0; i < num_keys; i++)
str_hash.insert(str_hash_t::value_type(new_string_from_integer((int)rand()), value));
}
else if(!strcmp(bench_name, "deletestring"))
{
for(i = 0; i < num_keys; i++)
str_hash.insert(str_hash_t::value_type(new_string_from_integer(i), value));
timer.reset();
for(i = 0; i < num_keys; i++)
str_hash.erase(new_string_from_integer(i));
}
//printf("%f\n", (float)((double)timer.elapsed().count() / 1000));
fflush(stdout);
//std::this_thread::sleep_for(std::chrono::seconds(1000));
}
catch (...)
{
}
all_done = true;
outmem(test, 0, 0, true);
t1.join();
return 0;
}
#include <iostream>
#include "btree_fwd.h"
#include <parallel_hashmap/btree.h>
int main()
{
// initialise map with some values using an initializer_list
phmap::btree_map<std::string, int> map =
{ { "John", 35 },
{ "Jane", 32 },
{ "Joe", 30 },
};
// add a couple more values using operator[]()
map["lucy"] = 18;
map["Andre"] = 20;
auto it = map.find("Joe");
map.erase(it);
map.insert(std::make_pair("Alex", 16));
map.emplace("Emily", 18); // emplace uses pair template constructor
for (auto& p: map)
std::cout << p.first << ", " << p.second << '\n';
IntString map2; // IntString is declared in btree_fwd.h
map2.emplace(std::piecewise_construct, std::forward_as_tuple(0), std::forward_as_tuple(10, 'c'));
map2.try_emplace(1, 10, 'a'); // phmap::btree_map supports c++17 API
for (auto& p: map2)
std::cout << p.first << ", " << p.second << '\n';
// create a btree_set of tuples
using X = std::tuple<float, std::string>;
phmap::btree_set<X> set;
for (int i=0; i<10; ++i)
set.insert(X((float)i, std::to_string(i)));
set.emplace(15.0f, "15");
set.erase(X(1.0f, "1"));
for (auto& e: set)
std::cout << std::get<0>(e) << ", \"" << std::get<1>(e) << "\" \n";
return 0;
}
#include <string>
#include <parallel_hashmap/phmap_fwd_decl.h>
using IntString = phmap::btree_map<int, std::string>;
#include <iostream>
#include <parallel_hashmap/phmap_dump.h>
void dump_load_uint64_uint32() {
phmap::flat_hash_map<uint64_t, uint32_t> mp1 = { {100, 99}, {300, 299} };
for (const auto& n : mp1)
std::cout << n.first << "'s value is: " << n.second << "\n";
{
phmap::BinaryOutputArchive ar_out("./dump.data");
mp1.phmap_dump(ar_out);
}
phmap::flat_hash_map<uint64_t, uint32_t> mp2;
{
phmap::BinaryInputArchive ar_in("./dump.data");
mp2.phmap_load(ar_in);
}
for (const auto& n : mp2)
std::cout << n.first << "'s value is: " << n.second << "\n";
}
void dump_load_parallel_flat_hash_map() {
phmap::parallel_flat_hash_map<uint64_t, uint32_t> mp1 = {
{100, 99}, {300, 299}, {101, 992} };
for (const auto& n : mp1)
std::cout << "key: " << n.first << ", value: " << n.second << "\n";
{
phmap::BinaryOutputArchive ar_out("./dump.data");
mp1.phmap_dump(ar_out);
}
phmap::parallel_flat_hash_map<uint64_t, uint32_t> mp2;
{
phmap::BinaryInputArchive ar_in("./dump.data");
mp2.phmap_load(ar_in);
}
for (const auto& n : mp2)
std::cout << "key: " << n.first << ", value: " << n.second << "\n";
}
int main()
{
dump_load_uint64_uint32();
dump_load_parallel_flat_hash_map();
return 0;
}
/*
*
* Example of dumping a map, containing values which are phmap maps or sets
* building this requires c++17 support
*
*/
#include <iostream>
#include <parallel_hashmap/phmap_dump.h>
template <class K, class V>
class MyMap : public phmap::flat_hash_map<K, phmap::flat_hash_set<V>>
{
public:
using Set = phmap::flat_hash_set<V>;
void dump(const std::string &filename)
{
phmap::BinaryOutputArchive ar_out (filename.c_str());
ar_out.dump(this->size());
for (auto& [k, v] : *this)
{
ar_out.dump(k);
v.dump(ar_out);
}
}
void load(const std::string & filename)
{
phmap::BinaryInputArchive ar_in(filename.c_str());
size_t size;
ar_in.load(&size);
this->reserve(size);
while (size--)
{
K k;
Set v;
ar_in.load(&k);
v.load(ar_in);
this->insert_or_assign(std::move(k), std::move(v));
}
}
void insert(K k, V v)
{
Set &set = (*this)[k];
set.insert(v);
}
friend std::ostream& operator<<(std::ostream& os, const MyMap& map)
{
for (const auto& [k, m] : map)
{
os << k << ": [";
for (const auto& x : m)
os << x << ", ";
os << "]\n";
}
return os;
}
};
int main()
{
MyMap<size_t, size_t> m;
m.insert(1, 5);
m.insert(1, 8);
m.insert(2, 3);
m.insert(1, 15);
m.insert(1, 27);
m.insert(2, 10);
m.insert(2, 13);
std::cout << m << "\n";
m.dump("test_archive");
m.clear();
m.load("test_archive");
std::cout << m << "\n";
return 0;
}
#include <map>
#include <unordered_map>
#include <string>
#include <iostream>
#include <chrono>
#include <vector>
#include <parallel_hashmap/phmap.h>
#include <sstream>
template <typename T>
using milliseconds = std::chrono::duration<T, std::milli>;
// type containing std::string. Seems to take a long time to construct (and maybe move)
// ------------------------------------------------------------------------------------
class custom_type
{
std::string one = "one";
std::string two = "two";
std::uint32_t three = 3;
std::uint64_t four = 4;
std::uint64_t five = 5;
public:
custom_type() = default;
// Make object movable and non-copyable
custom_type(custom_type &&) = default;
custom_type& operator=(custom_type &&) = default;
// should be automatically deleted per http://www.slideshare.net/ripplelabs/howard-hinnant-accu2014
//custom_type(custom_type const&) = delete;
//custom_type& operator=(custom_type const&) = delete;
};
// type containing only integrals. should be faster to create.
// -----------------------------------------------------------
class custom_type_2
{
std::uint32_t three = 3;
std::uint64_t four = 4;
std::uint64_t five = 5;
std::uint64_t six = 6;
public:
custom_type_2() = default;
// Make object movable and non-copyable
custom_type_2(custom_type_2 &&) = default;
custom_type_2& operator=(custom_type_2 &&) = default;
// should be automatically deleted per http://www.slideshare.net/ripplelabs/howard-hinnant-accu2014
//custom_type_2(custom_type_2 const&) = delete;
//custom_type_2& operator=(custom_type_2 const&) = delete;
};
// convert std::size_t to appropriate key
// --------------------------------------
template <class K>
struct GenKey
{
K operator()(std::size_t j);
};
template <>
struct GenKey<std::string>
{
std::string operator()(std::size_t j) {
std::ostringstream stm;
stm << j;
return stm.str();
}
};
template <>
struct GenKey<int>
{
int operator()(std::size_t j) {
return (int)j;
}
};
// emplace key + large struct
// --------------------------
template <class Map, class K, class V, class T> struct _emplace
{
void operator()(Map &m, std::size_t j);
};
// "void" template parameter -> use emplace
template <class Map, class K, class V> struct _emplace<Map, K, V, void>
{
void operator()(Map &m, std::size_t j)
{
m.emplace(GenKey<K>()(j), V());
}
};
// "int" template parameter -> use emplace_back for std::vector
template <class Map, class K, class V> struct _emplace<Map, K, V, int>
{
void operator()(Map &m, std::size_t j)
{
m.emplace_back(GenKey<K>()(j), V());
}
};
// The test itself
// ---------------
template <class Map, class K, class V, class T, template <class, class, class, class> class INSERT>
void _test(std::size_t iterations, std::size_t container_size, const char *map_name)
{
std::size_t count = 0;
auto t1 = std::chrono::high_resolution_clock::now();
INSERT<Map, K, V, T> insert;
for (std::size_t i=0; i<iterations; ++i)
{
Map m;
for (std::size_t j=0; j<container_size; ++j)
insert(m, j);
count += m.size();
}
auto t2 = std::chrono::high_resolution_clock::now();
auto elapsed = milliseconds<double>(t2 - t1).count();
if (count != iterations*container_size)
std::clog << " invalid count: " << count << "\n";
std::clog << map_name << std::fixed << int(elapsed) << " ms\n";
}
template <class K, class V, template <class, class, class, class> class INSERT>
void test(std::size_t iterations, std::size_t container_size)
{
std::clog << "bench: iterations: " << iterations << " / container_size: " << container_size << "\n";
_test<std::map<K, V>, K, V, void, INSERT>(iterations, container_size, " std::map: ");
_test<std::unordered_map<K, V>, K, V, void, INSERT>(iterations, container_size, " std::unordered_map: ");
_test<phmap::flat_hash_map<K, V>, K, V, void, INSERT>(iterations, container_size, " phmap::flat_hash_map: ");
_test<std::vector<std::pair<K, V>>, K, V, int, INSERT> (iterations, container_size, " std::vector<std::pair>: ");
std::clog << "\n";
}
int main()
{
std::size_t iterations = 100000;
// test with custom_type_2 (int key + 32 byte value). This is representative
// of the hash table insertion speed.
// -------------------------------------------------------------------------
std::clog << "\n\n" << "testing with <int, custom_type_2>" "\n";
std::clog << "---------------------------------" "\n";
test<int, custom_type_2, _emplace>(iterations,10);
test<int, custom_type_2, _emplace>(iterations,100);
test<int, custom_type_2, _emplace>(iterations,500);
// test with custom_type, which contains two std::string values, and use
// a generated string key. This is not very indicative of the speed of the
// hash itself, as a good chunk of the time is spent creating the keys and
// values (as shown by the long times even for std::vector).
// -----------------------------------------------------------------------
std::clog << "\n" << "testing with <string, custom_type>" "\n";
std::clog << "---------------------------------" "\n";
test<std::string, custom_type, _emplace>(iterations,1);
test<std::string, custom_type, _emplace>(iterations,10);
test<std::string, custom_type, _emplace>(iterations,50);
}
/*
* Make sure that the phmap.h header builds fine when included in two separate
* source files
*/
#include <string>
#include <parallel_hashmap/phmap.h>
using phmap::flat_hash_map;
int main()
{
// Create an unordered_map of three strings (that map to strings)
using Map = flat_hash_map<std::string, std::string>;
Map email =
{
{ "tom", "tom@gmail.com"},
{ "jeff", "jk@gmail.com"},
{ "jim", "jimg@microsoft.com"}
};
extern void f2(Map&);
f2(email);
return 0;
}
/*
* Make sure that the phmap.h header builds fine when included in two separate
* source files
*/
#include <iostream>
#include <string>
#include <parallel_hashmap/phmap.h>
using phmap::flat_hash_map;
using Map = flat_hash_map<std::string, std::string>;
void f2(Map& email)
{
// Iterate and print keys and values
for (const auto& n : email)
std::cout << n.first << "'s email is: " << n.second << "\n";
// Add a new entry
email["bill"] = "bg@whatever.com";
// and print it
std::cout << "bill's email is: " << email["bill"] << "\n";
}
#include "hash_std.h" // defines Person with std::hash specialization
#include <iostream>
#include <parallel_hashmap/phmap.h>
int main()
{
// As we have defined a specialization of std::hash() for Person,
// we can now create sparse_hash_set or sparse_hash_map of Persons
// ----------------------------------------------------------------
phmap::flat_hash_set<Person> persons =
{ { "John", "Mitchell", 35 },
{ "Jane", "Smith", 32 },
{ "Jane", "Smith", 30 },
};
for (auto& p: persons)
std::cout << p._first << ' ' << p._last << " (" << p._age << ")" << '\n';
}
#ifndef phmap_example_hash_std_
#define phmap_example_hash_std_
#include <parallel_hashmap/phmap_utils.h> // minimal header providing phmap::HashState()
#include <string>
using std::string;
struct Person
{
bool operator==(const Person &o) const
{
return _first == o._first && _last == o._last && _age == o._age;
}
string _first;
string _last;
int _age;
};
namespace std
{
// inject specialization of std::hash for Person into namespace std
// An alternative is to provide a hash_value() friend function (see hash_value.h)
// ------------------------------------------------------------------------------
template<> struct hash<Person>
{
std::size_t operator()(Person const &p) const
{
return phmap::HashState().combine(0, p._first, p._last, p._age);
}
};
}
#endif // phmap_example_hash_std_
#include "hash_value.h" // defines Person with std::hash specialization
#include <iostream>
#include <parallel_hashmap/phmap.h>
int main()
{
// As we have defined a specialization of std::hash() for Person,
// we can now create sparse_hash_set or sparse_hash_map of Persons
// ----------------------------------------------------------------
phmap::flat_hash_set<Person> persons =
{ { "John", "Mitchell", 35 },
{ "Jane", "Smith", 32 },
{ "Jane", "Smith", 30 },
};
for (auto& p: persons)
std::cout << p._first << ' ' << p._last << " (" << p._age << ")" << '\n';
}
#ifndef phmap_example_hash_value_
#define phmap_example_hash_value_
#include <parallel_hashmap/phmap_utils.h> // minimal header providing phmap::HashState()
#include <string>
using std::string;
struct Person
{
bool operator==(const Person &o) const
{
return _first == o._first && _last == o._last && _age == o._age;
}
// Demonstrates how to provide the hash function as a friend member function of the class
// This can be used as an alternative to providing a std::hash<Person> specialization
// --------------------------------------------------------------------------------------
friend size_t hash_value(const Person &p)
{
return phmap::HashState().combine(0, p._first, p._last, p._age);
}
string _first;
string _last;
int _age;
};
#endif // phmap_example_hash_value_
#include <string>
#include <array>
#include <cstdint>
#include <limits>
#include <random>
#include <utility>
#define PHMAP_ALLOCATOR_NOTHROW 1
#include <parallel_hashmap/phmap.h>
// this is probably the fastest high quality 64bit random number generator that exists.
// Implements Small Fast Counting v4 RNG from PractRand.
class sfc64 {
public:
using result_type = uint64_t;
// no copy ctors so we don't accidentally get the same random again
sfc64(sfc64 const&) = delete;
sfc64& operator=(sfc64 const&) = delete;
sfc64(sfc64&&) = default;
sfc64& operator=(sfc64&&) = default;
sfc64(std::array<uint64_t, 4> const& _state)
: m_a(_state[0])
, m_b(_state[1])
, m_c(_state[2])
, m_counter(_state[3]) {}
static constexpr uint64_t(min)() {
return (std::numeric_limits<uint64_t>::min)();
}
static constexpr uint64_t(max)() {
return (std::numeric_limits<uint64_t>::max)();
}
sfc64()
: sfc64(UINT64_C(0x853c49e6748fea9b)) {}
sfc64(uint64_t _seed)
: m_a(_seed)
, m_b(_seed)
, m_c(_seed)
, m_counter(1) {
for (int i = 0; i < 12; ++i) {
operator()();
}
}
void seed() {
*this = sfc64{std::random_device{}()};
}
uint64_t operator()() noexcept {
auto const tmp = m_a + m_b + m_counter++;
m_a = m_b ^ (m_b >> right_shift);
m_b = m_c + (m_c << left_shift);
m_c = rotl(m_c, rotation) + tmp;
return tmp;
}
// this is a bit biased, but for our use case that's not important.
uint64_t operator()(uint64_t boundExcluded) noexcept {
#ifdef PHMAP_HAS_UMUL128
uint64_t h;
(void)umul128(operator()(), boundExcluded, &h);
return h;
#else
return 0;
#endif
}
std::array<uint64_t, 4> state() const {
return {{m_a, m_b, m_c, m_counter}};
}
void state(std::array<uint64_t, 4> const& s) {
m_a = s[0];
m_b = s[1];
m_c = s[2];
m_counter = s[3];
}
private:
template <typename T>
T rotl(T const x, int k) {
return (x << k) | (x >> (8 * sizeof(T) - k));
}
static constexpr int rotation = 24;
static constexpr int right_shift = 11;
static constexpr int left_shift = 3;
uint64_t m_a;
uint64_t m_b;
uint64_t m_c;
uint64_t m_counter;
};
int main()
{
// Create an unordered_map of three strings (that map to strings)
using Map = phmap::parallel_node_hash_map<int, int>;
static size_t const n = 50000000;
sfc64 rng(123);
size_t checksum = 0;
if (0)
{
size_t const max_rng = n / 20;
Map map;
for (size_t i = 0; i < n; ++i) {
checksum += ++map[static_cast<int>(rng(max_rng))];
}
}
if (0)
{
size_t const max_rng = n / 4;
Map map;
for (size_t i = 0; i < n; ++i) {
checksum += ++map[static_cast<int>(rng(max_rng))];
}
}
if (1)
{
size_t const max_rng = n / 2;
Map map;
for (size_t i = 0; i < n; ++i) {
checksum += ++map[static_cast<int>(rng(max_rng))];
}
}
if (0)
{
Map map;
for (size_t i = 0; i < n; ++i) {
checksum += ++map[static_cast<int>(rng())];
}
}
printf("%zu\n", checksum);
}
>ONE Homo sapiens alu
GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA
TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT
AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG
GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG
CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT
GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA
GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA
TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG
AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA
GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT
AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC
AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG
GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC
CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG
AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT
TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA
TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT
GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG
TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT
CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG
CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG
TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA
CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG
AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG
GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC
TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA
TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA
GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT
GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC
ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT
TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC
CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG
CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG
GGCGACAGAGCGAGACTCCG
>TWO IUB ambiguity codes
cttBtatcatatgctaKggNcataaaSatgtaaaDcDRtBggDtctttataattcBgtcg
tactDtDagcctatttSVHtHttKtgtHMaSattgWaHKHttttagacatWatgtRgaaa
NtactMcSMtYtcMgRtacttctWBacgaaatatagScDtttgaagacacatagtVgYgt
cattHWtMMWcStgttaggKtSgaYaaccWStcgBttgcgaMttBYatcWtgacaYcaga
gtaBDtRacttttcWatMttDBcatWtatcttactaBgaYtcttgttttttttYaaScYa
HgtgttNtSatcMtcVaaaStccRcctDaataataStcYtRDSaMtDttgttSagtRRca
tttHatSttMtWgtcgtatSSagactYaaattcaMtWatttaSgYttaRgKaRtccactt
tattRggaMcDaWaWagttttgacatgttctacaaaRaatataataaMttcgDacgaSSt
acaStYRctVaNMtMgtaggcKatcttttattaaaaagVWaHKYagtttttatttaacct
tacgtVtcVaattVMBcttaMtttaStgacttagattWWacVtgWYagWVRctDattBYt
gtttaagaagattattgacVatMaacattVctgtBSgaVtgWWggaKHaatKWcBScSWa
accRVacacaaactaccScattRatatKVtactatatttHttaagtttSKtRtacaaagt
RDttcaaaaWgcacatWaDgtDKacgaacaattacaRNWaatHtttStgttattaaMtgt
tgDcgtMgcatBtgcttcgcgaDWgagctgcgaggggVtaaScNatttacttaatgacag
cccccacatYScaMgtaggtYaNgttctgaMaacNaMRaacaaacaKctacatagYWctg
ttWaaataaaataRattagHacacaagcgKatacBttRttaagtatttccgatctHSaat
actcNttMaagtattMtgRtgaMgcataatHcMtaBSaRattagttgatHtMttaaKagg
YtaaBataSaVatactWtataVWgKgttaaaacagtgcgRatatacatVtHRtVYataSa
KtWaStVcNKHKttactatccctcatgWHatWaRcttactaggatctataDtDHBttata
aaaHgtacVtagaYttYaKcctattcttcttaataNDaaggaaaDYgcggctaaWSctBa
aNtgctggMBaKctaMVKagBaactaWaDaMaccYVtNtaHtVWtKgRtcaaNtYaNacg
gtttNattgVtttctgtBaWgtaattcaagtcaVWtactNggattctttaYtaaagccgc
tcttagHVggaYtgtNcDaVagctctctKgacgtatagYcctRYHDtgBattDaaDgccK
tcHaaStttMcctagtattgcRgWBaVatHaaaataYtgtttagMDMRtaataaggatMt
ttctWgtNtgtgaaaaMaatatRtttMtDgHHtgtcattttcWattRSHcVagaagtacg
ggtaKVattKYagactNaatgtttgKMMgYNtcccgSKttctaStatatNVataYHgtNa
BKRgNacaactgatttcctttaNcgatttctctataScaHtataRagtcRVttacDSDtt
aRtSatacHgtSKacYagttMHtWataggatgactNtatSaNctataVtttRNKtgRacc
tttYtatgttactttttcctttaaacatacaHactMacacggtWataMtBVacRaSaatc
cgtaBVttccagccBcttaRKtgtgcctttttRtgtcagcRttKtaaacKtaaatctcac
aattgcaNtSBaaccgggttattaaBcKatDagttactcttcattVtttHaaggctKKga
tacatcBggScagtVcacattttgaHaDSgHatRMaHWggtatatRgccDttcgtatcga
aacaHtaagttaRatgaVacttagattVKtaaYttaaatcaNatccRttRRaMScNaaaD
gttVHWgtcHaaHgacVaWtgttScactaagSgttatcttagggDtaccagWattWtRtg
ttHWHacgattBtgVcaYatcggttgagKcWtKKcaVtgaYgWctgYggVctgtHgaNcV
taBtWaaYatcDRaaRtSctgaHaYRttagatMatgcatttNattaDttaattgttctaa
ccctcccctagaWBtttHtBccttagaVaatMcBHagaVcWcagBVttcBtaYMccagat
gaaaaHctctaacgttagNWRtcggattNatcRaNHttcagtKttttgWatWttcSaNgg
gaWtactKKMaacatKatacNattgctWtatctaVgagctatgtRaHtYcWcttagccaa
tYttWttaWSSttaHcaaaaagVacVgtaVaRMgattaVcDactttcHHggHRtgNcctt
tYatcatKgctcctctatVcaaaaKaaaagtatatctgMtWtaaaacaStttMtcgactt
taSatcgDataaactaaacaagtaaVctaggaSccaatMVtaaSKNVattttgHccatca
cBVctgcaVatVttRtactgtVcaattHgtaaattaaattttYtatattaaRSgYtgBag
aHSBDgtagcacRHtYcBgtcacttacactaYcgctWtattgSHtSatcataaatataHt
cgtYaaMNgBaatttaRgaMaatatttBtttaaaHHKaatctgatWatYaacttMctctt
ttVctagctDaaagtaVaKaKRtaacBgtatccaaccactHHaagaagaaggaNaaatBW
attccgStaMSaMatBttgcatgRSacgttVVtaaDMtcSgVatWcaSatcttttVatag
ttactttacgatcaccNtaDVgSRcgVcgtgaacgaNtaNatatagtHtMgtHcMtagaa
attBgtataRaaaacaYKgtRccYtatgaagtaataKgtaaMttgaaRVatgcagaKStc
tHNaaatctBBtcttaYaBWHgtVtgacagcaRcataWctcaBcYacYgatDgtDHccta
>THREE Homo sapiens frequency
aacacttcaccaggtatcgtgaaggctcaagattacccagagaacctttgcaatataaga
atatgtatgcagcattaccctaagtaattatattctttttctgactcaaagtgacaagcc
ctagtgtatattaaatcggtatatttgggaaattcctcaaactatcctaatcaggtagcc
atgaaagtgatcaaaaaagttcgtacttataccatacatgaattctggccaagtaaaaaa
tagattgcgcaaaattcgtaccttaagtctctcgccaagatattaggatcctattactca
tatcgtgtttttctttattgccgccatccccggagtatctcacccatccttctcttaaag
gcctaatattacctatgcaaataaacatatattgttgaaaattgagaacctgatcgtgat
tcttatgtgtaccatatgtatagtaatcacgcgactatatagtgctttagtatcgcccgt
gggtgagtgaatattctgggctagcgtgagatagtttcttgtcctaatatttttcagatc
gaatagcttctatttttgtgtttattgacatatgtcgaaactccttactcagtgaaagtc
atgaccagatccacgaacaatcttcggaatcagtctcgttttacggcggaatcttgagtc
taacttatatcccgtcgcttactttctaacaccccttatgtatttttaaaattacgttta
ttcgaacgtacttggcggaagcgttattttttgaagtaagttacattgggcagactcttg
acattttcgatacgactttctttcatccatcacaggactcgttcgtattgatatcagaag
ctcgtgatgattagttgtcttctttaccaatactttgaggcctattctgcgaaatttttg
ttgccctgcgaacttcacataccaaggaacacctcgcaacatgccttcatatccatcgtt
cattgtaattcttacacaatgaatcctaagtaattacatccctgcgtaaaagatggtagg
ggcactgaggatatattaccaagcatttagttatgagtaatcagcaatgtttcttgtatt
aagttctctaaaatagttacatcgtaatgttatctcgggttccgcgaataaacgagatag
attcattatatatggccctaagcaaaaacctcctcgtattctgttggtaattagaatcac
acaatacgggttgagatattaattatttgtagtacgaagagatataaaaagatgaacaat
tactcaagtcaagatgtatacgggatttataataaaaatcgggtagagatctgctttgca
attcagacgtgccactaaatcgtaatatgtcgcgttacatcagaaagggtaactattatt
aattaataaagggcttaatcactacatattagatcttatccgatagtcttatctattcgt
tgtatttttaagcggttctaattcagtcattatatcagtgctccgagttctttattattg
ttttaaggatgacaaaatgcctcttgttataacgctgggagaagcagactaagagtcgga
gcagttggtagaatgaggctgcaaaagacggtctcgacgaatggacagactttactaaac
caatgaaagacagaagtagagcaaagtctgaagtggtatcagcttaattatgacaaccct
taatacttccctttcgccgaatactggcgtggaaaggttttaaaagtcgaagtagttaga
ggcatctctcgctcataaataggtagactactcgcaatccaatgtgactatgtaatactg
ggaacatcagtccgcgatgcagcgtgtttatcaaccgtccccactcgcctggggagacat
gagaccacccccgtggggattattagtccgcagtaatcgactcttgacaatccttttcga
ttatgtcatagcaatttacgacagttcagcgaagtgactactcggcgaaatggtattact
aaagcattcgaacccacatgaatgtgattcttggcaatttctaatccactaaagcttttc
cgttgaatctggttgtagatatttatataagttcactaattaagatcacggtagtatatt
gatagtgatgtctttgcaagaggttggccgaggaatttacggattctctattgatacaat
ttgtctggcttataactcttaaggctgaaccaggcgtttttagacgacttgatcagctgt
tagaatggtttggactccctctttcatgtcagtaacatttcagccgttattgttacgata
tgcttgaacaatattgatctaccacacacccatagtatattttataggtcatgctgttac
ctacgagcatggtattccacttcccattcaatgagtattcaacatcactagcctcagaga
tgatgacccacctctaataacgtcacgttgcggccatgtgaaacctgaacttgagtagac
gatatcaagcgctttaaattgcatataacatttgagggtaaagctaagcggatgctttat
ataatcaatactcaataataagatttgattgcattttagagttatgacacgacatagttc
actaacgagttactattcccagatctagactgaagtactgatcgagacgatccttacgtc
gatgatcgttagttatcgacttaggtcgggtctctagcggtattggtacttaaccggaca
ctatactaataacccatgatcaaagcataacagaatacagacgataatttcgccaacata
tatgtacagaccccaagcatgagaagctcattgaaagctatcattgaagtcccgctcaca
atgtgtcttttccagacggtttaactggttcccgggagtcctggagtttcgacttacata
aatggaaacaatgtattttgctaatttatctatagcgtcatttggaccaatacagaatat
tatgttgcctagtaatccactataacccgcaagtgctgatagaaaatttttagacgattt
ataaatgccccaagtatccctcccgtgaatcctccgttatactaattagtattcgttcat
acgtataccgcgcatatatgaacatttggcgataaggcgcgtgaattgttacgtgacaga
gatagcagtttcttgtgatatggttaacagacgtacatgaagggaaactttatatctata
gtgatgcttccgtagaaataccgccactggtctgccaatgatgaagtatgtagctttagg
tttgtactatgaggctttcgtttgtttgcagagtataacagttgcgagtgaaaaaccgac
gaatttatactaatacgctttcactattggctacaaaatagggaagagtttcaatcatga
gagggagtatatggatgctttgtagctaaaggtagaacgtatgtatatgctgccgttcat
tcttgaaagatacataagcgataagttacgacaattataagcaacatccctaccttcgta
acgatttcactgttactgcgcttgaaatacactatggggctattggcggagagaagcaga
tcgcgccgagcatatacgagacctataatgttgatgatagagaaggcgtctgaattgata
catcgaagtacactttctttcgtagtatctctcgtcctctttctatctccggacacaaga
attaagttatatatatagagtcttaccaatcatgttgaatcctgattctcagagttcttt
ggcgggccttgtgatgactgagaaacaatgcaatattgctccaaatttcctaagcaaatt
ctcggttatgttatgttatcagcaaagcgttacgttatgttatttaaatctggaatgacg
gagcgaagttcttatgtcggtgtgggaataattcttttgaagacagcactccttaaataa
tatcgctccgtgtttgtatttatcgaatgggtctgtaaccttgcacaagcaaatcggtgg
tgtatatatcggataacaattaatacgatgttcatagtgacagtatactgatcgagtcct
ctaaagtcaattacctcacttaacaatctcattgatgttgtgtcattcccggtatcgccc
gtagtatgtgctctgattgaccgagtgtgaaccaaggaacatctactaatgcctttgtta
ggtaagatctctctgaattccttcgtgccaacttaaaacattatcaaaatttcttctact
tggattaactacttttacgagcatggcaaattcccctgtggaagacggttcattattatc
ggaaaccttatagaaattgcgtgttgactgaaattagatttttattgtaagagttgcatc
tttgcgattcctctggtctagcttccaatgaacagtcctcccttctattcgacatcgggt
ccttcgtacatgtctttgcgatgtaataattaggttcggagtgtggccttaatgggtgca
actaggaatacaacgcaaatttgctgacatgatagcaaatcggtatgccggcaccaaaac
gtgctccttgcttagcttgtgaatgagactcagtagttaaataaatccatatctgcaatc
gattccacaggtattgtccactatctttgaactactctaagagatacaagcttagctgag
accgaggtgtatatgactacgctgatatctgtaaggtaccaatgcaggcaaagtatgcga
gaagctaataccggctgtttccagctttataagattaaaatttggctgtcctggcggcct
cagaattgttctatcgtaatcagttggttcattaattagctaagtacgaggtacaactta
tctgtcccagaacagctccacaagtttttttacagccgaaacccctgtgtgaatcttaat
atccaagcgcgttatctgattagagtttacaactcagtattttatcagtacgttttgttt
ccaacattacccggtatgacaaaatgacgccacgtgtcgaataatggtctgaccaatgta
ggaagtgaaaagataaatat
This source diff could not be displayed because it is too large. You can view the blob instead.
// ------------------------------------------------------------------
// run with: knucleotide 0 < ../examples/knucleotide-input.txt
// ------------------------------------------------------------------
//
// output should be:
//
// T 31.520
// A 29.600
// C 19.480
// G 19.400
//
// AT 9.922
// TT 9.602
// TA 9.402
// AA 8.402
// GA 6.321
// TC 6.301
// TG 6.201
// GT 6.041
// CT 5.961
// AG 5.841
// CA 5.461
// AC 5.441
// CC 4.041
// CG 4.021
// GC 3.701
// GG 3.341
//
// 54 GGT
// 24 GGTA
// 4 GGTATT
// 0 GGTATTTTAATT
// 0 GGTATTTTAATTTATAGT
// ------------------------------------------------------------------
#include <iostream>
#include <iomanip>
#include <cstdint>
#include <string>
#include <algorithm>
#include <map>
#include <thread>
#include <type_traits>
#include <cstring>
#include <array>
#include <vector>
#include <cassert>
#include <parallel_hashmap/phmap.h>
// ------------------------------------------------------------------
constexpr size_t thread_count = 4;
struct Cfg {
unsigned char *to_char;
unsigned char to_num[128];
using Data = std::vector<unsigned char>;
Cfg() {
static unsigned char __tochar[] = {'A', 'C', 'T', 'G'};
to_char = __tochar;
to_num[static_cast<unsigned char>('A')] = to_num[static_cast<unsigned char>('a')] = 0;
to_num[static_cast<unsigned char>('C')] = to_num[static_cast<unsigned char>('c')] = 1;
to_num[static_cast<unsigned char>('T')] = to_num[static_cast<unsigned char>('t')] = 2;
to_num[static_cast<unsigned char>('G')] = to_num[static_cast<unsigned char>('g')] = 3;
}
} const cfg;
// ------------------------------------------------------------------
template <size_t size>
struct Key
{
// select type to use for 'data', if hash key can fit on 32-bit integer
// then use uint32_t else use uint64_t.
using Data = typename std::conditional<size<=16, uint32_t, uint64_t>::type;
struct Hash {
Data operator()(const Key& t)const{ return t._data; }
};
Key() : _data(0) {
}
Key(const char *str) {
_data = 0;
for(unsigned i = 0; i < size; ++i){
_data <<= 2;
_data |= cfg.to_num[unsigned(str[i])];
}
}
// initialize hash from input data
void InitKey(const unsigned char *data) {
for(unsigned i = 0; i < size; ++i){
_data <<= 2;
_data |= data[i];
}
}
// updates the key with 1 byte
void UpdateKey(const unsigned char data) {
_data <<= 2;
_data |= data;
}
// masks out excess information
void MaskKey() {
_data &= _mask;
}
// implicit casting operator to string
operator std::string() const {
std::string tmp;
Data data = _data;
for(size_t i = 0; i != size; ++i, data >>= 2)
tmp += cfg.to_char[data & 3ull];
std::reverse(tmp.begin(), tmp.end());
return tmp;
}
bool operator== (const Key& in) const {
return _data == in._data;
}
private:
static constexpr Data _mask = ~(Data(-1) << (2 * size));
Data _data;
};
// ------------------------------------------------------------------
template <size_t size, typename K = Key<size> >
using HashTable = phmap::flat_hash_map<K, unsigned, typename K::Hash>;
// ------------------------------------------------------------------
template <size_t size>
void Calculate(const Cfg::Data& input, size_t begin, HashTable<size>& table)
{
// original implementation fully recomputes the hash key for each
// insert to the hash table. This implementation only partially
// updates the hash, this is the same with C GCC, Rust #6 and Rust #4
Key<size> key;
// initialize key
key.InitKey(input.data() + begin);
// use key to increment value
++table[key];
auto itr_begin = input.data() + begin + thread_count;
auto itr_end = (input.data() + input.size() + 1) - size;
size_t nsize = std::min(size, thread_count);
for(;itr_begin < itr_end; itr_begin += thread_count) {
// update the key 1 byte at a time
for(unsigned i = 0; i < nsize; ++i)
key.UpdateKey( itr_begin[i] );
// then finally mask out excess information
key.MaskKey();
// then use key to increment value
++table[key];
}
}
// ------------------------------------------------------------------
template <size_t size>
HashTable<size> CalculateInThreads(const Cfg::Data& input)
{
HashTable<size> hash_tables[thread_count];
std::thread threads[thread_count];
auto invoke = [&](unsigned begin) {
Calculate<size>(input, begin, hash_tables[begin]);
};
for(unsigned i = 0; i < thread_count; ++i)
threads[i] = std::thread(invoke, i);
for(auto& i : threads)
i.join();
auto& frequencies = hash_tables[0];
for(unsigned i = 1 ; i < thread_count; ++i)
for(auto& j : hash_tables[i])
frequencies[j.first] += j.second;
// return the 'frequency' by move instead of copy.
return std::move(frequencies);
}
// ------------------------------------------------------------------
template <unsigned size>
void WriteFrequencies(const Cfg::Data& input)
{
// we "receive" the returned object by move instead of copy.
auto&& frequencies = CalculateInThreads<size>(input);
std::map<unsigned, std::string, std::greater<unsigned>> freq;
for(const auto& i: frequencies)
freq.insert({i.second, i.first});
const unsigned sum = static_cast<unsigned>(input.size()) + 1 - size;
for(const auto& i : freq)
std::cout << i.second << ' ' << (sum ? double(100 * i.first) / sum : 0.0) << '\n';
std::cout << '\n';
}
// ------------------------------------------------------------------
template <unsigned size>
void WriteCount( const Cfg::Data& input, const char *text ) {
// we "receive" the returned object by move instead of copy.
auto&& frequencies = CalculateInThreads<size>(input);
std::cout << frequencies[Key<size>(text)] << '\t' << text << '\n';
}
// ------------------------------------------------------------------
int main()
{
Cfg::Data data;
std::array<char, 256> buf;
while(fgets(buf.data(), static_cast<int>(buf.size()), stdin) && memcmp(">THREE", buf.data(), 6));
while(fgets(buf.data(), static_cast<int>(buf.size()), stdin) && buf.front() != '>') {
if(buf.front() != ';'){
auto i = std::find(buf.begin(), buf.end(), '\n');
data.insert(data.end(), buf.begin(), i);
}
}
std::transform(data.begin(), data.end(), data.begin(), [](unsigned char c){
return cfg.to_num[c];
});
std::cout << std::setprecision(3) << std::setiosflags(std::ios::fixed);
WriteFrequencies<1>(data);
WriteFrequencies<2>(data);
// value at left is the length of the passed string.
WriteCount<3>(data, "GGT");
WriteCount<4>(data, "GGTA");
WriteCount<6>(data, "GGTATT");
WriteCount<12>(data, "GGTATTTTAATT");
WriteCount<18>(data, "GGTATTTTAATTTATAGT");
}
// ------------------------
// Windows specific example
// curtesy of @kanonka
// ------------------------
#include <windows.h>
#include "parallel_hashmap/phmap.h"
#include <cstring>
#include <vector>
#include <ppl.h>
class srwlock {
SRWLOCK _lock;
public:
srwlock() { InitializeSRWLock(&_lock); }
void lock() { AcquireSRWLockExclusive(&_lock); }
void unlock() { ReleaseSRWLockExclusive(&_lock); }
};
using Map = phmap::parallel_flat_hash_map<std::string, int, phmap::priv::hash_default_hash<std::string>,
phmap::priv::hash_default_eq<std::string>,
std::allocator<std::pair<const std::string, int>>, 8, srwlock>;
class Dict
{
Map m_stringsMap;
public:
int addParallel(std::string&& str, volatile long* curIdx)
{
int newIndex = -1;
m_stringsMap.lazy_emplace_l(std::move(str),
[&](Map::value_type& p) { newIndex = p.second; }, // called only when key was already present
[&](const Map::constructor& ctor) // construct value_type in place when key not present
{ newIndex = InterlockedIncrement(curIdx); ctor(std::move(str), newIndex); });
return newIndex;
}
};
int main()
{
size_t totalSize = 6000000;
std::vector<int> values(totalSize);
Dict dict;
volatile long index = 0;
concurrency::parallel_for(size_t(0), size_t(totalSize),
[&](size_t i) {
std::string s = "ab_uu_" + std::to_string(i % 1000000);
values[i] = dict.addParallel(std::move(s), &index);
});
return 0;
}
#include <chrono>
#include <ctime>
#include <cstring>
#include <cstdlib>
#include <cstdio>
#include <cmath>
#include <vector>
#include <random>
#include <parallel_hashmap/phmap.h>
#include <parallel_hashmap/btree.h>
// -------------------------------------------------------------------
// -------------------------------------------------------------------
class Timer
{
public:
Timer(std::string name) : _name(name), _start(std::chrono::high_resolution_clock::now()) {}
~Timer()
{
std::chrono::duration<float> elapsed_seconds = std::chrono::high_resolution_clock::now() - _start;
printf("%s: %.3fs\n", _name.c_str(), elapsed_seconds.count());
}
private:
std::string _name;
std::chrono::high_resolution_clock::time_point _start;
};
// --------------------------------------------------------------------------
// from: https://github.com/preshing/RandomSequence
// --------------------------------------------------------------------------
class RSU
{
private:
uint32_t m_index;
uint32_t m_intermediateOffset;
static uint32_t permuteQPR(uint32_t x)
{
static const uint32_t prime = 4294967291u;
if (x >= prime)
return x; // The 5 integers out of range are mapped to themselves.
uint32_t residue = ((unsigned long long) x * x) % prime;
return (x <= prime / 2) ? residue : prime - residue;
}
public:
RSU(uint32_t seedBase, uint32_t seedOffset)
{
m_index = permuteQPR(permuteQPR(seedBase) + 0x682f0161);
m_intermediateOffset = permuteQPR(permuteQPR(seedOffset) + 0x46790905);
}
uint32_t next()
{
return permuteQPR((permuteQPR(m_index++) + m_intermediateOffset) ^ 0x5bf03635);
}
};
using Perturb = std::function<void (std::vector<uint64_t> &)>;
// --------------------------------------------------------------------------
// --------------------------------------------------------------------------
template<class Set, size_t N>
void test(const char *name, Perturb perturb1, Perturb /* perturb2 */)
{
//phmap::btree_set<uint64_t> s;
Set s;
unsigned int seed = 76687;
RSU rsu(seed, seed + 1);
for (uint32_t i=0; i<N; ++i)
s.insert(rsu.next());
std::vector<uint64_t> order(s.begin(), s.end()); // contains sorted, randomly generated keys (when using phmap::btree_set)
// or keys in the final order of a Set (when using Set).
perturb1(order); // either keep them in same order, or shuffle them
#if 0
order.resize(N/4);
perturb2(order);
#endif
Timer t(name); // start timer
Set c;
//c.reserve(order.size()); // whether this "reserve()" is present or not makes a huge difference
c.insert(order.begin(), order.end()); // time for inserting the same keys into the set
// should not depend on them being sorted or not.
}
// --------------------------------------------------------------------------
// --------------------------------------------------------------------------
template <class T, size_t N>
using pset = phmap::parallel_flat_hash_set<T,
phmap::priv::hash_default_hash<T>,
phmap::priv::hash_default_eq<T>,
phmap::priv::Allocator<T>, // alias for std::allocator
N>;
// --------------------------------------------------------------------------
// --------------------------------------------------------------------------
int main()
{
auto shuffle = [](std::vector<uint64_t> &order) {
std::random_device rd;
std::mt19937 g(rd());
std::shuffle(order.begin(), order.end(), g);
};
auto noop = [](std::vector<uint64_t> &) {};
auto perturb2 = noop;
constexpr uint32_t num_keys = 10000000;
using T = uint64_t;
test<phmap::flat_hash_set<T>, num_keys>("flat_hash_set ordered ", noop, perturb2);
test<phmap::flat_hash_set<T>, num_keys>("flat_hash_set shuffled", shuffle, perturb2);
test<pset<T, 4>, num_keys>("parallel (16) ordered ", noop, perturb2);
test<pset<T, 4>, num_keys>("parallel (16) shuffled", shuffle, perturb2);
test<pset<T, 6>, num_keys>("parallel (64) ordered ", noop, perturb2);
test<pset<T, 6>, num_keys>("parallel (64) shuffled", shuffle, perturb2);
test<pset<T, 8>, num_keys>("parallel (256) ordered ", noop, perturb2);
test<pset<T, 8>, num_keys>("parallel (256) shuffled", shuffle, perturb2);
}
#if __has_include(<experimental/memory_resource>)
#include <experimental/memory_resource>
namespace std
{
namespace pmr = experimental::pmr;
}
#elif __has_include(<memory_resource>)
#include <memory_resource>
#elif
#error <memory_resource> is missing
#endif
#include <parallel_hashmap/phmap.h>
struct MyStruct
{
template<typename Key, typename Value>
using ParallelFlatHashMap = phmap::parallel_flat_hash_map<Key, Value, std::hash<Key>, std::equal_to<Key>,
std::pmr::polymorphic_allocator<std::pair<const Key, Value>>>;
ParallelFlatHashMap<uint32_t, uint32_t> hashMap;
// No compile errors
MyStruct()
{
}
// Compile errors
MyStruct(std::pmr::memory_resource* memoryResource = std::pmr::get_default_resource())
: hashMap(memoryResource)
{
}
};
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment