Merge pull request #1106 from vmarkovtsev/master

Swivel: fastprep: replace pthread with std::thread

Merge pull request #1106 from vmarkovtsev/master
Swivel: fastprep: replace pthread with std::thread
3be9ece9 · Chris Waterson · GitHub · 8337978f · a2adde40 · 3be9ece9
Commit 3be9ece9 authored Mar 04, 2017 by Chris Waterson Committed by GitHub Mar 04, 2017
Show whitespace changes
Inline Side-by-side

Showing with 12 additions and 20 deletions

swivel/fastprep.cc swivel/fastprep.cc +12 -20

No files found.
--- a/swivel/fastprep.cc
+++ b/swivel/fastprep.cc
@@ -25,7 +25,6 @@
 #include <assert.h>
 #include <fcntl.h>
-#include <pthread.h>
 #include <stdio.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
@@ -36,7 +35,9 @@
 #include <iomanip>
 #include <iostream>
 #include <map>
+#include <mutex>
 #include <string>
+#include <thread>
 #include <tuple>
 #include <unordered_map>
 #include <vector>
@@ -250,15 +251,14 @@ class CoocBuffer {
  std::vector<int> fds_;
  // Ensures that only one buffer file is getting written at a time.
-  pthread_mutex_t writer_mutex_;
+  std::mutex writer_mutex_;
 };
 CoocBuffer::CoocBuffer(const std::string &output_dirname, const int num_shards,
                       const int shard_size)
    : output_dirname_(output_dirname),
      num_shards_(num_shards),
-      shard_size_(shard_size),
+      shard_size_(shard_size) {
-      writer_mutex_(PTHREAD_MUTEX_INITIALIZER) {
  for (int row = 0; row < num_shards_; ++row) {
    for (int col = 0; col < num_shards_; ++col) {
      char filename[256];
@@ -294,14 +294,11 @@ void CoocBuffer::AccumulateCoocs(const cooc_counts_t &coocs) {
    bufs[bot_shard_idx].push_back(cooc_t{col_off, row_off, cnt});
  }
-  // XXX TODO: lock
  for (int i = 0; i < static_cast<int>(fds_.size()); ++i) {
-    int rv = pthread_mutex_lock(&writer_mutex_);
+    std::lock_guard<std::mutex> rv(writer_mutex_);
-    assert(rv == 0);
    const int nbytes = bufs[i].size() * sizeof(cooc_t);
    int nwritten = write(fds_[i], bufs[i].data(), nbytes);
    assert(nwritten == nbytes);
-    pthread_mutex_unlock(&writer_mutex_);
  }
 }
@@ -648,18 +645,13 @@ int main(int argc, char *argv[]) {
    token_to_id_map[vocab[i]] = i;
  // Compute the co-occurrences
-  std::vector<pthread_t> threads;
+  std::vector<std::thread> threads;
+  threads.reserve(num_threads);
  std::vector<CoocCounter*> counters;
  const off_t nbytes_per_thread = input_size / num_threads;
  std::cout << "Running " << num_threads << " threads, each on "
            << nbytes_per_thread << " bytes" << std::endl;
-  pthread_attr_t attr;
-  if (pthread_attr_init(&attr) != 0) {
-    std::cerr << "unable to initalize pthreads" << std::endl;
-    return 1;
-  }
  for (int i = 0; i < num_threads; ++i) {
    // We could make this smarter and look around for newlines.  But
    // realistically that's not going to change things much.
@@ -672,16 +664,16 @@ int main(int argc, char *argv[]) {
    counters.push_back(counter);
-    pthread_t thread;
+    threads.emplace_back(CoocCounter::Run, counter);
-    pthread_create(&thread, &attr, CoocCounter::Run, counter);
-    threads.push_back(thread);
  }
  // Wait for threads to finish and collect marginals.
  std::vector<double> marginals(vocab.size());
  for (int i = 0; i < num_threads; ++i) {
-    pthread_join(threads[i], 0);
+    if (i > 0) {
+      std::cout << "joining thread #" << (i + 1) << std::endl;
+    }
+    threads[i].join();
    const std::vector<double>& counter_marginals = counters[i]->Marginals();
    for (int j = 0; j < static_cast<int>(vocab.size()); ++j)