Update files

688b6eac · SWHL · 688b6eac · 688b6eac · 688b6eac · 688b6eac
Commit 688b6eac authored Apr 07, 2023 by SWHL
20 changed files
--- a/cpp/thirdpart/kenlm/util/double-conversion/ieee.h
+++ b/cpp/thirdpart/kenlm/util/double-conversion/ieee.h
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#ifndef DOUBLE_CONVERSION_DOUBLE_H_
+#define DOUBLE_CONVERSION_DOUBLE_H_
+#include "diy-fp.h"
+namespace double_conversion {
+// We assume that doubles and uint64_t have the same endianness.
+static uint64_t double_to_uint64(double d) { return BitCast<uint64_t>(d); }
+static double uint64_to_double(uint64_t d64) { return BitCast<double>(d64); }
+static uint32_t float_to_uint32(float f) { return BitCast<uint32_t>(f); }
+static float uint32_to_float(uint32_t d32) { return BitCast<float>(d32); }
+// Helper functions for doubles.
+class Double {
+ public:
+  static const uint64_t kSignMask = UINT64_2PART_C(0x80000000, 00000000);
+  static const uint64_t kExponentMask = UINT64_2PART_C(0x7FF00000, 00000000);
+  static const uint64_t kSignificandMask = UINT64_2PART_C(0x000FFFFF, FFFFFFFF);
+  static const uint64_t kHiddenBit = UINT64_2PART_C(0x00100000, 00000000);
+  static const int kPhysicalSignificandSize = 52;  // Excludes the hidden bit.
+  static const int kSignificandSize = 53;
+  Double() : d64_(0) {}
+  explicit Double(double d) : d64_(double_to_uint64(d)) {}
+  explicit Double(uint64_t d64) : d64_(d64) {}
+  explicit Double(DiyFp diy_fp)
+    : d64_(DiyFpToUint64(diy_fp)) {}
+  // The value encoded by this Double must be greater or equal to +0.0.
+  // It must not be special (infinity, or NaN).
+  DiyFp AsDiyFp() const {
+    ASSERT(Sign() > 0);
+    ASSERT(!IsSpecial());
+    return DiyFp(Significand(), Exponent());
+  }
+  // The value encoded by this Double must be strictly greater than 0.
+  DiyFp AsNormalizedDiyFp() const {
+    ASSERT(value() > 0.0);
+    uint64_t f = Significand();
+    int e = Exponent();
+    // The current double could be a denormal.
+    while ((f & kHiddenBit) == 0) {
+      f <<= 1;
+      e--;
+    }
+    // Do the final shifts in one go.
+    f <<= DiyFp::kSignificandSize - kSignificandSize;
+    e -= DiyFp::kSignificandSize - kSignificandSize;
+    return DiyFp(f, e);
+  }
+  // Returns the double's bit as uint64.
+  uint64_t AsUint64() const {
+    return d64_;
+  }
+  // Returns the next greater double. Returns +infinity on input +infinity.
+  double NextDouble() const {
+    if (d64_ == kInfinity) return Double(kInfinity).value();
+    if (Sign() < 0 && Significand() == 0) {
+      // -0.0
+      return 0.0;
+    }
+    if (Sign() < 0) {
+      return Double(d64_ - 1).value();
+    } else {
+      return Double(d64_ + 1).value();
+    }
+  }
+  double PreviousDouble() const {
+    if (d64_ == (kInfinity | kSignMask)) return -Infinity();
+    if (Sign() < 0) {
+      return Double(d64_ + 1).value();
+    } else {
+      if (Significand() == 0) return -0.0;
+      return Double(d64_ - 1).value();
+    }
+  }
+  int Exponent() const {
+    if (IsDenormal()) return kDenormalExponent;
+    uint64_t d64 = AsUint64();
+    int biased_e =
+        static_cast<int>((d64 & kExponentMask) >> kPhysicalSignificandSize);
+    return biased_e - kExponentBias;
+  }
+  uint64_t Significand() const {
+    uint64_t d64 = AsUint64();
+    uint64_t significand = d64 & kSignificandMask;
+    if (!IsDenormal()) {
+      return significand + kHiddenBit;
+    } else {
+      return significand;
+    }
+  }
+  // Returns true if the double is a denormal.
+  bool IsDenormal() const {
+    uint64_t d64 = AsUint64();
+    return (d64 & kExponentMask) == 0;
+  }
+  // We consider denormals not to be special.
+  // Hence only Infinity and NaN are special.
+  bool IsSpecial() const {
+    uint64_t d64 = AsUint64();
+    return (d64 & kExponentMask) == kExponentMask;
+  }
+  bool IsNan() const {
+    uint64_t d64 = AsUint64();
+    return ((d64 & kExponentMask) == kExponentMask) &&
+        ((d64 & kSignificandMask) != 0);
+  }
+  bool IsInfinite() const {
+    uint64_t d64 = AsUint64();
+    return ((d64 & kExponentMask) == kExponentMask) &&
+        ((d64 & kSignificandMask) == 0);
+  }
+  int Sign() const {
+    uint64_t d64 = AsUint64();
+    return (d64 & kSignMask) == 0? 1: -1;
+  }
+  // Precondition: the value encoded by this Double must be greater or equal
+  // than +0.0.
+  DiyFp UpperBoundary() const {
+    ASSERT(Sign() > 0);
+    return DiyFp(Significand() * 2 + 1, Exponent() - 1);
+  }
+  // Computes the two boundaries of this.
+  // The bigger boundary (m_plus) is normalized. The lower boundary has the same
+  // exponent as m_plus.
+  // Precondition: the value encoded by this Double must be greater than 0.
+  void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const {
+    ASSERT(value() > 0.0);
+    DiyFp v = this->AsDiyFp();
+    DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1));
+    DiyFp m_minus;
+    if (LowerBoundaryIsCloser()) {
+      m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2);
+    } else {
+      m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1);
+    }
+    m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e()));
+    m_minus.set_e(m_plus.e());
+    *out_m_plus = m_plus;
+    *out_m_minus = m_minus;
+  }
+  bool LowerBoundaryIsCloser() const {
+    // The boundary is closer if the significand is of the form f == 2^p-1 then
+    // the lower boundary is closer.
+    // Think of v = 1000e10 and v- = 9999e9.
+    // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but
+    // at a distance of 1e8.
+    // The only exception is for the smallest normal: the largest denormal is
+    // at the same distance as its successor.
+    // Note: denormals have the same exponent as the smallest normals.
+    bool physical_significand_is_zero = ((AsUint64() & kSignificandMask) == 0);
+    return physical_significand_is_zero && (Exponent() != kDenormalExponent);
+  }
+  double value() const { return uint64_to_double(d64_); }
+  // Returns the significand size for a given order of magnitude.
+  // If v = f*2^e with 2^p-1 <= f <= 2^p then p+e is v's order of magnitude.
+  // This function returns the number of significant binary digits v will have
+  // once it's encoded into a double. In almost all cases this is equal to
+  // kSignificandSize. The only exceptions are denormals. They start with
+  // leading zeroes and their effective significand-size is hence smaller.
+  static int SignificandSizeForOrderOfMagnitude(int order) {
+    if (order >= (kDenormalExponent + kSignificandSize)) {
+      return kSignificandSize;
+    }
+    if (order <= kDenormalExponent) return 0;
+    return order - kDenormalExponent;
+  }
+  static double Infinity() {
+    return Double(kInfinity).value();
+  }
+  static double NaN() {
+    return Double(kNaN).value();
+  }
+ private:
+  static const int kExponentBias = 0x3FF + kPhysicalSignificandSize;
+  static const int kDenormalExponent = -kExponentBias + 1;
+  static const int kMaxExponent = 0x7FF - kExponentBias;
+  static const uint64_t kInfinity = UINT64_2PART_C(0x7FF00000, 00000000);
+  static const uint64_t kNaN = UINT64_2PART_C(0x7FF80000, 00000000);
+  const uint64_t d64_;
+  static uint64_t DiyFpToUint64(DiyFp diy_fp) {
+    uint64_t significand = diy_fp.f();
+    int exponent = diy_fp.e();
+    while (significand > kHiddenBit + kSignificandMask) {
+      significand >>= 1;
+      exponent++;
+    }
+    if (exponent >= kMaxExponent) {
+      return kInfinity;
+    }
+    if (exponent < kDenormalExponent) {
+      return 0;
+    }
+    while (exponent > kDenormalExponent && (significand & kHiddenBit) == 0) {
+      significand <<= 1;
+      exponent--;
+    }
+    uint64_t biased_exponent;
+    if (exponent == kDenormalExponent && (significand & kHiddenBit) == 0) {
+      biased_exponent = 0;
+    } else {
+      biased_exponent = static_cast<uint64_t>(exponent + kExponentBias);
+    }
+    return (significand & kSignificandMask) |
+        (biased_exponent << kPhysicalSignificandSize);
+  }
+  DISALLOW_COPY_AND_ASSIGN(Double);
+};
+class Single {
+ public:
+  static const uint32_t kSignMask = 0x80000000;
+  static const uint32_t kExponentMask = 0x7F800000;
+  static const uint32_t kSignificandMask = 0x007FFFFF;
+  static const uint32_t kHiddenBit = 0x00800000;
+  static const int kPhysicalSignificandSize = 23;  // Excludes the hidden bit.
+  static const int kSignificandSize = 24;
+  Single() : d32_(0) {}
+  explicit Single(float f) : d32_(float_to_uint32(f)) {}
+  explicit Single(uint32_t d32) : d32_(d32) {}
+  // The value encoded by this Single must be greater or equal to +0.0.
+  // It must not be special (infinity, or NaN).
+  DiyFp AsDiyFp() const {
+    ASSERT(Sign() > 0);
+    ASSERT(!IsSpecial());
+    return DiyFp(Significand(), Exponent());
+  }
+  // Returns the single's bit as uint64.
+  uint32_t AsUint32() const {
+    return d32_;
+  }
+  int Exponent() const {
+    if (IsDenormal()) return kDenormalExponent;
+    uint32_t d32 = AsUint32();
+    int biased_e =
+        static_cast<int>((d32 & kExponentMask) >> kPhysicalSignificandSize);
+    return biased_e - kExponentBias;
+  }
+  uint32_t Significand() const {
+    uint32_t d32 = AsUint32();
+    uint32_t significand = d32 & kSignificandMask;
+    if (!IsDenormal()) {
+      return significand + kHiddenBit;
+    } else {
+      return significand;
+    }
+  }
+  // Returns true if the single is a denormal.
+  bool IsDenormal() const {
+    uint32_t d32 = AsUint32();
+    return (d32 & kExponentMask) == 0;
+  }
+  // We consider denormals not to be special.
+  // Hence only Infinity and NaN are special.
+  bool IsSpecial() const {
+    uint32_t d32 = AsUint32();
+    return (d32 & kExponentMask) == kExponentMask;
+  }
+  bool IsNan() const {
+    uint32_t d32 = AsUint32();
+    return ((d32 & kExponentMask) == kExponentMask) &&
+        ((d32 & kSignificandMask) != 0);
+  }
+  bool IsInfinite() const {
+    uint32_t d32 = AsUint32();
+    return ((d32 & kExponentMask) == kExponentMask) &&
+        ((d32 & kSignificandMask) == 0);
+  }
+  int Sign() const {
+    uint32_t d32 = AsUint32();
+    return (d32 & kSignMask) == 0? 1: -1;
+  }
+  // Computes the two boundaries of this.
+  // The bigger boundary (m_plus) is normalized. The lower boundary has the same
+  // exponent as m_plus.
+  // Precondition: the value encoded by this Single must be greater than 0.
+  void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const {
+    ASSERT(value() > 0.0);
+    DiyFp v = this->AsDiyFp();
+    DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1));
+    DiyFp m_minus;
+    if (LowerBoundaryIsCloser()) {
+      m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2);
+    } else {
+      m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1);
+    }
+    m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e()));
+    m_minus.set_e(m_plus.e());
+    *out_m_plus = m_plus;
+    *out_m_minus = m_minus;
+  }
+  // Precondition: the value encoded by this Single must be greater or equal
+  // than +0.0.
+  DiyFp UpperBoundary() const {
+    ASSERT(Sign() > 0);
+    return DiyFp(Significand() * 2 + 1, Exponent() - 1);
+  }
+  bool LowerBoundaryIsCloser() const {
+    // The boundary is closer if the significand is of the form f == 2^p-1 then
+    // the lower boundary is closer.
+    // Think of v = 1000e10 and v- = 9999e9.
+    // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but
+    // at a distance of 1e8.
+    // The only exception is for the smallest normal: the largest denormal is
+    // at the same distance as its successor.
+    // Note: denormals have the same exponent as the smallest normals.
+    bool physical_significand_is_zero = ((AsUint32() & kSignificandMask) == 0);
+    return physical_significand_is_zero && (Exponent() != kDenormalExponent);
+  }
+  float value() const { return uint32_to_float(d32_); }
+  static float Infinity() {
+    return Single(kInfinity).value();
+  }
+  static float NaN() {
+    return Single(kNaN).value();
+  }
+ private:
+  static const int kExponentBias = 0x7F + kPhysicalSignificandSize;
+  static const int kDenormalExponent = -kExponentBias + 1;
+  static const int kMaxExponent = 0xFF - kExponentBias;
+  static const uint32_t kInfinity = 0x7F800000;
+  static const uint32_t kNaN = 0x7FC00000;
+  const uint32_t d32_;
+  DISALLOW_COPY_AND_ASSIGN(Single);
+};
+}  // namespace double_conversion
+#endif  // DOUBLE_CONVERSION_DOUBLE_H_
--- a/cpp/thirdpart/kenlm/util/double-conversion/strtod.cc
+++ b/cpp/thirdpart/kenlm/util/double-conversion/strtod.cc
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#include <stdarg.h>
+#include <limits.h>
+#include "strtod.h"
+#include "bignum.h"
+#include "cached-powers.h"
+#include "ieee.h"
+namespace double_conversion {
+// 2^53 = 9007199254740992.
+// Any integer with at most 15 decimal digits will hence fit into a double
+// (which has a 53bit significand) without loss of precision.
+static const int kMaxExactDoubleIntegerDecimalDigits = 15;
+// 2^64 = 18446744073709551616 > 10^19
+static const int kMaxUint64DecimalDigits = 19;
+// Max double: 1.7976931348623157 x 10^308
+// Min non-zero double: 4.9406564584124654 x 10^-324
+// Any x >= 10^309 is interpreted as +infinity.
+// Any x <= 10^-324 is interpreted as 0.
+// Note that 2.5e-324 (despite being smaller than the min double) will be read
+// as non-zero (equal to the min non-zero double).
+static const int kMaxDecimalPower = 309;
+static const int kMinDecimalPower = -324;
+// 2^64 = 18446744073709551616
+static const uint64_t kMaxUint64 = UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF);
+static const double exact_powers_of_ten[] = {
+  1.0,  // 10^0
+  10.0,
+  100.0,
+  1000.0,
+  10000.0,
+  100000.0,
+  1000000.0,
+  10000000.0,
+  100000000.0,
+  1000000000.0,
+  10000000000.0,  // 10^10
+  100000000000.0,
+  1000000000000.0,
+  10000000000000.0,
+  100000000000000.0,
+  1000000000000000.0,
+  10000000000000000.0,
+  100000000000000000.0,
+  1000000000000000000.0,
+  10000000000000000000.0,
+  100000000000000000000.0,  // 10^20
+  1000000000000000000000.0,
+  // 10^22 = 0x21e19e0c9bab2400000 = 0x878678326eac9 * 2^22
+  10000000000000000000000.0
+};
+static const int kExactPowersOfTenSize = ARRAY_SIZE(exact_powers_of_ten);
+// Maximum number of significant digits in the decimal representation.
+// In fact the value is 772 (see conversions.cc), but to give us some margin
+// we round up to 780.
+static const int kMaxSignificantDecimalDigits = 780;
+static Vector<const char> TrimLeadingZeros(Vector<const char> buffer) {
+  for (int i = 0; i < buffer.length(); i++) {
+    if (buffer[i] != '0') {
+      return buffer.SubVector(i, buffer.length());
+    }
+  }
+  return Vector<const char>(buffer.start(), 0);
+}
+static Vector<const char> TrimTrailingZeros(Vector<const char> buffer) {
+  for (int i = buffer.length() - 1; i >= 0; --i) {
+    if (buffer[i] != '0') {
+      return buffer.SubVector(0, i + 1);
+    }
+  }
+  return Vector<const char>(buffer.start(), 0);
+}
+static void CutToMaxSignificantDigits(Vector<const char> buffer,
+                                       int exponent,
+                                       char* significant_buffer,
+                                       int* significant_exponent) {
+  for (int i = 0; i < kMaxSignificantDecimalDigits - 1; ++i) {
+    significant_buffer[i] = buffer[i];
+  }
+  // The input buffer has been trimmed. Therefore the last digit must be
+  // different from '0'.
+  ASSERT(buffer[buffer.length() - 1] != '0');
+  // Set the last digit to be non-zero. This is sufficient to guarantee
+  // correct rounding.
+  significant_buffer[kMaxSignificantDecimalDigits - 1] = '1';
+  *significant_exponent =
+      exponent + (buffer.length() - kMaxSignificantDecimalDigits);
+}
+// Trims the buffer and cuts it to at most kMaxSignificantDecimalDigits.
+// If possible the input-buffer is reused, but if the buffer needs to be
+// modified (due to cutting), then the input needs to be copied into the
+// buffer_copy_space.
+static void TrimAndCut(Vector<const char> buffer, int exponent,
+                       char* buffer_copy_space, int space_size,
+                       Vector<const char>* trimmed, int* updated_exponent) {
+  Vector<const char> left_trimmed = TrimLeadingZeros(buffer);
+  Vector<const char> right_trimmed = TrimTrailingZeros(left_trimmed);
+  exponent += left_trimmed.length() - right_trimmed.length();
+  if (right_trimmed.length() > kMaxSignificantDecimalDigits) {
+    (void) space_size;  // Mark variable as used.
+    ASSERT(space_size >= kMaxSignificantDecimalDigits);
+    CutToMaxSignificantDigits(right_trimmed, exponent,
+                              buffer_copy_space, updated_exponent);
+    *trimmed = Vector<const char>(buffer_copy_space,
+                                 kMaxSignificantDecimalDigits);
+  } else {
+    *trimmed = right_trimmed;
+    *updated_exponent = exponent;
+  }
+}
+// Reads digits from the buffer and converts them to a uint64.
+// Reads in as many digits as fit into a uint64.
+// When the string starts with "1844674407370955161" no further digit is read.
+// Since 2^64 = 18446744073709551616 it would still be possible read another
+// digit if it was less or equal than 6, but this would complicate the code.
+static uint64_t ReadUint64(Vector<const char> buffer,
+                           int* number_of_read_digits) {
+  uint64_t result = 0;
+  int i = 0;
+  while (i < buffer.length() && result <= (kMaxUint64 / 10 - 1)) {
+    int digit = buffer[i++] - '0';
+    ASSERT(0 <= digit && digit <= 9);
+    result = 10 * result + digit;
+  }
+  *number_of_read_digits = i;
+  return result;
+}
+// Reads a DiyFp from the buffer.
+// The returned DiyFp is not necessarily normalized.
+// If remaining_decimals is zero then the returned DiyFp is accurate.
+// Otherwise it has been rounded and has error of at most 1/2 ulp.
+static void ReadDiyFp(Vector<const char> buffer,
+                      DiyFp* result,
+                      int* remaining_decimals) {
+  int read_digits;
+  uint64_t significand = ReadUint64(buffer, &read_digits);
+  if (buffer.length() == read_digits) {
+    *result = DiyFp(significand, 0);
+    *remaining_decimals = 0;
+  } else {
+    // Round the significand.
+    if (buffer[read_digits] >= '5') {
+      significand++;
+    }
+    // Compute the binary exponent.
+    int exponent = 0;
+    *result = DiyFp(significand, exponent);
+    *remaining_decimals = buffer.length() - read_digits;
+  }
+}
+static bool DoubleStrtod(Vector<const char> trimmed,
+                         int exponent,
+                         double* result) {
+#if !defined(DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS)
+  // On x86 the floating-point stack can be 64 or 80 bits wide. If it is
+  // 80 bits wide (as is the case on Linux) then double-rounding occurs and the
+  // result is not accurate.
+  // We know that Windows32 uses 64 bits and is therefore accurate.
+  // Note that the ARM simulator is compiled for 32bits. It therefore exhibits
+  // the same problem.
+  return false;
+#endif
+  if (trimmed.length() <= kMaxExactDoubleIntegerDecimalDigits) {
+    int read_digits;
+    // The trimmed input fits into a double.
+    // If the 10^exponent (resp. 10^-exponent) fits into a double too then we
+    // can compute the result-double simply by multiplying (resp. dividing) the
+    // two numbers.
+    // This is possible because IEEE guarantees that floating-point operations
+    // return the best possible approximation.
+    if (exponent < 0 && -exponent < kExactPowersOfTenSize) {
+      // 10^-exponent fits into a double.
+      *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
+      ASSERT(read_digits == trimmed.length());
+      *result /= exact_powers_of_ten[-exponent];
+      return true;
+    }
+    if (0 <= exponent && exponent < kExactPowersOfTenSize) {
+      // 10^exponent fits into a double.
+      *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
+      ASSERT(read_digits == trimmed.length());
+      *result *= exact_powers_of_ten[exponent];
+      return true;
+    }
+    int remaining_digits =
+        kMaxExactDoubleIntegerDecimalDigits - trimmed.length();
+    if ((0 <= exponent) &&
+        (exponent - remaining_digits < kExactPowersOfTenSize)) {
+      // The trimmed string was short and we can multiply it with
+      // 10^remaining_digits. As a result the remaining exponent now fits
+      // into a double too.
+      *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
+      ASSERT(read_digits == trimmed.length());
+      *result *= exact_powers_of_ten[remaining_digits];
+      *result *= exact_powers_of_ten[exponent - remaining_digits];
+      return true;
+    }
+  }
+  return false;
+}
+// Returns 10^exponent as an exact DiyFp.
+// The given exponent must be in the range [1; kDecimalExponentDistance[.
+static DiyFp AdjustmentPowerOfTen(int exponent) {
+  ASSERT(0 < exponent);
+  ASSERT(exponent < PowersOfTenCache::kDecimalExponentDistance);
+  // Simply hardcode the remaining powers for the given decimal exponent
+  // distance.
+  ASSERT(PowersOfTenCache::kDecimalExponentDistance == 8);
+  switch (exponent) {
+    case 1: return DiyFp(UINT64_2PART_C(0xa0000000, 00000000), -60);
+    case 2: return DiyFp(UINT64_2PART_C(0xc8000000, 00000000), -57);
+    case 3: return DiyFp(UINT64_2PART_C(0xfa000000, 00000000), -54);
+    case 4: return DiyFp(UINT64_2PART_C(0x9c400000, 00000000), -50);
+    case 5: return DiyFp(UINT64_2PART_C(0xc3500000, 00000000), -47);
+    case 6: return DiyFp(UINT64_2PART_C(0xf4240000, 00000000), -44);
+    case 7: return DiyFp(UINT64_2PART_C(0x98968000, 00000000), -40);
+    default:
+      UNREACHABLE();
+  }
+}
+// If the function returns true then the result is the correct double.
+// Otherwise it is either the correct double or the double that is just below
+// the correct double.
+static bool DiyFpStrtod(Vector<const char> buffer,
+                        int exponent,
+                        double* result) {
+  DiyFp input;
+  int remaining_decimals;
+  ReadDiyFp(buffer, &input, &remaining_decimals);
+  // Since we may have dropped some digits the input is not accurate.
+  // If remaining_decimals is different than 0 than the error is at most
+  // .5 ulp (unit in the last place).
+  // We don't want to deal with fractions and therefore keep a common
+  // denominator.
+  const int kDenominatorLog = 3;
+  const int kDenominator = 1 << kDenominatorLog;
+  // Move the remaining decimals into the exponent.
+  exponent += remaining_decimals;
+  uint64_t error = (remaining_decimals == 0 ? 0 : kDenominator / 2);
+  int old_e = input.e();
+  input.Normalize();
+  error <<= old_e - input.e();
+  ASSERT(exponent <= PowersOfTenCache::kMaxDecimalExponent);
+  if (exponent < PowersOfTenCache::kMinDecimalExponent) {
+    *result = 0.0;
+    return true;
+  }
+  DiyFp cached_power;
+  int cached_decimal_exponent;
+  PowersOfTenCache::GetCachedPowerForDecimalExponent(exponent,
+                                                     &cached_power,
+                                                     &cached_decimal_exponent);
+  if (cached_decimal_exponent != exponent) {
+    int adjustment_exponent = exponent - cached_decimal_exponent;
+    DiyFp adjustment_power = AdjustmentPowerOfTen(adjustment_exponent);
+    input.Multiply(adjustment_power);
+    if (kMaxUint64DecimalDigits - buffer.length() >= adjustment_exponent) {
+      // The product of input with the adjustment power fits into a 64 bit
+      // integer.
+      ASSERT(DiyFp::kSignificandSize == 64);
+    } else {
+      // The adjustment power is exact. There is hence only an error of 0.5.
+      error += kDenominator / 2;
+    }
+  }
+  input.Multiply(cached_power);
+  // The error introduced by a multiplication of a*b equals
+  //   error_a + error_b + error_a*error_b/2^64 + 0.5
+  // Substituting a with 'input' and b with 'cached_power' we have
+  //   error_b = 0.5  (all cached powers have an error of less than 0.5 ulp),
+  //   error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64
+  int error_b = kDenominator / 2;
+  int error_ab = (error == 0 ? 0 : 1);  // We round up to 1.
+  int fixed_error = kDenominator / 2;
+  error += error_b + error_ab + fixed_error;
+  old_e = input.e();
+  input.Normalize();
+  error <<= old_e - input.e();
+  // See if the double's significand changes if we add/subtract the error.
+  int order_of_magnitude = DiyFp::kSignificandSize + input.e();
+  int effective_significand_size =
+      Double::SignificandSizeForOrderOfMagnitude(order_of_magnitude);
+  int precision_digits_count =
+      DiyFp::kSignificandSize - effective_significand_size;
+  if (precision_digits_count + kDenominatorLog >= DiyFp::kSignificandSize) {
+    // This can only happen for very small denormals. In this case the
+    // half-way multiplied by the denominator exceeds the range of an uint64.
+    // Simply shift everything to the right.
+    int shift_amount = (precision_digits_count + kDenominatorLog) -
+        DiyFp::kSignificandSize + 1;
+    input.set_f(input.f() >> shift_amount);
+    input.set_e(input.e() + shift_amount);
+    // We add 1 for the lost precision of error, and kDenominator for
+    // the lost precision of input.f().
+    error = (error >> shift_amount) + 1 + kDenominator;
+    precision_digits_count -= shift_amount;
+  }
+  // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too.
+  ASSERT(DiyFp::kSignificandSize == 64);
+  ASSERT(precision_digits_count < 64);
+  uint64_t one64 = 1;
+  uint64_t precision_bits_mask = (one64 << precision_digits_count) - 1;
+  uint64_t precision_bits = input.f() & precision_bits_mask;
+  uint64_t half_way = one64 << (precision_digits_count - 1);
+  precision_bits *= kDenominator;
+  half_way *= kDenominator;
+  DiyFp rounded_input(input.f() >> precision_digits_count,
+                      input.e() + precision_digits_count);
+  if (precision_bits >= half_way + error) {
+    rounded_input.set_f(rounded_input.f() + 1);
+  }
+  // If the last_bits are too close to the half-way case than we are too
+  // inaccurate and round down. In this case we return false so that we can
+  // fall back to a more precise algorithm.
+  *result = Double(rounded_input).value();
+  if (half_way - error < precision_bits && precision_bits < half_way + error) {
+    // Too imprecise. The caller will have to fall back to a slower version.
+    // However the returned number is guaranteed to be either the correct
+    // double, or the next-lower double.
+    return false;
+  } else {
+    return true;
+  }
+}
+// Returns
+//   - -1 if buffer*10^exponent < diy_fp.
+//   -  0 if buffer*10^exponent == diy_fp.
+//   - +1 if buffer*10^exponent > diy_fp.
+// Preconditions:
+//   buffer.length() + exponent <= kMaxDecimalPower + 1
+//   buffer.length() + exponent > kMinDecimalPower
+//   buffer.length() <= kMaxDecimalSignificantDigits
+static int CompareBufferWithDiyFp(Vector<const char> buffer,
+                                  int exponent,
+                                  DiyFp diy_fp) {
+  ASSERT(buffer.length() + exponent <= kMaxDecimalPower + 1);
+  ASSERT(buffer.length() + exponent > kMinDecimalPower);
+  ASSERT(buffer.length() <= kMaxSignificantDecimalDigits);
+  // Make sure that the Bignum will be able to hold all our numbers.
+  // Our Bignum implementation has a separate field for exponents. Shifts will
+  // consume at most one bigit (< 64 bits).
+  // ln(10) == 3.3219...
+  ASSERT(((kMaxDecimalPower + 1) * 333 / 100) < Bignum::kMaxSignificantBits);
+  Bignum buffer_bignum;
+  Bignum diy_fp_bignum;
+  buffer_bignum.AssignDecimalString(buffer);
+  diy_fp_bignum.AssignUInt64(diy_fp.f());
+  if (exponent >= 0) {
+    buffer_bignum.MultiplyByPowerOfTen(exponent);
+  } else {
+    diy_fp_bignum.MultiplyByPowerOfTen(-exponent);
+  }
+  if (diy_fp.e() > 0) {
+    diy_fp_bignum.ShiftLeft(diy_fp.e());
+  } else {
+    buffer_bignum.ShiftLeft(-diy_fp.e());
+  }
+  return Bignum::Compare(buffer_bignum, diy_fp_bignum);
+}
+// Returns true if the guess is the correct double.
+// Returns false, when guess is either correct or the next-lower double.
+static bool ComputeGuess(Vector<const char> trimmed, int exponent,
+                         double* guess) {
+  if (trimmed.length() == 0) {
+    *guess = 0.0;
+    return true;
+  }
+  if (exponent + trimmed.length() - 1 >= kMaxDecimalPower) {
+    *guess = Double::Infinity();
+    return true;
+  }
+  if (exponent + trimmed.length() <= kMinDecimalPower) {
+    *guess = 0.0;
+    return true;
+  }
+  if (DoubleStrtod(trimmed, exponent, guess) ||
+      DiyFpStrtod(trimmed, exponent, guess)) {
+    return true;
+  }
+  if (*guess == Double::Infinity()) {
+    return true;
+  }
+  return false;
+}
+double Strtod(Vector<const char> buffer, int exponent) {
+  char copy_buffer[kMaxSignificantDecimalDigits];
+  Vector<const char> trimmed;
+  int updated_exponent;
+  TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
+             &trimmed, &updated_exponent);
+  exponent = updated_exponent;
+  double guess;
+  bool is_correct = ComputeGuess(trimmed, exponent, &guess);
+  if (is_correct) return guess;
+  DiyFp upper_boundary = Double(guess).UpperBoundary();
+  int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
+  if (comparison < 0) {
+    return guess;
+  } else if (comparison > 0) {
+    return Double(guess).NextDouble();
+  } else if ((Double(guess).Significand() & 1) == 0) {
+    // Round towards even.
+    return guess;
+  } else {
+    return Double(guess).NextDouble();
+  }
+}
+float Strtof(Vector<const char> buffer, int exponent) {
+  char copy_buffer[kMaxSignificantDecimalDigits];
+  Vector<const char> trimmed;
+  int updated_exponent;
+  TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
+             &trimmed, &updated_exponent);
+  exponent = updated_exponent;
+  double double_guess;
+  bool is_correct = ComputeGuess(trimmed, exponent, &double_guess);
+  float float_guess = static_cast<float>(double_guess);
+  if (float_guess == double_guess) {
+    // This shortcut triggers for integer values.
+    return float_guess;
+  }
+  // We must catch double-rounding. Say the double has been rounded up, and is
+  // now a boundary of a float, and rounds up again. This is why we have to
+  // look at previous too.
+  // Example (in decimal numbers):
+  //    input: 12349
+  //    high-precision (4 digits): 1235
+  //    low-precision (3 digits):
+  //       when read from input: 123
+  //       when rounded from high precision: 124.
+  // To do this we simply look at the neigbors of the correct result and see
+  // if they would round to the same float. If the guess is not correct we have
+  // to look at four values (since two different doubles could be the correct
+  // double).
+  double double_next = Double(double_guess).NextDouble();
+  double double_previous = Double(double_guess).PreviousDouble();
+  float f1 = static_cast<float>(double_previous);
+  float f2 = float_guess;
+  float f3 = static_cast<float>(double_next);
+  float f4;
+  if (is_correct) {
+    f4 = f3;
+  } else {
+    double double_next2 = Double(double_next).NextDouble();
+    f4 = static_cast<float>(double_next2);
+  }
+  (void) f2;  // Mark variable as used.
+  ASSERT(f1 <= f2 && f2 <= f3 && f3 <= f4);
+  // If the guess doesn't lie near a single-precision boundary we can simply
+  // return its float-value.
+  if (f1 == f4) {
+    return float_guess;
+  }
+  ASSERT((f1 != f2 && f2 == f3 && f3 == f4) ||
+         (f1 == f2 && f2 != f3 && f3 == f4) ||
+         (f1 == f2 && f2 == f3 && f3 != f4));
+  // guess and next are the two possible canditates (in the same way that
+  // double_guess was the lower candidate for a double-precision guess).
+  float guess = f1;
+  float next = f4;
+  DiyFp upper_boundary;
+  if (guess == 0.0f) {
+    float min_float = 1e-45f;
+    upper_boundary = Double(static_cast<double>(min_float) / 2).AsDiyFp();
+  } else {
+    upper_boundary = Single(guess).UpperBoundary();
+  }
+  int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
+  if (comparison < 0) {
+    return guess;
+  } else if (comparison > 0) {
+    return next;
+  } else if ((Single(guess).Significand() & 1) == 0) {
+    // Round towards even.
+    return guess;
+  } else {
+    return next;
+  }
+}
+}  // namespace double_conversion
--- a/cpp/thirdpart/kenlm/util/double-conversion/strtod.h
+++ b/cpp/thirdpart/kenlm/util/double-conversion/strtod.h
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#ifndef DOUBLE_CONVERSION_STRTOD_H_
+#define DOUBLE_CONVERSION_STRTOD_H_
+#include "utils.h"
+namespace double_conversion {
+// The buffer must only contain digits in the range [0-9]. It must not
+// contain a dot or a sign. It must not start with '0', and must not be empty.
+double Strtod(Vector<const char> buffer, int exponent);
+// The buffer must only contain digits in the range [0-9]. It must not
+// contain a dot or a sign. It must not start with '0', and must not be empty.
+float Strtof(Vector<const char> buffer, int exponent);
+}  // namespace double_conversion
+#endif  // DOUBLE_CONVERSION_STRTOD_H_
--- a/cpp/thirdpart/kenlm/util/double-conversion/utils.h
+++ b/cpp/thirdpart/kenlm/util/double-conversion/utils.h
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#ifndef DOUBLE_CONVERSION_UTILS_H_
+#define DOUBLE_CONVERSION_UTILS_H_
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#ifndef ASSERT
+#define ASSERT(condition)         \
+    assert(condition);
+#endif
+#ifndef UNIMPLEMENTED
+#define UNIMPLEMENTED() (abort())
+#endif
+#ifndef DOUBLE_CONVERSION_NO_RETURN
+#ifdef _MSC_VER
+#define DOUBLE_CONVERSION_NO_RETURN __declspec(noreturn)
+#else
+#define DOUBLE_CONVERSION_NO_RETURN __attribute__((noreturn))
+#endif
+#endif
+#ifndef UNREACHABLE
+#ifdef _MSC_VER
+void DOUBLE_CONVERSION_NO_RETURN abort_noreturn();
+inline void abort_noreturn() { abort(); }
+#define UNREACHABLE()   (abort_noreturn())
+#else
+#define UNREACHABLE()   (abort())
+#endif
+#endif
+// Double operations detection based on target architecture.
+// Linux uses a 80bit wide floating point stack on x86. This induces double
+// rounding, which in turn leads to wrong results.
+// An easy way to test if the floating-point operations are correct is to
+// evaluate: 89255.0/1e22. If the floating-point stack is 64 bits wide then
+// the result is equal to 89255e-22.
+// The best way to test this, is to create a division-function and to compare
+// the output of the division with the expected result. (Inlining must be
+// disabled.)
+// On Linux,x86 89255e-22 != Div_double(89255.0/1e22)
+#if defined(_M_X64) || defined(__x86_64__) || \
+    defined(__ARMEL__) || defined(__avr32__) || \
+    defined(__hppa__) || defined(__ia64__) || \
+    defined(__mips__) || \
+    defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \
+    defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
+    defined(__sparc__) || defined(__sparc) || defined(__s390__) || \
+    defined(__SH4__) || defined(__alpha__) || \
+    defined(_MIPS_ARCH_MIPS32R2) || \
+    defined(__AARCH64EL__) || defined(__aarch64__) || \
+    defined(__riscv)
+#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1
+#elif defined(__mc68000__)
+#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS
+#elif defined(_M_IX86) || defined(__i386__) || defined(__i386)
+#if defined(_WIN32)
+// Windows uses a 64bit wide floating point stack.
+#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1
+#else
+#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS
+#endif  // _WIN32
+#else
+#error Target architecture was not detected as supported by Double-Conversion.
+#endif
+#if defined(__GNUC__)
+#define DOUBLE_CONVERSION_UNUSED __attribute__((unused))
+#else
+#define DOUBLE_CONVERSION_UNUSED
+#endif
+#if defined(_WIN32) && !defined(__MINGW32__)
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef short int16_t;  // NOLINT
+typedef unsigned short uint16_t;  // NOLINT
+typedef int int32_t;
+typedef unsigned int uint32_t;
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+// intptr_t and friends are defined in crtdefs.h through stdio.h.
+#else
+#include <stdint.h>
+#endif
+typedef uint16_t uc16;
+// The following macro works on both 32 and 64-bit platforms.
+// Usage: instead of writing 0x1234567890123456
+//      write UINT64_2PART_C(0x12345678,90123456);
+#define UINT64_2PART_C(a, b) (((static_cast<uint64_t>(a) << 32) + 0x##b##u))
+// The expression ARRAY_SIZE(a) is a compile-time constant of type
+// size_t which represents the number of elements of the given
+// array. You should only use ARRAY_SIZE on statically allocated
+// arrays.
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(a)                                   \
+  ((sizeof(a) / sizeof(*(a))) /                         \
+  static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
+#endif
+// A macro to disallow the evil copy constructor and operator= functions
+// This should be used in the private: declarations for a class
+#ifndef DISALLOW_COPY_AND_ASSIGN
+#define DISALLOW_COPY_AND_ASSIGN(TypeName)      \
+  TypeName(const TypeName&);                    \
+  void operator=(const TypeName&)
+#endif
+// A macro to disallow all the implicit constructors, namely the
+// default constructor, copy constructor and operator= functions.
+//
+// This should be used in the private: declarations for a class
+// that wants to prevent anyone from instantiating it. This is
+// especially useful for classes containing only static methods.
+#ifndef DISALLOW_IMPLICIT_CONSTRUCTORS
+#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
+  TypeName();                                    \
+  DISALLOW_COPY_AND_ASSIGN(TypeName)
+#endif
+namespace double_conversion {
+static const int kCharSize = sizeof(char);
+// Returns the maximum of the two parameters.
+template <typename T>
+static T Max(T a, T b) {
+  return a < b ? b : a;
+}
+// Returns the minimum of the two parameters.
+template <typename T>
+static T Min(T a, T b) {
+  return a < b ? a : b;
+}
+inline int StrLength(const char* string) {
+  size_t length = strlen(string);
+  ASSERT(length == static_cast<size_t>(static_cast<int>(length)));
+  return static_cast<int>(length);
+}
+// This is a simplified version of V8's Vector class.
+template <typename T>
+class Vector {
+ public:
+  Vector() : start_(NULL), length_(0) {}
+  Vector(T* data, int len) : start_(data), length_(len) {
+    ASSERT(len == 0 || (len > 0 && data != NULL));
+  }
+  // Returns a vector using the same backing storage as this one,
+  // spanning from and including 'from', to but not including 'to'.
+  Vector<T> SubVector(int from, int to) {
+    ASSERT(to <= length_);
+    ASSERT(from < to);
+    ASSERT(0 <= from);
+    return Vector<T>(start() + from, to - from);
+  }
+  // Returns the length of the vector.
+  int length() const { return length_; }
+  // Returns whether or not the vector is empty.
+  bool is_empty() const { return length_ == 0; }
+  // Returns the pointer to the start of the data in the vector.
+  T* start() const { return start_; }
+  // Access individual vector elements - checks bounds in debug mode.
+  T& operator[](int index) const {
+    ASSERT(0 <= index && index < length_);
+    return start_[index];
+  }
+  T& first() { return start_[0]; }
+  T& last() { return start_[length_ - 1]; }
+ private:
+  T* start_;
+  int length_;
+};
+// Helper class for building result strings in a character buffer. The
+// purpose of the class is to use safe operations that checks the
+// buffer bounds on all operations in debug mode.
+class StringBuilder {
+ public:
+  StringBuilder(char* buffer, int buffer_size)
+      : buffer_(buffer, buffer_size), position_(0) { }
+  ~StringBuilder() { if (!is_finalized()) Finalize(); }
+  int size() const { return buffer_.length(); }
+  // Get the current position in the builder.
+  int position() const {
+    ASSERT(!is_finalized());
+    return position_;
+  }
+  // Reset the position.
+  void Reset() { position_ = 0; }
+  // Add a single character to the builder. It is not allowed to add
+  // 0-characters; use the Finalize() method to terminate the string
+  // instead.
+  void AddCharacter(char c) {
+    ASSERT(c != '\0');
+    ASSERT(!is_finalized() && position_ < buffer_.length());
+    buffer_[position_++] = c;
+  }
+  // Add an entire string to the builder. Uses strlen() internally to
+  // compute the length of the input string.
+  void AddString(const char* s) {
+    AddSubstring(s, StrLength(s));
+  }
+  // Add the first 'n' characters of the given string 's' to the
+  // builder. The input string must have enough characters.
+  void AddSubstring(const char* s, int n) {
+    ASSERT(!is_finalized() && position_ + n < buffer_.length());
+    ASSERT(static_cast<size_t>(n) <= strlen(s));
+    memmove(&buffer_[position_], s, n * kCharSize);
+    position_ += n;
+  }
+  // Add character padding to the builder. If count is non-positive,
+  // nothing is added to the builder.
+  void AddPadding(char c, int count) {
+    for (int i = 0; i < count; i++) {
+      AddCharacter(c);
+    }
+  }
+  // Finalize the string by 0-terminating it and returning the buffer.
+  char* Finalize() {
+    ASSERT(!is_finalized() && position_ < buffer_.length());
+    buffer_[position_] = '\0';
+    // Make sure nobody managed to add a 0-character to the
+    // buffer while building the string.
+    ASSERT(strlen(buffer_.start()) == static_cast<size_t>(position_));
+    position_ = -1;
+    ASSERT(is_finalized());
+    return buffer_.start();
+  }
+ private:
+  Vector<char> buffer_;
+  int position_;
+  bool is_finalized() const { return position_ < 0; }
+  DISALLOW_IMPLICIT_CONSTRUCTORS(StringBuilder);
+};
+// The type-based aliasing rule allows the compiler to assume that pointers of
+// different types (for some definition of different) never alias each other.
+// Thus the following code does not work:
+//
+// float f = foo();
+// int fbits = *(int*)(&f);
+//
+// The compiler 'knows' that the int pointer can't refer to f since the types
+// don't match, so the compiler may cache f in a register, leaving random data
+// in fbits.  Using C++ style casts makes no difference, however a pointer to
+// char data is assumed to alias any other pointer.  This is the 'memcpy
+// exception'.
+//
+// Bit_cast uses the memcpy exception to move the bits from a variable of one
+// type of a variable of another type.  Of course the end result is likely to
+// be implementation dependent.  Most compilers (gcc-4.2 and MSVC 2005)
+// will completely optimize BitCast away.
+//
+// There is an additional use for BitCast.
+// Recent gccs will warn when they see casts that may result in breakage due to
+// the type-based aliasing rule.  If you have checked that there is no breakage
+// you can use BitCast to cast one pointer type to another.  This confuses gcc
+// enough that it can no longer see that you have cast one pointer type to
+// another thus avoiding the warning.
+template <class Dest, class Source>
+inline Dest BitCast(const Source& source) {
+  // Compile time assertion: sizeof(Dest) == sizeof(Source)
+  // A compile error here means your Dest and Source have different sizes.
+  DOUBLE_CONVERSION_UNUSED
+      typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1];
+  Dest dest;
+  memmove(&dest, &source, sizeof(dest));
+  return dest;
+}
+template <class Dest, class Source>
+inline Dest BitCast(Source* source) {
+  return BitCast<Dest>(reinterpret_cast<uintptr_t>(source));
+}
+}  // namespace double_conversion
+#endif  // DOUBLE_CONVERSION_UTILS_H_
--- a/cpp/thirdpart/kenlm/util/ersatz_progress.cc
+++ b/cpp/thirdpart/kenlm/util/ersatz_progress.cc
+#include "ersatz_progress.hh"
+#include <algorithm>
+#include <ostream>
+#include <limits>
+#include <string>
+namespace util {
+namespace { const unsigned char kWidth = 100; }
+const char kProgressBanner[] = "----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n";
+ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits<uint64_t>::max()), complete_(next_), out_(NULL) {}
+ErsatzProgress::~ErsatzProgress() {
+  if (out_) Finished();
+}
+ErsatzProgress::ErsatzProgress(uint64_t complete, std::ostream *to, const std::string &message)
+  : current_(0), next_(complete / kWidth), complete_(complete), stones_written_(0), out_(to) {
+  if (!out_) {
+    next_ = std::numeric_limits<uint64_t>::max();
+    return;
+  }
+  if (!message.empty()) *out_ << message << '\n';
+  *out_ << kProgressBanner;
+}
+void ErsatzProgress::Milestone() {
+  if (!out_) { current_ = 0; return; }
+  if (!complete_) return;
+  unsigned char stone = std::min(static_cast<uint64_t>(kWidth), (current_ * kWidth) / complete_);
+  for (; stones_written_ < stone; ++stones_written_) {
+    (*out_) << '*';
+  }
+  if (stone == kWidth) {
+    (*out_) << std::endl;
+    next_ = std::numeric_limits<uint64_t>::max();
+    out_ = NULL;
+  } else {
+    next_ = std::max(next_, ((stone + 1) * complete_ + kWidth - 1) / kWidth);
+  }
+}
+} // namespace util
--- a/cpp/thirdpart/kenlm/util/ersatz_progress.hh
+++ b/cpp/thirdpart/kenlm/util/ersatz_progress.hh
+#ifndef UTIL_ERSATZ_PROGRESS_H
+#define UTIL_ERSATZ_PROGRESS_H
+#include <iostream>
+#include <string>
+#include <stdint.h>
+// Ersatz version of boost::progress so core language model doesn't depend on
+// boost.  Also adds option to print nothing.
+namespace util {
+extern const char kProgressBanner[];
+class ErsatzProgress {
+  public:
+    // No output.
+    ErsatzProgress();
+    // Null means no output.  The null value is useful for passing along the ostream pointer from another caller.
+    explicit ErsatzProgress(uint64_t complete, std::ostream *to = &std::cerr, const std::string &message = "");
+#if __cplusplus >= 201103L
+    ErsatzProgress(ErsatzProgress &&from) noexcept : current_(from.current_), next_(from.next_), complete_(from.complete_), stones_written_(from.stones_written_), out_(from.out_) {
+      from.out_ = nullptr;
+      from.next_ = (uint64_t)-1;
+    }
+#endif
+    ~ErsatzProgress();
+    ErsatzProgress &operator++() {
+      if (++current_ >= next_) Milestone();
+      return *this;
+    }
+    ErsatzProgress &operator+=(uint64_t amount) {
+      if ((current_ += amount) >= next_) Milestone();
+      return *this;
+    }
+    void Set(uint64_t to) {
+      if ((current_ = to) >= next_) Milestone();
+    }
+    void Finished() {
+      Set(complete_);
+    }
+  private:
+    void Milestone();
+    uint64_t current_, next_, complete_;
+    unsigned char stones_written_;
+    std::ostream *out_;
+    // noncopyable
+    ErsatzProgress(const ErsatzProgress &other);
+    ErsatzProgress &operator=(const ErsatzProgress &other);
+};
+} // namespace util
+#endif // UTIL_ERSATZ_PROGRESS_H
--- a/cpp/thirdpart/kenlm/util/exception.cc
+++ b/cpp/thirdpart/kenlm/util/exception.cc
+#include "exception.hh"
+#ifdef __GXX_RTTI
+#include <typeinfo>
+#endif
+#include <cerrno>
+#include <cstring>
+#if defined(_WIN32) || defined(_WIN64)
+#include <windows.h>
+#include <io.h>
+#endif
+namespace util {
+Exception::Exception() throw() {}
+Exception::~Exception() throw() {}
+void Exception::SetLocation(const char *file, unsigned int line, const char *func, const char *child_name, const char *condition) {
+  /* The child class might have set some text, but we want this to come first.
+   * Another option would be passing this information to the constructor, but
+   * then child classes would have to accept constructor arguments and pass
+   * them down.
+   */
+  std::string old_text;
+  what_.swap(old_text);
+  what_ << file << ':' << line;
+  if (func) what_ << " in " << func << " threw ";
+  if (child_name) {
+    what_ << child_name;
+  } else {
+#ifdef __GXX_RTTI
+    what_ << typeid(this).name();
+#else
+    what_ << "an exception";
+#endif
+  }
+  if (condition) {
+    what_ << " because `" << condition << '\'';
+  }
+  what_ << ".\n";
+  what_ << old_text;
+}
+namespace {
+#ifdef __GNUC__
+const char *HandleStrerror(int ret, const char *buf) __attribute__ ((unused));
+const char *HandleStrerror(const char *ret, const char * /*buf*/) __attribute__ ((unused));
+#endif
+// At least one of these functions will not be called.
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-function"
+#endif
+// The XOPEN version.
+const char *HandleStrerror(int ret, const char *buf) {
+  if (!ret) return buf;
+  return NULL;
+}
+// The GNU version.
+const char *HandleStrerror(const char *ret, const char * /*buf*/) {
+  return ret;
+}
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+} // namespace
+ErrnoException::ErrnoException() throw() : errno_(errno) {
+  char buf[200];
+  buf[0] = 0;
+#if defined(sun) || defined(_WIN32) || defined(_WIN64)
+  const char *add = strerror(errno);
+#else
+  const char *add = HandleStrerror(strerror_r(errno, buf, 200), buf);
+#endif
+  if (add) {
+    *this << add << ' ';
+  }
+}
+ErrnoException::~ErrnoException() throw() {}
+OverflowException::OverflowException() throw() {}
+OverflowException::~OverflowException() throw() {}
+#if defined(_WIN32) || defined(_WIN64)
+WindowsException::WindowsException() throw() {
+  unsigned int last_error = GetLastError();
+  char error_msg[256] = "";
+  if (!FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, last_error, LANG_NEUTRAL, error_msg, sizeof(error_msg), NULL)) {
+    *this << "Windows error " << GetLastError() << " while formatting Windows error " << last_error << ". ";
+  } else {
+    *this << "Windows error " << last_error << ": " << error_msg;
+  }
+}
+WindowsException::~WindowsException() throw() {}
+#endif
+} // namespace util
--- a/cpp/thirdpart/kenlm/util/exception.hh
+++ b/cpp/thirdpart/kenlm/util/exception.hh
+#ifndef UTIL_EXCEPTION_H
+#define UTIL_EXCEPTION_H
+#include "string_stream.hh"
+#include <exception>
+#include <limits>
+#include <string>
+#include <stdint.h>
+namespace util {
+template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data);
+class Exception : public std::exception {
+  public:
+    Exception() throw();
+    virtual ~Exception() throw();
+    const char *what() const throw() { return what_.str().c_str(); }
+    // For use by the UTIL_THROW macros.
+    void SetLocation(
+        const char *file,
+        unsigned int line,
+        const char *func,
+        const char *child_name,
+        const char *condition);
+  private:
+    template <class Except, class Data> friend typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data);
+    // This helps restrict operator<< defined below.
+    template <class T> struct ExceptionTag {
+      typedef T Identity;
+    };
+    StringStream what_;
+};
+/* This implements the normal operator<< for Exception and all its children.
+ * SFINAE means it only applies to Exception.  Think of this as an ersatz
+ * boost::enable_if.
+ */
+template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data) {
+  e.what_ << data;
+  return e;
+}
+#ifdef __GNUC__
+#define UTIL_FUNC_NAME __PRETTY_FUNCTION__
+#else
+#ifdef _WIN32
+#define UTIL_FUNC_NAME __FUNCTION__
+#else
+#define UTIL_FUNC_NAME NULL
+#endif
+#endif
+/* Create an instance of Exception, add the message Modify, and throw it.
+ * Modify is appended to the what() message and can contain << for ostream
+ * operations.
+ *
+ * do .. while kludge to swallow trailing ; character
+ * http://gcc.gnu.org/onlinedocs/cpp/Swallowing-the-Semicolon.html .
+ * Arg can be a constructor argument to the exception.
+ */
+#define UTIL_THROW_BACKEND(Condition, Exception, Arg, Modify) do { \
+  Exception UTIL_e Arg; \
+  UTIL_e.SetLocation(__FILE__, __LINE__, UTIL_FUNC_NAME, #Exception, Condition); \
+  UTIL_e << Modify; \
+  throw UTIL_e; \
+} while (0)
+#define UTIL_THROW_ARG(Exception, Arg, Modify) \
+  UTIL_THROW_BACKEND(NULL, Exception, Arg, Modify)
+#define UTIL_THROW(Exception, Modify) \
+  UTIL_THROW_BACKEND(NULL, Exception, , Modify);
+#define UTIL_THROW2(Modify) \
+  UTIL_THROW_BACKEND(NULL, util::Exception, , Modify);
+#if __GNUC__ >= 3
+#define UTIL_UNLIKELY(x) __builtin_expect (!!(x), 0)
+#else
+#define UTIL_UNLIKELY(x) (x)
+#endif
+#if __GNUC__ >= 3
+#define UTIL_LIKELY(x) __builtin_expect (!!(x), 1)
+#else
+#define UTIL_LIKELY(x) (x)
+#endif
+#define UTIL_THROW_IF_ARG(Condition, Exception, Arg, Modify) do { \
+  if (UTIL_UNLIKELY(Condition)) { \
+    UTIL_THROW_BACKEND(#Condition, Exception, Arg, Modify); \
+  } \
+} while (0)
+#define UTIL_THROW_IF(Condition, Exception, Modify) \
+  UTIL_THROW_IF_ARG(Condition, Exception, , Modify)
+#define UTIL_THROW_IF2(Condition, Modify) \
+  UTIL_THROW_IF_ARG(Condition, util::Exception, , Modify)
+// Exception that records errno and adds it to the message.
+class ErrnoException : public Exception {
+  public:
+    ErrnoException() throw();
+    virtual ~ErrnoException() throw();
+    int Error() const throw() { return errno_; }
+  private:
+    int errno_;
+};
+// file wasn't there, or couldn't be open for some reason
+class FileOpenException : public Exception {
+  public:
+	FileOpenException() throw() {}
+    ~FileOpenException() throw() {}
+};
+// Utilities for overflow checking.
+class OverflowException : public Exception {
+  public:
+    OverflowException() throw();
+    ~OverflowException() throw();
+};
+template <unsigned len> inline std::size_t CheckOverflowInternal(uint64_t value) {
+  UTIL_THROW_IF(value > static_cast<uint64_t>(std::numeric_limits<std::size_t>::max()), OverflowException, "Integer overflow detected.  This model is too big for 32-bit code.");
+  return static_cast<std::size_t>(value);
+}
+template <> inline std::size_t CheckOverflowInternal<8>(uint64_t value) {
+  return value;
+}
+inline std::size_t CheckOverflow(uint64_t value) {
+  return CheckOverflowInternal<sizeof(std::size_t)>(value);
+}
+#if defined(_WIN32) || defined(_WIN64)
+/* Thrown for Windows specific operations. */
+class WindowsException : public Exception {
+  public:
+    WindowsException() throw();
+    ~WindowsException() throw();
+};
+#endif
+} // namespace util
+#endif // UTIL_EXCEPTION_H
--- a/cpp/thirdpart/kenlm/util/fake_ostream.hh
+++ b/cpp/thirdpart/kenlm/util/fake_ostream.hh
+#ifndef UTIL_FAKE_OSTREAM_H
+#define UTIL_FAKE_OSTREAM_H
+#include "float_to_string.hh"
+#include "integer_to_string.hh"
+#include "string_piece.hh"
+#include <cassert>
+#include <limits>
+#include <stdint.h>
+namespace util {
+/* Like std::ostream but without being incredibly slow.
+ * Supports most of the built-in types except for long double.
+ *
+ * The FakeOStream class is intended to be inherited from.  The inherting class
+ * should provide:
+ * public:
+ *   Derived &flush();
+ *   Derived &write(const void *data, std::size_t length);
+ *
+ * private: or protected:
+ *   friend class FakeOStream;
+ *   char *Ensure(std::size_t amount);
+ *   void AdvanceTo(char *to);
+ *
+ * The Ensure function makes enough space for an in-place write and returns
+ * where to write.  The AdvanceTo function happens after the write, saying how
+ * much was actually written.
+ *
+ * Precondition:
+ * amount <= kToStringMaxBytes for in-place writes.
+ */
+template <class Derived> class FakeOStream {
+  public:
+    FakeOStream() {}
+    // This also covers std::string and char*
+    Derived &operator<<(StringPiece str) {
+      return C().write(str.data(), str.size());
+    }
+    // Handle integers by size and signedness.
+  private:
+    template <class Arg> struct EnableIfKludge {
+      typedef Derived type;
+    };
+    template <class From, unsigned Length = sizeof(From), bool Signed = std::numeric_limits<From>::is_signed, bool IsInteger = std::numeric_limits<From>::is_integer> struct Coerce {};
+    template <class From> struct Coerce<From, 2, false, true> { typedef uint16_t To; };
+    template <class From> struct Coerce<From, 4, false, true> { typedef uint32_t To; };
+    template <class From> struct Coerce<From, 8, false, true> { typedef uint64_t To; };
+    template <class From> struct Coerce<From, 2, true, true> { typedef int16_t To; };
+    template <class From> struct Coerce<From, 4, true, true> { typedef int32_t To; };
+    template <class From> struct Coerce<From, 8, true, true> { typedef int64_t To; };
+  public:
+    template <class From> typename EnableIfKludge<typename Coerce<From>::To>::type &operator<<(const From value) {
+      return CallToString(static_cast<typename Coerce<From>::To>(value));
+    }
+    // Character types that get copied as bytes instead of displayed as integers.
+    Derived &operator<<(char val) { return put(val); }
+    Derived &operator<<(signed char val) { return put(static_cast<char>(val)); }
+    Derived &operator<<(unsigned char val) { return put(static_cast<char>(val)); }
+    Derived &operator<<(bool val) { return put(val + '0'); }
+    // enums will fall back to int but are not caught by the template.
+    Derived &operator<<(int val) { return CallToString(static_cast<typename Coerce<int>::To>(val)); }
+    Derived &operator<<(float val) { return CallToString(val); }
+    Derived &operator<<(double val) { return CallToString(val); }
+    // This is here to catch all the other pointer types.
+    Derived &operator<<(const void *value) { return CallToString(value); }
+    // This is here because the above line also catches const char*.
+    Derived &operator<<(const char *value) { return *this << StringPiece(value); }
+    Derived &operator<<(char *value) { return *this << StringPiece(value); }
+    Derived &put(char val) {
+      char *c = C().Ensure(1);
+      *c = val;
+      C().AdvanceTo(++c);
+      return C();
+    }
+    char widen(char val) const { return val; }
+  private:
+    // References to derived class for convenience.
+    Derived &C() {
+      return *static_cast<Derived*>(this);
+    }
+    const Derived &C() const {
+      return *static_cast<const Derived*>(this);
+    }
+    // This is separate to prevent an infinite loop if the compiler considers
+    // types the same (i.e. gcc std::size_t and uint64_t or uint32_t).
+    template <class T> Derived &CallToString(const T value) {
+      C().AdvanceTo(ToString(value, C().Ensure(ToStringBuf<T>::kBytes)));
+      return C();
+    }
+};
+} // namespace
+#endif // UTIL_FAKE_OSTREAM_H
--- a/cpp/thirdpart/kenlm/util/file.cc
+++ b/cpp/thirdpart/kenlm/util/file.cc
+#define _LARGEFILE64_SOURCE
+#define _FILE_OFFSET_BITS 64
+#include "file.hh"
+#include "exception.hh"
+#include <algorithm>
+#include <cstdlib>
+#include <cstdio>
+#include <iostream>
+#include <limits>
+#include <stdexcept>
+#include <sstream>
+#include <cassert>
+#include <cerrno>
+#include <climits>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdint.h>
+#if defined(__MINGW32__)
+#include <windows.h>
+#include <unistd.h>
+#warning "The file functions on MinGW have not been tested for file sizes above 2^31 - 1.  Please read https://stackoverflow.com/questions/12539488/determine-64-bit-file-size-in-c-on-mingw-32-bit and fix"
+#elif defined(_WIN32) || defined(_WIN64)
+#include <windows.h>
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+namespace util {
+scoped_fd::~scoped_fd() {
+  if (fd_ != -1 && close(fd_)) {
+    std::cerr << "Could not close file " << fd_ << std::endl;
+    std::abort();
+  }
+}
+void scoped_FILE_closer::Close(std::FILE *file) {
+  if (file && std::fclose(file)) {
+    std::cerr << "Could not close file " << file << std::endl;
+    std::abort();
+  }
+}
+// Note that ErrnoException records errno before NameFromFD is called.
+FDException::FDException(int fd) throw() : fd_(fd), name_guess_(NameFromFD(fd)) {
+  *this << "in " << name_guess_ << ' ';
+}
+FDException::~FDException() throw() {}
+EndOfFileException::EndOfFileException() throw() {
+  *this << "End of file";
+}
+EndOfFileException::~EndOfFileException() throw() {}
+bool InputFileIsStdin(StringPiece path) {
+  return path == "-" || path == "/dev/stdin";
+}
+bool OutputFileIsStdout(StringPiece path) {
+  return path == "-" || path == "/dev/stdout";
+}
+int OpenReadOrThrow(const char *name) {
+  int ret;
+#if defined(_WIN32) || defined(_WIN64)
+  UTIL_THROW_IF(-1 == (ret = _open(name, _O_BINARY | _O_RDONLY)), ErrnoException, "while opening " << name);
+#else
+  UTIL_THROW_IF(-1 == (ret = open(name, O_RDONLY)), ErrnoException, "while opening " << name);
+#endif
+  return ret;
+}
+int CreateOrThrow(const char *name) {
+  int ret;
+#if defined(_WIN32) || defined(_WIN64)
+  UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR | _O_BINARY, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name);
+#else
+  UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)), ErrnoException, "while creating " << name);
+#endif
+  return ret;
+}
+uint64_t SizeFile(int fd) {
+#if defined __MINGW32__
+  struct stat sb;
+  // Does this handle 64-bit?
+  int ret = fstat(fd, &sb);
+  if (ret == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
+  return sb.st_size;
+#elif defined(_WIN32) || defined(_WIN64)
+  __int64 ret = _filelengthi64(fd);
+  return (ret == -1) ? kBadSize : ret;
+#else // Not windows.
+#ifdef OS_ANDROID
+  struct stat64 sb;
+  int ret = fstat64(fd, &sb);
+#else
+  struct stat sb;
+  int ret = fstat(fd, &sb);
+#endif
+  if (ret == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
+  return sb.st_size;
+#endif
+}
+uint64_t SizeOrThrow(int fd) {
+  uint64_t ret = SizeFile(fd);
+  UTIL_THROW_IF_ARG(ret == kBadSize, FDException, (fd), "Failed to size");
+  return ret;
+}
+void ResizeOrThrow(int fd, uint64_t to) {
+#if defined __MINGW32__
+    // Does this handle 64-bit?
+    int ret = ftruncate
+#elif defined(_WIN32) || defined(_WIN64)
+    errno_t ret = _chsize_s
+#elif defined(OS_ANDROID)
+    int ret = ftruncate64
+#else
+    int ret = ftruncate
+#endif
+    (fd, to);
+  UTIL_THROW_IF_ARG(ret, FDException, (fd), "while resizing to " << to << " bytes");
+}
+void HolePunch(int fd, uint64_t offset, uint64_t size) {
+#if defined(__linux__) && defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
+  UTIL_THROW_IF_ARG(-1 == fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, size), FDException, (fd), "in punching a hole at " << offset << " for " << size << " bytes.");
+#else
+  UTIL_THROW(UnsupportedOSException, "fallocate hole punching requires Linux and glibc >= 2.18");
+#endif
+}
+namespace {
+std::size_t GuardLarge(std::size_t size) {
+  // The following operating systems have broken read/write/pread/pwrite that
+  // only supports up to 2^31.
+  // OS X man pages claim to support 64-bit, but Kareem M. Darwish had problems
+  // building with larger files, so APPLE is also here.
+#if defined(_WIN32) || defined(_WIN64) || defined(__APPLE__) || defined(OS_ANDROID) || defined(__MINGW32__)
+  return size < INT_MAX ? size : INT_MAX;
+#else
+  return size;
+#endif
+}
+}
+#if defined(_WIN32) || defined(_WIN64)
+namespace {
+const std::size_t kMaxDWORD = static_cast<std::size_t>(4294967295UL);
+} // namespace
+#endif
+std::size_t PartialRead(int fd, void *to, std::size_t amount) {
+#if defined(_WIN32) || defined(_WIN64)
+    DWORD ret;
+    HANDLE file_handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
+    DWORD larger_size = static_cast<DWORD>(std::min<std::size_t>(kMaxDWORD, amount));
+    DWORD smaller_size = 28672; // Received reports that 31346 worked but higher values did not. This rounds down to the nearest multiple of 4096, the page size.
+    if (!ReadFile(file_handle, to, larger_size, &ret, NULL))
+    {
+        DWORD last_error = GetLastError();
+        if (last_error != ERROR_NOT_ENOUGH_MEMORY || !ReadFile(file_handle, to, smaller_size, &ret, NULL)) {
+            UTIL_THROW(WindowsException, "Windows error in ReadFile.");
+        }
+    }
+#else
+  errno = 0;
+  ssize_t ret;
+  do {
+    ret = read(fd, to, GuardLarge(amount));
+  } while (ret == -1 && errno == EINTR);
+  UTIL_THROW_IF_ARG(ret < 0, FDException, (fd), "while reading " << amount << " bytes");
+#endif
+  return static_cast<std::size_t>(ret);
+}
+void ReadOrThrow(int fd, void *to_void, std::size_t amount) {
+  uint8_t *to = static_cast<uint8_t*>(to_void);
+  while (amount) {
+    std::size_t ret = PartialRead(fd, to, amount);
+    UTIL_THROW_IF(ret == 0, EndOfFileException, " in " << NameFromFD(fd) << " but there should be " << amount << " more bytes to read.");
+    amount -= ret;
+    to += ret;
+  }
+}
+std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount) {
+  uint8_t *to = static_cast<uint8_t*>(to_void);
+  std::size_t remaining = amount;
+  while (remaining) {
+    std::size_t ret = PartialRead(fd, to, remaining);
+    if (!ret) return amount - remaining;
+    remaining -= ret;
+    to += ret;
+  }
+  return amount;
+}
+void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
+  const uint8_t *data = static_cast<const uint8_t*>(data_void);
+  while (size) {
+#if defined(_WIN32) || defined(_WIN64)
+    int ret;
+#else
+    ssize_t ret;
+#endif
+    errno = 0;
+    do {
+      ret =
+#if defined(_WIN32) || defined(_WIN64)
+        _write
+#else
+        write
+#endif
+        (fd, data, GuardLarge(size));
+    } while (ret == -1 && errno == EINTR);
+    UTIL_THROW_IF_ARG(ret < 1, FDException, (fd), "while writing " << size << " bytes");
+    data += ret;
+    size -= ret;
+  }
+}
+void WriteOrThrow(FILE *to, const void *data, std::size_t size) {
+  if (!size) return;
+  UTIL_THROW_IF(1 != std::fwrite(data, size, 1, to), ErrnoException, "Short write; requested size " << size);
+}
+void ErsatzPRead(int fd, void *to_void, std::size_t size, uint64_t off) {
+  uint8_t *to = static_cast<uint8_t*>(to_void);
+  while (size) {
+#if defined(_WIN32) || defined(_WIN64)
+    /* BROKEN: changes file pointer.  Even if you save it and change it back, it won't be safe to use concurrently with write() or read() which lmplz does. */
+    // size_t might be 64-bit.  DWORD is always 32.
+    DWORD reading = static_cast<DWORD>(std::min<std::size_t>(kMaxDWORD, size));
+    DWORD ret;
+    OVERLAPPED overlapped;
+    memset(&overlapped, 0, sizeof(OVERLAPPED));
+    overlapped.Offset = static_cast<DWORD>(off);
+    overlapped.OffsetHigh = static_cast<DWORD>(off >> 32);
+    UTIL_THROW_IF(!ReadFile((HANDLE)_get_osfhandle(fd), to, reading, &ret, &overlapped), WindowsException, "ReadFile failed for offset " << off);
+#else
+    ssize_t ret;
+    errno = 0;
+    ret =
+#ifdef OS_ANDROID
+      pread64
+#else
+      pread
+#endif
+      (fd, to, GuardLarge(size), off);
+    if (ret <= 0) {
+      if (ret == -1 && errno == EINTR) continue;
+      UTIL_THROW_IF(ret == 0, EndOfFileException, " for reading " << size << " bytes at " << off << " from " << NameFromFD(fd));
+      UTIL_THROW_ARG(FDException, (fd), "while reading " << size << " bytes at offset " << off);
+    }
+#endif
+    size -= ret;
+    off += ret;
+    to += ret;
+  }
+}
+void ErsatzPWrite(int fd, const void *from_void, std::size_t size, uint64_t off) {
+  const uint8_t *from = static_cast<const uint8_t*>(from_void);
+  while(size) {
+#if defined(_WIN32) || defined(_WIN64)
+    /* Changes file pointer.  Even if you save it and change it back, it won't be safe to use concurrently with write() or read() */
+    // size_t might be 64-bit.  DWORD is always 32.
+    DWORD writing = static_cast<DWORD>(std::min<std::size_t>(kMaxDWORD, size));
+    DWORD ret;
+    OVERLAPPED overlapped;
+    memset(&overlapped, 0, sizeof(OVERLAPPED));
+    overlapped.Offset = static_cast<DWORD>(off);
+    overlapped.OffsetHigh = static_cast<DWORD>(off >> 32);
+    UTIL_THROW_IF(!WriteFile((HANDLE)_get_osfhandle(fd), from, writing, &ret, &overlapped), Exception, "WriteFile failed for offset " << off);
+#else
+    ssize_t ret;
+    errno = 0;
+    ret =
+#ifdef OS_ANDROID
+      pwrite64
+#else
+      pwrite
+#endif
+      (fd, from, GuardLarge(size), off);
+    if (ret <= 0) {
+      if (ret == -1 && errno == EINTR) continue;
+      UTIL_THROW_IF(ret == 0, EndOfFileException, " for writing " << size << " bytes at " << off << " from " << NameFromFD(fd));
+      UTIL_THROW_ARG(FDException, (fd), "while writing " << size << " bytes at offset " << off);
+    }
+#endif
+    size -= ret;
+    off += ret;
+    from += ret;
+  }
+}
+void FSyncOrThrow(int fd) {
+// Apparently windows doesn't have fsync?
+#if !defined(_WIN32) && !defined(_WIN64)
+  UTIL_THROW_IF_ARG(-1 == fsync(fd), FDException, (fd), "while syncing");
+#endif
+}
+namespace {
+// Static assert for 64-bit off_t size.
+#if !defined(_WIN32) && !defined(_WIN64) && !defined(OS_ANDROID)
+template <unsigned> struct CheckOffT;
+template <> struct CheckOffT<8> {
+  struct True {};
+};
+// If there's a compiler error on the next line, then off_t isn't 64 bit.  And
+// that makes me a sad panda.
+typedef CheckOffT<sizeof(off_t)>::True IgnoredType;
+#endif
+// Can't we all just get along?
+uint64_t InternalSeek(int fd, int64_t off, int whence) {
+#if defined __MINGW32__
+  // Does this handle 64-bit?
+  typedef off_t Offset;
+  Offset ret = lseek(fd, off, whence);
+#elif defined(_WIN32) || defined(_WIN64)
+  typedef __int64 Offset;
+  Offset ret = _lseeki64(fd, off, whence);
+#elif defined(OS_ANDROID)
+  typedef off64_t Offset;
+  Offset ret = lseek64(fd, off, whence);
+#else
+  typedef off_t Offset;
+  Offset ret = lseek(fd, off, whence);
+#endif
+  UTIL_THROW_IF_ARG((Offset)-1 == ret, FDException, (fd), "while seeking to " << off << " whence " << whence);
+  return (uint64_t)ret;
+}
+} // namespace
+uint64_t SeekOrThrow(int fd, uint64_t off) {
+  return InternalSeek(fd, off, SEEK_SET);
+}
+uint64_t AdvanceOrThrow(int fd, int64_t off) {
+  return InternalSeek(fd, off, SEEK_CUR);
+}
+uint64_t SeekEnd(int fd) {
+  return InternalSeek(fd, 0, SEEK_END);
+}
+std::FILE *FDOpenOrThrow(scoped_fd &file) {
+  std::FILE *ret = fdopen(file.get(), "r+b");
+  UTIL_THROW_IF_ARG(!ret, FDException, (file.get()), "Could not fdopen for write");
+  file.release();
+  return ret;
+}
+std::FILE *FDOpenReadOrThrow(scoped_fd &file) {
+  std::FILE *ret = fdopen(file.get(), "rb");
+  UTIL_THROW_IF_ARG(!ret, FDException, (file.get()), "Could not fdopen for read");
+  file.release();
+  return ret;
+}
+// Sigh.  Windows temporary file creation is full of race conditions.
+#if defined(_WIN32) || defined(_WIN64)
+/* mkstemp extracted from libc/sysdeps/posix/tempname.c.  Copyright
+   (C) 1991-1999, 2000, 2001, 2006 Free Software Foundation, Inc.
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.  */
+/* This has been modified from the original version to rename the function and
+ * set the Windows temporary flag. */
+static const char letters[] =
+"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+/* Generate a temporary file name based on TMPL.  TMPL must match the
+   rules for mk[s]temp (i.e. end in "XXXXXX").  The name constructed
+   does not exist at the time of the call to mkstemp.  TMPL is
+   overwritten with the result.  */
+int
+mkstemp_and_unlink(char *tmpl)
+{
+  int len;
+  char *XXXXXX;
+  static unsigned long long value;
+  unsigned long long random_time_bits;
+  unsigned int count;
+  int fd = -1;
+  int save_errno = errno;
+  /* A lower bound on the number of temporary files to attempt to
+     generate.  The maximum total number of temporary file names that
+     can exist for a given template is 62**6.  It should never be
+     necessary to try all these combinations.  Instead if a reasonable
+     number of names is tried (we define reasonable as 62**3) fail to
+     give the system administrator the chance to remove the problems.  */
+#define ATTEMPTS_MIN (62 * 62 * 62)
+  /* The number of times to attempt to generate a temporary file.  To
+     conform to POSIX, this must be no smaller than TMP_MAX.  */
+#if ATTEMPTS_MIN < TMP_MAX
+  unsigned int attempts = TMP_MAX;
+#else
+  unsigned int attempts = ATTEMPTS_MIN;
+#endif
+  len = strlen (tmpl);
+  if (len < 6 || strcmp (&tmpl[len - 6], "XXXXXX"))
+    {
+      errno = EINVAL;
+      return -1;
+    }
+/* This is where the Xs start.  */
+  XXXXXX = &tmpl[len - 6];
+  /* Get some more or less random data.  */
+  {
+    SYSTEMTIME      stNow;
+    FILETIME ftNow;
+    // get system time
+    GetSystemTime(&stNow);
+    stNow.wMilliseconds = 500;
+    if (!SystemTimeToFileTime(&stNow, &ftNow))
+    {
+        errno = -1;
+        return -1;
+    }
+    random_time_bits = (((unsigned long long)ftNow.dwHighDateTime << 32)
+                        | (unsigned long long)ftNow.dwLowDateTime);
+  }
+  value += random_time_bits ^ (unsigned long long)GetCurrentThreadId ();
+  for (count = 0; count < attempts; value += 7777, ++count)
+  {
+    unsigned long long v = value;
+    /* Fill in the random bits.  */
+    XXXXXX[0] = letters[v % 62];
+    v /= 62;
+    XXXXXX[1] = letters[v % 62];
+    v /= 62;
+    XXXXXX[2] = letters[v % 62];
+    v /= 62;
+    XXXXXX[3] = letters[v % 62];
+    v /= 62;
+    XXXXXX[4] = letters[v % 62];
+    v /= 62;
+    XXXXXX[5] = letters[v % 62];
+    /* Modified for windows and to unlink */
+    //      fd = open (tmpl, O_RDWR | O_CREAT | O_EXCL, _S_IREAD | _S_IWRITE);
+    int flags = _O_RDWR | _O_CREAT | _O_EXCL | _O_BINARY;
+    flags |= _O_TEMPORARY;
+    fd = _open (tmpl, flags, _S_IREAD | _S_IWRITE);
+    if (fd >= 0)
+    {
+      errno = save_errno;
+      return fd;
+    }
+    else if (errno != EEXIST)
+      return -1;
+  }
+  /* We got out of the loop because we ran out of combinations to try.  */
+  errno = EEXIST;
+  return -1;
+}
+#else
+int
+mkstemp_and_unlink(char *tmpl) {
+  int ret = mkstemp(tmpl);
+  if (ret != -1) {
+    UTIL_THROW_IF(unlink(tmpl), ErrnoException, "while deleting " << tmpl);
+  }
+  return ret;
+}
+#endif
+// If it's a directory, add a /.  This lets users say -T /tmp without creating
+// /tmpAAAAAA
+void NormalizeTempPrefix(std::string &base) {
+  if (base.empty()) return;
+  if (base[base.size() - 1] == '/') return;
+  struct stat sb;
+  // It's fine for it to not exist.
+  if (-1 == stat(base.c_str(), &sb)) return;
+  if (
+#if defined(_WIN32) || defined(_WIN64)
+    sb.st_mode & _S_IFDIR
+#else
+    S_ISDIR(sb.st_mode)
+#endif
+    ) base += '/';
+}
+int MakeTemp(const StringPiece &base) {
+  std::string name(base.data(), base.size());
+  name += "XXXXXX";
+  name.push_back(0);
+  int ret;
+  UTIL_THROW_IF(-1 == (ret = mkstemp_and_unlink(&name[0])), ErrnoException, "while making a temporary based on " << base);
+  return ret;
+}
+std::FILE *FMakeTemp(const StringPiece &base) {
+  util::scoped_fd file(MakeTemp(base));
+  return FDOpenOrThrow(file);
+}
+std::string DefaultTempDirectory() {
+#if defined(_WIN32) || defined(_WIN64)
+  char dir_buffer[1000];
+  if (GetTempPath(1000, dir_buffer) == 0)
+    throw std::runtime_error("Could not read temporary directory.");
+  std::string ret(dir_buffer);
+  NormalizeTempPrefix(ret);
+  return ret;
+#else
+  // POSIX says to try these environment variables, in this order:
+  const char *const vars[] = {"TMPDIR", "TMP", "TEMPDIR", "TEMP", 0};
+  for (int i=0; vars[i]; ++i) {
+    char *val =
+#if defined(_GNU_SOURCE) && defined(__GLIBC_PREREQ)
+#if __GLIBC_PREREQ(2,17)
+      secure_getenv
+#else // __GLIBC_PREREQ
+      getenv
+#endif // __GLIBC_PREREQ
+#else // _GNU_SOURCE
+      getenv
+#endif
+      (vars[i]);
+    // Environment variable is set and nonempty.  Use it.
+    if (val && *val) {
+      std::string ret(val);
+      NormalizeTempPrefix(ret);
+      return ret;
+    }
+  }
+  // No environment variables set.  Default to /tmp.
+  return "/tmp/";
+#endif
+}
+int DupOrThrow(int fd) {
+  int ret = dup(fd);
+  UTIL_THROW_IF_ARG(ret == -1, FDException, (fd), "in duplicating the file descriptor");
+  return ret;
+}
+namespace {
+// Try to name things but be willing to fail too.
+bool TryName(int fd, std::string &out) {
+#if defined(_WIN32) || defined(_WIN64)
+  return false;
+#else
+  std::string name("/proc/self/fd/");
+  std::ostringstream convert;
+  convert << fd;
+  name += convert.str();
+  struct stat sb;
+  if (-1 == lstat(name.c_str(), &sb))
+    return false;
+  out.resize(sb.st_size + 1);
+  // lstat gave us a size, but I've seen it grow, possibly due to symlinks on top of symlinks.
+  while (true) {
+    ssize_t ret = readlink(name.c_str(), &out[0], out.size());
+    if (-1 == ret)
+      return false;
+    if ((size_t)ret < out.size()) {
+      out.resize(ret);
+      break;
+    }
+    // Exponential growth.
+    out.resize(out.size() * 2);
+  }
+  // Don't use the non-file names.
+  if (!out.empty() && out[0] != '/')
+    return false;
+  return true;
+#endif
+}
+} // namespace
+std::string NameFromFD(int fd) {
+  std::string ret;
+  if (TryName(fd, ret)) return ret;
+  switch (fd) {
+    case 0: return "stdin";
+    case 1: return "stdout";
+    case 2: return "stderr";
+  }
+  ret = "fd ";
+  std::ostringstream convert;
+  convert << fd;
+  ret += convert.str();
+  return ret;
+}
+} // namespace util
--- a/cpp/thirdpart/kenlm/util/file.hh
+++ b/cpp/thirdpart/kenlm/util/file.hh
+#ifndef UTIL_FILE_H
+#define UTIL_FILE_H
+#include "exception.hh"
+#include "scoped.hh"
+#include "string_piece.hh"
+#include <cstddef>
+#include <cstdio>
+#include <string>
+#include <stdint.h>
+namespace util {
+class scoped_fd {
+  public:
+    scoped_fd() : fd_(-1) {}
+    explicit scoped_fd(int fd) : fd_(fd) {}
+    ~scoped_fd();
+#if __cplusplus >= 201103L
+    scoped_fd(scoped_fd &&from) noexcept : fd_(from.fd_) {
+      from.fd_ = -1;
+    }
+#endif
+    void reset(int to = -1) {
+      scoped_fd other(fd_);
+      fd_ = to;
+    }
+    int get() const { return fd_; }
+    int operator*() const { return fd_; }
+    int release() {
+      int ret = fd_;
+      fd_ = -1;
+      return ret;
+    }
+  private:
+    int fd_;
+    scoped_fd(const scoped_fd &);
+    scoped_fd &operator=(const scoped_fd &);
+};
+struct scoped_FILE_closer {
+  static void Close(std::FILE *file);
+};
+typedef scoped<std::FILE, scoped_FILE_closer> scoped_FILE;
+/* Thrown for any operation where the fd is known. */
+class FDException : public ErrnoException {
+  public:
+    explicit FDException(int fd) throw();
+    virtual ~FDException() throw();
+    // This may no longer be valid if the exception was thrown past open.
+    int FD() const { return fd_; }
+    // Guess from NameFromFD.
+    const std::string &NameGuess() const { return name_guess_; }
+  private:
+    int fd_;
+    std::string name_guess_;
+};
+// End of file reached.
+class EndOfFileException : public Exception {
+  public:
+    EndOfFileException() throw();
+    ~EndOfFileException() throw();
+};
+class UnsupportedOSException : public Exception {};
+// Open for read only.
+int OpenReadOrThrow(const char *name);
+// Create file if it doesn't exist, truncate if it does.  Opened for write.
+int CreateOrThrow(const char *name);
+/** Does the given input file path denote standard input?
+ *
+ * Returns true if, and only if, path is either "-" or "/dev/stdin".
+ *
+ * Opening standard input as a file may need some special treatment for
+ * portability.  There's a convention that a dash ("-") in place of an input
+ * file path denotes standard input, but opening "/dev/stdin" may need to be
+ * special as well.
+ */
+bool InputPathIsStdin(StringPiece path);
+/** Does the given output file path denote standard output?
+ *
+ * Returns true if, and only if, path is either "-" or "/dev/stdout".
+ *
+ * Opening standard output as a file may need some special treatment for
+ * portability.  There's a convention that a dash ("-") in place of an output
+ * file path denotes standard output, but opening "/dev/stdout" may need to be
+ * special as well.
+ */
+bool OutputPathIsStdout(StringPiece path);
+// Return value for SizeFile when it can't size properly.
+const uint64_t kBadSize = (uint64_t)-1;
+uint64_t SizeFile(int fd);
+uint64_t SizeOrThrow(int fd);
+void ResizeOrThrow(int fd, uint64_t to);
+// It bothers me that fallocate has offset before size while pread has size
+// before offset.  But best to follow the call.
+void HolePunch(int fd, uint64_t offset, uint64_t size);
+std::size_t PartialRead(int fd, void *to, std::size_t size);
+void ReadOrThrow(int fd, void *to, std::size_t size);
+std::size_t ReadOrEOF(int fd, void *to_void, std::size_t size);
+void WriteOrThrow(int fd, const void *data_void, std::size_t size);
+void WriteOrThrow(FILE *to, const void *data, std::size_t size);
+/* These call pread/pwrite in a loop.  However, on Windows they call ReadFile/
+ * WriteFile which changes the file pointer.  So it's safe to call ErsatzPRead
+ * and ErsatzPWrite concurrently (or any combination thereof).  But it changes
+ * the file pointer on windows, so it's not safe to call concurrently with
+ * anything that uses the implicit file pointer e.g. the Read/Write functions
+ * above.
+ */
+void ErsatzPRead(int fd, void *to, std::size_t size, uint64_t off);
+void ErsatzPWrite(int fd, const void *data_void, std::size_t size, uint64_t off);
+void FSyncOrThrow(int fd);
+// Seeking: returns offset
+uint64_t SeekOrThrow(int fd, uint64_t off);
+uint64_t AdvanceOrThrow(int fd, int64_t off);
+uint64_t SeekEnd(int fd);
+std::FILE *FDOpenOrThrow(scoped_fd &file);
+std::FILE *FDOpenReadOrThrow(scoped_fd &file);
+// Temporary files
+// Append a / if base is a directory.
+void NormalizeTempPrefix(std::string &base);
+int MakeTemp(const StringPiece &prefix);
+std::FILE *FMakeTemp(const StringPiece &prefix);
+// Where should we put temporary files?  Handles all the windows/POSIX defaults fun.
+std::string DefaultTempDirectory();
+// dup an fd.
+int DupOrThrow(int fd);
+/* Attempt get file name from fd.  This won't always work (i.e. on Windows or
+ * a pipe).  The file might have been renamed.  It's intended for diagnostics
+ * and logging only.
+ */
+std::string NameFromFD(int fd);
+} // namespace util
+#endif // UTIL_FILE_H
--- a/cpp/thirdpart/kenlm/util/file_piece.cc
+++ b/cpp/thirdpart/kenlm/util/file_piece.cc
+#include "file_piece.hh"
+#include "double-conversion/double-conversion.h"
+#include "exception.hh"
+#include "file.hh"
+#include "mmap.hh"
+#if defined(_WIN32) || defined(_WIN64)
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+#include <algorithm>
+#include <cassert>
+#include <cerrno>
+#include <cmath>
+#include <cstdlib>
+#include <iostream>
+#include <limits>
+#include <string>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#if defined(_WIN32) || defined(_WIN64)
+#include <math.h>
+#endif
+namespace util {
+namespace { const uint64_t kPageSize = SizePage(); }
+ParseNumberException::ParseNumberException(StringPiece value) throw() {
+  *this << "Could not parse \"" << value << "\" into a ";
+}
+LineIterator &LineIterator::operator++() {
+  if (!backing_->ReadLineOrEOF(line_, delim_))
+    backing_ = NULL;
+  return *this;
+}
+FilePiece::FilePiece(const char *name, std::ostream *show_progress, std::size_t min_buffer) :
+  file_(OpenReadOrThrow(name)), total_size_(SizeFile(file_.get())),
+  progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name) {
+  Initialize(name, show_progress, min_buffer);
+}
+namespace {
+std::string NamePossiblyFind(int fd, const char *name) {
+  if (name) return name;
+  return NameFromFD(fd);
+}
+} // namespace
+FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std::size_t min_buffer) :
+  file_(fd), total_size_(SizeFile(file_.get())),
+  progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + NamePossiblyFind(fd, name)) {
+  Initialize(NamePossiblyFind(fd, name).c_str(), show_progress, min_buffer);
+}
+FilePiece::FilePiece(std::istream &stream, const char * /*name*/, std::size_t min_buffer) :
+  total_size_(kBadSize) {
+  InitializeNoRead("istream", min_buffer);
+  fallback_to_read_ = true;
+  HugeMalloc(default_map_size_, false, data_);
+  position_ = data_.begin();
+  position_end_ = position_;
+  fell_back_.Reset(stream);
+}
+StringPiece FilePiece::ReadLine(char delim, bool strip_cr) {
+  std::size_t skip = 0;
+  while (true) {
+    const char *i = std::find(position_ + skip, position_end_, delim);
+    if (UTIL_LIKELY(i != position_end_)) {
+      // End of line.
+      // Take 1 byte off the end if it's an unwanted carriage return.
+      const std::size_t subtract_cr = (
+          (strip_cr && i > position_ && *(i - 1) == '\r') ?
+          1 : 0);
+      StringPiece ret(position_, i - position_ - subtract_cr);
+      position_ = i + 1;
+      return ret;
+    }
+    if (at_end_) {
+      if (position_ == position_end_) {
+        Shift();
+      }
+      return Consume(position_end_);
+    }
+    skip = position_end_ - position_;
+    Shift();
+  }
+}
+bool FilePiece::ReadLineOrEOF(StringPiece &to, char delim, bool strip_cr) {
+  try {
+    to = ReadLine(delim, strip_cr);
+  } catch (const util::EndOfFileException &e) { return false; }
+  return true;
+}
+float FilePiece::ReadFloat() {
+  return ReadNumber<float>();
+}
+double FilePiece::ReadDouble() {
+  return ReadNumber<double>();
+}
+long int FilePiece::ReadLong() {
+  return ReadNumber<long int>();
+}
+unsigned long int FilePiece::ReadULong() {
+  return ReadNumber<unsigned long int>();
+}
+// Factored out so that istream can call this.
+void FilePiece::InitializeNoRead(const char *name, std::size_t min_buffer) {
+  file_name_ = name;
+  default_map_size_ = kPageSize * std::max<std::size_t>((min_buffer / kPageSize + 1), 2);
+  position_ = NULL;
+  position_end_ = NULL;
+  mapped_offset_ = 0;
+  at_end_ = false;
+}
+void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer) {
+  InitializeNoRead(name, min_buffer);
+  uint64_t current_offset;
+  bool valid_current_offset;
+  try {
+    current_offset = AdvanceOrThrow(file_.get(), 0);
+    valid_current_offset = true;
+  } catch (const FDException &) {
+    current_offset = 0;
+    valid_current_offset = false;
+  }
+  // So the assertion in TransitionToRead passes
+  fallback_to_read_ = false;
+  if (total_size_ == kBadSize || !valid_current_offset) {
+    if (show_progress)
+      *show_progress << "File " << name << " isn't normal.  Using slower read() instead of mmap().  No progress bar." << std::endl;
+    TransitionToRead();
+  } else {
+    mapped_offset_ = current_offset;
+  }
+  Shift();
+  // gzip detect.
+  if ((position_end_ >= position_ + ReadCompressed::kMagicSize) && ReadCompressed::DetectCompressedMagic(position_)) {
+    if (!fallback_to_read_) {
+      at_end_ = false;
+      TransitionToRead();
+    }
+  }
+}
+namespace {
+static const double_conversion::StringToDoubleConverter kConverter(
+    double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK | double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES,
+    std::numeric_limits<double>::quiet_NaN(),
+    std::numeric_limits<double>::quiet_NaN(),
+    "inf",
+    "NaN");
+StringPiece FirstToken(StringPiece str) {
+  const char *i;
+  for (i = str.data(); i != str.data() + str.size(); ++i) {
+    if (kSpaces[(unsigned char)*i]) break;
+  }
+  return StringPiece(str.data(), i - str.data());
+}
+// std::isnan is technically C++11 not C++98.  But in practice this is a problem for visual studio.
+template <class T> inline int CrossPlatformIsNaN(T value) {
+#if defined(_WIN32) || defined(_WIN64)
+  return isnan(value);
+#else
+  return std::isnan(value);
+#endif
+}
+const char *ParseNumber(StringPiece str, float &out) {
+  int count;
+  out = kConverter.StringToFloat(str.data(), str.size(), &count);
+  UTIL_THROW_IF_ARG(CrossPlatformIsNaN(out) && str != "NaN" && str != "nan", ParseNumberException, (FirstToken(str)), "float");
+  return str.data() + count;
+}
+const char *ParseNumber(StringPiece str, double &out) {
+  int count;
+  out = kConverter.StringToDouble(str.data(), str.size(), &count);
+  UTIL_THROW_IF_ARG(CrossPlatformIsNaN(out) && str != "NaN" && str != "nan", ParseNumberException, (FirstToken(str)), "double");
+  return str.data() + count;
+}
+const char *ParseNumber(StringPiece str, long int &out) {
+  char *end;
+  errno = 0;
+  out = strtol(str.data(), &end, 10);
+  UTIL_THROW_IF_ARG(errno || (end == str.data()), ParseNumberException, (FirstToken(str)), "long int");
+  return end;
+}
+const char *ParseNumber(StringPiece str, unsigned long int &out) {
+  char *end;
+  errno = 0;
+  out = strtoul(str.data(), &end, 10);
+  UTIL_THROW_IF_ARG(errno || (end == str.data()), ParseNumberException, (FirstToken(str)), "unsigned long int");
+  return end;
+}
+} // namespace
+template <class T> T FilePiece::ReadNumber() {
+  SkipSpaces();
+  while (last_space_ < position_) {
+    if (UTIL_UNLIKELY(at_end_)) {
+      // Hallucinate a null off the end of the file.
+      std::string buffer(position_, position_end_);
+      T ret;
+      // Has to be null-terminated.
+      const char *begin = buffer.c_str();
+      const char *end = ParseNumber(StringPiece(begin, buffer.size()), ret);
+      position_ += end - begin;
+      return ret;
+    }
+    Shift();
+  }
+  T ret;
+  position_ = ParseNumber(StringPiece(position_, last_space_ - position_), ret);
+  return ret;
+}
+const char *FilePiece::FindDelimiterOrEOF(const bool *delim)  {
+  std::size_t skip = 0;
+  while (true) {
+    for (const char *i = position_ + skip; i < position_end_; ++i) {
+      if (delim[static_cast<unsigned char>(*i)]) return i;
+    }
+    if (at_end_) {
+      if (position_ == position_end_) Shift();
+      return position_end_;
+    }
+    skip = position_end_ - position_;
+    Shift();
+  }
+}
+void FilePiece::Shift() {
+  if (at_end_) {
+    progress_.Finished();
+    throw EndOfFileException();
+  }
+  uint64_t desired_begin = position_ - data_.begin() + mapped_offset_;
+  if (!fallback_to_read_) MMapShift(desired_begin);
+  // Notice an mmap failure might set the fallback.
+  if (fallback_to_read_) ReadShift();
+  for (last_space_ = position_end_ - 1; last_space_ >= position_; --last_space_) {
+    if (kSpaces[static_cast<unsigned char>(*last_space_)])  break;
+  }
+}
+void FilePiece::UpdateProgress() {
+  if (!fallback_to_read_)
+    progress_.Set(position_ - data_.begin() + mapped_offset_);
+}
+void FilePiece::MMapShift(uint64_t desired_begin) {
+  // Use mmap.
+  uint64_t ignore = desired_begin % kPageSize;
+  // Duplicate request for Shift means give more data.
+  if (position_ == data_.begin() + ignore && position_) {
+    default_map_size_ *= 2;
+  }
+  // Local version so that in case of failure it doesn't overwrite the class variable.
+  uint64_t mapped_offset = desired_begin - ignore;
+  uint64_t mapped_size;
+  if (default_map_size_ >= static_cast<std::size_t>(total_size_ - mapped_offset)) {
+    at_end_ = true;
+    mapped_size = total_size_ - mapped_offset;
+  } else {
+    mapped_size = default_map_size_;
+  }
+  // Forcibly clear the existing mmap first.
+  data_.reset();
+  try {
+    MapRead(POPULATE_OR_LAZY, *file_, mapped_offset, mapped_size, data_);
+  } catch (const util::ErrnoException &) {
+    if (desired_begin) {
+      SeekOrThrow(*file_, desired_begin);
+    }
+    // The mmap was scheduled to end the file, but now we're going to read it.
+    at_end_ = false;
+    TransitionToRead();
+    return;
+  }
+  mapped_offset_ = mapped_offset;
+  position_ = data_.begin() + ignore;
+  position_end_ = data_.begin() + mapped_size;
+  progress_.Set(desired_begin);
+}
+void FilePiece::TransitionToRead() {
+  assert(!fallback_to_read_);
+  fallback_to_read_ = true;
+  data_.reset();
+  HugeMalloc(default_map_size_, false, data_);
+  position_ = data_.begin();
+  position_end_ = position_;
+  try {
+    fell_back_.Reset(file_.release());
+  } catch (util::Exception &e) {
+    e << " in file " << file_name_;
+    throw;
+  }
+}
+void FilePiece::ReadShift() {
+  assert(fallback_to_read_);
+  // Bytes [data_.begin(), position_) have been consumed.
+  // Bytes [position_, position_end_) have been read into the buffer.
+  // Start at the beginning of the buffer if there's nothing useful in it.
+  if (position_ == position_end_) {
+    mapped_offset_ += (position_end_ - data_.begin());
+    position_ = data_.begin();
+    position_end_ = position_;
+  }
+  std::size_t already_read = position_end_ - data_.begin();
+  if (already_read == default_map_size_) {
+    if (position_ == data_.begin()) {
+      // Buffer too small.
+      std::size_t valid_length = position_end_ - position_;
+      default_map_size_ *= 2;
+      HugeRealloc(default_map_size_, false, data_);
+      position_ = data_.begin();
+      position_end_ = position_ + valid_length;
+    } else {
+      std::size_t moving = position_end_ - position_;
+      memmove(data_.get(), position_, moving);
+      position_ = data_.begin();
+      position_end_ = position_ + moving;
+      already_read = moving;
+    }
+  }
+  std::size_t read_return = fell_back_.Read(static_cast<uint8_t*>(data_.get()) + already_read, default_map_size_ - already_read);
+  progress_.Set(fell_back_.RawAmount());
+  if (read_return == 0) {
+    at_end_ = true;
+  }
+  position_end_ += read_return;
+}
+} // namespace util
--- a/cpp/thirdpart/kenlm/util/file_piece.hh
+++ b/cpp/thirdpart/kenlm/util/file_piece.hh
+#ifndef UTIL_FILE_PIECE_H
+#define UTIL_FILE_PIECE_H
+#include "ersatz_progress.hh"
+#include "exception.hh"
+#include "file.hh"
+#include "mmap.hh"
+#include "read_compressed.hh"
+#include "spaces.hh"
+#include "string_piece.hh"
+#include <cstddef>
+#include <iosfwd>
+#include <string>
+#include <cassert>
+#include <stdint.h>
+namespace util {
+class ParseNumberException : public Exception {
+  public:
+    explicit ParseNumberException(StringPiece value) throw();
+    ~ParseNumberException() throw() {}
+};
+class FilePiece;
+// Input Iterator over lines.  This allows
+//   for (StringPiece l : FilePiece("file"))
+// in C++11.
+// NB: not multipass.
+class LineIterator {
+  public:
+    LineIterator() : backing_(NULL) {}
+    explicit LineIterator(FilePiece &f, char delim = '\n') : backing_(&f), delim_(delim) {
+      ++*this;
+    }
+    LineIterator &operator++();
+    bool operator==(const LineIterator &other) const {
+      return backing_ == other.backing_;
+    }
+    bool operator!=(const LineIterator &other) const {
+      return backing_ != other.backing_;
+    }
+    operator bool() const { return backing_ != NULL; }
+    StringPiece operator*() const { return line_; }
+    const StringPiece *operator->() const { return &line_; }
+  private:
+    FilePiece *backing_;
+    StringPiece line_;
+    char delim_;
+};
+// Memory backing the returned StringPiece may vanish on the next call.
+class FilePiece {
+  public:
+    // 1 MB default.
+    explicit FilePiece(const char *file, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
+    // Takes ownership of fd.  name is used for messages.
+    explicit FilePiece(int fd, const char *name = NULL, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
+    /* Read from an istream.  Don't use this if you can avoid it.  Raw fd IO is
+     * much faster.  But sometimes you just have an istream like Boost's HTTP
+     * server and want to parse it the same way.
+     * name is just used for messages and FileName().
+     */
+    explicit FilePiece(std::istream &stream, const char *name = NULL, std::size_t min_buffer = 1048576);
+    LineIterator begin() {
+      return LineIterator(*this);
+    }
+    LineIterator end() {
+      return LineIterator();
+    }
+    char peek() {
+      if (position_ == position_end_) {
+        Shift();
+        if (at_end_) throw EndOfFileException();
+      }
+      return *position_;
+    }
+    char get() {
+      char ret = peek();
+      ++position_;
+      return ret;
+    }
+    // Leaves the delimiter, if any, to be returned by get().  Delimiters defined by isspace().
+    StringPiece ReadDelimited(const bool *delim = kSpaces) {
+      SkipSpaces(delim);
+      return Consume(FindDelimiterOrEOF(delim));
+    }
+    /// Read word until the line or file ends.
+    bool ReadWordSameLine(StringPiece &to, const bool *delim = kSpaces) {
+      assert(delim[static_cast<unsigned char>('\n')]);
+      // Skip non-enter spaces.
+      for (; ; ++position_) {
+        if (position_ == position_end_) {
+          try {
+            Shift();
+          } catch (const util::EndOfFileException &) { return false; }
+          // And break out at end of file.
+          if (position_ == position_end_) return false;
+        }
+        if (!delim[static_cast<unsigned char>(*position_)]) break;
+        if (*position_ == '\n') return false;
+      }
+      // We can't be at the end of file because there's at least one character open.
+      to = Consume(FindDelimiterOrEOF(delim));
+      return true;
+    }
+    /** Read a line of text from the file.
+     *
+     * Unlike ReadDelimited, this includes leading spaces and consumes the
+     * delimiter.   It is similar to getline in that way.
+     *
+     * If strip_cr is true, any trailing carriate return (as would be found on
+     * a file written on Windows) will be left out of the returned line.
+     *
+     * Throws EndOfFileException if the end of the file is encountered.  If the
+     * file does not end in a newline, this could mean that the last line is
+     * never read.
+     */
+    StringPiece ReadLine(char delim = '\n', bool strip_cr = true);
+    /** Read a line of text from the file, or return false on EOF.
+     *
+     * This is like ReadLine, except it returns false where ReadLine throws
+     * EndOfFileException.  Like ReadLine it may not read the last line in the
+     * file if the file does not end in a newline.
+     *
+     * If strip_cr is true, any trailing carriate return (as would be found on
+     * a file written on Windows) will be left out of the returned line.
+     */
+    bool ReadLineOrEOF(StringPiece &to, char delim = '\n', bool strip_cr = true);
+    float ReadFloat();
+    double ReadDouble();
+    long int ReadLong();
+    unsigned long int ReadULong();
+    // Skip spaces defined by isspace.
+    void SkipSpaces(const bool *delim = kSpaces) {
+      assert(position_ <= position_end_);
+      for (; ; ++position_) {
+        if (position_ == position_end_) {
+          Shift();
+          // And break out at end of file.
+          if (position_ == position_end_) return;
+        }
+        assert(position_ < position_end_);
+        if (!delim[static_cast<unsigned char>(*position_)]) return;
+      }
+    }
+    uint64_t Offset() const {
+      return position_ - data_.begin() + mapped_offset_;
+    }
+    const std::string &FileName() const { return file_name_; }
+    // Force a progress update.
+    void UpdateProgress();
+  private:
+    void InitializeNoRead(const char *name, std::size_t min_buffer);
+    // Calls InitializeNoRead, so don't call both.
+    void Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer);
+    template <class T> T ReadNumber();
+    StringPiece Consume(const char *to) {
+      assert(to >= position_);
+      StringPiece ret(position_, to - position_);
+      position_ = to;
+      return ret;
+    }
+    const char *FindDelimiterOrEOF(const bool *delim = kSpaces);
+    void Shift();
+    // Backends to Shift().
+    void MMapShift(uint64_t desired_begin);
+    void TransitionToRead();
+    void ReadShift();
+    const char *position_, *last_space_, *position_end_;
+    scoped_fd file_;
+    const uint64_t total_size_;
+    std::size_t default_map_size_;
+    uint64_t mapped_offset_;
+    // Order matters: file_ should always be destroyed after this.
+    scoped_memory data_;
+    bool at_end_;
+    bool fallback_to_read_;
+    ErsatzProgress progress_;
+    std::string file_name_;
+    ReadCompressed fell_back_;
+};
+} // namespace util
+#endif // UTIL_FILE_PIECE_H
--- a/cpp/thirdpart/kenlm/util/file_piece_test.cc
+++ b/cpp/thirdpart/kenlm/util/file_piece_test.cc
+// Tests might fail if you have creative characters in your path.  Sue me.
+#include "file_piece.hh"
+#include "file_stream.hh"
+#include "file.hh"
+#include "scoped.hh"
+#define BOOST_TEST_MODULE FilePieceTest
+#include <boost/test/unit_test.hpp>
+#include <fstream>
+#include <iostream>
+#include <cstdio>
+#include <sys/types.h>
+#include <sys/stat.h>
+namespace util {
+namespace {
+std::string FileLocation() {
+  if (boost::unit_test::framework::master_test_suite().argc < 2) {
+    return "file_piece.cc";
+  }
+  std::string ret(boost::unit_test::framework::master_test_suite().argv[1]);
+  return ret;
+}
+/* istream */
+BOOST_AUTO_TEST_CASE(IStream) {
+  std::fstream ref(FileLocation().c_str(), std::ios::in);
+  std::fstream backing(FileLocation().c_str(), std::ios::in);
+  FilePiece test(backing);
+  std::string ref_line;
+  while (getline(ref, ref_line)) {
+    StringPiece test_line(test.ReadLine());
+    BOOST_CHECK_EQUAL(ref_line, test_line);
+  }
+  BOOST_CHECK_THROW(test.get(), EndOfFileException);
+  BOOST_CHECK_THROW(test.get(), EndOfFileException);
+}
+/* mmap implementation */
+BOOST_AUTO_TEST_CASE(MMapReadLine) {
+  std::fstream ref(FileLocation().c_str(), std::ios::in);
+  FilePiece test(FileLocation().c_str(), NULL, 1);
+  std::string ref_line;
+  while (getline(ref, ref_line)) {
+    StringPiece test_line(test.ReadLine());
+    // I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
+    if (!test_line.empty() || !ref_line.empty()) {
+      BOOST_CHECK_EQUAL(ref_line, test_line);
+    }
+  }
+  BOOST_CHECK_THROW(test.get(), EndOfFileException);
+}
+/* mmap with seek beforehand */
+BOOST_AUTO_TEST_CASE(MMapSeek) {
+  std::fstream ref(FileLocation().c_str(), std::ios::in);
+  ref.seekg(10);
+  scoped_fd file(util::OpenReadOrThrow(FileLocation().c_str()));
+  SeekOrThrow(file.get(), 10);
+  FilePiece test(file.release());
+  std::string ref_line;
+  while (getline(ref, ref_line)) {
+    StringPiece test_line(test.ReadLine());
+    // I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
+    if (!test_line.empty() || !ref_line.empty()) {
+      BOOST_CHECK_EQUAL(ref_line, test_line);
+    }
+  }
+  BOOST_CHECK_THROW(test.get(), EndOfFileException);
+}
+#if !defined(_WIN32) && !defined(_WIN64) && !defined(__APPLE__)
+/* Apple isn't happy with the popen, fileno, dup.  And I don't want to
+ * reimplement popen.  This is an issue with the test.
+ */
+/* read() implementation */
+BOOST_AUTO_TEST_CASE(StreamReadLine) {
+  std::fstream ref(FileLocation().c_str(), std::ios::in);
+  std::string popen_args = "cat \"";
+  popen_args += FileLocation();
+  popen_args += '"';
+  FILE *catter = popen(popen_args.c_str(), "r");
+  BOOST_REQUIRE(catter);
+  FilePiece test(dup(fileno(catter)), "file_piece.cc", NULL, 1);
+  std::string ref_line;
+  while (getline(ref, ref_line)) {
+    StringPiece test_line(test.ReadLine());
+    // I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
+    if (!test_line.empty() || !ref_line.empty()) {
+      BOOST_CHECK_EQUAL(ref_line, test_line);
+    }
+  }
+  BOOST_CHECK_THROW(test.get(), EndOfFileException);
+  BOOST_REQUIRE(!pclose(catter));
+}
+#endif
+#ifdef HAVE_ZLIB
+// gzip file
+BOOST_AUTO_TEST_CASE(PlainZipReadLine) {
+  std::string location(FileLocation());
+  std::fstream ref(location.c_str(), std::ios::in);
+  std::string command("gzip <\"");
+  command += location + "\" >\"" + location + "\".gz";
+  BOOST_REQUIRE_EQUAL(0, system(command.c_str()));
+  FilePiece test((location + ".gz").c_str(), NULL, 1);
+  unlink((location + ".gz").c_str());
+  std::string ref_line;
+  while (getline(ref, ref_line)) {
+    StringPiece test_line(test.ReadLine());
+    // I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
+    if (!test_line.empty() || !ref_line.empty()) {
+      BOOST_CHECK_EQUAL(ref_line, test_line);
+    }
+  }
+  BOOST_CHECK_THROW(test.get(), EndOfFileException);
+}
+// gzip stream.  Apple doesn't like popen, fileno, dup.  This is an issue with
+// the test.
+#if !defined __APPLE__ && !defined __MINGW32__
+BOOST_AUTO_TEST_CASE(StreamZipReadLine) {
+  std::fstream ref(FileLocation().c_str(), std::ios::in);
+  std::string command("gzip <\"");
+  command += FileLocation() + "\"";
+  FILE * catter = popen(command.c_str(), "r");
+  BOOST_REQUIRE(catter);
+  FilePiece test(dup(fileno(catter)), "file_piece.cc.gz", NULL, 1);
+  std::string ref_line;
+  while (getline(ref, ref_line)) {
+    StringPiece test_line(test.ReadLine());
+    // I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
+    if (!test_line.empty() || !ref_line.empty()) {
+      BOOST_CHECK_EQUAL(ref_line, test_line);
+    }
+  }
+  BOOST_CHECK_THROW(test.get(), EndOfFileException);
+  BOOST_REQUIRE(!pclose(catter));
+}
+#endif // __APPLE__
+#endif // HAVE_ZLIB
+BOOST_AUTO_TEST_CASE(Numbers) {
+  scoped_fd file(MakeTemp(FileLocation()));
+  const float floating = 3.2;
+  {
+    util::FileStream writing(file.get());
+    writing << "94389483984398493890287 " << floating << " 5";
+  }
+  SeekOrThrow(file.get(), 0);
+  util::FilePiece f(file.release());
+  BOOST_CHECK_THROW(f.ReadULong(), ParseNumberException);
+  BOOST_CHECK_EQUAL("94389483984398493890287", f.ReadDelimited());
+  // Yes, exactly equal.  Isn't double-conversion wonderful?
+  BOOST_CHECK_EQUAL(floating, f.ReadFloat());
+  BOOST_CHECK_EQUAL(5, f.ReadULong());
+}
+} // namespace
+} // namespace util
--- a/cpp/thirdpart/kenlm/util/file_stream.hh
+++ b/cpp/thirdpart/kenlm/util/file_stream.hh
+/* Like std::ofstream but without being incredibly slow.  Backed by a raw fd.
+ * Supports most of the built-in types except for long double.
+ */
+#ifndef UTIL_FILE_STREAM_H
+#define UTIL_FILE_STREAM_H
+#include "fake_ostream.hh"
+#include "file.hh"
+#include "scoped.hh"
+#include <cassert>
+#include <cstring>
+#include <stdint.h>
+namespace util {
+class FileStream : public FakeOStream<FileStream> {
+  public:
+    explicit FileStream(int out = -1, std::size_t buffer_size = 8192)
+      : buf_(util::MallocOrThrow(std::max<std::size_t>(buffer_size, kToStringMaxBytes))),
+        current_(static_cast<char*>(buf_.get())),
+        end_(current_ + std::max<std::size_t>(buffer_size, kToStringMaxBytes)),
+        fd_(out) {}
+#if __cplusplus >= 201103L
+    FileStream(FileStream &&from) noexcept : buf_(from.buf_.release()), current_(from.current_), end_(from.end_), fd_(from.fd_) {
+      from.end_ = reinterpret_cast<char*>(from.buf_.get());
+      from.current_ = from.end_;
+    }
+#endif
+    ~FileStream() {
+      flush();
+    }
+    void SetFD(int to) {
+      flush();
+      fd_ = to;
+    }
+    FileStream &flush() {
+      if (current_ != buf_.get()) {
+        util::WriteOrThrow(fd_, buf_.get(), current_ - (char*)buf_.get());
+        current_ = static_cast<char*>(buf_.get());
+      }
+      return *this;
+    }
+    // For writes of arbitrary size.
+    FileStream &write(const void *data, std::size_t length) {
+      if (UTIL_LIKELY(current_ + length <= end_)) {
+        std::memcpy(current_, data, length);
+        current_ += length;
+        return *this;
+      }
+      flush();
+      if (current_ + length <= end_) {
+        std::memcpy(current_, data, length);
+        current_ += length;
+      } else {
+        util::WriteOrThrow(fd_, data, length);
+      }
+      return *this;
+    }
+    FileStream &seekp(uint64_t to) {
+      flush();
+      util::SeekOrThrow(fd_, to);
+      return *this;
+    }
+  protected:
+    friend class FakeOStream<FileStream>;
+    // For writes directly to buffer guaranteed to have amount < buffer size.
+    char *Ensure(std::size_t amount) {
+      if (UTIL_UNLIKELY(current_ + amount > end_)) {
+        flush();
+        assert(current_ + amount <= end_);
+      }
+      return current_;
+    }
+    void AdvanceTo(char *to) {
+      current_ = to;
+      assert(current_ <= end_);
+    }
+  private:
+    util::scoped_malloc buf_;
+    char *current_, *end_;
+    int fd_;
+};
+} // namespace
+#endif
--- a/cpp/thirdpart/kenlm/util/fixed_array.hh
+++ b/cpp/thirdpart/kenlm/util/fixed_array.hh
+#ifndef UTIL_FIXED_ARRAY_H
+#define UTIL_FIXED_ARRAY_H
+#include "scoped.hh"
+#include <cstddef>
+#include <cassert>
+#include <cstdlib>
+namespace util {
+/**
+ * Defines an array with fixed maximum size.
+ *
+ * Ever want an array of things but they don't have a default constructor or
+ * are non-copyable?  FixedArray allows constructing one at a time.
+ */
+template <class T> class FixedArray {
+  public:
+    /** Initialize with a given size bound but do not construct the objects. */
+    explicit FixedArray(std::size_t limit) {
+      Init(limit);
+    }
+    /**
+     * Constructs an instance, but does not initialize it.
+     *
+     * Any objects constructed in this manner must be subsequently @ref FixedArray::Init() "initialized" prior to use.
+     *
+     * @see FixedArray::Init()
+     */
+    FixedArray()
+      : newed_end_(NULL)
+#ifndef NDEBUG
+      , allocated_end_(NULL)
+#endif
+    {}
+    /**
+     * Initialize with a given size bound but do not construct the objects.
+     *
+     * This method is responsible for allocating memory.
+     * Objects stored in this array will be constructed in a location within this allocated memory.
+     */
+    void Init(std::size_t count) {
+      assert(!block_.get());
+      block_.reset(malloc(sizeof(T) * count));
+      if (!block_.get()) throw std::bad_alloc();
+      newed_end_ = begin();
+#ifndef NDEBUG
+      allocated_end_ = begin() + count;
+#endif
+    }
+    /**
+     * Constructs a copy of the provided array.
+     *
+     * @param from Array whose elements should be copied into this newly-constructed data structure.
+     */
+    FixedArray(const FixedArray &from) {
+      std::size_t size = from.newed_end_ - static_cast<const T*>(from.block_.get());
+      Init(size);
+      for (std::size_t i = 0; i < size; ++i) {
+        push_back(from[i]);
+      }
+    }
+    /**
+     * Frees the memory held by this object.
+     */
+    ~FixedArray() { clear(); }
+#if __cplusplus >= 201103L
+    FixedArray(FixedArray &&from)
+      : block_(std::move(from.block_)),
+        newed_end_(from.newed_end_)
+#  ifndef NDEBUG
+        , allocated_end_(from.allocated_end_)
+#  endif // NDEBUG
+    {
+      from.newed_end_ = NULL;
+#  ifndef NDEBUG
+      from.allocated_end_ = NULL;
+#  endif // NDEBUG
+    }
+#endif // C++11
+    /** Gets a pointer to the first object currently stored in this data structure. */
+    T *begin() { return static_cast<T*>(block_.get()); }
+    /** Gets a const pointer to the last object currently stored in this data structure. */
+    const T *begin() const { return static_cast<const T*>(block_.get()); }
+    /** Gets a pointer to the last object currently stored in this data structure. */
+    T *end() { return newed_end_; }
+    /** Gets a const pointer to the last object currently stored in this data structure. */
+    const T *end() const { return newed_end_; }
+    /** Gets a reference to the last object currently stored in this data structure. */
+    T &back() { return *(end() - 1); }
+    /** Gets a const reference to the last object currently stored in this data structure. */
+    const T &back() const { return *(end() - 1); }
+    /** Gets the number of objects currently stored in this data structure. */
+    std::size_t size() const { return end() - begin(); }
+    /** Returns true if there are no objects currently stored in this data structure. */
+    bool empty() const { return begin() == end(); }
+    /**
+     * Gets a reference to the object with index i currently stored in this data structure.
+     *
+     * @param i Index of the object to reference
+     */
+    T &operator[](std::size_t i) {
+      assert(i < size());
+      return begin()[i];
+    }
+    /**
+     * Gets a const reference to the object with index i currently stored in this data structure.
+     *
+     * @param i Index of the object to reference
+     */
+    const T &operator[](std::size_t i) const {
+      assert(i < size());
+      return begin()[i];
+    }
+    /**
+     * Constructs a new object using the provided parameter,
+     * and stores it in this data structure.
+     *
+     * The memory backing the constructed object is managed by this data structure.
+     * I miss C++11 variadic templates.
+     */
+#if __cplusplus >= 201103L
+    template <typename... Construct> T *emplace_back(Construct&&... construct) {
+      T *ret = end();
+      new (end()) T(construct...);
+      Constructed();
+      return ret;
+    }
+    template <typename... Construct> T *push_back(Construct&&... construct) {
+      T *ret = end();
+      new (end()) T(construct...);
+      Constructed();
+      return ret;
+    }
+#else
+    void push_back() {
+      new (end()) T();
+      Constructed();
+    }
+    template <class C> void push_back(const C &c) {
+      new (end()) T(c);
+      Constructed();
+    }
+    template <class C> void push_back(C &c) {
+      new (end()) T(c);
+      Constructed();
+    }
+    template <class C, class D> void push_back(const C &c, const D &d) {
+      new (end()) T(c, d);
+      Constructed();
+    }
+#endif
+    void pop_back() {
+      back().~T();
+      --newed_end_;
+    }
+    /**
+     * Removes all elements from this array.
+     */
+    void clear() {
+      while (newed_end_ != begin())
+        pop_back();
+    }
+  protected:
+    // Always call Constructed after successful completion of new.
+    void Constructed() {
+      ++newed_end_;
+#ifndef NDEBUG
+      assert(newed_end_ <= allocated_end_);
+#endif
+    }
+  private:
+    util::scoped_malloc block_;
+    T *newed_end_;
+#ifndef NDEBUG
+    T *allocated_end_;
+#endif
+};
+} // namespace util
+#endif // UTIL_FIXED_ARRAY_H
--- a/cpp/thirdpart/kenlm/util/float_to_string.cc
+++ b/cpp/thirdpart/kenlm/util/float_to_string.cc
+#include "float_to_string.hh"
+#include "double-conversion/double-conversion.h"
+#include "double-conversion/utils.h"
+namespace util {
+namespace {
+const double_conversion::DoubleToStringConverter kConverter(double_conversion::DoubleToStringConverter::NO_FLAGS, "inf", "NaN", 'e', -6, 21, 6, 0);
+} // namespace
+char *ToString(double value, char *to) {
+  double_conversion::StringBuilder builder(to, ToStringBuf<double>::kBytes);
+  kConverter.ToShortest(value, &builder);
+  return &to[builder.position()];
+}
+char *ToString(float value, char *to) {
+  double_conversion::StringBuilder builder(to, ToStringBuf<float>::kBytes);
+  kConverter.ToShortestSingle(value, &builder);
+  return &to[builder.position()];
+}
+} // namespace util
--- a/cpp/thirdpart/kenlm/util/float_to_string.hh
+++ b/cpp/thirdpart/kenlm/util/float_to_string.hh
+#ifndef UTIL_FLOAT_TO_STRING_H
+#define UTIL_FLOAT_TO_STRING_H
+// Just for ToStringBuf
+#include "integer_to_string.hh"
+namespace util {
+template <> struct ToStringBuf<double> {
+  // DoubleToStringConverter::kBase10MaximalLength + 1 for null paranoia.
+  static const unsigned kBytes = 19;
+};
+// Single wasn't documented in double conversion, so be conservative and
+// say the same as double.
+template <> struct ToStringBuf<float> {
+  static const unsigned kBytes = 19;
+};
+char *ToString(double value, char *to);
+char *ToString(float value, char *to);
+} // namespace util
+#endif // UTIL_FLOAT_TO_STRING_H
--- a/cpp/thirdpart/kenlm/util/getopt.c
+++ b/cpp/thirdpart/kenlm/util/getopt.c
+/*
+POSIX getopt for Windows
+AT&T Public License
+Code given out at the 1985 UNIFORUM conference in Dallas.
+*/
+#ifndef __GNUC__
+#include "getopt.hh"
+#include <stdio.h>
+#include <string.h>
+#define NULL	0
+#define EOF	(-1)
+#define ERR(s, c)	if(opterr){\
+	char errbuf[2];\
+	errbuf[0] = c; errbuf[1] = '\n';\
+	fputs(argv[0], stderr);\
+	fputs(s, stderr);\
+	fputc(c, stderr);}
+	//(void) write(2, argv[0], (unsigned)strlen(argv[0]));\
+	//(void) write(2, s, (unsigned)strlen(s));\
+	//(void) write(2, errbuf, 2);}
+int	opterr = 1;
+int	optind = 1;
+int	optopt;
+char	*optarg;
+int
+getopt(argc, argv, opts)
+int	argc;
+char	**argv, *opts;
+{
+	static int sp = 1;
+	register int c;
+	register char *cp;
+	if(sp == 1)
+		if(optind >= argc ||
+		   argv[optind][0] != '-' || argv[optind][1] == '\0')
+			return(EOF);
+		else if(strcmp(argv[optind], "--") == NULL) {
+			optind++;
+			return(EOF);
+		}
+	optopt = c = argv[optind][sp];
+	if(c == ':' || (cp=strchr(opts, c)) == NULL) {
+		ERR(": illegal option -- ", c);
+		if(argv[optind][++sp] == '\0') {
+			optind++;
+			sp = 1;
+		}
+		return('?');
+	}
+	if(*++cp == ':') {
+		if(argv[optind][sp+1] != '\0')
+			optarg = &argv[optind++][sp+1];
+		else if(++optind >= argc) {
+			ERR(": option requires an argument -- ", c);
+			sp = 1;
+			return('?');
+		} else
+			optarg = argv[optind++];
+		sp = 1;
+	} else {
+		if(argv[optind][++sp] == '\0') {
+			sp = 1;
+			optind++;
+		}
+		optarg = NULL;
+	}
+	return(c);
+}
+#endif  /* __GNUC__ */
--- a/cpp/thirdpart/kenlm/util/getopt.hh
+++ b/cpp/thirdpart/kenlm/util/getopt.hh
+/*
+POSIX getopt for Windows
+AT&T Public License
+Code given out at the 1985 UNIFORUM conference in Dallas.
+*/
+#ifdef __GNUC__
+#include <getopt.h>
+#endif
+#ifndef __GNUC__
+#ifndef UTIL_GETOPT_H
+#define UTIL_GETOPT_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern int opterr;
+extern int optind;
+extern int optopt;
+extern char *optarg;
+extern int getopt(int argc, char **argv, char *opts);
+#ifdef __cplusplus
+}
+#endif
+#endif  /* UTIL_GETOPT_H */
+#endif  /* __GNUC__ */