Commit 688b6eac authored by SWHL's avatar SWHL
Browse files

Update files

parents
// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef DOUBLE_CONVERSION_DOUBLE_H_
#define DOUBLE_CONVERSION_DOUBLE_H_
#include "diy-fp.h"
namespace double_conversion {
// We assume that doubles and uint64_t have the same endianness.
static uint64_t double_to_uint64(double d) { return BitCast<uint64_t>(d); }
static double uint64_to_double(uint64_t d64) { return BitCast<double>(d64); }
static uint32_t float_to_uint32(float f) { return BitCast<uint32_t>(f); }
static float uint32_to_float(uint32_t d32) { return BitCast<float>(d32); }
// Helper functions for doubles.
class Double {
public:
static const uint64_t kSignMask = UINT64_2PART_C(0x80000000, 00000000);
static const uint64_t kExponentMask = UINT64_2PART_C(0x7FF00000, 00000000);
static const uint64_t kSignificandMask = UINT64_2PART_C(0x000FFFFF, FFFFFFFF);
static const uint64_t kHiddenBit = UINT64_2PART_C(0x00100000, 00000000);
static const int kPhysicalSignificandSize = 52; // Excludes the hidden bit.
static const int kSignificandSize = 53;
Double() : d64_(0) {}
explicit Double(double d) : d64_(double_to_uint64(d)) {}
explicit Double(uint64_t d64) : d64_(d64) {}
explicit Double(DiyFp diy_fp)
: d64_(DiyFpToUint64(diy_fp)) {}
// The value encoded by this Double must be greater or equal to +0.0.
// It must not be special (infinity, or NaN).
DiyFp AsDiyFp() const {
ASSERT(Sign() > 0);
ASSERT(!IsSpecial());
return DiyFp(Significand(), Exponent());
}
// The value encoded by this Double must be strictly greater than 0.
DiyFp AsNormalizedDiyFp() const {
ASSERT(value() > 0.0);
uint64_t f = Significand();
int e = Exponent();
// The current double could be a denormal.
while ((f & kHiddenBit) == 0) {
f <<= 1;
e--;
}
// Do the final shifts in one go.
f <<= DiyFp::kSignificandSize - kSignificandSize;
e -= DiyFp::kSignificandSize - kSignificandSize;
return DiyFp(f, e);
}
// Returns the double's bit as uint64.
uint64_t AsUint64() const {
return d64_;
}
// Returns the next greater double. Returns +infinity on input +infinity.
double NextDouble() const {
if (d64_ == kInfinity) return Double(kInfinity).value();
if (Sign() < 0 && Significand() == 0) {
// -0.0
return 0.0;
}
if (Sign() < 0) {
return Double(d64_ - 1).value();
} else {
return Double(d64_ + 1).value();
}
}
double PreviousDouble() const {
if (d64_ == (kInfinity | kSignMask)) return -Infinity();
if (Sign() < 0) {
return Double(d64_ + 1).value();
} else {
if (Significand() == 0) return -0.0;
return Double(d64_ - 1).value();
}
}
int Exponent() const {
if (IsDenormal()) return kDenormalExponent;
uint64_t d64 = AsUint64();
int biased_e =
static_cast<int>((d64 & kExponentMask) >> kPhysicalSignificandSize);
return biased_e - kExponentBias;
}
uint64_t Significand() const {
uint64_t d64 = AsUint64();
uint64_t significand = d64 & kSignificandMask;
if (!IsDenormal()) {
return significand + kHiddenBit;
} else {
return significand;
}
}
// Returns true if the double is a denormal.
bool IsDenormal() const {
uint64_t d64 = AsUint64();
return (d64 & kExponentMask) == 0;
}
// We consider denormals not to be special.
// Hence only Infinity and NaN are special.
bool IsSpecial() const {
uint64_t d64 = AsUint64();
return (d64 & kExponentMask) == kExponentMask;
}
bool IsNan() const {
uint64_t d64 = AsUint64();
return ((d64 & kExponentMask) == kExponentMask) &&
((d64 & kSignificandMask) != 0);
}
bool IsInfinite() const {
uint64_t d64 = AsUint64();
return ((d64 & kExponentMask) == kExponentMask) &&
((d64 & kSignificandMask) == 0);
}
int Sign() const {
uint64_t d64 = AsUint64();
return (d64 & kSignMask) == 0? 1: -1;
}
// Precondition: the value encoded by this Double must be greater or equal
// than +0.0.
DiyFp UpperBoundary() const {
ASSERT(Sign() > 0);
return DiyFp(Significand() * 2 + 1, Exponent() - 1);
}
// Computes the two boundaries of this.
// The bigger boundary (m_plus) is normalized. The lower boundary has the same
// exponent as m_plus.
// Precondition: the value encoded by this Double must be greater than 0.
void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const {
ASSERT(value() > 0.0);
DiyFp v = this->AsDiyFp();
DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1));
DiyFp m_minus;
if (LowerBoundaryIsCloser()) {
m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2);
} else {
m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1);
}
m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e()));
m_minus.set_e(m_plus.e());
*out_m_plus = m_plus;
*out_m_minus = m_minus;
}
bool LowerBoundaryIsCloser() const {
// The boundary is closer if the significand is of the form f == 2^p-1 then
// the lower boundary is closer.
// Think of v = 1000e10 and v- = 9999e9.
// Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but
// at a distance of 1e8.
// The only exception is for the smallest normal: the largest denormal is
// at the same distance as its successor.
// Note: denormals have the same exponent as the smallest normals.
bool physical_significand_is_zero = ((AsUint64() & kSignificandMask) == 0);
return physical_significand_is_zero && (Exponent() != kDenormalExponent);
}
double value() const { return uint64_to_double(d64_); }
// Returns the significand size for a given order of magnitude.
// If v = f*2^e with 2^p-1 <= f <= 2^p then p+e is v's order of magnitude.
// This function returns the number of significant binary digits v will have
// once it's encoded into a double. In almost all cases this is equal to
// kSignificandSize. The only exceptions are denormals. They start with
// leading zeroes and their effective significand-size is hence smaller.
static int SignificandSizeForOrderOfMagnitude(int order) {
if (order >= (kDenormalExponent + kSignificandSize)) {
return kSignificandSize;
}
if (order <= kDenormalExponent) return 0;
return order - kDenormalExponent;
}
static double Infinity() {
return Double(kInfinity).value();
}
static double NaN() {
return Double(kNaN).value();
}
private:
static const int kExponentBias = 0x3FF + kPhysicalSignificandSize;
static const int kDenormalExponent = -kExponentBias + 1;
static const int kMaxExponent = 0x7FF - kExponentBias;
static const uint64_t kInfinity = UINT64_2PART_C(0x7FF00000, 00000000);
static const uint64_t kNaN = UINT64_2PART_C(0x7FF80000, 00000000);
const uint64_t d64_;
static uint64_t DiyFpToUint64(DiyFp diy_fp) {
uint64_t significand = diy_fp.f();
int exponent = diy_fp.e();
while (significand > kHiddenBit + kSignificandMask) {
significand >>= 1;
exponent++;
}
if (exponent >= kMaxExponent) {
return kInfinity;
}
if (exponent < kDenormalExponent) {
return 0;
}
while (exponent > kDenormalExponent && (significand & kHiddenBit) == 0) {
significand <<= 1;
exponent--;
}
uint64_t biased_exponent;
if (exponent == kDenormalExponent && (significand & kHiddenBit) == 0) {
biased_exponent = 0;
} else {
biased_exponent = static_cast<uint64_t>(exponent + kExponentBias);
}
return (significand & kSignificandMask) |
(biased_exponent << kPhysicalSignificandSize);
}
DISALLOW_COPY_AND_ASSIGN(Double);
};
class Single {
public:
static const uint32_t kSignMask = 0x80000000;
static const uint32_t kExponentMask = 0x7F800000;
static const uint32_t kSignificandMask = 0x007FFFFF;
static const uint32_t kHiddenBit = 0x00800000;
static const int kPhysicalSignificandSize = 23; // Excludes the hidden bit.
static const int kSignificandSize = 24;
Single() : d32_(0) {}
explicit Single(float f) : d32_(float_to_uint32(f)) {}
explicit Single(uint32_t d32) : d32_(d32) {}
// The value encoded by this Single must be greater or equal to +0.0.
// It must not be special (infinity, or NaN).
DiyFp AsDiyFp() const {
ASSERT(Sign() > 0);
ASSERT(!IsSpecial());
return DiyFp(Significand(), Exponent());
}
// Returns the single's bit as uint64.
uint32_t AsUint32() const {
return d32_;
}
int Exponent() const {
if (IsDenormal()) return kDenormalExponent;
uint32_t d32 = AsUint32();
int biased_e =
static_cast<int>((d32 & kExponentMask) >> kPhysicalSignificandSize);
return biased_e - kExponentBias;
}
uint32_t Significand() const {
uint32_t d32 = AsUint32();
uint32_t significand = d32 & kSignificandMask;
if (!IsDenormal()) {
return significand + kHiddenBit;
} else {
return significand;
}
}
// Returns true if the single is a denormal.
bool IsDenormal() const {
uint32_t d32 = AsUint32();
return (d32 & kExponentMask) == 0;
}
// We consider denormals not to be special.
// Hence only Infinity and NaN are special.
bool IsSpecial() const {
uint32_t d32 = AsUint32();
return (d32 & kExponentMask) == kExponentMask;
}
bool IsNan() const {
uint32_t d32 = AsUint32();
return ((d32 & kExponentMask) == kExponentMask) &&
((d32 & kSignificandMask) != 0);
}
bool IsInfinite() const {
uint32_t d32 = AsUint32();
return ((d32 & kExponentMask) == kExponentMask) &&
((d32 & kSignificandMask) == 0);
}
int Sign() const {
uint32_t d32 = AsUint32();
return (d32 & kSignMask) == 0? 1: -1;
}
// Computes the two boundaries of this.
// The bigger boundary (m_plus) is normalized. The lower boundary has the same
// exponent as m_plus.
// Precondition: the value encoded by this Single must be greater than 0.
void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const {
ASSERT(value() > 0.0);
DiyFp v = this->AsDiyFp();
DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1));
DiyFp m_minus;
if (LowerBoundaryIsCloser()) {
m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2);
} else {
m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1);
}
m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e()));
m_minus.set_e(m_plus.e());
*out_m_plus = m_plus;
*out_m_minus = m_minus;
}
// Precondition: the value encoded by this Single must be greater or equal
// than +0.0.
DiyFp UpperBoundary() const {
ASSERT(Sign() > 0);
return DiyFp(Significand() * 2 + 1, Exponent() - 1);
}
bool LowerBoundaryIsCloser() const {
// The boundary is closer if the significand is of the form f == 2^p-1 then
// the lower boundary is closer.
// Think of v = 1000e10 and v- = 9999e9.
// Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but
// at a distance of 1e8.
// The only exception is for the smallest normal: the largest denormal is
// at the same distance as its successor.
// Note: denormals have the same exponent as the smallest normals.
bool physical_significand_is_zero = ((AsUint32() & kSignificandMask) == 0);
return physical_significand_is_zero && (Exponent() != kDenormalExponent);
}
float value() const { return uint32_to_float(d32_); }
static float Infinity() {
return Single(kInfinity).value();
}
static float NaN() {
return Single(kNaN).value();
}
private:
static const int kExponentBias = 0x7F + kPhysicalSignificandSize;
static const int kDenormalExponent = -kExponentBias + 1;
static const int kMaxExponent = 0xFF - kExponentBias;
static const uint32_t kInfinity = 0x7F800000;
static const uint32_t kNaN = 0x7FC00000;
const uint32_t d32_;
DISALLOW_COPY_AND_ASSIGN(Single);
};
} // namespace double_conversion
#endif // DOUBLE_CONVERSION_DOUBLE_H_
// Copyright 2010 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdarg.h>
#include <limits.h>
#include "strtod.h"
#include "bignum.h"
#include "cached-powers.h"
#include "ieee.h"
namespace double_conversion {
// 2^53 = 9007199254740992.
// Any integer with at most 15 decimal digits will hence fit into a double
// (which has a 53bit significand) without loss of precision.
static const int kMaxExactDoubleIntegerDecimalDigits = 15;
// 2^64 = 18446744073709551616 > 10^19
static const int kMaxUint64DecimalDigits = 19;
// Max double: 1.7976931348623157 x 10^308
// Min non-zero double: 4.9406564584124654 x 10^-324
// Any x >= 10^309 is interpreted as +infinity.
// Any x <= 10^-324 is interpreted as 0.
// Note that 2.5e-324 (despite being smaller than the min double) will be read
// as non-zero (equal to the min non-zero double).
static const int kMaxDecimalPower = 309;
static const int kMinDecimalPower = -324;
// 2^64 = 18446744073709551616
static const uint64_t kMaxUint64 = UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF);
static const double exact_powers_of_ten[] = {
1.0, // 10^0
10.0,
100.0,
1000.0,
10000.0,
100000.0,
1000000.0,
10000000.0,
100000000.0,
1000000000.0,
10000000000.0, // 10^10
100000000000.0,
1000000000000.0,
10000000000000.0,
100000000000000.0,
1000000000000000.0,
10000000000000000.0,
100000000000000000.0,
1000000000000000000.0,
10000000000000000000.0,
100000000000000000000.0, // 10^20
1000000000000000000000.0,
// 10^22 = 0x21e19e0c9bab2400000 = 0x878678326eac9 * 2^22
10000000000000000000000.0
};
static const int kExactPowersOfTenSize = ARRAY_SIZE(exact_powers_of_ten);
// Maximum number of significant digits in the decimal representation.
// In fact the value is 772 (see conversions.cc), but to give us some margin
// we round up to 780.
static const int kMaxSignificantDecimalDigits = 780;
static Vector<const char> TrimLeadingZeros(Vector<const char> buffer) {
for (int i = 0; i < buffer.length(); i++) {
if (buffer[i] != '0') {
return buffer.SubVector(i, buffer.length());
}
}
return Vector<const char>(buffer.start(), 0);
}
static Vector<const char> TrimTrailingZeros(Vector<const char> buffer) {
for (int i = buffer.length() - 1; i >= 0; --i) {
if (buffer[i] != '0') {
return buffer.SubVector(0, i + 1);
}
}
return Vector<const char>(buffer.start(), 0);
}
static void CutToMaxSignificantDigits(Vector<const char> buffer,
int exponent,
char* significant_buffer,
int* significant_exponent) {
for (int i = 0; i < kMaxSignificantDecimalDigits - 1; ++i) {
significant_buffer[i] = buffer[i];
}
// The input buffer has been trimmed. Therefore the last digit must be
// different from '0'.
ASSERT(buffer[buffer.length() - 1] != '0');
// Set the last digit to be non-zero. This is sufficient to guarantee
// correct rounding.
significant_buffer[kMaxSignificantDecimalDigits - 1] = '1';
*significant_exponent =
exponent + (buffer.length() - kMaxSignificantDecimalDigits);
}
// Trims the buffer and cuts it to at most kMaxSignificantDecimalDigits.
// If possible the input-buffer is reused, but if the buffer needs to be
// modified (due to cutting), then the input needs to be copied into the
// buffer_copy_space.
static void TrimAndCut(Vector<const char> buffer, int exponent,
char* buffer_copy_space, int space_size,
Vector<const char>* trimmed, int* updated_exponent) {
Vector<const char> left_trimmed = TrimLeadingZeros(buffer);
Vector<const char> right_trimmed = TrimTrailingZeros(left_trimmed);
exponent += left_trimmed.length() - right_trimmed.length();
if (right_trimmed.length() > kMaxSignificantDecimalDigits) {
(void) space_size; // Mark variable as used.
ASSERT(space_size >= kMaxSignificantDecimalDigits);
CutToMaxSignificantDigits(right_trimmed, exponent,
buffer_copy_space, updated_exponent);
*trimmed = Vector<const char>(buffer_copy_space,
kMaxSignificantDecimalDigits);
} else {
*trimmed = right_trimmed;
*updated_exponent = exponent;
}
}
// Reads digits from the buffer and converts them to a uint64.
// Reads in as many digits as fit into a uint64.
// When the string starts with "1844674407370955161" no further digit is read.
// Since 2^64 = 18446744073709551616 it would still be possible read another
// digit if it was less or equal than 6, but this would complicate the code.
static uint64_t ReadUint64(Vector<const char> buffer,
int* number_of_read_digits) {
uint64_t result = 0;
int i = 0;
while (i < buffer.length() && result <= (kMaxUint64 / 10 - 1)) {
int digit = buffer[i++] - '0';
ASSERT(0 <= digit && digit <= 9);
result = 10 * result + digit;
}
*number_of_read_digits = i;
return result;
}
// Reads a DiyFp from the buffer.
// The returned DiyFp is not necessarily normalized.
// If remaining_decimals is zero then the returned DiyFp is accurate.
// Otherwise it has been rounded and has error of at most 1/2 ulp.
static void ReadDiyFp(Vector<const char> buffer,
DiyFp* result,
int* remaining_decimals) {
int read_digits;
uint64_t significand = ReadUint64(buffer, &read_digits);
if (buffer.length() == read_digits) {
*result = DiyFp(significand, 0);
*remaining_decimals = 0;
} else {
// Round the significand.
if (buffer[read_digits] >= '5') {
significand++;
}
// Compute the binary exponent.
int exponent = 0;
*result = DiyFp(significand, exponent);
*remaining_decimals = buffer.length() - read_digits;
}
}
static bool DoubleStrtod(Vector<const char> trimmed,
int exponent,
double* result) {
#if !defined(DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS)
// On x86 the floating-point stack can be 64 or 80 bits wide. If it is
// 80 bits wide (as is the case on Linux) then double-rounding occurs and the
// result is not accurate.
// We know that Windows32 uses 64 bits and is therefore accurate.
// Note that the ARM simulator is compiled for 32bits. It therefore exhibits
// the same problem.
return false;
#endif
if (trimmed.length() <= kMaxExactDoubleIntegerDecimalDigits) {
int read_digits;
// The trimmed input fits into a double.
// If the 10^exponent (resp. 10^-exponent) fits into a double too then we
// can compute the result-double simply by multiplying (resp. dividing) the
// two numbers.
// This is possible because IEEE guarantees that floating-point operations
// return the best possible approximation.
if (exponent < 0 && -exponent < kExactPowersOfTenSize) {
// 10^-exponent fits into a double.
*result = static_cast<double>(ReadUint64(trimmed, &read_digits));
ASSERT(read_digits == trimmed.length());
*result /= exact_powers_of_ten[-exponent];
return true;
}
if (0 <= exponent && exponent < kExactPowersOfTenSize) {
// 10^exponent fits into a double.
*result = static_cast<double>(ReadUint64(trimmed, &read_digits));
ASSERT(read_digits == trimmed.length());
*result *= exact_powers_of_ten[exponent];
return true;
}
int remaining_digits =
kMaxExactDoubleIntegerDecimalDigits - trimmed.length();
if ((0 <= exponent) &&
(exponent - remaining_digits < kExactPowersOfTenSize)) {
// The trimmed string was short and we can multiply it with
// 10^remaining_digits. As a result the remaining exponent now fits
// into a double too.
*result = static_cast<double>(ReadUint64(trimmed, &read_digits));
ASSERT(read_digits == trimmed.length());
*result *= exact_powers_of_ten[remaining_digits];
*result *= exact_powers_of_ten[exponent - remaining_digits];
return true;
}
}
return false;
}
// Returns 10^exponent as an exact DiyFp.
// The given exponent must be in the range [1; kDecimalExponentDistance[.
static DiyFp AdjustmentPowerOfTen(int exponent) {
ASSERT(0 < exponent);
ASSERT(exponent < PowersOfTenCache::kDecimalExponentDistance);
// Simply hardcode the remaining powers for the given decimal exponent
// distance.
ASSERT(PowersOfTenCache::kDecimalExponentDistance == 8);
switch (exponent) {
case 1: return DiyFp(UINT64_2PART_C(0xa0000000, 00000000), -60);
case 2: return DiyFp(UINT64_2PART_C(0xc8000000, 00000000), -57);
case 3: return DiyFp(UINT64_2PART_C(0xfa000000, 00000000), -54);
case 4: return DiyFp(UINT64_2PART_C(0x9c400000, 00000000), -50);
case 5: return DiyFp(UINT64_2PART_C(0xc3500000, 00000000), -47);
case 6: return DiyFp(UINT64_2PART_C(0xf4240000, 00000000), -44);
case 7: return DiyFp(UINT64_2PART_C(0x98968000, 00000000), -40);
default:
UNREACHABLE();
}
}
// If the function returns true then the result is the correct double.
// Otherwise it is either the correct double or the double that is just below
// the correct double.
static bool DiyFpStrtod(Vector<const char> buffer,
int exponent,
double* result) {
DiyFp input;
int remaining_decimals;
ReadDiyFp(buffer, &input, &remaining_decimals);
// Since we may have dropped some digits the input is not accurate.
// If remaining_decimals is different than 0 than the error is at most
// .5 ulp (unit in the last place).
// We don't want to deal with fractions and therefore keep a common
// denominator.
const int kDenominatorLog = 3;
const int kDenominator = 1 << kDenominatorLog;
// Move the remaining decimals into the exponent.
exponent += remaining_decimals;
uint64_t error = (remaining_decimals == 0 ? 0 : kDenominator / 2);
int old_e = input.e();
input.Normalize();
error <<= old_e - input.e();
ASSERT(exponent <= PowersOfTenCache::kMaxDecimalExponent);
if (exponent < PowersOfTenCache::kMinDecimalExponent) {
*result = 0.0;
return true;
}
DiyFp cached_power;
int cached_decimal_exponent;
PowersOfTenCache::GetCachedPowerForDecimalExponent(exponent,
&cached_power,
&cached_decimal_exponent);
if (cached_decimal_exponent != exponent) {
int adjustment_exponent = exponent - cached_decimal_exponent;
DiyFp adjustment_power = AdjustmentPowerOfTen(adjustment_exponent);
input.Multiply(adjustment_power);
if (kMaxUint64DecimalDigits - buffer.length() >= adjustment_exponent) {
// The product of input with the adjustment power fits into a 64 bit
// integer.
ASSERT(DiyFp::kSignificandSize == 64);
} else {
// The adjustment power is exact. There is hence only an error of 0.5.
error += kDenominator / 2;
}
}
input.Multiply(cached_power);
// The error introduced by a multiplication of a*b equals
// error_a + error_b + error_a*error_b/2^64 + 0.5
// Substituting a with 'input' and b with 'cached_power' we have
// error_b = 0.5 (all cached powers have an error of less than 0.5 ulp),
// error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64
int error_b = kDenominator / 2;
int error_ab = (error == 0 ? 0 : 1); // We round up to 1.
int fixed_error = kDenominator / 2;
error += error_b + error_ab + fixed_error;
old_e = input.e();
input.Normalize();
error <<= old_e - input.e();
// See if the double's significand changes if we add/subtract the error.
int order_of_magnitude = DiyFp::kSignificandSize + input.e();
int effective_significand_size =
Double::SignificandSizeForOrderOfMagnitude(order_of_magnitude);
int precision_digits_count =
DiyFp::kSignificandSize - effective_significand_size;
if (precision_digits_count + kDenominatorLog >= DiyFp::kSignificandSize) {
// This can only happen for very small denormals. In this case the
// half-way multiplied by the denominator exceeds the range of an uint64.
// Simply shift everything to the right.
int shift_amount = (precision_digits_count + kDenominatorLog) -
DiyFp::kSignificandSize + 1;
input.set_f(input.f() >> shift_amount);
input.set_e(input.e() + shift_amount);
// We add 1 for the lost precision of error, and kDenominator for
// the lost precision of input.f().
error = (error >> shift_amount) + 1 + kDenominator;
precision_digits_count -= shift_amount;
}
// We use uint64_ts now. This only works if the DiyFp uses uint64_ts too.
ASSERT(DiyFp::kSignificandSize == 64);
ASSERT(precision_digits_count < 64);
uint64_t one64 = 1;
uint64_t precision_bits_mask = (one64 << precision_digits_count) - 1;
uint64_t precision_bits = input.f() & precision_bits_mask;
uint64_t half_way = one64 << (precision_digits_count - 1);
precision_bits *= kDenominator;
half_way *= kDenominator;
DiyFp rounded_input(input.f() >> precision_digits_count,
input.e() + precision_digits_count);
if (precision_bits >= half_way + error) {
rounded_input.set_f(rounded_input.f() + 1);
}
// If the last_bits are too close to the half-way case than we are too
// inaccurate and round down. In this case we return false so that we can
// fall back to a more precise algorithm.
*result = Double(rounded_input).value();
if (half_way - error < precision_bits && precision_bits < half_way + error) {
// Too imprecise. The caller will have to fall back to a slower version.
// However the returned number is guaranteed to be either the correct
// double, or the next-lower double.
return false;
} else {
return true;
}
}
// Returns
// - -1 if buffer*10^exponent < diy_fp.
// - 0 if buffer*10^exponent == diy_fp.
// - +1 if buffer*10^exponent > diy_fp.
// Preconditions:
// buffer.length() + exponent <= kMaxDecimalPower + 1
// buffer.length() + exponent > kMinDecimalPower
// buffer.length() <= kMaxDecimalSignificantDigits
static int CompareBufferWithDiyFp(Vector<const char> buffer,
int exponent,
DiyFp diy_fp) {
ASSERT(buffer.length() + exponent <= kMaxDecimalPower + 1);
ASSERT(buffer.length() + exponent > kMinDecimalPower);
ASSERT(buffer.length() <= kMaxSignificantDecimalDigits);
// Make sure that the Bignum will be able to hold all our numbers.
// Our Bignum implementation has a separate field for exponents. Shifts will
// consume at most one bigit (< 64 bits).
// ln(10) == 3.3219...
ASSERT(((kMaxDecimalPower + 1) * 333 / 100) < Bignum::kMaxSignificantBits);
Bignum buffer_bignum;
Bignum diy_fp_bignum;
buffer_bignum.AssignDecimalString(buffer);
diy_fp_bignum.AssignUInt64(diy_fp.f());
if (exponent >= 0) {
buffer_bignum.MultiplyByPowerOfTen(exponent);
} else {
diy_fp_bignum.MultiplyByPowerOfTen(-exponent);
}
if (diy_fp.e() > 0) {
diy_fp_bignum.ShiftLeft(diy_fp.e());
} else {
buffer_bignum.ShiftLeft(-diy_fp.e());
}
return Bignum::Compare(buffer_bignum, diy_fp_bignum);
}
// Returns true if the guess is the correct double.
// Returns false, when guess is either correct or the next-lower double.
static bool ComputeGuess(Vector<const char> trimmed, int exponent,
double* guess) {
if (trimmed.length() == 0) {
*guess = 0.0;
return true;
}
if (exponent + trimmed.length() - 1 >= kMaxDecimalPower) {
*guess = Double::Infinity();
return true;
}
if (exponent + trimmed.length() <= kMinDecimalPower) {
*guess = 0.0;
return true;
}
if (DoubleStrtod(trimmed, exponent, guess) ||
DiyFpStrtod(trimmed, exponent, guess)) {
return true;
}
if (*guess == Double::Infinity()) {
return true;
}
return false;
}
double Strtod(Vector<const char> buffer, int exponent) {
char copy_buffer[kMaxSignificantDecimalDigits];
Vector<const char> trimmed;
int updated_exponent;
TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
&trimmed, &updated_exponent);
exponent = updated_exponent;
double guess;
bool is_correct = ComputeGuess(trimmed, exponent, &guess);
if (is_correct) return guess;
DiyFp upper_boundary = Double(guess).UpperBoundary();
int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
if (comparison < 0) {
return guess;
} else if (comparison > 0) {
return Double(guess).NextDouble();
} else if ((Double(guess).Significand() & 1) == 0) {
// Round towards even.
return guess;
} else {
return Double(guess).NextDouble();
}
}
float Strtof(Vector<const char> buffer, int exponent) {
char copy_buffer[kMaxSignificantDecimalDigits];
Vector<const char> trimmed;
int updated_exponent;
TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
&trimmed, &updated_exponent);
exponent = updated_exponent;
double double_guess;
bool is_correct = ComputeGuess(trimmed, exponent, &double_guess);
float float_guess = static_cast<float>(double_guess);
if (float_guess == double_guess) {
// This shortcut triggers for integer values.
return float_guess;
}
// We must catch double-rounding. Say the double has been rounded up, and is
// now a boundary of a float, and rounds up again. This is why we have to
// look at previous too.
// Example (in decimal numbers):
// input: 12349
// high-precision (4 digits): 1235
// low-precision (3 digits):
// when read from input: 123
// when rounded from high precision: 124.
// To do this we simply look at the neigbors of the correct result and see
// if they would round to the same float. If the guess is not correct we have
// to look at four values (since two different doubles could be the correct
// double).
double double_next = Double(double_guess).NextDouble();
double double_previous = Double(double_guess).PreviousDouble();
float f1 = static_cast<float>(double_previous);
float f2 = float_guess;
float f3 = static_cast<float>(double_next);
float f4;
if (is_correct) {
f4 = f3;
} else {
double double_next2 = Double(double_next).NextDouble();
f4 = static_cast<float>(double_next2);
}
(void) f2; // Mark variable as used.
ASSERT(f1 <= f2 && f2 <= f3 && f3 <= f4);
// If the guess doesn't lie near a single-precision boundary we can simply
// return its float-value.
if (f1 == f4) {
return float_guess;
}
ASSERT((f1 != f2 && f2 == f3 && f3 == f4) ||
(f1 == f2 && f2 != f3 && f3 == f4) ||
(f1 == f2 && f2 == f3 && f3 != f4));
// guess and next are the two possible canditates (in the same way that
// double_guess was the lower candidate for a double-precision guess).
float guess = f1;
float next = f4;
DiyFp upper_boundary;
if (guess == 0.0f) {
float min_float = 1e-45f;
upper_boundary = Double(static_cast<double>(min_float) / 2).AsDiyFp();
} else {
upper_boundary = Single(guess).UpperBoundary();
}
int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
if (comparison < 0) {
return guess;
} else if (comparison > 0) {
return next;
} else if ((Single(guess).Significand() & 1) == 0) {
// Round towards even.
return guess;
} else {
return next;
}
}
} // namespace double_conversion
// Copyright 2010 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef DOUBLE_CONVERSION_STRTOD_H_
#define DOUBLE_CONVERSION_STRTOD_H_
#include "utils.h"
namespace double_conversion {
// The buffer must only contain digits in the range [0-9]. It must not
// contain a dot or a sign. It must not start with '0', and must not be empty.
double Strtod(Vector<const char> buffer, int exponent);
// The buffer must only contain digits in the range [0-9]. It must not
// contain a dot or a sign. It must not start with '0', and must not be empty.
float Strtof(Vector<const char> buffer, int exponent);
} // namespace double_conversion
#endif // DOUBLE_CONVERSION_STRTOD_H_
// Copyright 2010 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef DOUBLE_CONVERSION_UTILS_H_
#define DOUBLE_CONVERSION_UTILS_H_
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#ifndef ASSERT
#define ASSERT(condition) \
assert(condition);
#endif
#ifndef UNIMPLEMENTED
#define UNIMPLEMENTED() (abort())
#endif
#ifndef DOUBLE_CONVERSION_NO_RETURN
#ifdef _MSC_VER
#define DOUBLE_CONVERSION_NO_RETURN __declspec(noreturn)
#else
#define DOUBLE_CONVERSION_NO_RETURN __attribute__((noreturn))
#endif
#endif
#ifndef UNREACHABLE
#ifdef _MSC_VER
void DOUBLE_CONVERSION_NO_RETURN abort_noreturn();
inline void abort_noreturn() { abort(); }
#define UNREACHABLE() (abort_noreturn())
#else
#define UNREACHABLE() (abort())
#endif
#endif
// Double operations detection based on target architecture.
// Linux uses a 80bit wide floating point stack on x86. This induces double
// rounding, which in turn leads to wrong results.
// An easy way to test if the floating-point operations are correct is to
// evaluate: 89255.0/1e22. If the floating-point stack is 64 bits wide then
// the result is equal to 89255e-22.
// The best way to test this, is to create a division-function and to compare
// the output of the division with the expected result. (Inlining must be
// disabled.)
// On Linux,x86 89255e-22 != Div_double(89255.0/1e22)
#if defined(_M_X64) || defined(__x86_64__) || \
defined(__ARMEL__) || defined(__avr32__) || \
defined(__hppa__) || defined(__ia64__) || \
defined(__mips__) || \
defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \
defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
defined(__sparc__) || defined(__sparc) || defined(__s390__) || \
defined(__SH4__) || defined(__alpha__) || \
defined(_MIPS_ARCH_MIPS32R2) || \
defined(__AARCH64EL__) || defined(__aarch64__) || \
defined(__riscv)
#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1
#elif defined(__mc68000__)
#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS
#elif defined(_M_IX86) || defined(__i386__) || defined(__i386)
#if defined(_WIN32)
// Windows uses a 64bit wide floating point stack.
#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1
#else
#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS
#endif // _WIN32
#else
#error Target architecture was not detected as supported by Double-Conversion.
#endif
#if defined(__GNUC__)
#define DOUBLE_CONVERSION_UNUSED __attribute__((unused))
#else
#define DOUBLE_CONVERSION_UNUSED
#endif
#if defined(_WIN32) && !defined(__MINGW32__)
typedef signed char int8_t;
typedef unsigned char uint8_t;
typedef short int16_t; // NOLINT
typedef unsigned short uint16_t; // NOLINT
typedef int int32_t;
typedef unsigned int uint32_t;
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
// intptr_t and friends are defined in crtdefs.h through stdio.h.
#else
#include <stdint.h>
#endif
typedef uint16_t uc16;
// The following macro works on both 32 and 64-bit platforms.
// Usage: instead of writing 0x1234567890123456
// write UINT64_2PART_C(0x12345678,90123456);
#define UINT64_2PART_C(a, b) (((static_cast<uint64_t>(a) << 32) + 0x##b##u))
// The expression ARRAY_SIZE(a) is a compile-time constant of type
// size_t which represents the number of elements of the given
// array. You should only use ARRAY_SIZE on statically allocated
// arrays.
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(a) \
((sizeof(a) / sizeof(*(a))) / \
static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
#endif
// A macro to disallow the evil copy constructor and operator= functions
// This should be used in the private: declarations for a class
#ifndef DISALLOW_COPY_AND_ASSIGN
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
TypeName(const TypeName&); \
void operator=(const TypeName&)
#endif
// A macro to disallow all the implicit constructors, namely the
// default constructor, copy constructor and operator= functions.
//
// This should be used in the private: declarations for a class
// that wants to prevent anyone from instantiating it. This is
// especially useful for classes containing only static methods.
#ifndef DISALLOW_IMPLICIT_CONSTRUCTORS
#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
TypeName(); \
DISALLOW_COPY_AND_ASSIGN(TypeName)
#endif
namespace double_conversion {
static const int kCharSize = sizeof(char);
// Returns the maximum of the two parameters.
template <typename T>
static T Max(T a, T b) {
return a < b ? b : a;
}
// Returns the minimum of the two parameters.
template <typename T>
static T Min(T a, T b) {
return a < b ? a : b;
}
inline int StrLength(const char* string) {
size_t length = strlen(string);
ASSERT(length == static_cast<size_t>(static_cast<int>(length)));
return static_cast<int>(length);
}
// This is a simplified version of V8's Vector class.
template <typename T>
class Vector {
public:
Vector() : start_(NULL), length_(0) {}
Vector(T* data, int len) : start_(data), length_(len) {
ASSERT(len == 0 || (len > 0 && data != NULL));
}
// Returns a vector using the same backing storage as this one,
// spanning from and including 'from', to but not including 'to'.
Vector<T> SubVector(int from, int to) {
ASSERT(to <= length_);
ASSERT(from < to);
ASSERT(0 <= from);
return Vector<T>(start() + from, to - from);
}
// Returns the length of the vector.
int length() const { return length_; }
// Returns whether or not the vector is empty.
bool is_empty() const { return length_ == 0; }
// Returns the pointer to the start of the data in the vector.
T* start() const { return start_; }
// Access individual vector elements - checks bounds in debug mode.
T& operator[](int index) const {
ASSERT(0 <= index && index < length_);
return start_[index];
}
T& first() { return start_[0]; }
T& last() { return start_[length_ - 1]; }
private:
T* start_;
int length_;
};
// Helper class for building result strings in a character buffer. The
// purpose of the class is to use safe operations that checks the
// buffer bounds on all operations in debug mode.
class StringBuilder {
public:
StringBuilder(char* buffer, int buffer_size)
: buffer_(buffer, buffer_size), position_(0) { }
~StringBuilder() { if (!is_finalized()) Finalize(); }
int size() const { return buffer_.length(); }
// Get the current position in the builder.
int position() const {
ASSERT(!is_finalized());
return position_;
}
// Reset the position.
void Reset() { position_ = 0; }
// Add a single character to the builder. It is not allowed to add
// 0-characters; use the Finalize() method to terminate the string
// instead.
void AddCharacter(char c) {
ASSERT(c != '\0');
ASSERT(!is_finalized() && position_ < buffer_.length());
buffer_[position_++] = c;
}
// Add an entire string to the builder. Uses strlen() internally to
// compute the length of the input string.
void AddString(const char* s) {
AddSubstring(s, StrLength(s));
}
// Add the first 'n' characters of the given string 's' to the
// builder. The input string must have enough characters.
void AddSubstring(const char* s, int n) {
ASSERT(!is_finalized() && position_ + n < buffer_.length());
ASSERT(static_cast<size_t>(n) <= strlen(s));
memmove(&buffer_[position_], s, n * kCharSize);
position_ += n;
}
// Add character padding to the builder. If count is non-positive,
// nothing is added to the builder.
void AddPadding(char c, int count) {
for (int i = 0; i < count; i++) {
AddCharacter(c);
}
}
// Finalize the string by 0-terminating it and returning the buffer.
char* Finalize() {
ASSERT(!is_finalized() && position_ < buffer_.length());
buffer_[position_] = '\0';
// Make sure nobody managed to add a 0-character to the
// buffer while building the string.
ASSERT(strlen(buffer_.start()) == static_cast<size_t>(position_));
position_ = -1;
ASSERT(is_finalized());
return buffer_.start();
}
private:
Vector<char> buffer_;
int position_;
bool is_finalized() const { return position_ < 0; }
DISALLOW_IMPLICIT_CONSTRUCTORS(StringBuilder);
};
// The type-based aliasing rule allows the compiler to assume that pointers of
// different types (for some definition of different) never alias each other.
// Thus the following code does not work:
//
// float f = foo();
// int fbits = *(int*)(&f);
//
// The compiler 'knows' that the int pointer can't refer to f since the types
// don't match, so the compiler may cache f in a register, leaving random data
// in fbits. Using C++ style casts makes no difference, however a pointer to
// char data is assumed to alias any other pointer. This is the 'memcpy
// exception'.
//
// Bit_cast uses the memcpy exception to move the bits from a variable of one
// type of a variable of another type. Of course the end result is likely to
// be implementation dependent. Most compilers (gcc-4.2 and MSVC 2005)
// will completely optimize BitCast away.
//
// There is an additional use for BitCast.
// Recent gccs will warn when they see casts that may result in breakage due to
// the type-based aliasing rule. If you have checked that there is no breakage
// you can use BitCast to cast one pointer type to another. This confuses gcc
// enough that it can no longer see that you have cast one pointer type to
// another thus avoiding the warning.
template <class Dest, class Source>
inline Dest BitCast(const Source& source) {
// Compile time assertion: sizeof(Dest) == sizeof(Source)
// A compile error here means your Dest and Source have different sizes.
DOUBLE_CONVERSION_UNUSED
typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1];
Dest dest;
memmove(&dest, &source, sizeof(dest));
return dest;
}
template <class Dest, class Source>
inline Dest BitCast(Source* source) {
return BitCast<Dest>(reinterpret_cast<uintptr_t>(source));
}
} // namespace double_conversion
#endif // DOUBLE_CONVERSION_UTILS_H_
#include "ersatz_progress.hh"
#include <algorithm>
#include <ostream>
#include <limits>
#include <string>
namespace util {
namespace { const unsigned char kWidth = 100; }
const char kProgressBanner[] = "----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n";
ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits<uint64_t>::max()), complete_(next_), out_(NULL) {}
ErsatzProgress::~ErsatzProgress() {
if (out_) Finished();
}
ErsatzProgress::ErsatzProgress(uint64_t complete, std::ostream *to, const std::string &message)
: current_(0), next_(complete / kWidth), complete_(complete), stones_written_(0), out_(to) {
if (!out_) {
next_ = std::numeric_limits<uint64_t>::max();
return;
}
if (!message.empty()) *out_ << message << '\n';
*out_ << kProgressBanner;
}
void ErsatzProgress::Milestone() {
if (!out_) { current_ = 0; return; }
if (!complete_) return;
unsigned char stone = std::min(static_cast<uint64_t>(kWidth), (current_ * kWidth) / complete_);
for (; stones_written_ < stone; ++stones_written_) {
(*out_) << '*';
}
if (stone == kWidth) {
(*out_) << std::endl;
next_ = std::numeric_limits<uint64_t>::max();
out_ = NULL;
} else {
next_ = std::max(next_, ((stone + 1) * complete_ + kWidth - 1) / kWidth);
}
}
} // namespace util
#ifndef UTIL_ERSATZ_PROGRESS_H
#define UTIL_ERSATZ_PROGRESS_H
#include <iostream>
#include <string>
#include <stdint.h>
// Ersatz version of boost::progress so core language model doesn't depend on
// boost. Also adds option to print nothing.
namespace util {
extern const char kProgressBanner[];
class ErsatzProgress {
public:
// No output.
ErsatzProgress();
// Null means no output. The null value is useful for passing along the ostream pointer from another caller.
explicit ErsatzProgress(uint64_t complete, std::ostream *to = &std::cerr, const std::string &message = "");
#if __cplusplus >= 201103L
ErsatzProgress(ErsatzProgress &&from) noexcept : current_(from.current_), next_(from.next_), complete_(from.complete_), stones_written_(from.stones_written_), out_(from.out_) {
from.out_ = nullptr;
from.next_ = (uint64_t)-1;
}
#endif
~ErsatzProgress();
ErsatzProgress &operator++() {
if (++current_ >= next_) Milestone();
return *this;
}
ErsatzProgress &operator+=(uint64_t amount) {
if ((current_ += amount) >= next_) Milestone();
return *this;
}
void Set(uint64_t to) {
if ((current_ = to) >= next_) Milestone();
}
void Finished() {
Set(complete_);
}
private:
void Milestone();
uint64_t current_, next_, complete_;
unsigned char stones_written_;
std::ostream *out_;
// noncopyable
ErsatzProgress(const ErsatzProgress &other);
ErsatzProgress &operator=(const ErsatzProgress &other);
};
} // namespace util
#endif // UTIL_ERSATZ_PROGRESS_H
#include "exception.hh"
#ifdef __GXX_RTTI
#include <typeinfo>
#endif
#include <cerrno>
#include <cstring>
#if defined(_WIN32) || defined(_WIN64)
#include <windows.h>
#include <io.h>
#endif
namespace util {
Exception::Exception() throw() {}
Exception::~Exception() throw() {}
void Exception::SetLocation(const char *file, unsigned int line, const char *func, const char *child_name, const char *condition) {
/* The child class might have set some text, but we want this to come first.
* Another option would be passing this information to the constructor, but
* then child classes would have to accept constructor arguments and pass
* them down.
*/
std::string old_text;
what_.swap(old_text);
what_ << file << ':' << line;
if (func) what_ << " in " << func << " threw ";
if (child_name) {
what_ << child_name;
} else {
#ifdef __GXX_RTTI
what_ << typeid(this).name();
#else
what_ << "an exception";
#endif
}
if (condition) {
what_ << " because `" << condition << '\'';
}
what_ << ".\n";
what_ << old_text;
}
namespace {
#ifdef __GNUC__
const char *HandleStrerror(int ret, const char *buf) __attribute__ ((unused));
const char *HandleStrerror(const char *ret, const char * /*buf*/) __attribute__ ((unused));
#endif
// At least one of these functions will not be called.
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-function"
#endif
// The XOPEN version.
const char *HandleStrerror(int ret, const char *buf) {
if (!ret) return buf;
return NULL;
}
// The GNU version.
const char *HandleStrerror(const char *ret, const char * /*buf*/) {
return ret;
}
#ifdef __clang__
#pragma clang diagnostic pop
#endif
} // namespace
ErrnoException::ErrnoException() throw() : errno_(errno) {
char buf[200];
buf[0] = 0;
#if defined(sun) || defined(_WIN32) || defined(_WIN64)
const char *add = strerror(errno);
#else
const char *add = HandleStrerror(strerror_r(errno, buf, 200), buf);
#endif
if (add) {
*this << add << ' ';
}
}
ErrnoException::~ErrnoException() throw() {}
OverflowException::OverflowException() throw() {}
OverflowException::~OverflowException() throw() {}
#if defined(_WIN32) || defined(_WIN64)
WindowsException::WindowsException() throw() {
unsigned int last_error = GetLastError();
char error_msg[256] = "";
if (!FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, last_error, LANG_NEUTRAL, error_msg, sizeof(error_msg), NULL)) {
*this << "Windows error " << GetLastError() << " while formatting Windows error " << last_error << ". ";
} else {
*this << "Windows error " << last_error << ": " << error_msg;
}
}
WindowsException::~WindowsException() throw() {}
#endif
} // namespace util
#ifndef UTIL_EXCEPTION_H
#define UTIL_EXCEPTION_H
#include "string_stream.hh"
#include <exception>
#include <limits>
#include <string>
#include <stdint.h>
namespace util {
template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data);
class Exception : public std::exception {
public:
Exception() throw();
virtual ~Exception() throw();
const char *what() const throw() { return what_.str().c_str(); }
// For use by the UTIL_THROW macros.
void SetLocation(
const char *file,
unsigned int line,
const char *func,
const char *child_name,
const char *condition);
private:
template <class Except, class Data> friend typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data);
// This helps restrict operator<< defined below.
template <class T> struct ExceptionTag {
typedef T Identity;
};
StringStream what_;
};
/* This implements the normal operator<< for Exception and all its children.
* SFINAE means it only applies to Exception. Think of this as an ersatz
* boost::enable_if.
*/
template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data) {
e.what_ << data;
return e;
}
#ifdef __GNUC__
#define UTIL_FUNC_NAME __PRETTY_FUNCTION__
#else
#ifdef _WIN32
#define UTIL_FUNC_NAME __FUNCTION__
#else
#define UTIL_FUNC_NAME NULL
#endif
#endif
/* Create an instance of Exception, add the message Modify, and throw it.
* Modify is appended to the what() message and can contain << for ostream
* operations.
*
* do .. while kludge to swallow trailing ; character
* http://gcc.gnu.org/onlinedocs/cpp/Swallowing-the-Semicolon.html .
* Arg can be a constructor argument to the exception.
*/
#define UTIL_THROW_BACKEND(Condition, Exception, Arg, Modify) do { \
Exception UTIL_e Arg; \
UTIL_e.SetLocation(__FILE__, __LINE__, UTIL_FUNC_NAME, #Exception, Condition); \
UTIL_e << Modify; \
throw UTIL_e; \
} while (0)
#define UTIL_THROW_ARG(Exception, Arg, Modify) \
UTIL_THROW_BACKEND(NULL, Exception, Arg, Modify)
#define UTIL_THROW(Exception, Modify) \
UTIL_THROW_BACKEND(NULL, Exception, , Modify);
#define UTIL_THROW2(Modify) \
UTIL_THROW_BACKEND(NULL, util::Exception, , Modify);
#if __GNUC__ >= 3
#define UTIL_UNLIKELY(x) __builtin_expect (!!(x), 0)
#else
#define UTIL_UNLIKELY(x) (x)
#endif
#if __GNUC__ >= 3
#define UTIL_LIKELY(x) __builtin_expect (!!(x), 1)
#else
#define UTIL_LIKELY(x) (x)
#endif
#define UTIL_THROW_IF_ARG(Condition, Exception, Arg, Modify) do { \
if (UTIL_UNLIKELY(Condition)) { \
UTIL_THROW_BACKEND(#Condition, Exception, Arg, Modify); \
} \
} while (0)
#define UTIL_THROW_IF(Condition, Exception, Modify) \
UTIL_THROW_IF_ARG(Condition, Exception, , Modify)
#define UTIL_THROW_IF2(Condition, Modify) \
UTIL_THROW_IF_ARG(Condition, util::Exception, , Modify)
// Exception that records errno and adds it to the message.
class ErrnoException : public Exception {
public:
ErrnoException() throw();
virtual ~ErrnoException() throw();
int Error() const throw() { return errno_; }
private:
int errno_;
};
// file wasn't there, or couldn't be open for some reason
class FileOpenException : public Exception {
public:
FileOpenException() throw() {}
~FileOpenException() throw() {}
};
// Utilities for overflow checking.
class OverflowException : public Exception {
public:
OverflowException() throw();
~OverflowException() throw();
};
template <unsigned len> inline std::size_t CheckOverflowInternal(uint64_t value) {
UTIL_THROW_IF(value > static_cast<uint64_t>(std::numeric_limits<std::size_t>::max()), OverflowException, "Integer overflow detected. This model is too big for 32-bit code.");
return static_cast<std::size_t>(value);
}
template <> inline std::size_t CheckOverflowInternal<8>(uint64_t value) {
return value;
}
inline std::size_t CheckOverflow(uint64_t value) {
return CheckOverflowInternal<sizeof(std::size_t)>(value);
}
#if defined(_WIN32) || defined(_WIN64)
/* Thrown for Windows specific operations. */
class WindowsException : public Exception {
public:
WindowsException() throw();
~WindowsException() throw();
};
#endif
} // namespace util
#endif // UTIL_EXCEPTION_H
#ifndef UTIL_FAKE_OSTREAM_H
#define UTIL_FAKE_OSTREAM_H
#include "float_to_string.hh"
#include "integer_to_string.hh"
#include "string_piece.hh"
#include <cassert>
#include <limits>
#include <stdint.h>
namespace util {
/* Like std::ostream but without being incredibly slow.
* Supports most of the built-in types except for long double.
*
* The FakeOStream class is intended to be inherited from. The inherting class
* should provide:
* public:
* Derived &flush();
* Derived &write(const void *data, std::size_t length);
*
* private: or protected:
* friend class FakeOStream;
* char *Ensure(std::size_t amount);
* void AdvanceTo(char *to);
*
* The Ensure function makes enough space for an in-place write and returns
* where to write. The AdvanceTo function happens after the write, saying how
* much was actually written.
*
* Precondition:
* amount <= kToStringMaxBytes for in-place writes.
*/
template <class Derived> class FakeOStream {
public:
FakeOStream() {}
// This also covers std::string and char*
Derived &operator<<(StringPiece str) {
return C().write(str.data(), str.size());
}
// Handle integers by size and signedness.
private:
template <class Arg> struct EnableIfKludge {
typedef Derived type;
};
template <class From, unsigned Length = sizeof(From), bool Signed = std::numeric_limits<From>::is_signed, bool IsInteger = std::numeric_limits<From>::is_integer> struct Coerce {};
template <class From> struct Coerce<From, 2, false, true> { typedef uint16_t To; };
template <class From> struct Coerce<From, 4, false, true> { typedef uint32_t To; };
template <class From> struct Coerce<From, 8, false, true> { typedef uint64_t To; };
template <class From> struct Coerce<From, 2, true, true> { typedef int16_t To; };
template <class From> struct Coerce<From, 4, true, true> { typedef int32_t To; };
template <class From> struct Coerce<From, 8, true, true> { typedef int64_t To; };
public:
template <class From> typename EnableIfKludge<typename Coerce<From>::To>::type &operator<<(const From value) {
return CallToString(static_cast<typename Coerce<From>::To>(value));
}
// Character types that get copied as bytes instead of displayed as integers.
Derived &operator<<(char val) { return put(val); }
Derived &operator<<(signed char val) { return put(static_cast<char>(val)); }
Derived &operator<<(unsigned char val) { return put(static_cast<char>(val)); }
Derived &operator<<(bool val) { return put(val + '0'); }
// enums will fall back to int but are not caught by the template.
Derived &operator<<(int val) { return CallToString(static_cast<typename Coerce<int>::To>(val)); }
Derived &operator<<(float val) { return CallToString(val); }
Derived &operator<<(double val) { return CallToString(val); }
// This is here to catch all the other pointer types.
Derived &operator<<(const void *value) { return CallToString(value); }
// This is here because the above line also catches const char*.
Derived &operator<<(const char *value) { return *this << StringPiece(value); }
Derived &operator<<(char *value) { return *this << StringPiece(value); }
Derived &put(char val) {
char *c = C().Ensure(1);
*c = val;
C().AdvanceTo(++c);
return C();
}
char widen(char val) const { return val; }
private:
// References to derived class for convenience.
Derived &C() {
return *static_cast<Derived*>(this);
}
const Derived &C() const {
return *static_cast<const Derived*>(this);
}
// This is separate to prevent an infinite loop if the compiler considers
// types the same (i.e. gcc std::size_t and uint64_t or uint32_t).
template <class T> Derived &CallToString(const T value) {
C().AdvanceTo(ToString(value, C().Ensure(ToStringBuf<T>::kBytes)));
return C();
}
};
} // namespace
#endif // UTIL_FAKE_OSTREAM_H
#define _LARGEFILE64_SOURCE
#define _FILE_OFFSET_BITS 64
#include "file.hh"
#include "exception.hh"
#include <algorithm>
#include <cstdlib>
#include <cstdio>
#include <iostream>
#include <limits>
#include <stdexcept>
#include <sstream>
#include <cassert>
#include <cerrno>
#include <climits>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdint.h>
#if defined(__MINGW32__)
#include <windows.h>
#include <unistd.h>
#warning "The file functions on MinGW have not been tested for file sizes above 2^31 - 1. Please read https://stackoverflow.com/questions/12539488/determine-64-bit-file-size-in-c-on-mingw-32-bit and fix"
#elif defined(_WIN32) || defined(_WIN64)
#include <windows.h>
#include <io.h>
#else
#include <unistd.h>
#endif
namespace util {
scoped_fd::~scoped_fd() {
if (fd_ != -1 && close(fd_)) {
std::cerr << "Could not close file " << fd_ << std::endl;
std::abort();
}
}
void scoped_FILE_closer::Close(std::FILE *file) {
if (file && std::fclose(file)) {
std::cerr << "Could not close file " << file << std::endl;
std::abort();
}
}
// Note that ErrnoException records errno before NameFromFD is called.
FDException::FDException(int fd) throw() : fd_(fd), name_guess_(NameFromFD(fd)) {
*this << "in " << name_guess_ << ' ';
}
FDException::~FDException() throw() {}
EndOfFileException::EndOfFileException() throw() {
*this << "End of file";
}
EndOfFileException::~EndOfFileException() throw() {}
bool InputFileIsStdin(StringPiece path) {
return path == "-" || path == "/dev/stdin";
}
bool OutputFileIsStdout(StringPiece path) {
return path == "-" || path == "/dev/stdout";
}
int OpenReadOrThrow(const char *name) {
int ret;
#if defined(_WIN32) || defined(_WIN64)
UTIL_THROW_IF(-1 == (ret = _open(name, _O_BINARY | _O_RDONLY)), ErrnoException, "while opening " << name);
#else
UTIL_THROW_IF(-1 == (ret = open(name, O_RDONLY)), ErrnoException, "while opening " << name);
#endif
return ret;
}
int CreateOrThrow(const char *name) {
int ret;
#if defined(_WIN32) || defined(_WIN64)
UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR | _O_BINARY, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name);
#else
UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)), ErrnoException, "while creating " << name);
#endif
return ret;
}
uint64_t SizeFile(int fd) {
#if defined __MINGW32__
struct stat sb;
// Does this handle 64-bit?
int ret = fstat(fd, &sb);
if (ret == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
return sb.st_size;
#elif defined(_WIN32) || defined(_WIN64)
__int64 ret = _filelengthi64(fd);
return (ret == -1) ? kBadSize : ret;
#else // Not windows.
#ifdef OS_ANDROID
struct stat64 sb;
int ret = fstat64(fd, &sb);
#else
struct stat sb;
int ret = fstat(fd, &sb);
#endif
if (ret == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
return sb.st_size;
#endif
}
uint64_t SizeOrThrow(int fd) {
uint64_t ret = SizeFile(fd);
UTIL_THROW_IF_ARG(ret == kBadSize, FDException, (fd), "Failed to size");
return ret;
}
void ResizeOrThrow(int fd, uint64_t to) {
#if defined __MINGW32__
// Does this handle 64-bit?
int ret = ftruncate
#elif defined(_WIN32) || defined(_WIN64)
errno_t ret = _chsize_s
#elif defined(OS_ANDROID)
int ret = ftruncate64
#else
int ret = ftruncate
#endif
(fd, to);
UTIL_THROW_IF_ARG(ret, FDException, (fd), "while resizing to " << to << " bytes");
}
void HolePunch(int fd, uint64_t offset, uint64_t size) {
#if defined(__linux__) && defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
UTIL_THROW_IF_ARG(-1 == fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, size), FDException, (fd), "in punching a hole at " << offset << " for " << size << " bytes.");
#else
UTIL_THROW(UnsupportedOSException, "fallocate hole punching requires Linux and glibc >= 2.18");
#endif
}
namespace {
std::size_t GuardLarge(std::size_t size) {
// The following operating systems have broken read/write/pread/pwrite that
// only supports up to 2^31.
// OS X man pages claim to support 64-bit, but Kareem M. Darwish had problems
// building with larger files, so APPLE is also here.
#if defined(_WIN32) || defined(_WIN64) || defined(__APPLE__) || defined(OS_ANDROID) || defined(__MINGW32__)
return size < INT_MAX ? size : INT_MAX;
#else
return size;
#endif
}
}
#if defined(_WIN32) || defined(_WIN64)
namespace {
const std::size_t kMaxDWORD = static_cast<std::size_t>(4294967295UL);
} // namespace
#endif
std::size_t PartialRead(int fd, void *to, std::size_t amount) {
#if defined(_WIN32) || defined(_WIN64)
DWORD ret;
HANDLE file_handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
DWORD larger_size = static_cast<DWORD>(std::min<std::size_t>(kMaxDWORD, amount));
DWORD smaller_size = 28672; // Received reports that 31346 worked but higher values did not. This rounds down to the nearest multiple of 4096, the page size.
if (!ReadFile(file_handle, to, larger_size, &ret, NULL))
{
DWORD last_error = GetLastError();
if (last_error != ERROR_NOT_ENOUGH_MEMORY || !ReadFile(file_handle, to, smaller_size, &ret, NULL)) {
UTIL_THROW(WindowsException, "Windows error in ReadFile.");
}
}
#else
errno = 0;
ssize_t ret;
do {
ret = read(fd, to, GuardLarge(amount));
} while (ret == -1 && errno == EINTR);
UTIL_THROW_IF_ARG(ret < 0, FDException, (fd), "while reading " << amount << " bytes");
#endif
return static_cast<std::size_t>(ret);
}
void ReadOrThrow(int fd, void *to_void, std::size_t amount) {
uint8_t *to = static_cast<uint8_t*>(to_void);
while (amount) {
std::size_t ret = PartialRead(fd, to, amount);
UTIL_THROW_IF(ret == 0, EndOfFileException, " in " << NameFromFD(fd) << " but there should be " << amount << " more bytes to read.");
amount -= ret;
to += ret;
}
}
std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount) {
uint8_t *to = static_cast<uint8_t*>(to_void);
std::size_t remaining = amount;
while (remaining) {
std::size_t ret = PartialRead(fd, to, remaining);
if (!ret) return amount - remaining;
remaining -= ret;
to += ret;
}
return amount;
}
void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
const uint8_t *data = static_cast<const uint8_t*>(data_void);
while (size) {
#if defined(_WIN32) || defined(_WIN64)
int ret;
#else
ssize_t ret;
#endif
errno = 0;
do {
ret =
#if defined(_WIN32) || defined(_WIN64)
_write
#else
write
#endif
(fd, data, GuardLarge(size));
} while (ret == -1 && errno == EINTR);
UTIL_THROW_IF_ARG(ret < 1, FDException, (fd), "while writing " << size << " bytes");
data += ret;
size -= ret;
}
}
void WriteOrThrow(FILE *to, const void *data, std::size_t size) {
if (!size) return;
UTIL_THROW_IF(1 != std::fwrite(data, size, 1, to), ErrnoException, "Short write; requested size " << size);
}
void ErsatzPRead(int fd, void *to_void, std::size_t size, uint64_t off) {
uint8_t *to = static_cast<uint8_t*>(to_void);
while (size) {
#if defined(_WIN32) || defined(_WIN64)
/* BROKEN: changes file pointer. Even if you save it and change it back, it won't be safe to use concurrently with write() or read() which lmplz does. */
// size_t might be 64-bit. DWORD is always 32.
DWORD reading = static_cast<DWORD>(std::min<std::size_t>(kMaxDWORD, size));
DWORD ret;
OVERLAPPED overlapped;
memset(&overlapped, 0, sizeof(OVERLAPPED));
overlapped.Offset = static_cast<DWORD>(off);
overlapped.OffsetHigh = static_cast<DWORD>(off >> 32);
UTIL_THROW_IF(!ReadFile((HANDLE)_get_osfhandle(fd), to, reading, &ret, &overlapped), WindowsException, "ReadFile failed for offset " << off);
#else
ssize_t ret;
errno = 0;
ret =
#ifdef OS_ANDROID
pread64
#else
pread
#endif
(fd, to, GuardLarge(size), off);
if (ret <= 0) {
if (ret == -1 && errno == EINTR) continue;
UTIL_THROW_IF(ret == 0, EndOfFileException, " for reading " << size << " bytes at " << off << " from " << NameFromFD(fd));
UTIL_THROW_ARG(FDException, (fd), "while reading " << size << " bytes at offset " << off);
}
#endif
size -= ret;
off += ret;
to += ret;
}
}
void ErsatzPWrite(int fd, const void *from_void, std::size_t size, uint64_t off) {
const uint8_t *from = static_cast<const uint8_t*>(from_void);
while(size) {
#if defined(_WIN32) || defined(_WIN64)
/* Changes file pointer. Even if you save it and change it back, it won't be safe to use concurrently with write() or read() */
// size_t might be 64-bit. DWORD is always 32.
DWORD writing = static_cast<DWORD>(std::min<std::size_t>(kMaxDWORD, size));
DWORD ret;
OVERLAPPED overlapped;
memset(&overlapped, 0, sizeof(OVERLAPPED));
overlapped.Offset = static_cast<DWORD>(off);
overlapped.OffsetHigh = static_cast<DWORD>(off >> 32);
UTIL_THROW_IF(!WriteFile((HANDLE)_get_osfhandle(fd), from, writing, &ret, &overlapped), Exception, "WriteFile failed for offset " << off);
#else
ssize_t ret;
errno = 0;
ret =
#ifdef OS_ANDROID
pwrite64
#else
pwrite
#endif
(fd, from, GuardLarge(size), off);
if (ret <= 0) {
if (ret == -1 && errno == EINTR) continue;
UTIL_THROW_IF(ret == 0, EndOfFileException, " for writing " << size << " bytes at " << off << " from " << NameFromFD(fd));
UTIL_THROW_ARG(FDException, (fd), "while writing " << size << " bytes at offset " << off);
}
#endif
size -= ret;
off += ret;
from += ret;
}
}
void FSyncOrThrow(int fd) {
// Apparently windows doesn't have fsync?
#if !defined(_WIN32) && !defined(_WIN64)
UTIL_THROW_IF_ARG(-1 == fsync(fd), FDException, (fd), "while syncing");
#endif
}
namespace {
// Static assert for 64-bit off_t size.
#if !defined(_WIN32) && !defined(_WIN64) && !defined(OS_ANDROID)
template <unsigned> struct CheckOffT;
template <> struct CheckOffT<8> {
struct True {};
};
// If there's a compiler error on the next line, then off_t isn't 64 bit. And
// that makes me a sad panda.
typedef CheckOffT<sizeof(off_t)>::True IgnoredType;
#endif
// Can't we all just get along?
uint64_t InternalSeek(int fd, int64_t off, int whence) {
#if defined __MINGW32__
// Does this handle 64-bit?
typedef off_t Offset;
Offset ret = lseek(fd, off, whence);
#elif defined(_WIN32) || defined(_WIN64)
typedef __int64 Offset;
Offset ret = _lseeki64(fd, off, whence);
#elif defined(OS_ANDROID)
typedef off64_t Offset;
Offset ret = lseek64(fd, off, whence);
#else
typedef off_t Offset;
Offset ret = lseek(fd, off, whence);
#endif
UTIL_THROW_IF_ARG((Offset)-1 == ret, FDException, (fd), "while seeking to " << off << " whence " << whence);
return (uint64_t)ret;
}
} // namespace
uint64_t SeekOrThrow(int fd, uint64_t off) {
return InternalSeek(fd, off, SEEK_SET);
}
uint64_t AdvanceOrThrow(int fd, int64_t off) {
return InternalSeek(fd, off, SEEK_CUR);
}
uint64_t SeekEnd(int fd) {
return InternalSeek(fd, 0, SEEK_END);
}
std::FILE *FDOpenOrThrow(scoped_fd &file) {
std::FILE *ret = fdopen(file.get(), "r+b");
UTIL_THROW_IF_ARG(!ret, FDException, (file.get()), "Could not fdopen for write");
file.release();
return ret;
}
std::FILE *FDOpenReadOrThrow(scoped_fd &file) {
std::FILE *ret = fdopen(file.get(), "rb");
UTIL_THROW_IF_ARG(!ret, FDException, (file.get()), "Could not fdopen for read");
file.release();
return ret;
}
// Sigh. Windows temporary file creation is full of race conditions.
#if defined(_WIN32) || defined(_WIN64)
/* mkstemp extracted from libc/sysdeps/posix/tempname.c. Copyright
(C) 1991-1999, 2000, 2001, 2006 Free Software Foundation, Inc.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version. */
/* This has been modified from the original version to rename the function and
* set the Windows temporary flag. */
static const char letters[] =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
/* Generate a temporary file name based on TMPL. TMPL must match the
rules for mk[s]temp (i.e. end in "XXXXXX"). The name constructed
does not exist at the time of the call to mkstemp. TMPL is
overwritten with the result. */
int
mkstemp_and_unlink(char *tmpl)
{
int len;
char *XXXXXX;
static unsigned long long value;
unsigned long long random_time_bits;
unsigned int count;
int fd = -1;
int save_errno = errno;
/* A lower bound on the number of temporary files to attempt to
generate. The maximum total number of temporary file names that
can exist for a given template is 62**6. It should never be
necessary to try all these combinations. Instead if a reasonable
number of names is tried (we define reasonable as 62**3) fail to
give the system administrator the chance to remove the problems. */
#define ATTEMPTS_MIN (62 * 62 * 62)
/* The number of times to attempt to generate a temporary file. To
conform to POSIX, this must be no smaller than TMP_MAX. */
#if ATTEMPTS_MIN < TMP_MAX
unsigned int attempts = TMP_MAX;
#else
unsigned int attempts = ATTEMPTS_MIN;
#endif
len = strlen (tmpl);
if (len < 6 || strcmp (&tmpl[len - 6], "XXXXXX"))
{
errno = EINVAL;
return -1;
}
/* This is where the Xs start. */
XXXXXX = &tmpl[len - 6];
/* Get some more or less random data. */
{
SYSTEMTIME stNow;
FILETIME ftNow;
// get system time
GetSystemTime(&stNow);
stNow.wMilliseconds = 500;
if (!SystemTimeToFileTime(&stNow, &ftNow))
{
errno = -1;
return -1;
}
random_time_bits = (((unsigned long long)ftNow.dwHighDateTime << 32)
| (unsigned long long)ftNow.dwLowDateTime);
}
value += random_time_bits ^ (unsigned long long)GetCurrentThreadId ();
for (count = 0; count < attempts; value += 7777, ++count)
{
unsigned long long v = value;
/* Fill in the random bits. */
XXXXXX[0] = letters[v % 62];
v /= 62;
XXXXXX[1] = letters[v % 62];
v /= 62;
XXXXXX[2] = letters[v % 62];
v /= 62;
XXXXXX[3] = letters[v % 62];
v /= 62;
XXXXXX[4] = letters[v % 62];
v /= 62;
XXXXXX[5] = letters[v % 62];
/* Modified for windows and to unlink */
// fd = open (tmpl, O_RDWR | O_CREAT | O_EXCL, _S_IREAD | _S_IWRITE);
int flags = _O_RDWR | _O_CREAT | _O_EXCL | _O_BINARY;
flags |= _O_TEMPORARY;
fd = _open (tmpl, flags, _S_IREAD | _S_IWRITE);
if (fd >= 0)
{
errno = save_errno;
return fd;
}
else if (errno != EEXIST)
return -1;
}
/* We got out of the loop because we ran out of combinations to try. */
errno = EEXIST;
return -1;
}
#else
int
mkstemp_and_unlink(char *tmpl) {
int ret = mkstemp(tmpl);
if (ret != -1) {
UTIL_THROW_IF(unlink(tmpl), ErrnoException, "while deleting " << tmpl);
}
return ret;
}
#endif
// If it's a directory, add a /. This lets users say -T /tmp without creating
// /tmpAAAAAA
void NormalizeTempPrefix(std::string &base) {
if (base.empty()) return;
if (base[base.size() - 1] == '/') return;
struct stat sb;
// It's fine for it to not exist.
if (-1 == stat(base.c_str(), &sb)) return;
if (
#if defined(_WIN32) || defined(_WIN64)
sb.st_mode & _S_IFDIR
#else
S_ISDIR(sb.st_mode)
#endif
) base += '/';
}
int MakeTemp(const StringPiece &base) {
std::string name(base.data(), base.size());
name += "XXXXXX";
name.push_back(0);
int ret;
UTIL_THROW_IF(-1 == (ret = mkstemp_and_unlink(&name[0])), ErrnoException, "while making a temporary based on " << base);
return ret;
}
std::FILE *FMakeTemp(const StringPiece &base) {
util::scoped_fd file(MakeTemp(base));
return FDOpenOrThrow(file);
}
std::string DefaultTempDirectory() {
#if defined(_WIN32) || defined(_WIN64)
char dir_buffer[1000];
if (GetTempPath(1000, dir_buffer) == 0)
throw std::runtime_error("Could not read temporary directory.");
std::string ret(dir_buffer);
NormalizeTempPrefix(ret);
return ret;
#else
// POSIX says to try these environment variables, in this order:
const char *const vars[] = {"TMPDIR", "TMP", "TEMPDIR", "TEMP", 0};
for (int i=0; vars[i]; ++i) {
char *val =
#if defined(_GNU_SOURCE) && defined(__GLIBC_PREREQ)
#if __GLIBC_PREREQ(2,17)
secure_getenv
#else // __GLIBC_PREREQ
getenv
#endif // __GLIBC_PREREQ
#else // _GNU_SOURCE
getenv
#endif
(vars[i]);
// Environment variable is set and nonempty. Use it.
if (val && *val) {
std::string ret(val);
NormalizeTempPrefix(ret);
return ret;
}
}
// No environment variables set. Default to /tmp.
return "/tmp/";
#endif
}
int DupOrThrow(int fd) {
int ret = dup(fd);
UTIL_THROW_IF_ARG(ret == -1, FDException, (fd), "in duplicating the file descriptor");
return ret;
}
namespace {
// Try to name things but be willing to fail too.
bool TryName(int fd, std::string &out) {
#if defined(_WIN32) || defined(_WIN64)
return false;
#else
std::string name("/proc/self/fd/");
std::ostringstream convert;
convert << fd;
name += convert.str();
struct stat sb;
if (-1 == lstat(name.c_str(), &sb))
return false;
out.resize(sb.st_size + 1);
// lstat gave us a size, but I've seen it grow, possibly due to symlinks on top of symlinks.
while (true) {
ssize_t ret = readlink(name.c_str(), &out[0], out.size());
if (-1 == ret)
return false;
if ((size_t)ret < out.size()) {
out.resize(ret);
break;
}
// Exponential growth.
out.resize(out.size() * 2);
}
// Don't use the non-file names.
if (!out.empty() && out[0] != '/')
return false;
return true;
#endif
}
} // namespace
std::string NameFromFD(int fd) {
std::string ret;
if (TryName(fd, ret)) return ret;
switch (fd) {
case 0: return "stdin";
case 1: return "stdout";
case 2: return "stderr";
}
ret = "fd ";
std::ostringstream convert;
convert << fd;
ret += convert.str();
return ret;
}
} // namespace util
#ifndef UTIL_FILE_H
#define UTIL_FILE_H
#include "exception.hh"
#include "scoped.hh"
#include "string_piece.hh"
#include <cstddef>
#include <cstdio>
#include <string>
#include <stdint.h>
namespace util {
class scoped_fd {
public:
scoped_fd() : fd_(-1) {}
explicit scoped_fd(int fd) : fd_(fd) {}
~scoped_fd();
#if __cplusplus >= 201103L
scoped_fd(scoped_fd &&from) noexcept : fd_(from.fd_) {
from.fd_ = -1;
}
#endif
void reset(int to = -1) {
scoped_fd other(fd_);
fd_ = to;
}
int get() const { return fd_; }
int operator*() const { return fd_; }
int release() {
int ret = fd_;
fd_ = -1;
return ret;
}
private:
int fd_;
scoped_fd(const scoped_fd &);
scoped_fd &operator=(const scoped_fd &);
};
struct scoped_FILE_closer {
static void Close(std::FILE *file);
};
typedef scoped<std::FILE, scoped_FILE_closer> scoped_FILE;
/* Thrown for any operation where the fd is known. */
class FDException : public ErrnoException {
public:
explicit FDException(int fd) throw();
virtual ~FDException() throw();
// This may no longer be valid if the exception was thrown past open.
int FD() const { return fd_; }
// Guess from NameFromFD.
const std::string &NameGuess() const { return name_guess_; }
private:
int fd_;
std::string name_guess_;
};
// End of file reached.
class EndOfFileException : public Exception {
public:
EndOfFileException() throw();
~EndOfFileException() throw();
};
class UnsupportedOSException : public Exception {};
// Open for read only.
int OpenReadOrThrow(const char *name);
// Create file if it doesn't exist, truncate if it does. Opened for write.
int CreateOrThrow(const char *name);
/** Does the given input file path denote standard input?
*
* Returns true if, and only if, path is either "-" or "/dev/stdin".
*
* Opening standard input as a file may need some special treatment for
* portability. There's a convention that a dash ("-") in place of an input
* file path denotes standard input, but opening "/dev/stdin" may need to be
* special as well.
*/
bool InputPathIsStdin(StringPiece path);
/** Does the given output file path denote standard output?
*
* Returns true if, and only if, path is either "-" or "/dev/stdout".
*
* Opening standard output as a file may need some special treatment for
* portability. There's a convention that a dash ("-") in place of an output
* file path denotes standard output, but opening "/dev/stdout" may need to be
* special as well.
*/
bool OutputPathIsStdout(StringPiece path);
// Return value for SizeFile when it can't size properly.
const uint64_t kBadSize = (uint64_t)-1;
uint64_t SizeFile(int fd);
uint64_t SizeOrThrow(int fd);
void ResizeOrThrow(int fd, uint64_t to);
// It bothers me that fallocate has offset before size while pread has size
// before offset. But best to follow the call.
void HolePunch(int fd, uint64_t offset, uint64_t size);
std::size_t PartialRead(int fd, void *to, std::size_t size);
void ReadOrThrow(int fd, void *to, std::size_t size);
std::size_t ReadOrEOF(int fd, void *to_void, std::size_t size);
void WriteOrThrow(int fd, const void *data_void, std::size_t size);
void WriteOrThrow(FILE *to, const void *data, std::size_t size);
/* These call pread/pwrite in a loop. However, on Windows they call ReadFile/
* WriteFile which changes the file pointer. So it's safe to call ErsatzPRead
* and ErsatzPWrite concurrently (or any combination thereof). But it changes
* the file pointer on windows, so it's not safe to call concurrently with
* anything that uses the implicit file pointer e.g. the Read/Write functions
* above.
*/
void ErsatzPRead(int fd, void *to, std::size_t size, uint64_t off);
void ErsatzPWrite(int fd, const void *data_void, std::size_t size, uint64_t off);
void FSyncOrThrow(int fd);
// Seeking: returns offset
uint64_t SeekOrThrow(int fd, uint64_t off);
uint64_t AdvanceOrThrow(int fd, int64_t off);
uint64_t SeekEnd(int fd);
std::FILE *FDOpenOrThrow(scoped_fd &file);
std::FILE *FDOpenReadOrThrow(scoped_fd &file);
// Temporary files
// Append a / if base is a directory.
void NormalizeTempPrefix(std::string &base);
int MakeTemp(const StringPiece &prefix);
std::FILE *FMakeTemp(const StringPiece &prefix);
// Where should we put temporary files? Handles all the windows/POSIX defaults fun.
std::string DefaultTempDirectory();
// dup an fd.
int DupOrThrow(int fd);
/* Attempt get file name from fd. This won't always work (i.e. on Windows or
* a pipe). The file might have been renamed. It's intended for diagnostics
* and logging only.
*/
std::string NameFromFD(int fd);
} // namespace util
#endif // UTIL_FILE_H
#include "file_piece.hh"
#include "double-conversion/double-conversion.h"
#include "exception.hh"
#include "file.hh"
#include "mmap.hh"
#if defined(_WIN32) || defined(_WIN64)
#include <io.h>
#else
#include <unistd.h>
#endif
#include <algorithm>
#include <cassert>
#include <cerrno>
#include <cmath>
#include <cstdlib>
#include <iostream>
#include <limits>
#include <string>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#if defined(_WIN32) || defined(_WIN64)
#include <math.h>
#endif
namespace util {
namespace { const uint64_t kPageSize = SizePage(); }
ParseNumberException::ParseNumberException(StringPiece value) throw() {
*this << "Could not parse \"" << value << "\" into a ";
}
LineIterator &LineIterator::operator++() {
if (!backing_->ReadLineOrEOF(line_, delim_))
backing_ = NULL;
return *this;
}
FilePiece::FilePiece(const char *name, std::ostream *show_progress, std::size_t min_buffer) :
file_(OpenReadOrThrow(name)), total_size_(SizeFile(file_.get())),
progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name) {
Initialize(name, show_progress, min_buffer);
}
namespace {
std::string NamePossiblyFind(int fd, const char *name) {
if (name) return name;
return NameFromFD(fd);
}
} // namespace
FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std::size_t min_buffer) :
file_(fd), total_size_(SizeFile(file_.get())),
progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + NamePossiblyFind(fd, name)) {
Initialize(NamePossiblyFind(fd, name).c_str(), show_progress, min_buffer);
}
FilePiece::FilePiece(std::istream &stream, const char * /*name*/, std::size_t min_buffer) :
total_size_(kBadSize) {
InitializeNoRead("istream", min_buffer);
fallback_to_read_ = true;
HugeMalloc(default_map_size_, false, data_);
position_ = data_.begin();
position_end_ = position_;
fell_back_.Reset(stream);
}
StringPiece FilePiece::ReadLine(char delim, bool strip_cr) {
std::size_t skip = 0;
while (true) {
const char *i = std::find(position_ + skip, position_end_, delim);
if (UTIL_LIKELY(i != position_end_)) {
// End of line.
// Take 1 byte off the end if it's an unwanted carriage return.
const std::size_t subtract_cr = (
(strip_cr && i > position_ && *(i - 1) == '\r') ?
1 : 0);
StringPiece ret(position_, i - position_ - subtract_cr);
position_ = i + 1;
return ret;
}
if (at_end_) {
if (position_ == position_end_) {
Shift();
}
return Consume(position_end_);
}
skip = position_end_ - position_;
Shift();
}
}
bool FilePiece::ReadLineOrEOF(StringPiece &to, char delim, bool strip_cr) {
try {
to = ReadLine(delim, strip_cr);
} catch (const util::EndOfFileException &e) { return false; }
return true;
}
float FilePiece::ReadFloat() {
return ReadNumber<float>();
}
double FilePiece::ReadDouble() {
return ReadNumber<double>();
}
long int FilePiece::ReadLong() {
return ReadNumber<long int>();
}
unsigned long int FilePiece::ReadULong() {
return ReadNumber<unsigned long int>();
}
// Factored out so that istream can call this.
void FilePiece::InitializeNoRead(const char *name, std::size_t min_buffer) {
file_name_ = name;
default_map_size_ = kPageSize * std::max<std::size_t>((min_buffer / kPageSize + 1), 2);
position_ = NULL;
position_end_ = NULL;
mapped_offset_ = 0;
at_end_ = false;
}
void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer) {
InitializeNoRead(name, min_buffer);
uint64_t current_offset;
bool valid_current_offset;
try {
current_offset = AdvanceOrThrow(file_.get(), 0);
valid_current_offset = true;
} catch (const FDException &) {
current_offset = 0;
valid_current_offset = false;
}
// So the assertion in TransitionToRead passes
fallback_to_read_ = false;
if (total_size_ == kBadSize || !valid_current_offset) {
if (show_progress)
*show_progress << "File " << name << " isn't normal. Using slower read() instead of mmap(). No progress bar." << std::endl;
TransitionToRead();
} else {
mapped_offset_ = current_offset;
}
Shift();
// gzip detect.
if ((position_end_ >= position_ + ReadCompressed::kMagicSize) && ReadCompressed::DetectCompressedMagic(position_)) {
if (!fallback_to_read_) {
at_end_ = false;
TransitionToRead();
}
}
}
namespace {
static const double_conversion::StringToDoubleConverter kConverter(
double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK | double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES,
std::numeric_limits<double>::quiet_NaN(),
std::numeric_limits<double>::quiet_NaN(),
"inf",
"NaN");
StringPiece FirstToken(StringPiece str) {
const char *i;
for (i = str.data(); i != str.data() + str.size(); ++i) {
if (kSpaces[(unsigned char)*i]) break;
}
return StringPiece(str.data(), i - str.data());
}
// std::isnan is technically C++11 not C++98. But in practice this is a problem for visual studio.
template <class T> inline int CrossPlatformIsNaN(T value) {
#if defined(_WIN32) || defined(_WIN64)
return isnan(value);
#else
return std::isnan(value);
#endif
}
const char *ParseNumber(StringPiece str, float &out) {
int count;
out = kConverter.StringToFloat(str.data(), str.size(), &count);
UTIL_THROW_IF_ARG(CrossPlatformIsNaN(out) && str != "NaN" && str != "nan", ParseNumberException, (FirstToken(str)), "float");
return str.data() + count;
}
const char *ParseNumber(StringPiece str, double &out) {
int count;
out = kConverter.StringToDouble(str.data(), str.size(), &count);
UTIL_THROW_IF_ARG(CrossPlatformIsNaN(out) && str != "NaN" && str != "nan", ParseNumberException, (FirstToken(str)), "double");
return str.data() + count;
}
const char *ParseNumber(StringPiece str, long int &out) {
char *end;
errno = 0;
out = strtol(str.data(), &end, 10);
UTIL_THROW_IF_ARG(errno || (end == str.data()), ParseNumberException, (FirstToken(str)), "long int");
return end;
}
const char *ParseNumber(StringPiece str, unsigned long int &out) {
char *end;
errno = 0;
out = strtoul(str.data(), &end, 10);
UTIL_THROW_IF_ARG(errno || (end == str.data()), ParseNumberException, (FirstToken(str)), "unsigned long int");
return end;
}
} // namespace
template <class T> T FilePiece::ReadNumber() {
SkipSpaces();
while (last_space_ < position_) {
if (UTIL_UNLIKELY(at_end_)) {
// Hallucinate a null off the end of the file.
std::string buffer(position_, position_end_);
T ret;
// Has to be null-terminated.
const char *begin = buffer.c_str();
const char *end = ParseNumber(StringPiece(begin, buffer.size()), ret);
position_ += end - begin;
return ret;
}
Shift();
}
T ret;
position_ = ParseNumber(StringPiece(position_, last_space_ - position_), ret);
return ret;
}
const char *FilePiece::FindDelimiterOrEOF(const bool *delim) {
std::size_t skip = 0;
while (true) {
for (const char *i = position_ + skip; i < position_end_; ++i) {
if (delim[static_cast<unsigned char>(*i)]) return i;
}
if (at_end_) {
if (position_ == position_end_) Shift();
return position_end_;
}
skip = position_end_ - position_;
Shift();
}
}
void FilePiece::Shift() {
if (at_end_) {
progress_.Finished();
throw EndOfFileException();
}
uint64_t desired_begin = position_ - data_.begin() + mapped_offset_;
if (!fallback_to_read_) MMapShift(desired_begin);
// Notice an mmap failure might set the fallback.
if (fallback_to_read_) ReadShift();
for (last_space_ = position_end_ - 1; last_space_ >= position_; --last_space_) {
if (kSpaces[static_cast<unsigned char>(*last_space_)]) break;
}
}
void FilePiece::UpdateProgress() {
if (!fallback_to_read_)
progress_.Set(position_ - data_.begin() + mapped_offset_);
}
void FilePiece::MMapShift(uint64_t desired_begin) {
// Use mmap.
uint64_t ignore = desired_begin % kPageSize;
// Duplicate request for Shift means give more data.
if (position_ == data_.begin() + ignore && position_) {
default_map_size_ *= 2;
}
// Local version so that in case of failure it doesn't overwrite the class variable.
uint64_t mapped_offset = desired_begin - ignore;
uint64_t mapped_size;
if (default_map_size_ >= static_cast<std::size_t>(total_size_ - mapped_offset)) {
at_end_ = true;
mapped_size = total_size_ - mapped_offset;
} else {
mapped_size = default_map_size_;
}
// Forcibly clear the existing mmap first.
data_.reset();
try {
MapRead(POPULATE_OR_LAZY, *file_, mapped_offset, mapped_size, data_);
} catch (const util::ErrnoException &) {
if (desired_begin) {
SeekOrThrow(*file_, desired_begin);
}
// The mmap was scheduled to end the file, but now we're going to read it.
at_end_ = false;
TransitionToRead();
return;
}
mapped_offset_ = mapped_offset;
position_ = data_.begin() + ignore;
position_end_ = data_.begin() + mapped_size;
progress_.Set(desired_begin);
}
void FilePiece::TransitionToRead() {
assert(!fallback_to_read_);
fallback_to_read_ = true;
data_.reset();
HugeMalloc(default_map_size_, false, data_);
position_ = data_.begin();
position_end_ = position_;
try {
fell_back_.Reset(file_.release());
} catch (util::Exception &e) {
e << " in file " << file_name_;
throw;
}
}
void FilePiece::ReadShift() {
assert(fallback_to_read_);
// Bytes [data_.begin(), position_) have been consumed.
// Bytes [position_, position_end_) have been read into the buffer.
// Start at the beginning of the buffer if there's nothing useful in it.
if (position_ == position_end_) {
mapped_offset_ += (position_end_ - data_.begin());
position_ = data_.begin();
position_end_ = position_;
}
std::size_t already_read = position_end_ - data_.begin();
if (already_read == default_map_size_) {
if (position_ == data_.begin()) {
// Buffer too small.
std::size_t valid_length = position_end_ - position_;
default_map_size_ *= 2;
HugeRealloc(default_map_size_, false, data_);
position_ = data_.begin();
position_end_ = position_ + valid_length;
} else {
std::size_t moving = position_end_ - position_;
memmove(data_.get(), position_, moving);
position_ = data_.begin();
position_end_ = position_ + moving;
already_read = moving;
}
}
std::size_t read_return = fell_back_.Read(static_cast<uint8_t*>(data_.get()) + already_read, default_map_size_ - already_read);
progress_.Set(fell_back_.RawAmount());
if (read_return == 0) {
at_end_ = true;
}
position_end_ += read_return;
}
} // namespace util
#ifndef UTIL_FILE_PIECE_H
#define UTIL_FILE_PIECE_H
#include "ersatz_progress.hh"
#include "exception.hh"
#include "file.hh"
#include "mmap.hh"
#include "read_compressed.hh"
#include "spaces.hh"
#include "string_piece.hh"
#include <cstddef>
#include <iosfwd>
#include <string>
#include <cassert>
#include <stdint.h>
namespace util {
class ParseNumberException : public Exception {
public:
explicit ParseNumberException(StringPiece value) throw();
~ParseNumberException() throw() {}
};
class FilePiece;
// Input Iterator over lines. This allows
// for (StringPiece l : FilePiece("file"))
// in C++11.
// NB: not multipass.
class LineIterator {
public:
LineIterator() : backing_(NULL) {}
explicit LineIterator(FilePiece &f, char delim = '\n') : backing_(&f), delim_(delim) {
++*this;
}
LineIterator &operator++();
bool operator==(const LineIterator &other) const {
return backing_ == other.backing_;
}
bool operator!=(const LineIterator &other) const {
return backing_ != other.backing_;
}
operator bool() const { return backing_ != NULL; }
StringPiece operator*() const { return line_; }
const StringPiece *operator->() const { return &line_; }
private:
FilePiece *backing_;
StringPiece line_;
char delim_;
};
// Memory backing the returned StringPiece may vanish on the next call.
class FilePiece {
public:
// 1 MB default.
explicit FilePiece(const char *file, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
// Takes ownership of fd. name is used for messages.
explicit FilePiece(int fd, const char *name = NULL, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
/* Read from an istream. Don't use this if you can avoid it. Raw fd IO is
* much faster. But sometimes you just have an istream like Boost's HTTP
* server and want to parse it the same way.
* name is just used for messages and FileName().
*/
explicit FilePiece(std::istream &stream, const char *name = NULL, std::size_t min_buffer = 1048576);
LineIterator begin() {
return LineIterator(*this);
}
LineIterator end() {
return LineIterator();
}
char peek() {
if (position_ == position_end_) {
Shift();
if (at_end_) throw EndOfFileException();
}
return *position_;
}
char get() {
char ret = peek();
++position_;
return ret;
}
// Leaves the delimiter, if any, to be returned by get(). Delimiters defined by isspace().
StringPiece ReadDelimited(const bool *delim = kSpaces) {
SkipSpaces(delim);
return Consume(FindDelimiterOrEOF(delim));
}
/// Read word until the line or file ends.
bool ReadWordSameLine(StringPiece &to, const bool *delim = kSpaces) {
assert(delim[static_cast<unsigned char>('\n')]);
// Skip non-enter spaces.
for (; ; ++position_) {
if (position_ == position_end_) {
try {
Shift();
} catch (const util::EndOfFileException &) { return false; }
// And break out at end of file.
if (position_ == position_end_) return false;
}
if (!delim[static_cast<unsigned char>(*position_)]) break;
if (*position_ == '\n') return false;
}
// We can't be at the end of file because there's at least one character open.
to = Consume(FindDelimiterOrEOF(delim));
return true;
}
/** Read a line of text from the file.
*
* Unlike ReadDelimited, this includes leading spaces and consumes the
* delimiter. It is similar to getline in that way.
*
* If strip_cr is true, any trailing carriate return (as would be found on
* a file written on Windows) will be left out of the returned line.
*
* Throws EndOfFileException if the end of the file is encountered. If the
* file does not end in a newline, this could mean that the last line is
* never read.
*/
StringPiece ReadLine(char delim = '\n', bool strip_cr = true);
/** Read a line of text from the file, or return false on EOF.
*
* This is like ReadLine, except it returns false where ReadLine throws
* EndOfFileException. Like ReadLine it may not read the last line in the
* file if the file does not end in a newline.
*
* If strip_cr is true, any trailing carriate return (as would be found on
* a file written on Windows) will be left out of the returned line.
*/
bool ReadLineOrEOF(StringPiece &to, char delim = '\n', bool strip_cr = true);
float ReadFloat();
double ReadDouble();
long int ReadLong();
unsigned long int ReadULong();
// Skip spaces defined by isspace.
void SkipSpaces(const bool *delim = kSpaces) {
assert(position_ <= position_end_);
for (; ; ++position_) {
if (position_ == position_end_) {
Shift();
// And break out at end of file.
if (position_ == position_end_) return;
}
assert(position_ < position_end_);
if (!delim[static_cast<unsigned char>(*position_)]) return;
}
}
uint64_t Offset() const {
return position_ - data_.begin() + mapped_offset_;
}
const std::string &FileName() const { return file_name_; }
// Force a progress update.
void UpdateProgress();
private:
void InitializeNoRead(const char *name, std::size_t min_buffer);
// Calls InitializeNoRead, so don't call both.
void Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer);
template <class T> T ReadNumber();
StringPiece Consume(const char *to) {
assert(to >= position_);
StringPiece ret(position_, to - position_);
position_ = to;
return ret;
}
const char *FindDelimiterOrEOF(const bool *delim = kSpaces);
void Shift();
// Backends to Shift().
void MMapShift(uint64_t desired_begin);
void TransitionToRead();
void ReadShift();
const char *position_, *last_space_, *position_end_;
scoped_fd file_;
const uint64_t total_size_;
std::size_t default_map_size_;
uint64_t mapped_offset_;
// Order matters: file_ should always be destroyed after this.
scoped_memory data_;
bool at_end_;
bool fallback_to_read_;
ErsatzProgress progress_;
std::string file_name_;
ReadCompressed fell_back_;
};
} // namespace util
#endif // UTIL_FILE_PIECE_H
// Tests might fail if you have creative characters in your path. Sue me.
#include "file_piece.hh"
#include "file_stream.hh"
#include "file.hh"
#include "scoped.hh"
#define BOOST_TEST_MODULE FilePieceTest
#include <boost/test/unit_test.hpp>
#include <fstream>
#include <iostream>
#include <cstdio>
#include <sys/types.h>
#include <sys/stat.h>
namespace util {
namespace {
std::string FileLocation() {
if (boost::unit_test::framework::master_test_suite().argc < 2) {
return "file_piece.cc";
}
std::string ret(boost::unit_test::framework::master_test_suite().argv[1]);
return ret;
}
/* istream */
BOOST_AUTO_TEST_CASE(IStream) {
std::fstream ref(FileLocation().c_str(), std::ios::in);
std::fstream backing(FileLocation().c_str(), std::ios::in);
FilePiece test(backing);
std::string ref_line;
while (getline(ref, ref_line)) {
StringPiece test_line(test.ReadLine());
BOOST_CHECK_EQUAL(ref_line, test_line);
}
BOOST_CHECK_THROW(test.get(), EndOfFileException);
BOOST_CHECK_THROW(test.get(), EndOfFileException);
}
/* mmap implementation */
BOOST_AUTO_TEST_CASE(MMapReadLine) {
std::fstream ref(FileLocation().c_str(), std::ios::in);
FilePiece test(FileLocation().c_str(), NULL, 1);
std::string ref_line;
while (getline(ref, ref_line)) {
StringPiece test_line(test.ReadLine());
// I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
if (!test_line.empty() || !ref_line.empty()) {
BOOST_CHECK_EQUAL(ref_line, test_line);
}
}
BOOST_CHECK_THROW(test.get(), EndOfFileException);
}
/* mmap with seek beforehand */
BOOST_AUTO_TEST_CASE(MMapSeek) {
std::fstream ref(FileLocation().c_str(), std::ios::in);
ref.seekg(10);
scoped_fd file(util::OpenReadOrThrow(FileLocation().c_str()));
SeekOrThrow(file.get(), 10);
FilePiece test(file.release());
std::string ref_line;
while (getline(ref, ref_line)) {
StringPiece test_line(test.ReadLine());
// I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
if (!test_line.empty() || !ref_line.empty()) {
BOOST_CHECK_EQUAL(ref_line, test_line);
}
}
BOOST_CHECK_THROW(test.get(), EndOfFileException);
}
#if !defined(_WIN32) && !defined(_WIN64) && !defined(__APPLE__)
/* Apple isn't happy with the popen, fileno, dup. And I don't want to
* reimplement popen. This is an issue with the test.
*/
/* read() implementation */
BOOST_AUTO_TEST_CASE(StreamReadLine) {
std::fstream ref(FileLocation().c_str(), std::ios::in);
std::string popen_args = "cat \"";
popen_args += FileLocation();
popen_args += '"';
FILE *catter = popen(popen_args.c_str(), "r");
BOOST_REQUIRE(catter);
FilePiece test(dup(fileno(catter)), "file_piece.cc", NULL, 1);
std::string ref_line;
while (getline(ref, ref_line)) {
StringPiece test_line(test.ReadLine());
// I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
if (!test_line.empty() || !ref_line.empty()) {
BOOST_CHECK_EQUAL(ref_line, test_line);
}
}
BOOST_CHECK_THROW(test.get(), EndOfFileException);
BOOST_REQUIRE(!pclose(catter));
}
#endif
#ifdef HAVE_ZLIB
// gzip file
BOOST_AUTO_TEST_CASE(PlainZipReadLine) {
std::string location(FileLocation());
std::fstream ref(location.c_str(), std::ios::in);
std::string command("gzip <\"");
command += location + "\" >\"" + location + "\".gz";
BOOST_REQUIRE_EQUAL(0, system(command.c_str()));
FilePiece test((location + ".gz").c_str(), NULL, 1);
unlink((location + ".gz").c_str());
std::string ref_line;
while (getline(ref, ref_line)) {
StringPiece test_line(test.ReadLine());
// I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
if (!test_line.empty() || !ref_line.empty()) {
BOOST_CHECK_EQUAL(ref_line, test_line);
}
}
BOOST_CHECK_THROW(test.get(), EndOfFileException);
}
// gzip stream. Apple doesn't like popen, fileno, dup. This is an issue with
// the test.
#if !defined __APPLE__ && !defined __MINGW32__
BOOST_AUTO_TEST_CASE(StreamZipReadLine) {
std::fstream ref(FileLocation().c_str(), std::ios::in);
std::string command("gzip <\"");
command += FileLocation() + "\"";
FILE * catter = popen(command.c_str(), "r");
BOOST_REQUIRE(catter);
FilePiece test(dup(fileno(catter)), "file_piece.cc.gz", NULL, 1);
std::string ref_line;
while (getline(ref, ref_line)) {
StringPiece test_line(test.ReadLine());
// I submitted a bug report to ICU: http://bugs.icu-project.org/trac/ticket/7924
if (!test_line.empty() || !ref_line.empty()) {
BOOST_CHECK_EQUAL(ref_line, test_line);
}
}
BOOST_CHECK_THROW(test.get(), EndOfFileException);
BOOST_REQUIRE(!pclose(catter));
}
#endif // __APPLE__
#endif // HAVE_ZLIB
BOOST_AUTO_TEST_CASE(Numbers) {
scoped_fd file(MakeTemp(FileLocation()));
const float floating = 3.2;
{
util::FileStream writing(file.get());
writing << "94389483984398493890287 " << floating << " 5";
}
SeekOrThrow(file.get(), 0);
util::FilePiece f(file.release());
BOOST_CHECK_THROW(f.ReadULong(), ParseNumberException);
BOOST_CHECK_EQUAL("94389483984398493890287", f.ReadDelimited());
// Yes, exactly equal. Isn't double-conversion wonderful?
BOOST_CHECK_EQUAL(floating, f.ReadFloat());
BOOST_CHECK_EQUAL(5, f.ReadULong());
}
} // namespace
} // namespace util
/* Like std::ofstream but without being incredibly slow. Backed by a raw fd.
* Supports most of the built-in types except for long double.
*/
#ifndef UTIL_FILE_STREAM_H
#define UTIL_FILE_STREAM_H
#include "fake_ostream.hh"
#include "file.hh"
#include "scoped.hh"
#include <cassert>
#include <cstring>
#include <stdint.h>
namespace util {
class FileStream : public FakeOStream<FileStream> {
public:
explicit FileStream(int out = -1, std::size_t buffer_size = 8192)
: buf_(util::MallocOrThrow(std::max<std::size_t>(buffer_size, kToStringMaxBytes))),
current_(static_cast<char*>(buf_.get())),
end_(current_ + std::max<std::size_t>(buffer_size, kToStringMaxBytes)),
fd_(out) {}
#if __cplusplus >= 201103L
FileStream(FileStream &&from) noexcept : buf_(from.buf_.release()), current_(from.current_), end_(from.end_), fd_(from.fd_) {
from.end_ = reinterpret_cast<char*>(from.buf_.get());
from.current_ = from.end_;
}
#endif
~FileStream() {
flush();
}
void SetFD(int to) {
flush();
fd_ = to;
}
FileStream &flush() {
if (current_ != buf_.get()) {
util::WriteOrThrow(fd_, buf_.get(), current_ - (char*)buf_.get());
current_ = static_cast<char*>(buf_.get());
}
return *this;
}
// For writes of arbitrary size.
FileStream &write(const void *data, std::size_t length) {
if (UTIL_LIKELY(current_ + length <= end_)) {
std::memcpy(current_, data, length);
current_ += length;
return *this;
}
flush();
if (current_ + length <= end_) {
std::memcpy(current_, data, length);
current_ += length;
} else {
util::WriteOrThrow(fd_, data, length);
}
return *this;
}
FileStream &seekp(uint64_t to) {
flush();
util::SeekOrThrow(fd_, to);
return *this;
}
protected:
friend class FakeOStream<FileStream>;
// For writes directly to buffer guaranteed to have amount < buffer size.
char *Ensure(std::size_t amount) {
if (UTIL_UNLIKELY(current_ + amount > end_)) {
flush();
assert(current_ + amount <= end_);
}
return current_;
}
void AdvanceTo(char *to) {
current_ = to;
assert(current_ <= end_);
}
private:
util::scoped_malloc buf_;
char *current_, *end_;
int fd_;
};
} // namespace
#endif
#ifndef UTIL_FIXED_ARRAY_H
#define UTIL_FIXED_ARRAY_H
#include "scoped.hh"
#include <cstddef>
#include <cassert>
#include <cstdlib>
namespace util {
/**
* Defines an array with fixed maximum size.
*
* Ever want an array of things but they don't have a default constructor or
* are non-copyable? FixedArray allows constructing one at a time.
*/
template <class T> class FixedArray {
public:
/** Initialize with a given size bound but do not construct the objects. */
explicit FixedArray(std::size_t limit) {
Init(limit);
}
/**
* Constructs an instance, but does not initialize it.
*
* Any objects constructed in this manner must be subsequently @ref FixedArray::Init() "initialized" prior to use.
*
* @see FixedArray::Init()
*/
FixedArray()
: newed_end_(NULL)
#ifndef NDEBUG
, allocated_end_(NULL)
#endif
{}
/**
* Initialize with a given size bound but do not construct the objects.
*
* This method is responsible for allocating memory.
* Objects stored in this array will be constructed in a location within this allocated memory.
*/
void Init(std::size_t count) {
assert(!block_.get());
block_.reset(malloc(sizeof(T) * count));
if (!block_.get()) throw std::bad_alloc();
newed_end_ = begin();
#ifndef NDEBUG
allocated_end_ = begin() + count;
#endif
}
/**
* Constructs a copy of the provided array.
*
* @param from Array whose elements should be copied into this newly-constructed data structure.
*/
FixedArray(const FixedArray &from) {
std::size_t size = from.newed_end_ - static_cast<const T*>(from.block_.get());
Init(size);
for (std::size_t i = 0; i < size; ++i) {
push_back(from[i]);
}
}
/**
* Frees the memory held by this object.
*/
~FixedArray() { clear(); }
#if __cplusplus >= 201103L
FixedArray(FixedArray &&from)
: block_(std::move(from.block_)),
newed_end_(from.newed_end_)
# ifndef NDEBUG
, allocated_end_(from.allocated_end_)
# endif // NDEBUG
{
from.newed_end_ = NULL;
# ifndef NDEBUG
from.allocated_end_ = NULL;
# endif // NDEBUG
}
#endif // C++11
/** Gets a pointer to the first object currently stored in this data structure. */
T *begin() { return static_cast<T*>(block_.get()); }
/** Gets a const pointer to the last object currently stored in this data structure. */
const T *begin() const { return static_cast<const T*>(block_.get()); }
/** Gets a pointer to the last object currently stored in this data structure. */
T *end() { return newed_end_; }
/** Gets a const pointer to the last object currently stored in this data structure. */
const T *end() const { return newed_end_; }
/** Gets a reference to the last object currently stored in this data structure. */
T &back() { return *(end() - 1); }
/** Gets a const reference to the last object currently stored in this data structure. */
const T &back() const { return *(end() - 1); }
/** Gets the number of objects currently stored in this data structure. */
std::size_t size() const { return end() - begin(); }
/** Returns true if there are no objects currently stored in this data structure. */
bool empty() const { return begin() == end(); }
/**
* Gets a reference to the object with index i currently stored in this data structure.
*
* @param i Index of the object to reference
*/
T &operator[](std::size_t i) {
assert(i < size());
return begin()[i];
}
/**
* Gets a const reference to the object with index i currently stored in this data structure.
*
* @param i Index of the object to reference
*/
const T &operator[](std::size_t i) const {
assert(i < size());
return begin()[i];
}
/**
* Constructs a new object using the provided parameter,
* and stores it in this data structure.
*
* The memory backing the constructed object is managed by this data structure.
* I miss C++11 variadic templates.
*/
#if __cplusplus >= 201103L
template <typename... Construct> T *emplace_back(Construct&&... construct) {
T *ret = end();
new (end()) T(construct...);
Constructed();
return ret;
}
template <typename... Construct> T *push_back(Construct&&... construct) {
T *ret = end();
new (end()) T(construct...);
Constructed();
return ret;
}
#else
void push_back() {
new (end()) T();
Constructed();
}
template <class C> void push_back(const C &c) {
new (end()) T(c);
Constructed();
}
template <class C> void push_back(C &c) {
new (end()) T(c);
Constructed();
}
template <class C, class D> void push_back(const C &c, const D &d) {
new (end()) T(c, d);
Constructed();
}
#endif
void pop_back() {
back().~T();
--newed_end_;
}
/**
* Removes all elements from this array.
*/
void clear() {
while (newed_end_ != begin())
pop_back();
}
protected:
// Always call Constructed after successful completion of new.
void Constructed() {
++newed_end_;
#ifndef NDEBUG
assert(newed_end_ <= allocated_end_);
#endif
}
private:
util::scoped_malloc block_;
T *newed_end_;
#ifndef NDEBUG
T *allocated_end_;
#endif
};
} // namespace util
#endif // UTIL_FIXED_ARRAY_H
#include "float_to_string.hh"
#include "double-conversion/double-conversion.h"
#include "double-conversion/utils.h"
namespace util {
namespace {
const double_conversion::DoubleToStringConverter kConverter(double_conversion::DoubleToStringConverter::NO_FLAGS, "inf", "NaN", 'e', -6, 21, 6, 0);
} // namespace
char *ToString(double value, char *to) {
double_conversion::StringBuilder builder(to, ToStringBuf<double>::kBytes);
kConverter.ToShortest(value, &builder);
return &to[builder.position()];
}
char *ToString(float value, char *to) {
double_conversion::StringBuilder builder(to, ToStringBuf<float>::kBytes);
kConverter.ToShortestSingle(value, &builder);
return &to[builder.position()];
}
} // namespace util
#ifndef UTIL_FLOAT_TO_STRING_H
#define UTIL_FLOAT_TO_STRING_H
// Just for ToStringBuf
#include "integer_to_string.hh"
namespace util {
template <> struct ToStringBuf<double> {
// DoubleToStringConverter::kBase10MaximalLength + 1 for null paranoia.
static const unsigned kBytes = 19;
};
// Single wasn't documented in double conversion, so be conservative and
// say the same as double.
template <> struct ToStringBuf<float> {
static const unsigned kBytes = 19;
};
char *ToString(double value, char *to);
char *ToString(float value, char *to);
} // namespace util
#endif // UTIL_FLOAT_TO_STRING_H
/*
POSIX getopt for Windows
AT&T Public License
Code given out at the 1985 UNIFORUM conference in Dallas.
*/
#ifndef __GNUC__
#include "getopt.hh"
#include <stdio.h>
#include <string.h>
#define NULL 0
#define EOF (-1)
#define ERR(s, c) if(opterr){\
char errbuf[2];\
errbuf[0] = c; errbuf[1] = '\n';\
fputs(argv[0], stderr);\
fputs(s, stderr);\
fputc(c, stderr);}
//(void) write(2, argv[0], (unsigned)strlen(argv[0]));\
//(void) write(2, s, (unsigned)strlen(s));\
//(void) write(2, errbuf, 2);}
int opterr = 1;
int optind = 1;
int optopt;
char *optarg;
int
getopt(argc, argv, opts)
int argc;
char **argv, *opts;
{
static int sp = 1;
register int c;
register char *cp;
if(sp == 1)
if(optind >= argc ||
argv[optind][0] != '-' || argv[optind][1] == '\0')
return(EOF);
else if(strcmp(argv[optind], "--") == NULL) {
optind++;
return(EOF);
}
optopt = c = argv[optind][sp];
if(c == ':' || (cp=strchr(opts, c)) == NULL) {
ERR(": illegal option -- ", c);
if(argv[optind][++sp] == '\0') {
optind++;
sp = 1;
}
return('?');
}
if(*++cp == ':') {
if(argv[optind][sp+1] != '\0')
optarg = &argv[optind++][sp+1];
else if(++optind >= argc) {
ERR(": option requires an argument -- ", c);
sp = 1;
return('?');
} else
optarg = argv[optind++];
sp = 1;
} else {
if(argv[optind][++sp] == '\0') {
sp = 1;
optind++;
}
optarg = NULL;
}
return(c);
}
#endif /* __GNUC__ */
/*
POSIX getopt for Windows
AT&T Public License
Code given out at the 1985 UNIFORUM conference in Dallas.
*/
#ifdef __GNUC__
#include <getopt.h>
#endif
#ifndef __GNUC__
#ifndef UTIL_GETOPT_H
#define UTIL_GETOPT_H
#ifdef __cplusplus
extern "C" {
#endif
extern int opterr;
extern int optind;
extern int optopt;
extern char *optarg;
extern int getopt(int argc, char **argv, char *opts);
#ifdef __cplusplus
}
#endif
#endif /* UTIL_GETOPT_H */
#endif /* __GNUC__ */
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment