Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
RapidASR
Commits
688b6eac
Commit
688b6eac
authored
Apr 07, 2023
by
SWHL
Browse files
Update files
parents
Changes
364
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3173 additions
and
0 deletions
+3173
-0
cpp/thirdpart/kenlm/util/have.hh
cpp/thirdpart/kenlm/util/have.hh
+13
-0
cpp/thirdpart/kenlm/util/integer_to_string.cc
cpp/thirdpart/kenlm/util/integer_to_string.cc
+667
-0
cpp/thirdpart/kenlm/util/integer_to_string.hh
cpp/thirdpart/kenlm/util/integer_to_string.hh
+66
-0
cpp/thirdpart/kenlm/util/integer_to_string_test.cc
cpp/thirdpart/kenlm/util/integer_to_string_test.cc
+81
-0
cpp/thirdpart/kenlm/util/joint_sort.hh
cpp/thirdpart/kenlm/util/joint_sort.hh
+146
-0
cpp/thirdpart/kenlm/util/joint_sort_test.cc
cpp/thirdpart/kenlm/util/joint_sort_test.cc
+62
-0
cpp/thirdpart/kenlm/util/mmap.cc
cpp/thirdpart/kenlm/util/mmap.cc
+405
-0
cpp/thirdpart/kenlm/util/mmap.hh
cpp/thirdpart/kenlm/util/mmap.hh
+239
-0
cpp/thirdpart/kenlm/util/multi_intersection.hh
cpp/thirdpart/kenlm/util/multi_intersection.hh
+80
-0
cpp/thirdpart/kenlm/util/multi_intersection_test.cc
cpp/thirdpart/kenlm/util/multi_intersection_test.cc
+63
-0
cpp/thirdpart/kenlm/util/murmur_hash.cc
cpp/thirdpart/kenlm/util/murmur_hash.cc
+175
-0
cpp/thirdpart/kenlm/util/murmur_hash.hh
cpp/thirdpart/kenlm/util/murmur_hash.hh
+18
-0
cpp/thirdpart/kenlm/util/parallel_read.cc
cpp/thirdpart/kenlm/util/parallel_read.cc
+69
-0
cpp/thirdpart/kenlm/util/parallel_read.hh
cpp/thirdpart/kenlm/util/parallel_read.hh
+16
-0
cpp/thirdpart/kenlm/util/pcqueue.hh
cpp/thirdpart/kenlm/util/pcqueue.hh
+156
-0
cpp/thirdpart/kenlm/util/pcqueue_test.cc
cpp/thirdpart/kenlm/util/pcqueue_test.cc
+20
-0
cpp/thirdpart/kenlm/util/pool.cc
cpp/thirdpart/kenlm/util/pool.cc
+38
-0
cpp/thirdpart/kenlm/util/pool.hh
cpp/thirdpart/kenlm/util/pool.hh
+122
-0
cpp/thirdpart/kenlm/util/probing_hash_table.hh
cpp/thirdpart/kenlm/util/probing_hash_table.hh
+421
-0
cpp/thirdpart/kenlm/util/probing_hash_table_benchmark_main.cc
...thirdpart/kenlm/util/probing_hash_table_benchmark_main.cc
+316
-0
No files found.
Too many changes to show.
To preserve performance only
364 of 364+
files are displayed.
Plain diff
Email patch
cpp/thirdpart/kenlm/util/have.hh
0 → 100644
View file @
688b6eac
/* Optional packages. You might want to integrate this with your build system e.g. config.h from ./configure. */
#ifndef UTIL_HAVE_H
#define UTIL_HAVE_H
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifndef HAVE_ICU
//#define HAVE_ICU
#endif
#endif // UTIL_HAVE_H
cpp/thirdpart/kenlm/util/integer_to_string.cc
0 → 100644
View file @
688b6eac
#include <iostream>
/* Fast integer to string conversion.
Source: https://github.com/miloyip/itoa-benchmark
Local modifications:
1. Return end of buffer instead of null terminating
2. Collapse to single file
3. Namespace
4. Remove test hook
5. Non-x86 support from the branch_lut code
6. Rename functions
7. Require __SSE2__ on i386
Copyright (C) 2014 Milo Yip
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
Which is based on: http://0x80.pl/snippets/asm/sse-utoa.c
SSE: conversion integers to decimal representation
Author: Wojciech Muła
e-mail: wojciech_mula@poczta.onet.pl
www: http://0x80.pl/
License: BSD
initial release 2011-10-21
$Id$
*/
#include "integer_to_string.hh"
#include <cassert>
#include <stdint.h>
namespace
util
{
namespace
{
const
char
gDigitsLut
[
200
]
=
{
'0'
,
'0'
,
'0'
,
'1'
,
'0'
,
'2'
,
'0'
,
'3'
,
'0'
,
'4'
,
'0'
,
'5'
,
'0'
,
'6'
,
'0'
,
'7'
,
'0'
,
'8'
,
'0'
,
'9'
,
'1'
,
'0'
,
'1'
,
'1'
,
'1'
,
'2'
,
'1'
,
'3'
,
'1'
,
'4'
,
'1'
,
'5'
,
'1'
,
'6'
,
'1'
,
'7'
,
'1'
,
'8'
,
'1'
,
'9'
,
'2'
,
'0'
,
'2'
,
'1'
,
'2'
,
'2'
,
'2'
,
'3'
,
'2'
,
'4'
,
'2'
,
'5'
,
'2'
,
'6'
,
'2'
,
'7'
,
'2'
,
'8'
,
'2'
,
'9'
,
'3'
,
'0'
,
'3'
,
'1'
,
'3'
,
'2'
,
'3'
,
'3'
,
'3'
,
'4'
,
'3'
,
'5'
,
'3'
,
'6'
,
'3'
,
'7'
,
'3'
,
'8'
,
'3'
,
'9'
,
'4'
,
'0'
,
'4'
,
'1'
,
'4'
,
'2'
,
'4'
,
'3'
,
'4'
,
'4'
,
'4'
,
'5'
,
'4'
,
'6'
,
'4'
,
'7'
,
'4'
,
'8'
,
'4'
,
'9'
,
'5'
,
'0'
,
'5'
,
'1'
,
'5'
,
'2'
,
'5'
,
'3'
,
'5'
,
'4'
,
'5'
,
'5'
,
'5'
,
'6'
,
'5'
,
'7'
,
'5'
,
'8'
,
'5'
,
'9'
,
'6'
,
'0'
,
'6'
,
'1'
,
'6'
,
'2'
,
'6'
,
'3'
,
'6'
,
'4'
,
'6'
,
'5'
,
'6'
,
'6'
,
'6'
,
'7'
,
'6'
,
'8'
,
'6'
,
'9'
,
'7'
,
'0'
,
'7'
,
'1'
,
'7'
,
'2'
,
'7'
,
'3'
,
'7'
,
'4'
,
'7'
,
'5'
,
'7'
,
'6'
,
'7'
,
'7'
,
'7'
,
'8'
,
'7'
,
'9'
,
'8'
,
'0'
,
'8'
,
'1'
,
'8'
,
'2'
,
'8'
,
'3'
,
'8'
,
'4'
,
'8'
,
'5'
,
'8'
,
'6'
,
'8'
,
'7'
,
'8'
,
'8'
,
'8'
,
'9'
,
'9'
,
'0'
,
'9'
,
'1'
,
'9'
,
'2'
,
'9'
,
'3'
,
'9'
,
'4'
,
'9'
,
'5'
,
'9'
,
'6'
,
'9'
,
'7'
,
'9'
,
'8'
,
'9'
,
'9'
};
}
// namespace
// SSE2 implementation according to http://0x80.pl/articles/sse-itoa.html
// Modifications: (1) fix incorrect digits (2) accept all ranges (3) write to user provided buffer.
#if defined(__amd64) || defined(_M_X64) || (defined(__SSE2__) && (defined(_M_IX86) || defined(i386)))
#include <emmintrin.h>
#ifdef _MSC_VER
#include "intrin.h"
#endif
#ifdef _MSC_VER
#define ALIGN_PRE __declspec(align(16))
#define ALIGN_SUF
#else
#define ALIGN_PRE
#define ALIGN_SUF __attribute__ ((aligned(16)))
#endif
namespace
{
static
const
uint32_t
kDiv10000
=
0xd1b71759
;
ALIGN_PRE
static
const
uint32_t
kDiv10000Vector
[
4
]
ALIGN_SUF
=
{
kDiv10000
,
kDiv10000
,
kDiv10000
,
kDiv10000
};
ALIGN_PRE
static
const
uint32_t
k10000Vector
[
4
]
ALIGN_SUF
=
{
10000
,
10000
,
10000
,
10000
};
ALIGN_PRE
static
const
uint16_t
kDivPowersVector
[
8
]
ALIGN_SUF
=
{
8389
,
5243
,
13108
,
32768
,
8389
,
5243
,
13108
,
32768
};
// 10^3, 10^2, 10^1, 10^0
ALIGN_PRE
static
const
uint16_t
kShiftPowersVector
[
8
]
ALIGN_SUF
=
{
1
<<
(
16
-
(
23
+
2
-
16
)),
1
<<
(
16
-
(
19
+
2
-
16
)),
1
<<
(
16
-
1
-
2
),
1
<<
(
15
),
1
<<
(
16
-
(
23
+
2
-
16
)),
1
<<
(
16
-
(
19
+
2
-
16
)),
1
<<
(
16
-
1
-
2
),
1
<<
(
15
)
};
ALIGN_PRE
static
const
uint16_t
k10Vector
[
8
]
ALIGN_SUF
=
{
10
,
10
,
10
,
10
,
10
,
10
,
10
,
10
};
ALIGN_PRE
static
const
char
kAsciiZero
[
16
]
ALIGN_SUF
=
{
'0'
,
'0'
,
'0'
,
'0'
,
'0'
,
'0'
,
'0'
,
'0'
,
'0'
,
'0'
,
'0'
,
'0'
,
'0'
,
'0'
,
'0'
,
'0'
};
inline
__m128i
Convert8DigitsSSE2
(
uint32_t
value
)
{
assert
(
value
<=
99999999
);
// abcd, efgh = abcdefgh divmod 10000
const
__m128i
abcdefgh
=
_mm_cvtsi32_si128
(
value
);
const
__m128i
abcd
=
_mm_srli_epi64
(
_mm_mul_epu32
(
abcdefgh
,
reinterpret_cast
<
const
__m128i
*>
(
kDiv10000Vector
)[
0
]),
45
);
const
__m128i
efgh
=
_mm_sub_epi32
(
abcdefgh
,
_mm_mul_epu32
(
abcd
,
reinterpret_cast
<
const
__m128i
*>
(
k10000Vector
)[
0
]));
// v1 = [ abcd, efgh, 0, 0, 0, 0, 0, 0 ]
const
__m128i
v1
=
_mm_unpacklo_epi16
(
abcd
,
efgh
);
// v1a = v1 * 4 = [ abcd * 4, efgh * 4, 0, 0, 0, 0, 0, 0 ]
const
__m128i
v1a
=
_mm_slli_epi64
(
v1
,
2
);
// v2 = [ abcd * 4, abcd * 4, abcd * 4, abcd * 4, efgh * 4, efgh * 4, efgh * 4, efgh * 4 ]
const
__m128i
v2a
=
_mm_unpacklo_epi16
(
v1a
,
v1a
);
const
__m128i
v2
=
_mm_unpacklo_epi32
(
v2a
,
v2a
);
// v4 = v2 div 10^3, 10^2, 10^1, 10^0 = [ a, ab, abc, abcd, e, ef, efg, efgh ]
const
__m128i
v3
=
_mm_mulhi_epu16
(
v2
,
reinterpret_cast
<
const
__m128i
*>
(
kDivPowersVector
)[
0
]);
const
__m128i
v4
=
_mm_mulhi_epu16
(
v3
,
reinterpret_cast
<
const
__m128i
*>
(
kShiftPowersVector
)[
0
]);
// v5 = v4 * 10 = [ a0, ab0, abc0, abcd0, e0, ef0, efg0, efgh0 ]
const
__m128i
v5
=
_mm_mullo_epi16
(
v4
,
reinterpret_cast
<
const
__m128i
*>
(
k10Vector
)[
0
]);
// v6 = v5 << 16 = [ 0, a0, ab0, abc0, 0, e0, ef0, efg0 ]
const
__m128i
v6
=
_mm_slli_epi64
(
v5
,
16
);
// v7 = v4 - v6 = { a, b, c, d, e, f, g, h }
const
__m128i
v7
=
_mm_sub_epi16
(
v4
,
v6
);
return
v7
;
}
inline
__m128i
ShiftDigits_SSE2
(
__m128i
a
,
unsigned
digit
)
{
assert
(
digit
<=
8
);
switch
(
digit
)
{
case
0
:
return
a
;
case
1
:
return
_mm_srli_si128
(
a
,
1
);
case
2
:
return
_mm_srli_si128
(
a
,
2
);
case
3
:
return
_mm_srli_si128
(
a
,
3
);
case
4
:
return
_mm_srli_si128
(
a
,
4
);
case
5
:
return
_mm_srli_si128
(
a
,
5
);
case
6
:
return
_mm_srli_si128
(
a
,
6
);
case
7
:
return
_mm_srli_si128
(
a
,
7
);
case
8
:
return
_mm_srli_si128
(
a
,
8
);
}
return
a
;
// should not execute here.
}
}
// namespace
// Original name: u32toa_sse2
char
*
ToString
(
uint32_t
value
,
char
*
buffer
)
{
if
(
value
<
10000
)
{
const
uint32_t
d1
=
(
value
/
100
)
<<
1
;
const
uint32_t
d2
=
(
value
%
100
)
<<
1
;
if
(
value
>=
1000
)
*
buffer
++
=
gDigitsLut
[
d1
];
if
(
value
>=
100
)
*
buffer
++
=
gDigitsLut
[
d1
+
1
];
if
(
value
>=
10
)
*
buffer
++
=
gDigitsLut
[
d2
];
*
buffer
++
=
gDigitsLut
[
d2
+
1
];
//*buffer++ = '\0';
return
buffer
;
}
else
if
(
value
<
100000000
)
{
// Experiment shows that this case SSE2 is slower
#if 0
const __m128i a = Convert8DigitsSSE2(value);
// Convert to bytes, add '0'
const __m128i va = _mm_add_epi8(_mm_packus_epi16(a, _mm_setzero_si128()), reinterpret_cast<const __m128i*>(kAsciiZero)[0]);
// Count number of digit
const unsigned mask = _mm_movemask_epi8(_mm_cmpeq_epi8(va, reinterpret_cast<const __m128i*>(kAsciiZero)[0]));
unsigned long digit;
#ifdef _MSC_VER
_BitScanForward(&digit, ~mask | 0x8000);
#else
digit = __builtin_ctz(~mask | 0x8000);
#endif
// Shift digits to the beginning
__m128i
result
=
ShiftDigits_SSE2
(
va
,
digit
);
//__m128i result = _mm_srl_epi64(va, _mm_cvtsi32_si128(digit * 8));
_mm_storel_epi64
(
reinterpret_cast
<
__m128i
*>
(
buffer
),
result
);
buffer
[
8
-
digit
]
=
'\0'
;
#else
// value = bbbbcccc
const
uint32_t
b
=
value
/
10000
;
const
uint32_t
c
=
value
%
10000
;
const
uint32_t
d1
=
(
b
/
100
)
<<
1
;
const
uint32_t
d2
=
(
b
%
100
)
<<
1
;
const
uint32_t
d3
=
(
c
/
100
)
<<
1
;
const
uint32_t
d4
=
(
c
%
100
)
<<
1
;
if
(
value
>=
10000000
)
*
buffer
++
=
gDigitsLut
[
d1
];
if
(
value
>=
1000000
)
*
buffer
++
=
gDigitsLut
[
d1
+
1
];
if
(
value
>=
100000
)
*
buffer
++
=
gDigitsLut
[
d2
];
*
buffer
++
=
gDigitsLut
[
d2
+
1
];
*
buffer
++
=
gDigitsLut
[
d3
];
*
buffer
++
=
gDigitsLut
[
d3
+
1
];
*
buffer
++
=
gDigitsLut
[
d4
];
*
buffer
++
=
gDigitsLut
[
d4
+
1
];
// *buffer++ = '\0';
return
buffer
;
#endif
}
else
{
// value = aabbbbbbbb in decimal
const
uint32_t
a
=
value
/
100000000
;
// 1 to 42
value
%=
100000000
;
if
(
a
>=
10
)
{
const
unsigned
i
=
a
<<
1
;
*
buffer
++
=
gDigitsLut
[
i
];
*
buffer
++
=
gDigitsLut
[
i
+
1
];
}
else
*
buffer
++
=
'0'
+
static_cast
<
char
>
(
a
);
const
__m128i
b
=
Convert8DigitsSSE2
(
value
);
const
__m128i
ba
=
_mm_add_epi8
(
_mm_packus_epi16
(
_mm_setzero_si128
(),
b
),
reinterpret_cast
<
const
__m128i
*>
(
kAsciiZero
)[
0
]);
const
__m128i
result
=
_mm_srli_si128
(
ba
,
8
);
_mm_storel_epi64
(
reinterpret_cast
<
__m128i
*>
(
buffer
),
result
);
// buffer[8] = '\0';
return
buffer
+
8
;
}
}
// Original name: u64toa_sse2
char
*
ToString
(
uint64_t
value
,
char
*
buffer
)
{
if
(
value
<
100000000
)
{
uint32_t
v
=
static_cast
<
uint32_t
>
(
value
);
if
(
v
<
10000
)
{
const
uint32_t
d1
=
(
v
/
100
)
<<
1
;
const
uint32_t
d2
=
(
v
%
100
)
<<
1
;
if
(
v
>=
1000
)
*
buffer
++
=
gDigitsLut
[
d1
];
if
(
v
>=
100
)
*
buffer
++
=
gDigitsLut
[
d1
+
1
];
if
(
v
>=
10
)
*
buffer
++
=
gDigitsLut
[
d2
];
*
buffer
++
=
gDigitsLut
[
d2
+
1
];
//*buffer++ = '\0';
return
buffer
;
}
else
{
// Experiment shows that this case SSE2 is slower
#if 0
const __m128i a = Convert8DigitsSSE2(v);
// Convert to bytes, add '0'
const __m128i va = _mm_add_epi8(_mm_packus_epi16(a, _mm_setzero_si128()), reinterpret_cast<const __m128i*>(kAsciiZero)[0]);
// Count number of digit
const unsigned mask = _mm_movemask_epi8(_mm_cmpeq_epi8(va, reinterpret_cast<const __m128i*>(kAsciiZero)[0]));
unsigned long digit;
#ifdef _MSC_VER
_BitScanForward(&digit, ~mask | 0x8000);
#else
digit = __builtin_ctz(~mask | 0x8000);
#endif
// Shift digits to the beginning
__m128i
result
=
ShiftDigits_SSE2
(
va
,
digit
);
_mm_storel_epi64
(
reinterpret_cast
<
__m128i
*>
(
buffer
),
result
);
buffer
[
8
-
digit
]
=
'\0'
;
#else
// value = bbbbcccc
const
uint32_t
b
=
v
/
10000
;
const
uint32_t
c
=
v
%
10000
;
const
uint32_t
d1
=
(
b
/
100
)
<<
1
;
const
uint32_t
d2
=
(
b
%
100
)
<<
1
;
const
uint32_t
d3
=
(
c
/
100
)
<<
1
;
const
uint32_t
d4
=
(
c
%
100
)
<<
1
;
if
(
value
>=
10000000
)
*
buffer
++
=
gDigitsLut
[
d1
];
if
(
value
>=
1000000
)
*
buffer
++
=
gDigitsLut
[
d1
+
1
];
if
(
value
>=
100000
)
*
buffer
++
=
gDigitsLut
[
d2
];
*
buffer
++
=
gDigitsLut
[
d2
+
1
];
*
buffer
++
=
gDigitsLut
[
d3
];
*
buffer
++
=
gDigitsLut
[
d3
+
1
];
*
buffer
++
=
gDigitsLut
[
d4
];
*
buffer
++
=
gDigitsLut
[
d4
+
1
];
//*buffer++ = '\0';
return
buffer
;
#endif
}
}
else
if
(
value
<
10000000000000000
)
{
const
uint32_t
v0
=
static_cast
<
uint32_t
>
(
value
/
100000000
);
const
uint32_t
v1
=
static_cast
<
uint32_t
>
(
value
%
100000000
);
const
__m128i
a0
=
Convert8DigitsSSE2
(
v0
);
const
__m128i
a1
=
Convert8DigitsSSE2
(
v1
);
// Convert to bytes, add '0'
const
__m128i
va
=
_mm_add_epi8
(
_mm_packus_epi16
(
a0
,
a1
),
reinterpret_cast
<
const
__m128i
*>
(
kAsciiZero
)[
0
]);
// Count number of digit
const
unsigned
mask
=
_mm_movemask_epi8
(
_mm_cmpeq_epi8
(
va
,
reinterpret_cast
<
const
__m128i
*>
(
kAsciiZero
)[
0
]));
#ifdef _MSC_VER
unsigned
long
digit
;
_BitScanForward
(
&
digit
,
~
mask
|
0x8000
);
#else
unsigned
digit
=
__builtin_ctz
(
~
mask
|
0x8000
);
#endif
// Shift digits to the beginning
__m128i
result
=
ShiftDigits_SSE2
(
va
,
digit
);
_mm_storeu_si128
(
reinterpret_cast
<
__m128i
*>
(
buffer
),
result
);
// buffer[16 - digit] = '\0';
return
&
buffer
[
16
-
digit
];
}
else
{
const
uint32_t
a
=
static_cast
<
uint32_t
>
(
value
/
10000000000000000
);
// 1 to 1844
value
%=
10000000000000000
;
if
(
a
<
10
)
*
buffer
++
=
'0'
+
static_cast
<
char
>
(
a
);
else
if
(
a
<
100
)
{
const
uint32_t
i
=
a
<<
1
;
*
buffer
++
=
gDigitsLut
[
i
];
*
buffer
++
=
gDigitsLut
[
i
+
1
];
}
else
if
(
a
<
1000
)
{
*
buffer
++
=
'0'
+
static_cast
<
char
>
(
a
/
100
);
const
uint32_t
i
=
(
a
%
100
)
<<
1
;
*
buffer
++
=
gDigitsLut
[
i
];
*
buffer
++
=
gDigitsLut
[
i
+
1
];
}
else
{
const
uint32_t
i
=
(
a
/
100
)
<<
1
;
const
uint32_t
j
=
(
a
%
100
)
<<
1
;
*
buffer
++
=
gDigitsLut
[
i
];
*
buffer
++
=
gDigitsLut
[
i
+
1
];
*
buffer
++
=
gDigitsLut
[
j
];
*
buffer
++
=
gDigitsLut
[
j
+
1
];
}
const
uint32_t
v0
=
static_cast
<
uint32_t
>
(
value
/
100000000
);
const
uint32_t
v1
=
static_cast
<
uint32_t
>
(
value
%
100000000
);
const
__m128i
a0
=
Convert8DigitsSSE2
(
v0
);
const
__m128i
a1
=
Convert8DigitsSSE2
(
v1
);
// Convert to bytes, add '0'
const
__m128i
va
=
_mm_add_epi8
(
_mm_packus_epi16
(
a0
,
a1
),
reinterpret_cast
<
const
__m128i
*>
(
kAsciiZero
)[
0
]);
_mm_storeu_si128
(
reinterpret_cast
<
__m128i
*>
(
buffer
),
va
);
// buffer[16] = '\0';
return
&
buffer
[
16
];
}
}
#else // Generic Non-x86 case
// Orignal name: u32toa_branchlut
char
*
ToString
(
uint32_t
value
,
char
*
buffer
)
{
if
(
value
<
10000
)
{
const
uint32_t
d1
=
(
value
/
100
)
<<
1
;
const
uint32_t
d2
=
(
value
%
100
)
<<
1
;
if
(
value
>=
1000
)
*
buffer
++
=
gDigitsLut
[
d1
];
if
(
value
>=
100
)
*
buffer
++
=
gDigitsLut
[
d1
+
1
];
if
(
value
>=
10
)
*
buffer
++
=
gDigitsLut
[
d2
];
*
buffer
++
=
gDigitsLut
[
d2
+
1
];
}
else
if
(
value
<
100000000
)
{
// value = bbbbcccc
const
uint32_t
b
=
value
/
10000
;
const
uint32_t
c
=
value
%
10000
;
const
uint32_t
d1
=
(
b
/
100
)
<<
1
;
const
uint32_t
d2
=
(
b
%
100
)
<<
1
;
const
uint32_t
d3
=
(
c
/
100
)
<<
1
;
const
uint32_t
d4
=
(
c
%
100
)
<<
1
;
if
(
value
>=
10000000
)
*
buffer
++
=
gDigitsLut
[
d1
];
if
(
value
>=
1000000
)
*
buffer
++
=
gDigitsLut
[
d1
+
1
];
if
(
value
>=
100000
)
*
buffer
++
=
gDigitsLut
[
d2
];
*
buffer
++
=
gDigitsLut
[
d2
+
1
];
*
buffer
++
=
gDigitsLut
[
d3
];
*
buffer
++
=
gDigitsLut
[
d3
+
1
];
*
buffer
++
=
gDigitsLut
[
d4
];
*
buffer
++
=
gDigitsLut
[
d4
+
1
];
}
else
{
// value = aabbbbcccc in decimal
const
uint32_t
a
=
value
/
100000000
;
// 1 to 42
value
%=
100000000
;
if
(
a
>=
10
)
{
const
unsigned
i
=
a
<<
1
;
*
buffer
++
=
gDigitsLut
[
i
];
*
buffer
++
=
gDigitsLut
[
i
+
1
];
}
else
*
buffer
++
=
'0'
+
static_cast
<
char
>
(
a
);
const
uint32_t
b
=
value
/
10000
;
// 0 to 9999
const
uint32_t
c
=
value
%
10000
;
// 0 to 9999
const
uint32_t
d1
=
(
b
/
100
)
<<
1
;
const
uint32_t
d2
=
(
b
%
100
)
<<
1
;
const
uint32_t
d3
=
(
c
/
100
)
<<
1
;
const
uint32_t
d4
=
(
c
%
100
)
<<
1
;
*
buffer
++
=
gDigitsLut
[
d1
];
*
buffer
++
=
gDigitsLut
[
d1
+
1
];
*
buffer
++
=
gDigitsLut
[
d2
];
*
buffer
++
=
gDigitsLut
[
d2
+
1
];
*
buffer
++
=
gDigitsLut
[
d3
];
*
buffer
++
=
gDigitsLut
[
d3
+
1
];
*
buffer
++
=
gDigitsLut
[
d4
];
*
buffer
++
=
gDigitsLut
[
d4
+
1
];
}
return
buffer
;
//*buffer++ = '\0';
}
// Original name: u64toa_branchlut
char
*
ToString
(
uint64_t
value
,
char
*
buffer
)
{
if
(
value
<
100000000
)
{
uint32_t
v
=
static_cast
<
uint32_t
>
(
value
);
if
(
v
<
10000
)
{
const
uint32_t
d1
=
(
v
/
100
)
<<
1
;
const
uint32_t
d2
=
(
v
%
100
)
<<
1
;
if
(
v
>=
1000
)
*
buffer
++
=
gDigitsLut
[
d1
];
if
(
v
>=
100
)
*
buffer
++
=
gDigitsLut
[
d1
+
1
];
if
(
v
>=
10
)
*
buffer
++
=
gDigitsLut
[
d2
];
*
buffer
++
=
gDigitsLut
[
d2
+
1
];
}
else
{
// value = bbbbcccc
const
uint32_t
b
=
v
/
10000
;
const
uint32_t
c
=
v
%
10000
;
const
uint32_t
d1
=
(
b
/
100
)
<<
1
;
const
uint32_t
d2
=
(
b
%
100
)
<<
1
;
const
uint32_t
d3
=
(
c
/
100
)
<<
1
;
const
uint32_t
d4
=
(
c
%
100
)
<<
1
;
if
(
value
>=
10000000
)
*
buffer
++
=
gDigitsLut
[
d1
];
if
(
value
>=
1000000
)
*
buffer
++
=
gDigitsLut
[
d1
+
1
];
if
(
value
>=
100000
)
*
buffer
++
=
gDigitsLut
[
d2
];
*
buffer
++
=
gDigitsLut
[
d2
+
1
];
*
buffer
++
=
gDigitsLut
[
d3
];
*
buffer
++
=
gDigitsLut
[
d3
+
1
];
*
buffer
++
=
gDigitsLut
[
d4
];
*
buffer
++
=
gDigitsLut
[
d4
+
1
];
}
}
else
if
(
value
<
10000000000000000
)
{
const
uint32_t
v0
=
static_cast
<
uint32_t
>
(
value
/
100000000
);
const
uint32_t
v1
=
static_cast
<
uint32_t
>
(
value
%
100000000
);
const
uint32_t
b0
=
v0
/
10000
;
const
uint32_t
c0
=
v0
%
10000
;
const
uint32_t
d1
=
(
b0
/
100
)
<<
1
;
const
uint32_t
d2
=
(
b0
%
100
)
<<
1
;
const
uint32_t
d3
=
(
c0
/
100
)
<<
1
;
const
uint32_t
d4
=
(
c0
%
100
)
<<
1
;
const
uint32_t
b1
=
v1
/
10000
;
const
uint32_t
c1
=
v1
%
10000
;
const
uint32_t
d5
=
(
b1
/
100
)
<<
1
;
const
uint32_t
d6
=
(
b1
%
100
)
<<
1
;
const
uint32_t
d7
=
(
c1
/
100
)
<<
1
;
const
uint32_t
d8
=
(
c1
%
100
)
<<
1
;
if
(
value
>=
1000000000000000
)
*
buffer
++
=
gDigitsLut
[
d1
];
if
(
value
>=
100000000000000
)
*
buffer
++
=
gDigitsLut
[
d1
+
1
];
if
(
value
>=
10000000000000
)
*
buffer
++
=
gDigitsLut
[
d2
];
if
(
value
>=
1000000000000
)
*
buffer
++
=
gDigitsLut
[
d2
+
1
];
if
(
value
>=
100000000000
)
*
buffer
++
=
gDigitsLut
[
d3
];
if
(
value
>=
10000000000
)
*
buffer
++
=
gDigitsLut
[
d3
+
1
];
if
(
value
>=
1000000000
)
*
buffer
++
=
gDigitsLut
[
d4
];
if
(
value
>=
100000000
)
*
buffer
++
=
gDigitsLut
[
d4
+
1
];
*
buffer
++
=
gDigitsLut
[
d5
];
*
buffer
++
=
gDigitsLut
[
d5
+
1
];
*
buffer
++
=
gDigitsLut
[
d6
];
*
buffer
++
=
gDigitsLut
[
d6
+
1
];
*
buffer
++
=
gDigitsLut
[
d7
];
*
buffer
++
=
gDigitsLut
[
d7
+
1
];
*
buffer
++
=
gDigitsLut
[
d8
];
*
buffer
++
=
gDigitsLut
[
d8
+
1
];
}
else
{
const
uint32_t
a
=
static_cast
<
uint32_t
>
(
value
/
10000000000000000
);
// 1 to 1844
value
%=
10000000000000000
;
if
(
a
<
10
)
*
buffer
++
=
'0'
+
static_cast
<
char
>
(
a
);
else
if
(
a
<
100
)
{
const
uint32_t
i
=
a
<<
1
;
*
buffer
++
=
gDigitsLut
[
i
];
*
buffer
++
=
gDigitsLut
[
i
+
1
];
}
else
if
(
a
<
1000
)
{
*
buffer
++
=
'0'
+
static_cast
<
char
>
(
a
/
100
);
const
uint32_t
i
=
(
a
%
100
)
<<
1
;
*
buffer
++
=
gDigitsLut
[
i
];
*
buffer
++
=
gDigitsLut
[
i
+
1
];
}
else
{
const
uint32_t
i
=
(
a
/
100
)
<<
1
;
const
uint32_t
j
=
(
a
%
100
)
<<
1
;
*
buffer
++
=
gDigitsLut
[
i
];
*
buffer
++
=
gDigitsLut
[
i
+
1
];
*
buffer
++
=
gDigitsLut
[
j
];
*
buffer
++
=
gDigitsLut
[
j
+
1
];
}
const
uint32_t
v0
=
static_cast
<
uint32_t
>
(
value
/
100000000
);
const
uint32_t
v1
=
static_cast
<
uint32_t
>
(
value
%
100000000
);
const
uint32_t
b0
=
v0
/
10000
;
const
uint32_t
c0
=
v0
%
10000
;
const
uint32_t
d1
=
(
b0
/
100
)
<<
1
;
const
uint32_t
d2
=
(
b0
%
100
)
<<
1
;
const
uint32_t
d3
=
(
c0
/
100
)
<<
1
;
const
uint32_t
d4
=
(
c0
%
100
)
<<
1
;
const
uint32_t
b1
=
v1
/
10000
;
const
uint32_t
c1
=
v1
%
10000
;
const
uint32_t
d5
=
(
b1
/
100
)
<<
1
;
const
uint32_t
d6
=
(
b1
%
100
)
<<
1
;
const
uint32_t
d7
=
(
c1
/
100
)
<<
1
;
const
uint32_t
d8
=
(
c1
%
100
)
<<
1
;
*
buffer
++
=
gDigitsLut
[
d1
];
*
buffer
++
=
gDigitsLut
[
d1
+
1
];
*
buffer
++
=
gDigitsLut
[
d2
];
*
buffer
++
=
gDigitsLut
[
d2
+
1
];
*
buffer
++
=
gDigitsLut
[
d3
];
*
buffer
++
=
gDigitsLut
[
d3
+
1
];
*
buffer
++
=
gDigitsLut
[
d4
];
*
buffer
++
=
gDigitsLut
[
d4
+
1
];
*
buffer
++
=
gDigitsLut
[
d5
];
*
buffer
++
=
gDigitsLut
[
d5
+
1
];
*
buffer
++
=
gDigitsLut
[
d6
];
*
buffer
++
=
gDigitsLut
[
d6
+
1
];
*
buffer
++
=
gDigitsLut
[
d7
];
*
buffer
++
=
gDigitsLut
[
d7
+
1
];
*
buffer
++
=
gDigitsLut
[
d8
];
*
buffer
++
=
gDigitsLut
[
d8
+
1
];
}
return
buffer
;
}
#endif // End of architecture if statement.
// Signed wrappers. The negation is done on the unsigned version because
// doing so has defined behavior for INT_MIN.
char
*
ToString
(
int32_t
value
,
char
*
to
)
{
uint32_t
un
=
static_cast
<
uint32_t
>
(
value
);
if
(
value
<
0
)
{
*
to
++
=
'-'
;
un
=
-
un
;
}
return
ToString
(
un
,
to
);
}
char
*
ToString
(
int64_t
value
,
char
*
to
)
{
uint64_t
un
=
static_cast
<
uint64_t
>
(
value
);
if
(
value
<
0
)
{
*
to
++
=
'-'
;
un
=
-
un
;
}
return
ToString
(
un
,
to
);
}
// No optimization for this case yet.
char
*
ToString
(
int16_t
value
,
char
*
to
)
{
return
ToString
((
int32_t
)
value
,
to
);
}
char
*
ToString
(
uint16_t
value
,
char
*
to
)
{
return
ToString
((
uint32_t
)
value
,
to
);
}
// void * to string. This hasn't been optimized at all really.
namespace
{
const
char
kHexDigits
[]
=
"0123456789abcdef"
;
}
// namespace
char
*
ToString
(
const
void
*
v
,
char
*
to
)
{
*
to
++
=
'0'
;
*
to
++
=
'x'
;
// Fun fact: gcc/clang boost::lexical_cast on Linux do just "0" while clang on OS X does "0x0"
// I happen to prefer 0x0.
if
(
!
v
)
{
*
to
++
=
'0'
;
return
to
;
}
uintptr_t
value
=
reinterpret_cast
<
uintptr_t
>
(
v
);
uint8_t
shift
=
sizeof
(
void
*
)
*
8
-
4
;
for
(;
!
(
value
>>
shift
);
shift
-=
4
)
{}
for
(;
;
shift
-=
4
)
{
*
to
++
=
kHexDigits
[(
value
>>
shift
)
&
0xf
];
if
(
!
shift
)
break
;
}
return
to
;
}
}
// namespace util
cpp/thirdpart/kenlm/util/integer_to_string.hh
0 → 100644
View file @
688b6eac
#ifndef UTIL_INTEGER_TO_STRING_H
#define UTIL_INTEGER_TO_STRING_H
#include <cstddef>
#include <stdint.h>
namespace
util
{
/* These functions convert integers to strings and return the end pointer.
*/
char
*
ToString
(
uint32_t
value
,
char
*
to
);
char
*
ToString
(
uint64_t
value
,
char
*
to
);
// Implemented as wrappers to above
char
*
ToString
(
int32_t
value
,
char
*
to
);
char
*
ToString
(
int64_t
value
,
char
*
to
);
// Calls the 32-bit versions for now.
char
*
ToString
(
uint16_t
value
,
char
*
to
);
char
*
ToString
(
int16_t
value
,
char
*
to
);
char
*
ToString
(
const
void
*
value
,
char
*
to
);
inline
char
*
ToString
(
bool
value
,
char
*
to
)
{
*
to
++
=
'0'
+
value
;
return
to
;
}
// How many bytes to reserve in the buffer for these strings:
// g++ 4.9.1 doesn't work with this:
// static const std::size_t kBytes = 5;
// So use enum.
template
<
class
T
>
struct
ToStringBuf
;
template
<
>
struct
ToStringBuf
<
bool
>
{
enum
{
kBytes
=
1
};
};
template
<
>
struct
ToStringBuf
<
uint16_t
>
{
enum
{
kBytes
=
5
};
};
template
<
>
struct
ToStringBuf
<
int16_t
>
{
enum
{
kBytes
=
6
};
};
template
<
>
struct
ToStringBuf
<
uint32_t
>
{
enum
{
kBytes
=
10
};
};
template
<
>
struct
ToStringBuf
<
int32_t
>
{
enum
{
kBytes
=
11
};
};
template
<
>
struct
ToStringBuf
<
uint64_t
>
{
enum
{
kBytes
=
20
};
};
template
<
>
struct
ToStringBuf
<
int64_t
>
{
// Not a typo. 2^63 has 19 digits.
enum
{
kBytes
=
20
};
};
template
<
>
struct
ToStringBuf
<
const
void
*>
{
// Either 18 on 64-bit or 10 on 32-bit.
enum
{
kBytes
=
sizeof
(
const
void
*
)
*
2
+
2
};
};
// Maximum over this and float.
enum
{
kToStringMaxBytes
=
20
};
}
// namespace util
#endif // UTIL_INTEGER_TO_STRING_H
cpp/thirdpart/kenlm/util/integer_to_string_test.cc
0 → 100644
View file @
688b6eac
#define BOOST_LEXICAL_CAST_ASSUME_C_LOCALE
#include "integer_to_string.hh"
#include "string_piece.hh"
#define BOOST_TEST_MODULE IntegerToStringTest
#include <boost/test/unit_test.hpp>
#include <boost/lexical_cast.hpp>
#include <limits>
namespace
util
{
namespace
{
template
<
class
T
>
void
TestValue
(
const
T
value
)
{
char
buf
[
ToStringBuf
<
T
>::
kBytes
];
StringPiece
result
(
buf
,
ToString
(
value
,
buf
)
-
buf
);
BOOST_REQUIRE_GE
(
static_cast
<
std
::
size_t
>
(
ToStringBuf
<
T
>::
kBytes
),
result
.
size
());
if
(
value
)
{
BOOST_CHECK_EQUAL
(
boost
::
lexical_cast
<
std
::
string
>
(
value
),
result
);
}
else
{
// Platforms can do void * as 0x0 or 0.
BOOST_CHECK
(
result
==
"0x0"
||
result
==
"0"
);
}
}
template
<
class
T
>
void
TestCorners
()
{
TestValue
(
std
::
numeric_limits
<
T
>::
min
());
TestValue
(
std
::
numeric_limits
<
T
>::
max
());
TestValue
((
T
)
0
);
TestValue
((
T
)
-
1
);
TestValue
((
T
)
1
);
}
BOOST_AUTO_TEST_CASE
(
Corners
)
{
TestCorners
<
uint16_t
>
();
TestCorners
<
uint32_t
>
();
TestCorners
<
uint64_t
>
();
TestCorners
<
int16_t
>
();
TestCorners
<
int32_t
>
();
TestCorners
<
int64_t
>
();
TestCorners
<
const
void
*>
();
}
template
<
class
T
>
void
TestAll
()
{
for
(
T
i
=
std
::
numeric_limits
<
T
>::
min
();
i
<
std
::
numeric_limits
<
T
>::
max
();
++
i
)
{
TestValue
(
i
);
}
TestValue
(
std
::
numeric_limits
<
T
>::
max
());
}
BOOST_AUTO_TEST_CASE
(
Short
)
{
TestAll
<
uint16_t
>
();
TestAll
<
int16_t
>
();
}
template
<
class
T
>
void
Test10s
()
{
for
(
T
i
=
1
;
i
<
std
::
numeric_limits
<
T
>::
max
()
/
10
;
i
*=
10
)
{
TestValue
(
i
);
TestValue
(
i
-
1
);
TestValue
(
i
+
1
);
}
}
BOOST_AUTO_TEST_CASE
(
Tens
)
{
Test10s
<
uint64_t
>
();
Test10s
<
int64_t
>
();
Test10s
<
uint32_t
>
();
Test10s
<
int32_t
>
();
}
BOOST_AUTO_TEST_CASE
(
Pointers
)
{
for
(
uintptr_t
i
=
1
;
i
<
std
::
numeric_limits
<
uintptr_t
>::
max
()
/
10
;
i
*=
10
)
{
TestValue
((
const
void
*
)
i
);
}
for
(
uintptr_t
i
=
0
;
i
<
256
;
++
i
)
{
TestValue
((
const
void
*
)
i
);
TestValue
((
const
void
*
)(
i
+
0xf00
));
}
}
}}
// namespaces
cpp/thirdpart/kenlm/util/joint_sort.hh
0 → 100644
View file @
688b6eac
#ifndef UTIL_JOINT_SORT_H
#define UTIL_JOINT_SORT_H
/* A terrifying amount of C++ to coax std::sort into soring one range while
* also permuting another range the same way.
*/
#include "proxy_iterator.hh"
#include <algorithm>
#include <functional>
namespace
util
{
namespace
detail
{
template
<
class
KeyIter
,
class
ValueIter
>
class
JointProxy
;
template
<
class
KeyIter
,
class
ValueIter
>
class
JointIter
{
public:
JointIter
()
{}
JointIter
(
const
KeyIter
&
key_iter
,
const
ValueIter
&
value_iter
)
:
key_
(
key_iter
),
value_
(
value_iter
)
{}
bool
operator
==
(
const
JointIter
<
KeyIter
,
ValueIter
>
&
other
)
const
{
return
key_
==
other
.
key_
;
}
bool
operator
<
(
const
JointIter
<
KeyIter
,
ValueIter
>
&
other
)
const
{
return
(
key_
<
other
.
key_
);
}
std
::
ptrdiff_t
operator
-
(
const
JointIter
<
KeyIter
,
ValueIter
>
&
other
)
const
{
return
key_
-
other
.
key_
;
}
JointIter
<
KeyIter
,
ValueIter
>
&
operator
+=
(
std
::
ptrdiff_t
amount
)
{
key_
+=
amount
;
value_
+=
amount
;
return
*
this
;
}
friend
void
swap
(
JointIter
&
first
,
JointIter
&
second
)
{
using
std
::
swap
;
swap
(
first
.
key_
,
second
.
key_
);
swap
(
first
.
value_
,
second
.
value_
);
}
void
DeepSwap
(
JointIter
&
other
)
{
using
std
::
swap
;
swap
(
*
key_
,
*
other
.
key_
);
swap
(
*
value_
,
*
other
.
value_
);
}
private:
friend
class
JointProxy
<
KeyIter
,
ValueIter
>
;
KeyIter
key_
;
ValueIter
value_
;
};
template
<
class
KeyIter
,
class
ValueIter
>
class
JointProxy
{
private:
typedef
JointIter
<
KeyIter
,
ValueIter
>
InnerIterator
;
public:
typedef
struct
{
typename
std
::
iterator_traits
<
KeyIter
>::
value_type
key
;
typename
std
::
iterator_traits
<
ValueIter
>::
value_type
value
;
const
typename
std
::
iterator_traits
<
KeyIter
>::
value_type
&
GetKey
()
const
{
return
key
;
}
}
value_type
;
JointProxy
(
const
KeyIter
&
key_iter
,
const
ValueIter
&
value_iter
)
:
inner_
(
key_iter
,
value_iter
)
{}
JointProxy
(
const
JointProxy
<
KeyIter
,
ValueIter
>
&
other
)
:
inner_
(
other
.
inner_
)
{}
operator
value_type
()
const
{
value_type
ret
;
ret
.
key
=
*
inner_
.
key_
;
ret
.
value
=
*
inner_
.
value_
;
return
ret
;
}
JointProxy
&
operator
=
(
const
JointProxy
&
other
)
{
*
inner_
.
key_
=
*
other
.
inner_
.
key_
;
*
inner_
.
value_
=
*
other
.
inner_
.
value_
;
return
*
this
;
}
JointProxy
&
operator
=
(
const
value_type
&
other
)
{
*
inner_
.
key_
=
other
.
key
;
*
inner_
.
value_
=
other
.
value
;
return
*
this
;
}
typename
std
::
iterator_traits
<
KeyIter
>::
reference
GetKey
()
const
{
return
*
(
inner_
.
key_
);
}
friend
void
swap
(
JointProxy
<
KeyIter
,
ValueIter
>
first
,
JointProxy
<
KeyIter
,
ValueIter
>
second
)
{
first
.
Inner
().
DeepSwap
(
second
.
Inner
());
}
private:
friend
class
ProxyIterator
<
JointProxy
<
KeyIter
,
ValueIter
>
>
;
InnerIterator
&
Inner
()
{
return
inner_
;
}
const
InnerIterator
&
Inner
()
const
{
return
inner_
;
}
InnerIterator
inner_
;
};
template
<
class
Proxy
,
class
Less
>
class
LessWrapper
:
public
std
::
binary_function
<
const
typename
Proxy
::
value_type
&
,
const
typename
Proxy
::
value_type
&
,
bool
>
{
public:
explicit
LessWrapper
(
const
Less
&
less
)
:
less_
(
less
)
{}
bool
operator
()(
const
Proxy
&
left
,
const
Proxy
&
right
)
const
{
return
less_
(
left
.
GetKey
(),
right
.
GetKey
());
}
bool
operator
()(
const
Proxy
&
left
,
const
typename
Proxy
::
value_type
&
right
)
const
{
return
less_
(
left
.
GetKey
(),
right
.
GetKey
());
}
bool
operator
()(
const
typename
Proxy
::
value_type
&
left
,
const
Proxy
&
right
)
const
{
return
less_
(
left
.
GetKey
(),
right
.
GetKey
());
}
bool
operator
()(
const
typename
Proxy
::
value_type
&
left
,
const
typename
Proxy
::
value_type
&
right
)
const
{
return
less_
(
left
.
GetKey
(),
right
.
GetKey
());
}
private:
const
Less
less_
;
};
}
// namespace detail
template
<
class
KeyIter
,
class
ValueIter
>
class
PairedIterator
:
public
ProxyIterator
<
detail
::
JointProxy
<
KeyIter
,
ValueIter
>
>
{
public:
PairedIterator
(
const
KeyIter
&
key
,
const
ValueIter
&
value
)
:
ProxyIterator
<
detail
::
JointProxy
<
KeyIter
,
ValueIter
>
>
(
detail
::
JointProxy
<
KeyIter
,
ValueIter
>
(
key
,
value
))
{}
};
template
<
class
KeyIter
,
class
ValueIter
,
class
Less
>
void
JointSort
(
const
KeyIter
&
key_begin
,
const
KeyIter
&
key_end
,
const
ValueIter
&
value_begin
,
const
Less
&
less
)
{
ProxyIterator
<
detail
::
JointProxy
<
KeyIter
,
ValueIter
>
>
full_begin
(
detail
::
JointProxy
<
KeyIter
,
ValueIter
>
(
key_begin
,
value_begin
));
detail
::
LessWrapper
<
detail
::
JointProxy
<
KeyIter
,
ValueIter
>
,
Less
>
less_wrap
(
less
);
std
::
sort
(
full_begin
,
full_begin
+
(
key_end
-
key_begin
),
less_wrap
);
}
template
<
class
KeyIter
,
class
ValueIter
>
void
JointSort
(
const
KeyIter
&
key_begin
,
const
KeyIter
&
key_end
,
const
ValueIter
&
value_begin
)
{
JointSort
(
key_begin
,
key_end
,
value_begin
,
std
::
less
<
typename
std
::
iterator_traits
<
KeyIter
>::
value_type
>
());
}
}
// namespace util
#endif // UTIL_JOINT_SORT_H
cpp/thirdpart/kenlm/util/joint_sort_test.cc
0 → 100644
View file @
688b6eac
#include "joint_sort.hh"
#define BOOST_TEST_MODULE JointSortTest
#include <boost/test/unit_test.hpp>
namespace
util
{
namespace
{
BOOST_AUTO_TEST_CASE
(
just_flip
)
{
char
keys
[
2
];
int
values
[
2
];
keys
[
0
]
=
1
;
values
[
0
]
=
327
;
keys
[
1
]
=
0
;
values
[
1
]
=
87897
;
JointSort
<
char
*
,
int
*>
(
keys
+
0
,
keys
+
2
,
values
+
0
);
BOOST_CHECK_EQUAL
(
0
,
keys
[
0
]);
BOOST_CHECK_EQUAL
(
87897
,
values
[
0
]);
BOOST_CHECK_EQUAL
(
1
,
keys
[
1
]);
BOOST_CHECK_EQUAL
(
327
,
values
[
1
]);
}
BOOST_AUTO_TEST_CASE
(
three
)
{
char
keys
[
3
];
int
values
[
3
];
keys
[
0
]
=
1
;
values
[
0
]
=
327
;
keys
[
1
]
=
2
;
values
[
1
]
=
87897
;
keys
[
2
]
=
0
;
values
[
2
]
=
10
;
JointSort
<
char
*
,
int
*>
(
keys
+
0
,
keys
+
3
,
values
+
0
);
BOOST_CHECK_EQUAL
(
0
,
keys
[
0
]);
BOOST_CHECK_EQUAL
(
1
,
keys
[
1
]);
BOOST_CHECK_EQUAL
(
2
,
keys
[
2
]);
}
BOOST_AUTO_TEST_CASE
(
char_int
)
{
char
keys
[
4
];
int
values
[
4
];
keys
[
0
]
=
3
;
values
[
0
]
=
327
;
keys
[
1
]
=
1
;
values
[
1
]
=
87897
;
keys
[
2
]
=
2
;
values
[
2
]
=
10
;
keys
[
3
]
=
0
;
values
[
3
]
=
24347
;
JointSort
<
char
*
,
int
*>
(
keys
+
0
,
keys
+
4
,
values
+
0
);
BOOST_CHECK_EQUAL
(
0
,
keys
[
0
]);
BOOST_CHECK_EQUAL
(
24347
,
values
[
0
]);
BOOST_CHECK_EQUAL
(
1
,
keys
[
1
]);
BOOST_CHECK_EQUAL
(
87897
,
values
[
1
]);
BOOST_CHECK_EQUAL
(
2
,
keys
[
2
]);
BOOST_CHECK_EQUAL
(
10
,
values
[
2
]);
BOOST_CHECK_EQUAL
(
3
,
keys
[
3
]);
BOOST_CHECK_EQUAL
(
327
,
values
[
3
]);
}
BOOST_AUTO_TEST_CASE
(
swap_proxy
)
{
char
keys
[
2
]
=
{
0
,
1
};
int
values
[
2
]
=
{
2
,
3
};
detail
::
JointProxy
<
char
*
,
int
*>
first
(
keys
,
values
);
detail
::
JointProxy
<
char
*
,
int
*>
second
(
keys
+
1
,
values
+
1
);
swap
(
first
,
second
);
BOOST_CHECK_EQUAL
(
1
,
keys
[
0
]);
BOOST_CHECK_EQUAL
(
0
,
keys
[
1
]);
BOOST_CHECK_EQUAL
(
3
,
values
[
0
]);
BOOST_CHECK_EQUAL
(
2
,
values
[
1
]);
}
}}
// namespace anonymous util
cpp/thirdpart/kenlm/util/mmap.cc
0 → 100644
View file @
688b6eac
/* Memory mapping wrappers.
* ARM and MinGW ports contributed by Hideo Okuma and Tomoyuki Yoshimura at
* NICT.
*/
#include "mmap.hh"
#include "exception.hh"
#include "file.hh"
#include "scoped.hh"
#include <iostream>
#include <cassert>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <cstdlib>
#if defined(_WIN32) || defined(_WIN64)
#include <windows.h>
#include <io.h>
#else
#include <sys/mman.h>
#include <unistd.h>
#endif
namespace
util
{
std
::
size_t
SizePage
()
{
#if defined(_WIN32) || defined(_WIN64)
SYSTEM_INFO
si
;
GetSystemInfo
(
&
si
);
return
si
.
dwAllocationGranularity
;
#else
return
sysconf
(
_SC_PAGE_SIZE
);
#endif
}
scoped_mmap
::~
scoped_mmap
()
{
if
(
data_
!=
(
void
*
)
-
1
)
{
try
{
// Thanks Denis Filimonov for pointing out NFS likes msync first.
SyncOrThrow
(
data_
,
size_
);
UnmapOrThrow
(
data_
,
size_
);
}
catch
(
const
util
::
ErrnoException
&
e
)
{
std
::
cerr
<<
e
.
what
();
abort
();
}
}
}
namespace
{
template
<
class
T
>
T
RoundUpPow2
(
T
value
,
T
mult
)
{
return
((
value
-
1
)
&
~
(
mult
-
1
))
+
mult
;
}
std
::
size_t
RoundUpSize
(
const
scoped_memory
&
mem
)
{
switch
(
mem
.
source
())
{
case
scoped_memory
::
MMAP_ROUND_1G_ALLOCATED
:
return
RoundUpPow2
<
std
::
size_t
>
(
mem
.
size
(),
1ULL
<<
30
);
case
scoped_memory
::
MMAP_ROUND_2M_ALLOCATED
:
return
RoundUpPow2
<
std
::
size_t
>
(
mem
.
size
(),
1ULL
<<
21
);
case
scoped_memory
::
MMAP_ROUND_PAGE_ALLOCATED
:
return
RoundUpPow2
<
std
::
size_t
>
(
mem
.
size
(),
static_cast
<
std
::
size_t
>
(
SizePage
()));
default:
return
mem
.
size
();
}
}
}
// namespace
scoped_memory
::
scoped_memory
(
std
::
size_t
size
,
bool
zeroed
)
:
data_
(
NULL
),
size_
(
0
),
source_
(
NONE_ALLOCATED
)
{
HugeMalloc
(
size
,
zeroed
,
*
this
);
}
void
scoped_memory
::
reset
(
void
*
data
,
std
::
size_t
size
,
Alloc
source
)
{
switch
(
source_
)
{
case
MMAP_ROUND_1G_ALLOCATED
:
case
MMAP_ROUND_2M_ALLOCATED
:
case
MMAP_ROUND_PAGE_ALLOCATED
:
case
MMAP_ALLOCATED
:
scoped_mmap
(
data_
,
RoundUpSize
(
*
this
));
break
;
case
MALLOC_ALLOCATED
:
free
(
data_
);
break
;
case
NONE_ALLOCATED
:
break
;
}
data_
=
data
;
size_
=
size
;
source_
=
source
;
}
const
int
kFileFlags
=
#if defined(_WIN32) || defined(_WIN64)
0
// MapOrThrow ignores flags on windows
#elif defined(MAP_FILE)
MAP_FILE
|
MAP_SHARED
#else
MAP_SHARED
#endif
;
void
*
MapOrThrow
(
std
::
size_t
size
,
bool
for_write
,
int
flags
,
bool
prefault
,
int
fd
,
uint64_t
offset
)
{
#ifdef MAP_POPULATE // Linux specific
if
(
prefault
)
{
flags
|=
MAP_POPULATE
;
}
#endif
#if defined(_WIN32) || defined(_WIN64)
int
protectC
=
for_write
?
PAGE_READWRITE
:
PAGE_READONLY
;
int
protectM
=
for_write
?
FILE_MAP_WRITE
:
FILE_MAP_READ
;
uint64_t
total_size
=
size
+
offset
;
HANDLE
hMapping
=
CreateFileMapping
((
HANDLE
)
_get_osfhandle
(
fd
),
NULL
,
protectC
,
total_size
>>
32
,
static_cast
<
DWORD
>
(
total_size
),
NULL
);
UTIL_THROW_IF
(
!
hMapping
,
ErrnoException
,
"CreateFileMapping failed"
);
LPVOID
ret
=
MapViewOfFile
(
hMapping
,
protectM
,
offset
>>
32
,
offset
,
size
);
CloseHandle
(
hMapping
);
UTIL_THROW_IF
(
!
ret
,
ErrnoException
,
"MapViewOfFile failed"
);
#else
int
protect
=
for_write
?
(
PROT_READ
|
PROT_WRITE
)
:
PROT_READ
;
void
*
ret
;
UTIL_THROW_IF
((
ret
=
mmap
(
NULL
,
size
,
protect
,
flags
,
fd
,
offset
))
==
MAP_FAILED
,
ErrnoException
,
"mmap failed for size "
<<
size
<<
" at offset "
<<
offset
);
# ifdef MADV_HUGEPAGE
/* We like huge pages but it's fine if we can't have them. Note that huge
* pages are not supported for file-backed mmap on linux.
*/
madvise
(
ret
,
size
,
MADV_HUGEPAGE
);
# endif
#endif
return
ret
;
}
void
SyncOrThrow
(
void
*
start
,
size_t
length
)
{
#if defined(_WIN32) || defined(_WIN64)
UTIL_THROW_IF
(
!::
FlushViewOfFile
(
start
,
length
),
ErrnoException
,
"Failed to sync mmap"
);
#else
UTIL_THROW_IF
(
length
&&
msync
(
start
,
length
,
MS_SYNC
),
ErrnoException
,
"Failed to sync mmap"
);
#endif
}
void
UnmapOrThrow
(
void
*
start
,
size_t
length
)
{
#if defined(_WIN32) || defined(_WIN64)
UTIL_THROW_IF
(
!::
UnmapViewOfFile
(
start
),
ErrnoException
,
"Failed to unmap a file"
);
#else
UTIL_THROW_IF
(
munmap
(
start
,
length
),
ErrnoException
,
"munmap failed with "
<<
start
<<
" for length "
<<
length
);
#endif
}
// Linux huge pages.
#ifdef __linux__
namespace
{
bool
TryHuge
(
std
::
size_t
size
,
bool
populate
,
uint8_t
alignment_bits
,
scoped_memory
::
Alloc
huge_scheme
,
scoped_memory
&
to
)
{
// Don't bother with these cases.
if
(
size
<
(
1ULL
<<
alignment_bits
)
||
(
1ULL
<<
alignment_bits
)
<
SizePage
())
return
false
;
// First try: Linux >= 3.8 with manually configured hugetlb pages available.
int
flags
=
MAP_PRIVATE
|
MAP_ANONYMOUS
|
MAP_HUGETLB
|
(
alignment_bits
<<
26
/* This is MAP_HUGE_SHIFT but some headers are too old. */
);
if
(
populate
)
flags
|=
MAP_POPULATE
;
void
*
ret
=
mmap
(
NULL
,
size
,
PROT_READ
|
PROT_WRITE
,
flags
,
-
1
,
0
);
if
(
ret
!=
MAP_FAILED
)
{
to
.
reset
(
ret
,
size
,
huge_scheme
);
return
true
;
}
// There weren't pages in a sysadmin-created pool. Let's get aligned memory
// and hope transparent huge pages kicks in. Align to a multiple of the huge
// page size by overallocating. I feel bad about doing this, but it's also how
// posix_memalign is implemented. And the memory is virtual.
// Round up requested size to multiple of page size. This will allow the pages after to be munmapped.
std
::
size_t
size_up
=
RoundUpPow2
(
size
,
SizePage
());
std
::
size_t
ask
=
size_up
+
(
1
<<
alignment_bits
)
-
SizePage
();
// Don't populate because this is asking for more than we will use.
scoped_mmap
larger
(
mmap
(
NULL
,
ask
,
PROT_READ
|
PROT_WRITE
,
MAP_PRIVATE
|
MAP_ANONYMOUS
,
-
1
,
0
),
ask
);
if
(
larger
.
get
()
==
MAP_FAILED
)
return
false
;
// Throw out pages before the alignment point.
uintptr_t
base
=
reinterpret_cast
<
uintptr_t
>
(
larger
.
get
());
// Round up to next multiple of alignment.
uintptr_t
rounded_up
=
RoundUpPow2
(
base
,
static_cast
<
uintptr_t
>
(
1
)
<<
alignment_bits
);
if
(
base
!=
rounded_up
)
{
// If this throws an exception (which it shouldn't) then we want to unmap the whole thing by keeping it in larger.
UnmapOrThrow
(
larger
.
get
(),
rounded_up
-
base
);
larger
.
steal
();
larger
.
reset
(
reinterpret_cast
<
void
*>
(
rounded_up
),
ask
-
(
rounded_up
-
base
));
}
// Throw out pages after the requested size.
assert
(
larger
.
size
()
>=
size_up
);
if
(
larger
.
size
()
>
size_up
)
{
// This is where we assume size_up is a multiple of page size.
UnmapOrThrow
(
static_cast
<
uint8_t
*>
(
larger
.
get
())
+
size_up
,
larger
.
size
()
-
size_up
);
larger
.
reset
(
larger
.
steal
(),
size_up
);
}
#ifdef MADV_HUGEPAGE
madvise
(
larger
.
get
(),
size_up
,
MADV_HUGEPAGE
);
#endif
to
.
reset
(
larger
.
steal
(),
size
,
scoped_memory
::
MMAP_ROUND_PAGE_ALLOCATED
);
return
true
;
}
}
// namespace
#endif
void
HugeMalloc
(
std
::
size_t
size
,
bool
zeroed
,
scoped_memory
&
to
)
{
to
.
reset
();
#ifdef __linux__
// TODO: architectures/page sizes other than 2^21 and 2^30.
// Attempt 1 GB pages.
// If the user asked for zeroed memory, assume they want it populated.
if
(
size
>=
(
1ULL
<<
30
)
&&
TryHuge
(
size
,
zeroed
,
30
,
scoped_memory
::
MMAP_ROUND_1G_ALLOCATED
,
to
))
return
;
// Attempt 2 MB pages.
if
(
size
>=
(
1ULL
<<
21
)
&&
TryHuge
(
size
,
zeroed
,
21
,
scoped_memory
::
MMAP_ROUND_2M_ALLOCATED
,
to
))
return
;
#endif // __linux__
// Non-linux will always do this, as will small allocations on Linux.
to
.
reset
(
zeroed
?
calloc
(
1
,
size
)
:
malloc
(
size
),
size
,
scoped_memory
::
MALLOC_ALLOCATED
);
UTIL_THROW_IF
(
!
to
.
get
(),
ErrnoException
,
"Failed to allocate "
<<
size
<<
" bytes"
);
}
namespace
{
#ifdef __linux__
const
std
::
size_t
kTransitionHuge
=
std
::
max
<
std
::
size_t
>
(
1ULL
<<
21
,
SizePage
());
#endif // __linux__
void
ReplaceAndCopy
(
std
::
size_t
to
,
bool
zero_new
,
scoped_memory
&
mem
)
{
scoped_memory
replacement
;
HugeMalloc
(
to
,
zero_new
,
replacement
);
memcpy
(
replacement
.
get
(),
mem
.
get
(),
mem
.
size
());
// This can't throw.
mem
.
reset
(
replacement
.
get
(),
replacement
.
size
(),
replacement
.
source
());
replacement
.
steal
();
}
}
// namespace
void
HugeRealloc
(
std
::
size_t
to
,
bool
zero_new
,
scoped_memory
&
mem
)
{
if
(
!
to
)
{
mem
.
reset
();
return
;
}
switch
(
mem
.
source
())
{
case
scoped_memory
::
NONE_ALLOCATED
:
HugeMalloc
(
to
,
zero_new
,
mem
);
return
;
#ifdef __linux__
// TODO really need to collapse these cases with a number.
case
scoped_memory
::
MMAP_ROUND_1G_ALLOCATED
:
case
scoped_memory
::
MMAP_ROUND_2M_ALLOCATED
:
case
scoped_memory
::
MMAP_ROUND_PAGE_ALLOCATED
:
case
scoped_memory
::
MMAP_ALLOCATED
:
// Downsizing below barrier?
if
(
to
<=
SizePage
())
{
scoped_malloc
replacement
(
malloc
(
to
));
memcpy
(
replacement
.
get
(),
mem
.
get
(),
std
::
min
(
to
,
mem
.
size
()));
if
(
zero_new
&&
to
>
mem
.
size
())
memset
(
static_cast
<
uint8_t
*>
(
replacement
.
get
())
+
mem
.
size
(),
0
,
to
-
mem
.
size
());
mem
.
reset
(
replacement
.
release
(),
to
,
scoped_memory
::
MALLOC_ALLOCATED
);
}
else
{
// main path: try to mremap.
void
*
new_addr
=
mremap
(
mem
.
get
(),
RoundUpSize
(
mem
),
to
,
MREMAP_MAYMOVE
);
if
(
new_addr
!=
MAP_FAILED
)
{
scoped_memory
::
Alloc
source
(
mem
.
source
());
// steal resets mem.source()
mem
.
steal
();
// let go otherwise reset() will free it first
mem
.
reset
(
new_addr
,
to
,
source
);
}
else
{
// Reallocating huge pages can fail with EINVAL.
// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/mremap.c?id=refs/tags/v3.19#n346
ReplaceAndCopy
(
to
,
zero_new
,
mem
);
}
}
return
;
#endif // __linux__
case
scoped_memory
::
MALLOC_ALLOCATED
:
#ifdef __linux__
// Transition larger allocations to huge pages, but don't keep trying if we're still malloc allocated.
if
(
to
>=
kTransitionHuge
&&
mem
.
size
()
<
kTransitionHuge
)
{
ReplaceAndCopy
(
to
,
zero_new
,
mem
);
return
;
}
#endif // __linux__
{
void
*
new_addr
=
std
::
realloc
(
mem
.
get
(),
to
);
UTIL_THROW_IF
(
!
new_addr
,
ErrnoException
,
"realloc to "
<<
to
<<
" bytes failed."
);
if
(
zero_new
&&
to
>
mem
.
size
())
memset
(
static_cast
<
uint8_t
*>
(
new_addr
)
+
mem
.
size
(),
0
,
to
-
mem
.
size
());
mem
.
steal
();
mem
.
reset
(
new_addr
,
to
,
scoped_memory
::
MALLOC_ALLOCATED
);
}
return
;
default:
UTIL_THROW
(
Exception
,
"HugeRealloc called with type "
<<
mem
.
source
());
}
}
void
MapRead
(
LoadMethod
method
,
int
fd
,
uint64_t
offset
,
std
::
size_t
size
,
scoped_memory
&
out
)
{
switch
(
method
)
{
case
LAZY
:
out
.
reset
(
MapOrThrow
(
size
,
false
,
kFileFlags
,
false
,
fd
,
offset
),
size
,
scoped_memory
::
MMAP_ALLOCATED
);
break
;
case
POPULATE_OR_LAZY
:
#ifdef MAP_POPULATE
case
POPULATE_OR_READ
:
#endif
out
.
reset
(
MapOrThrow
(
size
,
false
,
kFileFlags
,
true
,
fd
,
offset
),
size
,
scoped_memory
::
MMAP_ALLOCATED
);
break
;
#ifndef MAP_POPULATE
case
POPULATE_OR_READ
:
#endif
case
READ
:
HugeMalloc
(
size
,
false
,
out
);
SeekOrThrow
(
fd
,
offset
);
ReadOrThrow
(
fd
,
out
.
get
(),
size
);
break
;
case
PARALLEL_READ
:
UTIL_THROW
(
Exception
,
"Parallel read was removed from this repo."
);
break
;
}
}
void
*
MapZeroedWrite
(
int
fd
,
std
::
size_t
size
)
{
ResizeOrThrow
(
fd
,
0
);
ResizeOrThrow
(
fd
,
size
);
return
MapOrThrow
(
size
,
true
,
kFileFlags
,
false
,
fd
,
0
);
}
void
*
MapZeroedWrite
(
const
char
*
name
,
std
::
size_t
size
,
scoped_fd
&
file
)
{
file
.
reset
(
CreateOrThrow
(
name
));
try
{
return
MapZeroedWrite
(
file
.
get
(),
size
);
}
catch
(
ErrnoException
&
e
)
{
e
<<
" in file "
<<
name
;
throw
;
}
}
Rolling
::
Rolling
(
const
Rolling
&
copy_from
,
uint64_t
increase
)
{
*
this
=
copy_from
;
IncreaseBase
(
increase
);
}
Rolling
&
Rolling
::
operator
=
(
const
Rolling
&
copy_from
)
{
fd_
=
copy_from
.
fd_
;
file_begin_
=
copy_from
.
file_begin_
;
file_end_
=
copy_from
.
file_end_
;
for_write_
=
copy_from
.
for_write_
;
block_
=
copy_from
.
block_
;
read_bound_
=
copy_from
.
read_bound_
;
current_begin_
=
0
;
if
(
copy_from
.
IsPassthrough
())
{
current_end_
=
copy_from
.
current_end_
;
ptr_
=
copy_from
.
ptr_
;
}
else
{
// Force call on next mmap.
current_end_
=
0
;
ptr_
=
NULL
;
}
return
*
this
;
}
Rolling
::
Rolling
(
int
fd
,
bool
for_write
,
std
::
size_t
block
,
std
::
size_t
read_bound
,
uint64_t
offset
,
uint64_t
amount
)
{
current_begin_
=
0
;
current_end_
=
0
;
fd_
=
fd
;
file_begin_
=
offset
;
file_end_
=
offset
+
amount
;
for_write_
=
for_write
;
block_
=
block
;
read_bound_
=
read_bound
;
}
void
*
Rolling
::
ExtractNonRolling
(
scoped_memory
&
out
,
uint64_t
index
,
std
::
size_t
size
)
{
out
.
reset
();
if
(
IsPassthrough
())
return
static_cast
<
uint8_t
*>
(
get
())
+
index
;
uint64_t
offset
=
index
+
file_begin_
;
// Round down to multiple of page size.
uint64_t
cruft
=
offset
%
static_cast
<
uint64_t
>
(
SizePage
());
std
::
size_t
map_size
=
static_cast
<
std
::
size_t
>
(
size
+
cruft
);
out
.
reset
(
MapOrThrow
(
map_size
,
for_write_
,
kFileFlags
,
true
,
fd_
,
offset
-
cruft
),
map_size
,
scoped_memory
::
MMAP_ALLOCATED
);
return
static_cast
<
uint8_t
*>
(
out
.
get
())
+
static_cast
<
std
::
size_t
>
(
cruft
);
}
void
Rolling
::
Roll
(
uint64_t
index
)
{
assert
(
!
IsPassthrough
());
std
::
size_t
amount
;
if
(
file_end_
-
(
index
+
file_begin_
)
>
static_cast
<
uint64_t
>
(
block_
))
{
amount
=
block_
;
current_end_
=
index
+
amount
-
read_bound_
;
}
else
{
amount
=
file_end_
-
(
index
+
file_begin_
);
current_end_
=
index
+
amount
;
}
ptr_
=
static_cast
<
uint8_t
*>
(
ExtractNonRolling
(
mem_
,
index
,
amount
))
-
index
;
current_begin_
=
index
;
}
}
// namespace util
cpp/thirdpart/kenlm/util/mmap.hh
0 → 100644
View file @
688b6eac
#ifndef UTIL_MMAP_H
#define UTIL_MMAP_H
// Utilities for mmaped files.
#include <cstddef>
#include <limits>
#include <stdint.h>
#include <sys/types.h>
namespace
util
{
class
scoped_fd
;
std
::
size_t
SizePage
();
// (void*)-1 is MAP_FAILED; this is done to avoid including the mmap header here.
class
scoped_mmap
{
public:
scoped_mmap
()
:
data_
((
void
*
)
-
1
),
size_
(
0
)
{}
scoped_mmap
(
void
*
data
,
std
::
size_t
size
)
:
data_
(
data
),
size_
(
size
)
{}
~
scoped_mmap
();
void
*
get
()
const
{
return
data_
;
}
const
char
*
begin
()
const
{
return
reinterpret_cast
<
char
*>
(
data_
);
}
char
*
begin
()
{
return
reinterpret_cast
<
char
*>
(
data_
);
}
const
char
*
end
()
const
{
return
reinterpret_cast
<
char
*>
(
data_
)
+
size_
;
}
char
*
end
()
{
return
reinterpret_cast
<
char
*>
(
data_
)
+
size_
;
}
std
::
size_t
size
()
const
{
return
size_
;
}
void
reset
(
void
*
data
,
std
::
size_t
size
)
{
scoped_mmap
other
(
data_
,
size_
);
data_
=
data
;
size_
=
size
;
}
void
reset
()
{
reset
((
void
*
)
-
1
,
0
);
}
void
*
steal
()
{
void
*
ret
=
data_
;
data_
=
(
void
*
)
-
1
;
size_
=
0
;
return
ret
;
}
private:
void
*
data_
;
std
::
size_t
size_
;
scoped_mmap
(
const
scoped_mmap
&
);
scoped_mmap
&
operator
=
(
const
scoped_mmap
&
);
};
/* For when the memory might come from mmap or malloc. Uses NULL and 0 for
* blanks even though mmap signals errors with (void*)-1).
*/
class
scoped_memory
{
public:
typedef
enum
{
// TODO: store rounded up size instead?
MMAP_ROUND_1G_ALLOCATED
,
// The size was rounded up for a 1GB page. Do the same before munmap.
MMAP_ROUND_2M_ALLOCATED
,
// The size was rounded up for a 2MB page. Do the same before munmap.
MMAP_ROUND_PAGE_ALLOCATED
,
// The size was rounded up to a multiple of the default page size. Do the same before munmap.
MMAP_ALLOCATED
,
// munmap
MALLOC_ALLOCATED
,
// free
NONE_ALLOCATED
// nothing to free (though there can be something here if it's owned by somebody else).
}
Alloc
;
scoped_memory
(
void
*
data
,
std
::
size_t
size
,
Alloc
source
)
:
data_
(
data
),
size_
(
size
),
source_
(
source
)
{}
scoped_memory
()
:
data_
(
NULL
),
size_
(
0
),
source_
(
NONE_ALLOCATED
)
{}
// Calls HugeMalloc
scoped_memory
(
std
::
size_t
to
,
bool
zero_new
);
#if __cplusplus >= 201103L
scoped_memory
(
scoped_memory
&&
from
)
noexcept
:
data_
(
from
.
data_
),
size_
(
from
.
size_
),
source_
(
from
.
source_
)
{
from
.
steal
();
}
#endif
~
scoped_memory
()
{
reset
();
}
void
*
get
()
const
{
return
data_
;
}
const
char
*
begin
()
const
{
return
reinterpret_cast
<
char
*>
(
data_
);
}
char
*
begin
()
{
return
reinterpret_cast
<
char
*>
(
data_
);
}
const
char
*
end
()
const
{
return
reinterpret_cast
<
char
*>
(
data_
)
+
size_
;
}
char
*
end
()
{
return
reinterpret_cast
<
char
*>
(
data_
)
+
size_
;
}
std
::
size_t
size
()
const
{
return
size_
;
}
Alloc
source
()
const
{
return
source_
;
}
void
reset
()
{
reset
(
NULL
,
0
,
NONE_ALLOCATED
);
}
void
reset
(
void
*
data
,
std
::
size_t
size
,
Alloc
from
);
void
*
steal
()
{
void
*
ret
=
data_
;
data_
=
NULL
;
size_
=
0
;
source_
=
NONE_ALLOCATED
;
return
ret
;
}
private:
void
*
data_
;
std
::
size_t
size_
;
Alloc
source_
;
scoped_memory
(
const
scoped_memory
&
);
scoped_memory
&
operator
=
(
const
scoped_memory
&
);
};
extern
const
int
kFileFlags
;
// Cross-platform, error-checking wrapper for mmap().
void
*
MapOrThrow
(
std
::
size_t
size
,
bool
for_write
,
int
flags
,
bool
prefault
,
int
fd
,
uint64_t
offset
=
0
);
// msync wrapper
void
SyncOrThrow
(
void
*
start
,
size_t
length
);
// Cross-platform, error-checking wrapper for munmap().
void
UnmapOrThrow
(
void
*
start
,
size_t
length
);
// Allocate memory, promising that all/vast majority of it will be used. Tries
// hard to use huge pages on Linux.
// If you want zeroed memory, pass zeroed = true.
void
HugeMalloc
(
std
::
size_t
size
,
bool
zeroed
,
scoped_memory
&
to
);
// Reallocates memory ala realloc but with option to zero the new memory.
// On Linux, the memory can come from anonymous mmap or malloc/calloc.
// On non-Linux, only malloc/calloc is supported.
//
// To summarize, any memory from HugeMalloc or HugeRealloc can be resized with
// this.
void
HugeRealloc
(
std
::
size_t
size
,
bool
new_zeroed
,
scoped_memory
&
mem
);
enum
LoadMethod
{
// mmap with no prepopulate
LAZY
,
// On linux, pass MAP_POPULATE to mmap.
POPULATE_OR_LAZY
,
// Populate on Linux. malloc and read on non-Linux.
POPULATE_OR_READ
,
// malloc and read.
READ
,
// malloc and read in parallel (recommended for Lustre)
PARALLEL_READ
,
};
void
MapRead
(
LoadMethod
method
,
int
fd
,
uint64_t
offset
,
std
::
size_t
size
,
scoped_memory
&
out
);
// Open file name with mmap of size bytes, all of which are initially zero.
void
*
MapZeroedWrite
(
int
fd
,
std
::
size_t
size
);
void
*
MapZeroedWrite
(
const
char
*
name
,
std
::
size_t
size
,
scoped_fd
&
file
);
// Forward rolling memory map with no overlap.
class
Rolling
{
public:
Rolling
()
{}
explicit
Rolling
(
void
*
data
)
{
Init
(
data
);
}
Rolling
(
const
Rolling
&
copy_from
,
uint64_t
increase
=
0
);
Rolling
&
operator
=
(
const
Rolling
&
copy_from
);
// For an actual rolling mmap.
explicit
Rolling
(
int
fd
,
bool
for_write
,
std
::
size_t
block
,
std
::
size_t
read_bound
,
uint64_t
offset
,
uint64_t
amount
);
// For a static mapping
void
Init
(
void
*
data
)
{
ptr_
=
data
;
current_end_
=
std
::
numeric_limits
<
uint64_t
>::
max
();
current_begin_
=
0
;
// Mark as a pass-through.
fd_
=
-
1
;
}
void
IncreaseBase
(
uint64_t
by
)
{
file_begin_
+=
by
;
ptr_
=
static_cast
<
uint8_t
*>
(
ptr_
)
+
by
;
if
(
!
IsPassthrough
())
current_end_
=
0
;
}
void
DecreaseBase
(
uint64_t
by
)
{
file_begin_
-=
by
;
ptr_
=
static_cast
<
uint8_t
*>
(
ptr_
)
-
by
;
if
(
!
IsPassthrough
())
current_end_
=
0
;
}
void
*
ExtractNonRolling
(
scoped_memory
&
out
,
uint64_t
index
,
std
::
size_t
size
);
// Returns base pointer
void
*
get
()
const
{
return
ptr_
;
}
// Returns base pointer.
void
*
CheckedBase
(
uint64_t
index
)
{
if
(
index
>=
current_end_
||
index
<
current_begin_
)
{
Roll
(
index
);
}
return
ptr_
;
}
// Returns indexed pointer.
void
*
CheckedIndex
(
uint64_t
index
)
{
return
static_cast
<
uint8_t
*>
(
CheckedBase
(
index
))
+
index
;
}
private:
void
Roll
(
uint64_t
index
);
// True if this is just a thin wrapper on a pointer.
bool
IsPassthrough
()
const
{
return
fd_
==
-
1
;
}
void
*
ptr_
;
uint64_t
current_begin_
;
uint64_t
current_end_
;
scoped_memory
mem_
;
int
fd_
;
uint64_t
file_begin_
;
uint64_t
file_end_
;
bool
for_write_
;
std
::
size_t
block_
;
std
::
size_t
read_bound_
;
};
}
// namespace util
#endif // UTIL_MMAP_H
cpp/thirdpart/kenlm/util/multi_intersection.hh
0 → 100644
View file @
688b6eac
#ifndef UTIL_MULTI_INTERSECTION_H
#define UTIL_MULTI_INTERSECTION_H
#include <boost/optional.hpp>
#include <boost/range/iterator_range.hpp>
#include <algorithm>
#include <functional>
#include <vector>
namespace
util
{
namespace
detail
{
template
<
class
Range
>
struct
RangeLessBySize
:
public
std
::
binary_function
<
const
Range
&
,
const
Range
&
,
bool
>
{
bool
operator
()(
const
Range
&
left
,
const
Range
&
right
)
const
{
return
left
.
size
()
<
right
.
size
();
}
};
/* Takes sets specified by their iterators and a boost::optional containing
* the lowest intersection if any. Each set must be sorted in increasing
* order. sets is changed to truncate the beginning of each sequence to the
* location of the match or an empty set. Precondition: sets is not empty
* since the intersection over null is the universe and this function does not
* know the universe.
*/
template
<
class
Iterator
,
class
Less
>
boost
::
optional
<
typename
std
::
iterator_traits
<
Iterator
>::
value_type
>
FirstIntersectionSorted
(
std
::
vector
<
boost
::
iterator_range
<
Iterator
>
>
&
sets
,
const
Less
&
less
=
std
::
less
<
typename
std
::
iterator_traits
<
Iterator
>::
value_type
>
())
{
typedef
std
::
vector
<
boost
::
iterator_range
<
Iterator
>
>
Sets
;
typedef
typename
std
::
iterator_traits
<
Iterator
>::
value_type
Value
;
assert
(
!
sets
.
empty
());
if
(
sets
.
front
().
empty
())
return
boost
::
optional
<
Value
>
();
// Possibly suboptimal to copy for general Value; makes unsigned int go slightly faster.
Value
highest
(
sets
.
front
().
front
());
for
(
typename
Sets
::
iterator
i
(
sets
.
begin
());
i
!=
sets
.
end
();
)
{
i
->
advance_begin
(
std
::
lower_bound
(
i
->
begin
(),
i
->
end
(),
highest
,
less
)
-
i
->
begin
());
if
(
i
->
empty
())
return
boost
::
optional
<
Value
>
();
if
(
less
(
highest
,
i
->
front
()))
{
highest
=
i
->
front
();
// start over
i
=
sets
.
begin
();
}
else
{
++
i
;
}
}
return
boost
::
optional
<
Value
>
(
highest
);
}
}
// namespace detail
template
<
class
Iterator
,
class
Less
>
boost
::
optional
<
typename
std
::
iterator_traits
<
Iterator
>::
value_type
>
FirstIntersection
(
std
::
vector
<
boost
::
iterator_range
<
Iterator
>
>
&
sets
,
const
Less
less
)
{
assert
(
!
sets
.
empty
());
std
::
sort
(
sets
.
begin
(),
sets
.
end
(),
detail
::
RangeLessBySize
<
boost
::
iterator_range
<
Iterator
>
>
());
return
detail
::
FirstIntersectionSorted
(
sets
,
less
);
}
template
<
class
Iterator
>
boost
::
optional
<
typename
std
::
iterator_traits
<
Iterator
>::
value_type
>
FirstIntersection
(
std
::
vector
<
boost
::
iterator_range
<
Iterator
>
>
&
sets
)
{
return
FirstIntersection
(
sets
,
std
::
less
<
typename
std
::
iterator_traits
<
Iterator
>::
value_type
>
());
}
template
<
class
Iterator
,
class
Output
,
class
Less
>
void
AllIntersection
(
std
::
vector
<
boost
::
iterator_range
<
Iterator
>
>
&
sets
,
Output
&
out
,
const
Less
less
)
{
typedef
typename
std
::
iterator_traits
<
Iterator
>::
value_type
Value
;
assert
(
!
sets
.
empty
());
std
::
sort
(
sets
.
begin
(),
sets
.
end
(),
detail
::
RangeLessBySize
<
boost
::
iterator_range
<
Iterator
>
>
());
boost
::
optional
<
Value
>
ret
;
for
(
boost
::
optional
<
Value
>
ret
;
(
ret
=
detail
::
FirstIntersectionSorted
(
sets
,
less
));
sets
.
front
().
advance_begin
(
1
))
{
out
(
*
ret
);
}
}
template
<
class
Iterator
,
class
Output
>
void
AllIntersection
(
std
::
vector
<
boost
::
iterator_range
<
Iterator
>
>
&
sets
,
Output
&
out
)
{
AllIntersection
(
sets
,
out
,
std
::
less
<
typename
std
::
iterator_traits
<
Iterator
>::
value_type
>
());
}
}
// namespace util
#endif // UTIL_MULTI_INTERSECTION_H
cpp/thirdpart/kenlm/util/multi_intersection_test.cc
0 → 100644
View file @
688b6eac
#include "multi_intersection.hh"
#define BOOST_TEST_MODULE MultiIntersectionTest
#include <boost/test/unit_test.hpp>
namespace
util
{
namespace
{
BOOST_AUTO_TEST_CASE
(
Empty
)
{
std
::
vector
<
boost
::
iterator_range
<
const
unsigned
int
*>
>
sets
;
sets
.
push_back
(
boost
::
iterator_range
<
const
unsigned
int
*>
(
static_cast
<
const
unsigned
int
*>
(
NULL
),
static_cast
<
const
unsigned
int
*>
(
NULL
)));
BOOST_CHECK
(
!
FirstIntersection
(
sets
));
}
BOOST_AUTO_TEST_CASE
(
Single
)
{
std
::
vector
<
unsigned
int
>
nums
;
nums
.
push_back
(
1
);
nums
.
push_back
(
4
);
nums
.
push_back
(
100
);
std
::
vector
<
boost
::
iterator_range
<
std
::
vector
<
unsigned
int
>::
const_iterator
>
>
sets
;
sets
.
push_back
(
nums
);
boost
::
optional
<
unsigned
int
>
ret
(
FirstIntersection
(
sets
));
BOOST_REQUIRE
(
ret
);
BOOST_CHECK_EQUAL
(
static_cast
<
unsigned
int
>
(
1
),
*
ret
);
}
template
<
class
T
,
unsigned
int
len
>
boost
::
iterator_range
<
const
T
*>
RangeFromArray
(
const
T
(
&
arr
)[
len
])
{
return
boost
::
iterator_range
<
const
T
*>
(
arr
,
arr
+
len
);
}
BOOST_AUTO_TEST_CASE
(
MultiNone
)
{
unsigned
int
nums0
[]
=
{
1
,
3
,
4
,
22
};
unsigned
int
nums1
[]
=
{
2
,
5
,
12
};
unsigned
int
nums2
[]
=
{
4
,
17
};
std
::
vector
<
boost
::
iterator_range
<
const
unsigned
int
*>
>
sets
;
sets
.
push_back
(
RangeFromArray
(
nums0
));
sets
.
push_back
(
RangeFromArray
(
nums1
));
sets
.
push_back
(
RangeFromArray
(
nums2
));
BOOST_CHECK
(
!
FirstIntersection
(
sets
));
}
BOOST_AUTO_TEST_CASE
(
MultiOne
)
{
unsigned
int
nums0
[]
=
{
1
,
3
,
4
,
17
,
22
};
unsigned
int
nums1
[]
=
{
2
,
5
,
12
,
17
};
unsigned
int
nums2
[]
=
{
4
,
17
};
std
::
vector
<
boost
::
iterator_range
<
const
unsigned
int
*>
>
sets
;
sets
.
push_back
(
RangeFromArray
(
nums0
));
sets
.
push_back
(
RangeFromArray
(
nums1
));
sets
.
push_back
(
RangeFromArray
(
nums2
));
boost
::
optional
<
unsigned
int
>
ret
(
FirstIntersection
(
sets
));
BOOST_REQUIRE
(
ret
);
BOOST_CHECK_EQUAL
(
static_cast
<
unsigned
int
>
(
17
),
*
ret
);
}
}
// namespace
}
// namespace util
cpp/thirdpart/kenlm/util/murmur_hash.cc
0 → 100644
View file @
688b6eac
/* Downloaded from http://sites.google.com/site/murmurhash/ which says "All
* code is released to the public domain. For business purposes, Murmurhash is
* under the MIT license."
* This is modified from the original:
* ULL tag on 0xc6a4a7935bd1e995 so this will compile on 32-bit.
* length changed to unsigned int.
* placed in namespace util
* add MurmurHashNative
* default option = 0 for seed
* ARM port from NICT
*/
#include "murmur_hash.hh"
#include <cstring>
namespace
util
{
//-----------------------------------------------------------------------------
// MurmurHash2, 64-bit versions, by Austin Appleby
// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
// and endian-ness issues if used across multiple platforms.
// 64-bit hash for 64-bit platforms
uint64_t
MurmurHash64A
(
const
void
*
key
,
std
::
size_t
len
,
uint64_t
seed
)
{
const
uint64_t
m
=
0xc6a4a7935bd1e995ULL
;
const
int
r
=
47
;
uint64_t
h
=
seed
^
(
len
*
m
);
#if defined(__arm) || defined(__arm__)
const
size_t
ksize
=
sizeof
(
uint64_t
);
const
unsigned
char
*
data
=
(
const
unsigned
char
*
)
key
;
const
unsigned
char
*
end
=
data
+
(
std
::
size_t
)(
len
/
8
)
*
ksize
;
#else
const
uint64_t
*
data
=
(
const
uint64_t
*
)
key
;
const
uint64_t
*
end
=
data
+
(
len
/
8
);
#endif
while
(
data
!=
end
)
{
#if defined(__arm) || defined(__arm__)
uint64_t
k
;
memcpy
(
&
k
,
data
,
ksize
);
data
+=
ksize
;
#else
uint64_t
k
=
*
data
++
;
#endif
k
*=
m
;
k
^=
k
>>
r
;
k
*=
m
;
h
^=
k
;
h
*=
m
;
}
const
unsigned
char
*
data2
=
(
const
unsigned
char
*
)
data
;
switch
(
len
&
7
)
{
case
7
:
h
^=
uint64_t
(
data2
[
6
])
<<
48
;
case
6
:
h
^=
uint64_t
(
data2
[
5
])
<<
40
;
case
5
:
h
^=
uint64_t
(
data2
[
4
])
<<
32
;
case
4
:
h
^=
uint64_t
(
data2
[
3
])
<<
24
;
case
3
:
h
^=
uint64_t
(
data2
[
2
])
<<
16
;
case
2
:
h
^=
uint64_t
(
data2
[
1
])
<<
8
;
case
1
:
h
^=
uint64_t
(
data2
[
0
]);
h
*=
m
;
};
h
^=
h
>>
r
;
h
*=
m
;
h
^=
h
>>
r
;
return
h
;
}
// 64-bit hash for 32-bit platforms
uint64_t
MurmurHash64B
(
const
void
*
key
,
std
::
size_t
len
,
uint64_t
seed
)
{
const
unsigned
int
m
=
0x5bd1e995
;
const
int
r
=
24
;
unsigned
int
h1
=
seed
^
len
;
unsigned
int
h2
=
0
;
#if defined(__arm) || defined(__arm__)
size_t
ksize
=
sizeof
(
unsigned
int
);
const
unsigned
char
*
data
=
(
const
unsigned
char
*
)
key
;
#else
const
unsigned
int
*
data
=
(
const
unsigned
int
*
)
key
;
#endif
unsigned
int
k1
,
k2
;
while
(
len
>=
8
)
{
#if defined(__arm) || defined(__arm__)
memcpy
(
&
k1
,
data
,
ksize
);
data
+=
ksize
;
memcpy
(
&
k2
,
data
,
ksize
);
data
+=
ksize
;
#else
k1
=
*
data
++
;
k2
=
*
data
++
;
#endif
k1
*=
m
;
k1
^=
k1
>>
r
;
k1
*=
m
;
h1
*=
m
;
h1
^=
k1
;
len
-=
4
;
k2
*=
m
;
k2
^=
k2
>>
r
;
k2
*=
m
;
h2
*=
m
;
h2
^=
k2
;
len
-=
4
;
}
if
(
len
>=
4
)
{
#if defined(__arm) || defined(__arm__)
memcpy
(
&
k1
,
data
,
ksize
);
data
+=
ksize
;
#else
k1
=
*
data
++
;
#endif
k1
*=
m
;
k1
^=
k1
>>
r
;
k1
*=
m
;
h1
*=
m
;
h1
^=
k1
;
len
-=
4
;
}
switch
(
len
)
{
case
3
:
h2
^=
((
unsigned
char
*
)
data
)[
2
]
<<
16
;
case
2
:
h2
^=
((
unsigned
char
*
)
data
)[
1
]
<<
8
;
case
1
:
h2
^=
((
unsigned
char
*
)
data
)[
0
];
h2
*=
m
;
};
h1
^=
h2
>>
18
;
h1
*=
m
;
h2
^=
h1
>>
22
;
h2
*=
m
;
h1
^=
h2
>>
17
;
h1
*=
m
;
h2
^=
h1
>>
19
;
h2
*=
m
;
uint64_t
h
=
h1
;
h
=
(
h
<<
32
)
|
h2
;
return
h
;
}
// Trick to test for 64-bit architecture at compile time.
namespace
{
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-function"
#endif
template
<
unsigned
L
>
inline
uint64_t
MurmurHashNativeBackend
(
const
void
*
key
,
std
::
size_t
len
,
uint64_t
seed
)
{
return
MurmurHash64A
(
key
,
len
,
seed
);
}
template
<
>
inline
uint64_t
MurmurHashNativeBackend
<
4
>
(
const
void
*
key
,
std
::
size_t
len
,
uint64_t
seed
)
{
return
MurmurHash64B
(
key
,
len
,
seed
);
}
#ifdef __clang__
#pragma clang diagnostic pop
#endif
}
// namespace
uint64_t
MurmurHashNative
(
const
void
*
key
,
std
::
size_t
len
,
uint64_t
seed
)
{
return
MurmurHashNativeBackend
<
sizeof
(
void
*
)
>
(
key
,
len
,
seed
);
}
}
// namespace util
cpp/thirdpart/kenlm/util/murmur_hash.hh
0 → 100644
View file @
688b6eac
#ifndef UTIL_MURMUR_HASH_H
#define UTIL_MURMUR_HASH_H
#include <cstddef>
#include <stdint.h>
namespace
util
{
// 64-bit machine version
uint64_t
MurmurHash64A
(
const
void
*
key
,
std
::
size_t
len
,
uint64_t
seed
=
0
);
// 32-bit machine version (not the same function as above)
uint64_t
MurmurHash64B
(
const
void
*
key
,
std
::
size_t
len
,
uint64_t
seed
=
0
);
// Use the version for this arch. Because the values differ across
// architectures, really only use it for in-memory structures.
uint64_t
MurmurHashNative
(
const
void
*
key
,
std
::
size_t
len
,
uint64_t
seed
=
0
);
}
// namespace util
#endif // UTIL_MURMUR_HASH_H
cpp/thirdpart/kenlm/util/parallel_read.cc
0 → 100644
View file @
688b6eac
#include "parallel_read.hh"
#include "file.hh"
#ifdef WITH_THREADS
#include "thread_pool.hh"
namespace
util
{
namespace
{
class
Reader
{
public:
explicit
Reader
(
int
fd
)
:
fd_
(
fd
)
{}
struct
Request
{
void
*
to
;
std
::
size_t
size
;
uint64_t
offset
;
bool
operator
==
(
const
Request
&
other
)
const
{
return
(
to
==
other
.
to
)
&&
(
size
==
other
.
size
)
&&
(
offset
==
other
.
offset
);
}
};
void
operator
()(
const
Request
&
request
)
{
util
::
ErsatzPRead
(
fd_
,
request
.
to
,
request
.
size
,
request
.
offset
);
}
private:
int
fd_
;
};
}
// namespace
void
ParallelRead
(
int
fd
,
void
*
to
,
std
::
size_t
amount
,
uint64_t
offset
)
{
Reader
::
Request
poison
;
poison
.
to
=
NULL
;
poison
.
size
=
0
;
poison
.
offset
=
0
;
unsigned
threads
=
boost
::
thread
::
hardware_concurrency
();
if
(
!
threads
)
threads
=
2
;
ThreadPool
<
Reader
>
pool
(
2
/* don't need much of a queue */
,
threads
,
fd
,
poison
);
const
std
::
size_t
kBatch
=
1ULL
<<
25
;
// 32 MB
Reader
::
Request
request
;
request
.
to
=
to
;
request
.
size
=
kBatch
;
request
.
offset
=
offset
;
for
(;
amount
>
kBatch
;
amount
-=
kBatch
)
{
pool
.
Produce
(
request
);
request
.
to
=
reinterpret_cast
<
uint8_t
*>
(
request
.
to
)
+
kBatch
;
request
.
offset
+=
kBatch
;
}
request
.
size
=
amount
;
if
(
request
.
size
)
{
pool
.
Produce
(
request
);
}
}
}
// namespace util
#else // WITH_THREADS
namespace
util
{
void
ParallelRead
(
int
fd
,
void
*
to
,
std
::
size_t
amount
,
uint64_t
offset
)
{
util
::
ErsatzPRead
(
fd
,
to
,
amount
,
offset
);
}
}
// namespace util
#endif
cpp/thirdpart/kenlm/util/parallel_read.hh
0 → 100644
View file @
688b6eac
#ifndef UTIL_PARALLEL_READ__
#define UTIL_PARALLEL_READ__
/* Read pieces of a file in parallel. This has a very specific use case:
* reading files from Lustre is CPU bound so multiple threads actually
* increases throughput. Speed matters when an LM takes a terabyte.
*/
#include <cstddef>
#include <stdint.h>
namespace
util
{
void
ParallelRead
(
int
fd
,
void
*
to
,
std
::
size_t
amount
,
uint64_t
offset
);
}
// namespace util
#endif // UTIL_PARALLEL_READ__
cpp/thirdpart/kenlm/util/pcqueue.hh
0 → 100644
View file @
688b6eac
#ifndef UTIL_PCQUEUE_H
#define UTIL_PCQUEUE_H
#include "exception.hh"
#include <boost/interprocess/sync/interprocess_semaphore.hpp>
#include <boost/scoped_array.hpp>
#include <boost/thread/mutex.hpp>
#include <boost/utility.hpp>
#include <cerrno>
#ifdef __APPLE__
#include <mach/semaphore.h>
#include <mach/task.h>
#include <mach/mach_traps.h>
#include <mach/mach.h>
#endif // __APPLE__
namespace
util
{
/* OS X Maverick and Boost interprocess were doing "Function not implemented."
* So this is my own wrapper around the mach kernel APIs.
*/
#ifdef __APPLE__
#define MACH_CALL(call) UTIL_THROW_IF(KERN_SUCCESS != (call), Exception, "Mach call failure")
class
Semaphore
{
public:
explicit
Semaphore
(
int
value
)
:
task_
(
mach_task_self
())
{
MACH_CALL
(
semaphore_create
(
task_
,
&
back_
,
SYNC_POLICY_FIFO
,
value
));
}
~
Semaphore
()
{
MACH_CALL
(
semaphore_destroy
(
task_
,
back_
));
}
void
wait
()
{
MACH_CALL
(
semaphore_wait
(
back_
));
}
void
post
()
{
MACH_CALL
(
semaphore_signal
(
back_
));
}
private:
semaphore_t
back_
;
task_t
task_
;
};
inline
void
WaitSemaphore
(
Semaphore
&
semaphore
)
{
semaphore
.
wait
();
}
#else
typedef
boost
::
interprocess
::
interprocess_semaphore
Semaphore
;
inline
void
WaitSemaphore
(
Semaphore
&
on
)
{
while
(
1
)
{
try
{
on
.
wait
();
break
;
}
catch
(
boost
::
interprocess
::
interprocess_exception
&
e
)
{
if
(
e
.
get_native_error
()
!=
EINTR
)
{
throw
;
}
}
}
}
#endif // __APPLE__
/**
* Producer consumer queue safe for multiple producers and multiple consumers.
* T must be default constructable and have operator=.
* The value is copied twice for Consume(T &out) or three times for Consume(),
* so larger objects should be passed via pointer.
* Strong exception guarantee if operator= throws. Undefined if semaphores throw.
*/
template
<
class
T
>
class
PCQueue
:
boost
::
noncopyable
{
public:
explicit
PCQueue
(
size_t
size
)
:
empty_
(
size
),
used_
(
0
),
storage_
(
new
T
[
size
]),
end_
(
storage_
.
get
()
+
size
),
produce_at_
(
storage_
.
get
()),
consume_at_
(
storage_
.
get
())
{}
// Add a value to the queue.
void
Produce
(
const
T
&
val
)
{
WaitSemaphore
(
empty_
);
{
boost
::
unique_lock
<
boost
::
mutex
>
produce_lock
(
produce_at_mutex_
);
try
{
*
produce_at_
=
val
;
}
catch
(...)
{
empty_
.
post
();
throw
;
}
if
(
++
produce_at_
==
end_
)
produce_at_
=
storage_
.
get
();
}
used_
.
post
();
}
// Consume a value, assigning it to out.
T
&
Consume
(
T
&
out
)
{
WaitSemaphore
(
used_
);
{
boost
::
unique_lock
<
boost
::
mutex
>
consume_lock
(
consume_at_mutex_
);
try
{
out
=
*
consume_at_
;
}
catch
(...)
{
used_
.
post
();
throw
;
}
if
(
++
consume_at_
==
end_
)
consume_at_
=
storage_
.
get
();
}
empty_
.
post
();
return
out
;
}
// Convenience version of Consume that copies the value to return.
// The other version is faster.
T
Consume
()
{
T
ret
;
Consume
(
ret
);
return
ret
;
}
private:
// Number of empty spaces in storage_.
Semaphore
empty_
;
// Number of occupied spaces in storage_.
Semaphore
used_
;
boost
::
scoped_array
<
T
>
storage_
;
T
*
const
end_
;
// Index for next write in storage_.
T
*
produce_at_
;
boost
::
mutex
produce_at_mutex_
;
// Index for next read from storage_.
T
*
consume_at_
;
boost
::
mutex
consume_at_mutex_
;
};
}
// namespace util
#endif // UTIL_PCQUEUE_H
cpp/thirdpart/kenlm/util/pcqueue_test.cc
0 → 100644
View file @
688b6eac
#include "pcqueue.hh"
#define BOOST_TEST_MODULE PCQueueTest
#include <boost/test/unit_test.hpp>
namespace
util
{
namespace
{
BOOST_AUTO_TEST_CASE
(
SingleThread
)
{
PCQueue
<
int
>
queue
(
10
);
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
queue
.
Produce
(
i
);
}
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
BOOST_CHECK_EQUAL
(
i
,
queue
.
Consume
());
}
}
}
}
// namespace util
cpp/thirdpart/kenlm/util/pool.cc
0 → 100644
View file @
688b6eac
#include "pool.hh"
#include "scoped.hh"
#include <cstdlib>
#include <algorithm>
namespace
util
{
Pool
::
Pool
()
{
current_
=
NULL
;
current_end_
=
NULL
;
}
Pool
::~
Pool
()
{
FreeAll
();
}
void
Pool
::
FreeAll
()
{
for
(
std
::
vector
<
void
*>::
const_iterator
i
(
free_list_
.
begin
());
i
!=
free_list_
.
end
();
++
i
)
{
free
(
*
i
);
}
free_list_
.
clear
();
current_
=
NULL
;
current_end_
=
NULL
;
}
void
*
Pool
::
More
(
std
::
size_t
size
)
{
std
::
size_t
amount
=
std
::
max
(
static_cast
<
size_t
>
(
32
)
<<
free_list_
.
size
(),
size
);
uint8_t
*
ret
=
static_cast
<
uint8_t
*>
(
MallocOrThrow
(
amount
));
free_list_
.
push_back
(
ret
);
current_
=
ret
+
size
;
current_end_
=
ret
+
amount
;
return
ret
;
}
}
// namespace util
cpp/thirdpart/kenlm/util/pool.hh
0 → 100644
View file @
688b6eac
#ifndef UTIL_POOL_H
#define UTIL_POOL_H
#include <cassert>
#include <cstring>
#include <vector>
#include <stdint.h>
namespace
util
{
/* Very simple pool. It can only allocate memory. And all of the memory it
* allocates must be freed at the same time.
*/
class
Pool
{
public:
Pool
();
~
Pool
();
void
*
Allocate
(
std
::
size_t
size
)
{
void
*
ret
=
current_
;
current_
+=
size
;
if
(
current_
>
current_end_
)
{
ret
=
More
(
size
);
}
#ifdef DEBUG
base_check_
=
ret
;
#endif
return
ret
;
}
/** Extend (or contract) the most recent allocation.
* @param base The base pointer of the allocation. This must must have been
* returned by the MOST RECENT call to Allocate or Continue.
* @param additional Change in the size.
*
* In most cases, more memory from the same page is used, in which case
* base is unchanged and the function returns false.
* If the page runs out, a new page is created and the memory (from base)
* is copied. The function returns true.
*
* @return Whether the base had to be changed due to allocating a page.
*/
bool
Continue
(
void
*&
base
,
std
::
ptrdiff_t
additional
)
{
#ifdef DEBUG
assert
(
base
==
base_check_
);
#endif
current_
+=
additional
;
if
(
current_
>
current_end_
)
{
std
::
size_t
new_total
=
current_
-
static_cast
<
uint8_t
*>
(
base
);
void
*
new_base
=
More
(
new_total
);
std
::
memcpy
(
new_base
,
base
,
new_total
-
additional
);
base
=
new_base
;
#ifdef DEBUG
base_check_
=
base
;
#endif
return
true
;
}
return
false
;
}
void
FreeAll
();
private:
void
*
More
(
std
::
size_t
size
);
std
::
vector
<
void
*>
free_list_
;
uint8_t
*
current_
,
*
current_end_
;
#ifdef DEBUG
// For debugging, check that Continue came from the most recent call.
void
*
base_check_
;
#endif // DEBUG
// no copying
Pool
(
const
Pool
&
);
Pool
&
operator
=
(
const
Pool
&
);
};
/**
* Pool designed to allow limited freeing.
* Keeps a linked list of free elements in the free spaces.
* Will not reduce in size until FreeAll is called.
*/
class
FreePool
{
public:
explicit
FreePool
(
std
::
size_t
element_size
)
:
free_list_
(
NULL
),
element_size_
(
element_size
),
padded_size_
(
std
::
max
(
element_size_
,
sizeof
(
void
*
)))
{}
void
*
Allocate
()
{
if
(
free_list_
)
{
void
*
ret
=
free_list_
;
free_list_
=
*
reinterpret_cast
<
void
**>
(
free_list_
);
return
ret
;
}
else
{
return
backing_
.
Allocate
(
padded_size_
);
}
}
void
Free
(
void
*
ptr
)
{
*
reinterpret_cast
<
void
**>
(
ptr
)
=
free_list_
;
free_list_
=
ptr
;
}
std
::
size_t
ElementSize
()
const
{
return
element_size_
;
}
private:
void
*
free_list_
;
Pool
backing_
;
const
std
::
size_t
element_size_
;
const
std
::
size_t
padded_size_
;
};
}
// namespace util
#endif // UTIL_POOL_H
cpp/thirdpart/kenlm/util/probing_hash_table.hh
0 → 100644
View file @
688b6eac
#ifndef UTIL_PROBING_HASH_TABLE_H
#define UTIL_PROBING_HASH_TABLE_H
#include "exception.hh"
#include "mmap.hh"
#include <algorithm>
#include <cstddef>
#include <functional>
#include <vector>
#include <cassert>
#include <stdint.h>
namespace
util
{
/* Thrown when table grows too large */
class
ProbingSizeException
:
public
Exception
{
public:
ProbingSizeException
()
throw
()
{}
~
ProbingSizeException
()
throw
()
{}
};
// std::identity is an SGI extension :-(
struct
IdentityHash
{
template
<
class
T
>
T
operator
()(
T
arg
)
const
{
return
arg
;
}
};
class
DivMod
{
public:
explicit
DivMod
(
std
::
size_t
buckets
)
:
buckets_
(
buckets
)
{}
static
uint64_t
RoundBuckets
(
uint64_t
from
)
{
return
from
;
}
template
<
class
It
>
It
Ideal
(
It
begin
,
uint64_t
hash
)
const
{
return
begin
+
(
hash
%
buckets_
);
}
template
<
class
BaseIt
,
class
OutIt
>
void
Next
(
BaseIt
begin
,
BaseIt
end
,
OutIt
&
it
)
const
{
if
(
++
it
==
end
)
it
=
begin
;
}
void
Double
()
{
buckets_
*=
2
;
}
private:
std
::
size_t
buckets_
;
};
class
Power2Mod
{
public:
explicit
Power2Mod
(
std
::
size_t
buckets
)
{
UTIL_THROW_IF
(
!
buckets
||
(((
buckets
-
1
)
&
buckets
)),
ProbingSizeException
,
"Size "
<<
buckets
<<
" is not a power of 2."
);
mask_
=
buckets
-
1
;
}
// Round up to next power of 2.
static
uint64_t
RoundBuckets
(
uint64_t
from
)
{
--
from
;
from
|=
from
>>
1
;
from
|=
from
>>
2
;
from
|=
from
>>
4
;
from
|=
from
>>
8
;
from
|=
from
>>
16
;
from
|=
from
>>
32
;
return
from
+
1
;
}
template
<
class
It
>
It
Ideal
(
It
begin
,
uint64_t
hash
)
const
{
return
begin
+
(
hash
&
mask_
);
}
template
<
class
BaseIt
,
class
OutIt
>
void
Next
(
BaseIt
begin
,
BaseIt
/*end*/
,
OutIt
&
it
)
const
{
it
=
begin
+
((
it
-
begin
+
1
)
&
mask_
);
}
void
Double
()
{
mask_
=
(
mask_
<<
1
)
|
1
;
}
private:
std
::
size_t
mask_
;
};
template
<
class
EntryT
,
class
HashT
,
class
EqualT
>
class
AutoProbing
;
/* Non-standard hash table
* Buckets must be set at the beginning and must be greater than maximum number
* of elements, else it throws ProbingSizeException.
* Memory management and initialization is externalized to make it easier to
* serialize these to disk and load them quickly.
* Uses linear probing to find value.
* Only insert and lookup operations.
*/
template
<
class
EntryT
,
class
HashT
,
class
EqualT
=
std
::
equal_to
<
typename
EntryT
::
Key
>,
class
ModT
=
DivMod
>
class
ProbingHashTable
{
public:
typedef
EntryT
Entry
;
typedef
typename
Entry
::
Key
Key
;
typedef
const
Entry
*
ConstIterator
;
typedef
Entry
*
MutableIterator
;
typedef
HashT
Hash
;
typedef
EqualT
Equal
;
typedef
ModT
Mod
;
static
uint64_t
Size
(
uint64_t
entries
,
float
multiplier
)
{
uint64_t
buckets
=
Mod
::
RoundBuckets
(
std
::
max
(
entries
+
1
,
static_cast
<
uint64_t
>
(
multiplier
*
static_cast
<
float
>
(
entries
))));
return
buckets
*
sizeof
(
Entry
);
}
// Must be assigned to later.
ProbingHashTable
()
:
mod_
(
1
),
entries_
(
0
)
#ifdef DEBUG
,
initialized_
(
false
)
#endif
{}
ProbingHashTable
(
void
*
start
,
std
::
size_t
allocated
,
const
Key
&
invalid
=
Key
(),
const
Hash
&
hash_func
=
Hash
(),
const
Equal
&
equal_func
=
Equal
())
:
begin_
(
reinterpret_cast
<
MutableIterator
>
(
start
)),
end_
(
begin_
+
allocated
/
sizeof
(
Entry
)),
buckets_
(
end_
-
begin_
),
invalid_
(
invalid
),
hash_
(
hash_func
),
equal_
(
equal_func
),
mod_
(
end_
-
begin_
),
entries_
(
0
)
#ifdef DEBUG
,
initialized_
(
true
)
#endif
{}
void
Relocate
(
void
*
new_base
)
{
begin_
=
reinterpret_cast
<
MutableIterator
>
(
new_base
);
end_
=
begin_
+
buckets_
;
}
MutableIterator
Ideal
(
const
Key
key
)
{
return
mod_
.
Ideal
(
begin_
,
hash_
(
key
));
}
ConstIterator
Ideal
(
const
Key
key
)
const
{
return
mod_
.
Ideal
(
begin_
,
hash_
(
key
));
}
template
<
class
T
>
MutableIterator
Insert
(
const
T
&
t
)
{
#ifdef DEBUG
assert
(
initialized_
);
#endif
UTIL_THROW_IF
(
++
entries_
>=
buckets_
,
ProbingSizeException
,
"Hash table with "
<<
buckets_
<<
" buckets is full."
);
return
UncheckedInsert
(
t
);
}
// Return true if the value was found (and not inserted). This is consistent with Find but the opposite of hash_map!
template
<
class
T
>
bool
FindOrInsert
(
const
T
&
t
,
MutableIterator
&
out
)
{
#ifdef DEBUG
assert
(
initialized_
);
#endif
for
(
MutableIterator
i
=
Ideal
(
t
.
GetKey
());;
mod_
.
Next
(
begin_
,
end_
,
i
))
{
Key
got
(
i
->
GetKey
());
if
(
equal_
(
got
,
t
.
GetKey
()))
{
out
=
i
;
return
true
;
}
if
(
equal_
(
got
,
invalid_
))
{
UTIL_THROW_IF
(
++
entries_
>=
buckets_
,
ProbingSizeException
,
"Hash table with "
<<
buckets_
<<
" buckets is full."
);
*
i
=
t
;
out
=
i
;
return
false
;
}
}
}
void
FinishedInserting
()
{}
// Don't change anything related to GetKey,
template
<
class
Key
>
bool
UnsafeMutableFind
(
const
Key
key
,
MutableIterator
&
out
)
{
#ifdef DEBUG
assert
(
initialized_
);
#endif
for
(
MutableIterator
i
(
Ideal
(
key
));;
mod_
.
Next
(
begin_
,
end_
,
i
))
{
Key
got
(
i
->
GetKey
());
if
(
equal_
(
got
,
key
))
{
out
=
i
;
return
true
;
}
if
(
equal_
(
got
,
invalid_
))
return
false
;
}
}
// Like UnsafeMutableFind, but the key must be there.
template
<
class
Key
>
MutableIterator
UnsafeMutableMustFind
(
const
Key
key
)
{
for
(
MutableIterator
i
(
Ideal
(
key
));;
mod_
.
Next
(
begin_
,
end_
,
i
))
{
Key
got
(
i
->
GetKey
());
if
(
equal_
(
got
,
key
))
{
return
i
;
}
assert
(
!
equal_
(
got
,
invalid_
));
}
}
// Iterator is both input and output.
template
<
class
Key
>
bool
FindFromIdeal
(
const
Key
key
,
ConstIterator
&
i
)
const
{
#ifdef DEBUG
assert
(
initialized_
);
#endif
for
(;;
mod_
.
Next
(
begin_
,
end_
,
i
))
{
Key
got
(
i
->
GetKey
());
if
(
equal_
(
got
,
key
))
return
true
;
if
(
equal_
(
got
,
invalid_
))
return
false
;
}
}
template
<
class
Key
>
bool
Find
(
const
Key
key
,
ConstIterator
&
out
)
const
{
out
=
Ideal
(
key
);
return
FindFromIdeal
(
key
,
out
);
}
// Like Find but we're sure it must be there.
template
<
class
Key
>
ConstIterator
MustFind
(
const
Key
key
)
const
{
for
(
ConstIterator
i
(
Ideal
(
key
));;
mod_
.
Next
(
begin_
,
end_
,
i
))
{
Key
got
(
i
->
GetKey
());
if
(
equal_
(
got
,
key
))
{
return
i
;
}
assert
(
!
equal_
(
got
,
invalid_
));
}
}
void
Clear
()
{
Entry
invalid
;
invalid
.
SetKey
(
invalid_
);
std
::
fill
(
begin_
,
end_
,
invalid
);
entries_
=
0
;
}
// Return number of entries assuming no serialization went on.
std
::
size_t
SizeNoSerialization
()
const
{
return
entries_
;
}
// Return memory size expected by Double.
std
::
size_t
DoubleTo
()
const
{
return
buckets_
*
2
*
sizeof
(
Entry
);
}
// Inform the table that it has double the amount of memory.
// Pass clear_new = false if you are sure the new memory is initialized
// properly (to invalid_) i.e. by mremap.
void
Double
(
void
*
new_base
,
bool
clear_new
=
true
)
{
begin_
=
static_cast
<
MutableIterator
>
(
new_base
);
MutableIterator
old_end
=
begin_
+
buckets_
;
buckets_
*=
2
;
end_
=
begin_
+
buckets_
;
mod_
.
Double
();
if
(
clear_new
)
{
Entry
invalid
;
invalid
.
SetKey
(
invalid_
);
std
::
fill
(
old_end
,
end_
,
invalid
);
}
std
::
vector
<
Entry
>
rolled_over
;
// Move roll-over entries to a buffer because they might not roll over anymore. This should be small.
for
(
MutableIterator
i
=
begin_
;
i
!=
old_end
&&
!
equal_
(
i
->
GetKey
(),
invalid_
);
++
i
)
{
rolled_over
.
push_back
(
*
i
);
i
->
SetKey
(
invalid_
);
}
/* Re-insert everything. Entries might go backwards to take over a
* recently opened gap, stay, move to new territory, or wrap around. If
* an entry wraps around, it might go to a pointer greater than i (which
* can happen at the beginning) and it will be revisited to possibly fill
* in a gap created later.
*/
Entry
temp
;
for
(
MutableIterator
i
=
begin_
;
i
!=
old_end
;
++
i
)
{
if
(
!
equal_
(
i
->
GetKey
(),
invalid_
))
{
temp
=
*
i
;
i
->
SetKey
(
invalid_
);
UncheckedInsert
(
temp
);
}
}
// Put the roll-over entries back in.
for
(
typename
std
::
vector
<
Entry
>::
const_iterator
i
(
rolled_over
.
begin
());
i
!=
rolled_over
.
end
();
++
i
)
{
UncheckedInsert
(
*
i
);
}
}
// Mostly for tests, check consistency of every entry.
void
CheckConsistency
()
{
MutableIterator
last
;
for
(
last
=
end_
-
1
;
last
>=
begin_
&&
!
equal_
(
last
->
GetKey
(),
invalid_
);
--
last
)
{}
UTIL_THROW_IF
(
last
==
begin_
,
ProbingSizeException
,
"Completely full"
);
MutableIterator
i
;
// Beginning can be wrap-arounds.
for
(
i
=
begin_
;
!
equal_
(
i
->
GetKey
(),
invalid_
);
++
i
)
{
MutableIterator
ideal
=
Ideal
(
i
->
GetKey
());
UTIL_THROW_IF
(
ideal
>
i
&&
ideal
<=
last
,
Exception
,
"Inconsistency at position "
<<
(
i
-
begin_
)
<<
" should be at "
<<
(
ideal
-
begin_
));
}
MutableIterator
pre_gap
=
i
;
for
(;
i
!=
end_
;
++
i
)
{
if
(
equal_
(
i
->
GetKey
(),
invalid_
))
{
pre_gap
=
i
;
continue
;
}
MutableIterator
ideal
=
Ideal
(
i
->
GetKey
());
UTIL_THROW_IF
(
ideal
>
i
||
ideal
<=
pre_gap
,
Exception
,
"Inconsistency at position "
<<
(
i
-
begin_
)
<<
" with ideal "
<<
(
ideal
-
begin_
));
}
}
ConstIterator
RawBegin
()
const
{
return
begin_
;
}
ConstIterator
RawEnd
()
const
{
return
end_
;
}
private:
friend
class
AutoProbing
<
Entry
,
Hash
,
Equal
>
;
template
<
class
T
>
MutableIterator
UncheckedInsert
(
const
T
&
t
)
{
for
(
MutableIterator
i
(
Ideal
(
t
.
GetKey
()));;
mod_
.
Next
(
begin_
,
end_
,
i
))
{
if
(
equal_
(
i
->
GetKey
(),
invalid_
))
{
*
i
=
t
;
return
i
;
}
}
}
MutableIterator
begin_
;
MutableIterator
end_
;
std
::
size_t
buckets_
;
Key
invalid_
;
Hash
hash_
;
Equal
equal_
;
Mod
mod_
;
std
::
size_t
entries_
;
#ifdef DEBUG
bool
initialized_
;
#endif
};
// Resizable linear probing hash table. This owns the memory.
template
<
class
EntryT
,
class
HashT
,
class
EqualT
=
std
::
equal_to
<
typename
EntryT
::
Key
>
>
class
AutoProbing
{
private:
typedef
ProbingHashTable
<
EntryT
,
HashT
,
EqualT
,
Power2Mod
>
Backend
;
public:
static
std
::
size_t
MemUsage
(
std
::
size_t
size
,
float
multiplier
=
1.5
)
{
return
Backend
::
Size
(
size
,
multiplier
);
}
typedef
EntryT
Entry
;
typedef
typename
Entry
::
Key
Key
;
typedef
const
Entry
*
ConstIterator
;
typedef
Entry
*
MutableIterator
;
typedef
HashT
Hash
;
typedef
EqualT
Equal
;
AutoProbing
(
std
::
size_t
initial_size
=
5
,
const
Key
&
invalid
=
Key
(),
const
Hash
&
hash_func
=
Hash
(),
const
Equal
&
equal_func
=
Equal
())
:
allocated_
(
Backend
::
Size
(
initial_size
,
1.2
)),
mem_
(
allocated_
,
KeyIsRawZero
(
invalid
)),
backend_
(
mem_
.
get
(),
allocated_
,
invalid
,
hash_func
,
equal_func
)
{
threshold_
=
std
::
min
<
std
::
size_t
>
(
backend_
.
buckets_
-
1
,
backend_
.
buckets_
*
0.9
);
if
(
!
KeyIsRawZero
(
invalid
))
{
Clear
();
}
}
// Assumes that the key is unique. Multiple insertions won't cause a failure, just inconsistent lookup.
template
<
class
T
>
MutableIterator
Insert
(
const
T
&
t
)
{
++
backend_
.
entries_
;
DoubleIfNeeded
();
return
backend_
.
UncheckedInsert
(
t
);
}
template
<
class
T
>
bool
FindOrInsert
(
const
T
&
t
,
MutableIterator
&
out
)
{
DoubleIfNeeded
();
return
backend_
.
FindOrInsert
(
t
,
out
);
}
template
<
class
Key
>
bool
UnsafeMutableFind
(
const
Key
key
,
MutableIterator
&
out
)
{
return
backend_
.
UnsafeMutableFind
(
key
,
out
);
}
template
<
class
Key
>
MutableIterator
UnsafeMutableMustFind
(
const
Key
key
)
{
return
backend_
.
UnsafeMutableMustFind
(
key
);
}
template
<
class
Key
>
bool
Find
(
const
Key
key
,
ConstIterator
&
out
)
const
{
return
backend_
.
Find
(
key
,
out
);
}
template
<
class
Key
>
ConstIterator
MustFind
(
const
Key
key
)
const
{
return
backend_
.
MustFind
(
key
);
}
std
::
size_t
Size
()
const
{
return
backend_
.
SizeNoSerialization
();
}
void
Clear
()
{
backend_
.
Clear
();
}
ConstIterator
RawBegin
()
const
{
return
backend_
.
RawBegin
();
}
ConstIterator
RawEnd
()
const
{
return
backend_
.
RawEnd
();
}
private:
void
DoubleIfNeeded
()
{
if
(
UTIL_LIKELY
(
Size
()
<
threshold_
))
return
;
HugeRealloc
(
backend_
.
DoubleTo
(),
KeyIsRawZero
(
backend_
.
invalid_
),
mem_
);
allocated_
=
backend_
.
DoubleTo
();
backend_
.
Double
(
mem_
.
get
(),
!
KeyIsRawZero
(
backend_
.
invalid_
));
threshold_
=
std
::
min
<
std
::
size_t
>
(
backend_
.
buckets_
-
1
,
backend_
.
buckets_
*
0.9
);
}
bool
KeyIsRawZero
(
const
Key
&
key
)
{
for
(
const
uint8_t
*
i
=
reinterpret_cast
<
const
uint8_t
*>
(
&
key
);
i
<
reinterpret_cast
<
const
uint8_t
*>
(
&
key
)
+
sizeof
(
Key
);
++
i
)
{
if
(
*
i
)
return
false
;
}
return
true
;
}
std
::
size_t
allocated_
;
util
::
scoped_memory
mem_
;
Backend
backend_
;
std
::
size_t
threshold_
;
};
}
// namespace util
#endif // UTIL_PROBING_HASH_TABLE_H
cpp/thirdpart/kenlm/util/probing_hash_table_benchmark_main.cc
0 → 100644
View file @
688b6eac
#include "file.hh"
#include "probing_hash_table.hh"
#include "mmap.hh"
#include "usage.hh"
#include "thread_pool.hh"
#include <boost/thread/mutex.hpp>
#include <boost/thread/locks.hpp>
#ifdef WIN32
#include <windows.h>
#include <processthreadsapi.h>
#else
#include <sys/resource.h>
#include <sys/time.h>
#endif
#include <iostream>
namespace
util
{
namespace
{
struct
Entry
{
typedef
uint64_t
Key
;
Key
key
;
Key
GetKey
()
const
{
return
key
;
}
};
// I don't care if this doesn't run on Windows. Empirically /dev/urandom was faster than boost::random's Mersenne Twister.
class
URandom
{
public:
URandom
()
:
it_
(
buf_
+
1024
),
end_
(
buf_
+
1024
),
file_
(
util
::
OpenReadOrThrow
(
"/dev/urandom"
))
{}
uint64_t
Get
()
{
if
(
it_
==
end_
)
{
it_
=
buf_
;
util
::
ReadOrThrow
(
file_
.
get
(),
buf_
,
sizeof
(
buf_
));
it_
=
buf_
;
}
return
*
it_
++
;
}
void
Batch
(
uint64_t
*
begin
,
uint64_t
*
end
)
{
util
::
ReadOrThrow
(
file_
.
get
(),
begin
,
(
end
-
begin
)
*
sizeof
(
uint64_t
));
}
private:
uint64_t
buf_
[
1024
];
uint64_t
*
it_
,
*
end_
;
util
::
scoped_fd
file_
;
};
struct
PrefetchEntry
{
uint64_t
key
;
const
Entry
*
pointer
;
};
template
<
class
TableT
,
unsigned
PrefetchSize
>
class
PrefetchQueue
{
public:
typedef
TableT
Table
;
explicit
PrefetchQueue
(
Table
&
table
)
:
table_
(
table
),
cur_
(
0
),
twiddle_
(
false
)
{
for
(
PrefetchEntry
*
i
=
entries_
;
i
!=
entries_
+
PrefetchSize
;
++
i
)
i
->
pointer
=
NULL
;
}
void
Add
(
uint64_t
key
)
{
if
(
Cur
().
pointer
)
{
twiddle_
^=
table_
.
FindFromIdeal
(
Cur
().
key
,
Cur
().
pointer
);
}
Cur
().
key
=
key
;
Cur
().
pointer
=
table_
.
Ideal
(
key
);
__builtin_prefetch
(
Cur
().
pointer
,
0
,
0
);
Next
();
}
bool
Drain
()
{
if
(
Cur
().
pointer
)
{
for
(
PrefetchEntry
*
i
=
&
Cur
();
i
<
entries_
+
PrefetchSize
;
++
i
)
{
twiddle_
^=
table_
.
FindFromIdeal
(
i
->
key
,
i
->
pointer
);
}
}
for
(
PrefetchEntry
*
i
=
entries_
;
i
<
&
Cur
();
++
i
)
{
twiddle_
^=
table_
.
FindFromIdeal
(
i
->
key
,
i
->
pointer
);
}
return
twiddle_
;
}
private:
PrefetchEntry
&
Cur
()
{
return
entries_
[
cur_
];
}
void
Next
()
{
++
cur_
;
cur_
=
cur_
%
PrefetchSize
;
}
Table
&
table_
;
PrefetchEntry
entries_
[
PrefetchSize
];
std
::
size_t
cur_
;
bool
twiddle_
;
PrefetchQueue
(
const
PrefetchQueue
&
);
void
operator
=
(
const
PrefetchQueue
&
);
};
template
<
class
TableT
>
class
Immediate
{
public:
typedef
TableT
Table
;
explicit
Immediate
(
Table
&
table
)
:
table_
(
table
),
twiddle_
(
false
)
{}
void
Add
(
uint64_t
key
)
{
typename
Table
::
ConstIterator
it
;
twiddle_
^=
table_
.
Find
(
key
,
it
);
}
bool
Drain
()
const
{
return
twiddle_
;
}
private:
Table
&
table_
;
bool
twiddle_
;
};
std
::
size_t
Size
(
uint64_t
entries
,
float
multiplier
=
1.5
)
{
typedef
util
::
ProbingHashTable
<
Entry
,
util
::
IdentityHash
,
std
::
equal_to
<
Entry
::
Key
>
,
Power2Mod
>
Table
;
// Always round up to power of 2 for fair comparison.
return
Power2Mod
::
RoundBuckets
(
Table
::
Size
(
entries
,
multiplier
)
/
sizeof
(
Entry
))
*
sizeof
(
Entry
);
}
template
<
class
Queue
>
bool
Test
(
URandom
&
rn
,
uint64_t
entries
,
const
uint64_t
*
const
queries_begin
,
const
uint64_t
*
const
queries_end
,
bool
ordinary_malloc
,
float
multiplier
=
1.5
)
{
std
::
size_t
size
=
Size
(
entries
,
multiplier
);
scoped_memory
backing
;
if
(
ordinary_malloc
)
{
backing
.
reset
(
util
::
CallocOrThrow
(
size
),
size
,
scoped_memory
::
MALLOC_ALLOCATED
);
}
else
{
util
::
HugeMalloc
(
size
,
true
,
backing
);
}
typename
Queue
::
Table
table
(
backing
.
get
(),
size
);
double
start
=
CPUTime
();
for
(
uint64_t
i
=
0
;
i
<
entries
;
++
i
)
{
Entry
entry
;
entry
.
key
=
rn
.
Get
();
table
.
Insert
(
entry
);
}
double
inserted
=
CPUTime
()
-
start
;
double
before_lookup
=
CPUTime
();
Queue
queue
(
table
);
for
(
const
uint64_t
*
i
=
queries_begin
;
i
!=
queries_end
;
++
i
)
{
queue
.
Add
(
*
i
);
}
bool
meaningless
=
queue
.
Drain
();
std
::
cout
<<
' '
<<
(
inserted
/
static_cast
<
double
>
(
entries
))
<<
' '
<<
(
CPUTime
()
-
before_lookup
)
/
static_cast
<
double
>
(
queries_end
-
queries_begin
)
<<
std
::
flush
;
return
meaningless
;
}
bool
TestRun
(
uint64_t
lookups
=
20000000
,
float
multiplier
=
1.5
)
{
URandom
rn
;
util
::
scoped_memory
queries
;
HugeMalloc
(
lookups
*
sizeof
(
uint64_t
),
true
,
queries
);
rn
.
Batch
(
static_cast
<
uint64_t
*>
(
queries
.
get
()),
static_cast
<
uint64_t
*>
(
queries
.
get
())
+
lookups
);
uint64_t
physical_mem_limit
=
util
::
GuessPhysicalMemory
()
/
2
;
bool
meaningless
=
true
;
for
(
uint64_t
i
=
4
;
Size
(
i
/
multiplier
)
<
physical_mem_limit
;
i
*=
4
)
{
std
::
cout
<<
static_cast
<
std
::
size_t
>
(
i
/
multiplier
)
<<
' '
<<
Size
(
i
/
multiplier
);
typedef
util
::
ProbingHashTable
<
Entry
,
util
::
IdentityHash
,
std
::
equal_to
<
Entry
::
Key
>
,
Power2Mod
>
Table
;
typedef
util
::
ProbingHashTable
<
Entry
,
util
::
IdentityHash
,
std
::
equal_to
<
Entry
::
Key
>
,
DivMod
>
TableDiv
;
const
uint64_t
*
const
queries_begin
=
static_cast
<
const
uint64_t
*>
(
queries
.
get
());
meaningless
^=
util
::
Test
<
Immediate
<
TableDiv
>
>
(
rn
,
i
/
multiplier
,
queries_begin
,
queries_begin
+
lookups
,
true
,
multiplier
);
meaningless
^=
util
::
Test
<
Immediate
<
Table
>
>
(
rn
,
i
/
multiplier
,
queries_begin
,
queries_begin
+
lookups
,
true
,
multiplier
);
meaningless
^=
util
::
Test
<
PrefetchQueue
<
Table
,
4
>
>
(
rn
,
i
/
multiplier
,
queries_begin
,
queries_begin
+
lookups
,
true
,
multiplier
);
meaningless
^=
util
::
Test
<
Immediate
<
Table
>
>
(
rn
,
i
/
multiplier
,
queries_begin
,
queries_begin
+
lookups
,
false
,
multiplier
);
meaningless
^=
util
::
Test
<
PrefetchQueue
<
Table
,
2
>
>
(
rn
,
i
/
multiplier
,
queries_begin
,
queries_begin
+
lookups
,
false
,
multiplier
);
meaningless
^=
util
::
Test
<
PrefetchQueue
<
Table
,
4
>
>
(
rn
,
i
/
multiplier
,
queries_begin
,
queries_begin
+
lookups
,
false
,
multiplier
);
meaningless
^=
util
::
Test
<
PrefetchQueue
<
Table
,
8
>
>
(
rn
,
i
/
multiplier
,
queries_begin
,
queries_begin
+
lookups
,
false
,
multiplier
);
meaningless
^=
util
::
Test
<
PrefetchQueue
<
Table
,
16
>
>
(
rn
,
i
/
multiplier
,
queries_begin
,
queries_begin
+
lookups
,
false
,
multiplier
);
std
::
cout
<<
std
::
endl
;
}
return
meaningless
;
}
template
<
class
Table
>
struct
ParallelTestRequest
{
ParallelTestRequest
()
:
queries_begin_
(
NULL
),
queries_end_
(
NULL
),
table_
(
NULL
)
{}
ParallelTestRequest
(
const
uint64_t
*
queries_begin
,
const
uint64_t
*
queries_end
,
Table
*
table
)
:
queries_begin_
(
queries_begin
),
queries_end_
(
queries_end
),
table_
(
table
)
{}
bool
operator
==
(
const
ParallelTestRequest
&
rhs
)
const
{
return
this
->
queries_begin_
==
rhs
.
queries_begin_
&&
this
->
queries_end_
==
rhs
.
queries_end_
;
}
const
uint64_t
*
queries_begin_
;
const
uint64_t
*
queries_end_
;
Table
*
table_
;
};
template
<
class
TableT
>
struct
ParallelTestConstruct
{
ParallelTestConstruct
(
boost
::
mutex
&
lock
,
const
uint64_t
*
const
burn_begin
,
const
uint64_t
*
const
burn_end
,
TableT
*
table
)
:
lock_
(
lock
),
burn_begin_
(
burn_begin
),
burn_end_
(
burn_end
),
table_
(
table
){}
boost
::
mutex
&
lock_
;
const
uint64_t
*
const
burn_begin_
;
const
uint64_t
*
const
burn_end_
;
TableT
*
table_
;
};
template
<
class
Queue
>
struct
ParallelTestHandler
{
typedef
ParallelTestRequest
<
typename
Queue
::
Table
>
Request
;
explicit
ParallelTestHandler
(
const
ParallelTestConstruct
<
typename
Queue
::
Table
>&
construct
)
:
lock_
(
construct
.
lock_
),
totalTime_
(
0.0
),
nRequests_
(
0
),
nQueries_
(
0
),
error_
(
false
),
twiddle_
(
false
){
//perform initial burn
for
(
const
uint64_t
*
i
=
construct
.
burn_begin_
;
i
<
construct
.
burn_end_
;
i
++
){
typename
Queue
::
Table
::
ConstIterator
it
;
twiddle_
^=
construct
.
table_
->
Find
(
*
i
,
it
);
}
}
void
operator
()(
Request
request
){
if
(
error_
)
return
;
Queue
queue
(
*
request
.
table_
);
double
start
=
ThreadTime
();
if
(
start
<
0.0
){
error_
=
true
;
return
;
}
for
(
const
uint64_t
*
i
=
request
.
queries_begin_
;
i
!=
request
.
queries_end_
;
++
i
){
queue
.
Add
(
*
i
);
}
twiddle_
^=
queue
.
Drain
();
double
end
=
ThreadTime
();
if
(
end
<
0.0
){
error_
=
true
;
return
;
}
totalTime_
+=
end
-
start
;
nQueries_
+=
request
.
queries_end_
-
request
.
queries_begin_
;
++
nRequests_
;
}
virtual
~
ParallelTestHandler
()
{
boost
::
unique_lock
<
boost
::
mutex
>
produce_lock
(
lock_
);
if
(
error_
){
std
::
cout
<<
"Error "
;
}
else
{
std
::
cout
<<
nRequests_
<<
' '
<<
' '
<<
nQueries_
<<
' '
<<
totalTime_
<<
std
::
endl
;
}
std
::
cerr
<<
"Meaningless "
<<
twiddle_
<<
std
::
endl
;
}
private:
boost
::
mutex
&
lock_
;
double
totalTime_
;
std
::
size_t
nRequests_
;
std
::
size_t
nQueries_
;
bool
error_
;
bool
twiddle_
;
};
template
<
class
Queue
>
void
ParallelTest
(
typename
Queue
::
Table
*
table
,
const
uint64_t
*
const
queries_begin
,
const
uint64_t
*
const
queries_end
,
std
::
size_t
num_threads
,
std
::
size_t
tasks_per_thread
,
std
::
size_t
burn
){
boost
::
mutex
lock
;
ParallelTestConstruct
<
typename
Queue
::
Table
>
construct
(
lock
,
queries_begin
,
queries_begin
+
burn
,
table
);
ParallelTestRequest
<
typename
Queue
::
Table
>
poison
(
NULL
,
NULL
,
NULL
);
{
util
::
ThreadPool
<
ParallelTestHandler
<
Queue
>
>
pool
(
num_threads
,
num_threads
,
construct
,
poison
);
const
uint64_t
queries_per_thread
=
(
static_cast
<
uint64_t
>
(
queries_end
-
queries_begin
-
burn
)
/
num_threads
)
/
tasks_per_thread
;
for
(
const
uint64_t
*
i
=
queries_begin
+
burn
;
i
+
queries_per_thread
<=
queries_end
;
i
+=
queries_per_thread
){
ParallelTestRequest
<
typename
Queue
::
Table
>
request
(
i
,
i
+
queries_per_thread
,
table
);
pool
.
Produce
(
request
);
}
}
// pool gets deallocated and all jobs finish
std
::
cout
<<
std
::
endl
;
}
void
ParallelTestRun
(
std
::
size_t
tasks_per_thread
=
1
,
std
::
size_t
burn
=
4000
,
uint64_t
lookups
=
20000000
,
float
multiplier
=
1.5
)
{
URandom
rn
;
util
::
scoped_memory
queries
;
HugeMalloc
((
lookups
+
burn
)
*
sizeof
(
uint64_t
),
true
,
queries
);
rn
.
Batch
(
static_cast
<
uint64_t
*>
(
queries
.
get
()),
static_cast
<
uint64_t
*>
(
queries
.
get
())
+
lookups
+
burn
);
const
uint64_t
*
const
queries_begin
=
static_cast
<
const
uint64_t
*>
(
queries
.
get
());
const
uint64_t
*
const
queries_end
=
queries_begin
+
lookups
+
burn
;
typedef
util
::
ProbingHashTable
<
Entry
,
util
::
IdentityHash
,
std
::
equal_to
<
Entry
::
Key
>
,
Power2Mod
>
Table
;
uint64_t
physical_mem_limit
=
util
::
GuessPhysicalMemory
()
/
2
;
for
(
uint64_t
i
=
4
;
Size
(
i
/
multiplier
,
multiplier
)
<
physical_mem_limit
;
i
*=
4
)
{
std
::
size_t
entries
=
static_cast
<
std
::
size_t
>
(
i
/
multiplier
);
std
::
size_t
size
=
Size
(
i
/
multiplier
,
multiplier
);
scoped_memory
backing
;
util
::
HugeMalloc
(
size
,
true
,
backing
);
Table
table
(
backing
.
get
(),
size
);
for
(
uint64_t
j
=
0
;
j
<
entries
;
++
j
)
{
Entry
entry
;
entry
.
key
=
rn
.
Get
();
table
.
Insert
(
entry
);
}
for
(
std
::
size_t
num_threads
=
1
;
num_threads
<=
16
;
num_threads
*=
2
){
std
::
cout
<<
entries
<<
' '
<<
size
<<
' '
<<
num_threads
<<
' '
<<
std
::
endl
;
util
::
ParallelTest
<
Immediate
<
Table
>
>
(
&
table
,
queries_begin
,
queries_end
,
num_threads
,
tasks_per_thread
,
burn
);
util
::
ParallelTest
<
PrefetchQueue
<
Table
,
2
>
>
(
&
table
,
queries_begin
,
queries_end
,
num_threads
,
tasks_per_thread
,
burn
);
util
::
ParallelTest
<
PrefetchQueue
<
Table
,
4
>
>
(
&
table
,
queries_begin
,
queries_end
,
num_threads
,
tasks_per_thread
,
burn
);
util
::
ParallelTest
<
PrefetchQueue
<
Table
,
8
>
>
(
&
table
,
queries_begin
,
queries_end
,
num_threads
,
tasks_per_thread
,
burn
);
util
::
ParallelTest
<
PrefetchQueue
<
Table
,
16
>
>
(
&
table
,
queries_begin
,
queries_end
,
num_threads
,
tasks_per_thread
,
burn
);
}
}
}
}
// namespace
}
// namespace util
int
main
()
{
//bool meaningless = false;
std
::
cout
<<
"#CPU time
\n
"
;
//meaningless ^= util::TestRun();
util
::
ParallelTestRun
(
10
,
4000
);
//std::cerr << "Meaningless: " << meaningless << '\n';
}
Prev
1
…
11
12
13
14
15
16
17
18
19
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment