Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
yangql
googletest
Commits
0c5a6624
Commit
0c5a6624
authored
Aug 25, 2008
by
vladlosev
Browse files
Implement wide->UTF-8 string conversion more correctly
parent
c6e674db
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
301 additions
and
66 deletions
+301
-66
include/gtest/internal/gtest-port.h
include/gtest/internal/gtest-port.h
+6
-0
src/gtest-internal-inl.h
src/gtest-internal-inl.h
+24
-2
src/gtest.cc
src/gtest.cc
+125
-38
test/gtest_unittest.cc
test/gtest_unittest.cc
+146
-26
No files found.
include/gtest/internal/gtest-port.h
View file @
0c5a6624
...
...
@@ -225,6 +225,12 @@
#include <sys/mman.h>
#endif // GTEST_HAS_STD_STRING && defined(GTEST_OS_LINUX)
// Determines whether the system compiler uses UTF-16 for encoding wide strings.
#if defined(GTEST_OS_WINDOWS) || defined(GTEST_OS_CYGWIN) || \
defined(__SYMBIAN32__)
#define GTEST_WIDE_STRING_USES_UTF16_ 1
#endif
// Defines some utility macros.
// The GNU compiler emits a warning if nested "if" statements are followed by
...
...
src/gtest-internal-inl.h
View file @
0c5a6624
...
...
@@ -133,8 +133,30 @@ class GTestFlagSaver {
internal
::
Int32
repeat_
;
}
GTEST_ATTRIBUTE_UNUSED
;
// Converts a Unicode code-point to its UTF-8 encoding.
String
ToUtf8String
(
wchar_t
wchar
);
// Converts a Unicode code point to a narrow string in UTF-8 encoding.
// code_point parameter is of type UInt32 because wchar_t may not be
// wide enough to contain a code point.
// The output buffer str must containt at least 32 characters.
// The function returns the address of the output buffer.
// If the code_point is not a valid Unicode code point
// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be output
// as '(Invalid Unicode 0xXXXXXXXX)'.
char
*
CodePointToUtf8
(
UInt32
code_point
,
char
*
str
);
// Converts a wide string to a narrow string in UTF-8 encoding.
// The wide string is assumed to have the following encoding:
// UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS)
// UTF-32 if sizeof(wchar_t) == 4 (on Linux)
// Parameter str points to a null-terminated wide string.
// Parameter num_chars may additionally limit the number
// of wchar_t characters processed. -1 is used when the entire string
// should be processed.
// If the string contains code points that are not valid Unicode code points
// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output
// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
// and contains invalid UTF-16 surrogate pairs, values in those pairs
// will be encoded as individual Unicode characters from Basic Normal Plane.
String
WideStringToUtf8
(
const
wchar_t
*
str
,
int
num_chars
);
// Returns the number of active threads, or 0 when there is an error.
size_t
GetThreadCount
();
...
...
src/gtest.cc
View file @
0c5a6624
...
...
@@ -784,16 +784,19 @@ bool String::CStringEquals(const char * lhs, const char * rhs) {
// encoding, and streams the result to the given Message object.
static
void
StreamWideCharsToMessage
(
const
wchar_t
*
wstr
,
size_t
len
,
Message
*
msg
)
{
for
(
size_t
i
=
0
;
i
!=
len
;
i
++
)
{
// TODO(wan): consider allowing a testing::String object to
// contain '\0'. This will make it behave more like std::string,
// and will allow ToUtf8String() to return the correct encoding
// for '\0' s.t. we can get rid of the conditional here (and in
// several other places).
if
(
wstr
[
i
])
{
*
msg
<<
internal
::
ToUtf8String
(
wstr
[
i
]);
// TODO(wan): consider allowing a testing::String object to
// contain '\0'. This will make it behave more like std::string,
// and will allow ToUtf8String() to return the correct encoding
// for '\0' s.t. we can get rid of the conditional here (and in
// several other places).
for
(
size_t
i
=
0
;
i
!=
len
;
)
{
// NOLINT
if
(
wstr
[
i
]
!=
L'\0'
)
{
*
msg
<<
WideStringToUtf8
(
wstr
+
i
,
len
-
i
);
while
(
i
!=
len
&&
wstr
[
i
]
!=
L'\0'
)
i
++
;
}
else
{
*
msg
<<
'\0'
;
i
++
;
}
}
}
...
...
@@ -852,8 +855,10 @@ String FormatForFailureMessage(wchar_t wchar) {
Message
msg
;
// A String object cannot contain '\0', so we print "\\0" when wchar is
// L'\0'.
msg
<<
"L'"
<<
(
wchar
?
ToUtf8String
(
wchar
).
c_str
()
:
"
\\
0"
)
<<
"' ("
<<
wchar_as_uint64
<<
", 0x"
<<
::
std
::
setbase
(
16
)
char
buffer
[
32
];
// CodePointToUtf8 requires a buffer that big.
msg
<<
"L'"
<<
(
wchar
?
CodePointToUtf8
(
static_cast
<
UInt32
>
(
wchar
),
buffer
)
:
"
\\
0"
)
<<
"' ("
<<
wchar_as_uint64
<<
", 0x"
<<
::
std
::
setbase
(
16
)
<<
wchar_as_uint64
<<
")"
;
return
msg
.
GetString
();
}
...
...
@@ -1317,31 +1322,118 @@ inline UInt32 ChopLowBits(UInt32* bits, int n) {
return
low_bits
;
}
// Converts a Unicode code-point to its UTF-8 encoding.
String
ToUtf8String
(
wchar_t
wchar
)
{
char
str
[
5
]
=
{};
// Initializes str to all '\0' characters.
UInt32
code
=
static_cast
<
UInt32
>
(
wchar
);
if
(
code
<=
kMaxCodePoint1
)
{
str
[
0
]
=
static_cast
<
char
>
(
code
);
// 0xxxxxxx
}
else
if
(
code
<=
kMaxCodePoint2
)
{
str
[
1
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code
,
6
));
// 10xxxxxx
str
[
0
]
=
static_cast
<
char
>
(
0xC0
|
code
);
// 110xxxxx
}
else
if
(
code
<=
kMaxCodePoint3
)
{
str
[
2
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code
,
6
));
// 10xxxxxx
str
[
1
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code
,
6
));
// 10xxxxxx
str
[
0
]
=
static_cast
<
char
>
(
0xE0
|
code
);
// 1110xxxx
}
else
if
(
code
<=
kMaxCodePoint4
)
{
str
[
3
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code
,
6
));
// 10xxxxxx
str
[
2
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code
,
6
));
// 10xxxxxx
str
[
1
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code
,
6
));
// 10xxxxxx
str
[
0
]
=
static_cast
<
char
>
(
0xF0
|
code
);
// 11110xxx
// Converts a Unicode code point to a narrow string in UTF-8 encoding.
// code_point parameter is of type UInt32 because wchar_t may not be
// wide enough to contain a code point.
// The output buffer str must containt at least 32 characters.
// The function returns the address of the output buffer.
// If the code_point is not a valid Unicode code point
// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be output
// as '(Invalid Unicode 0xXXXXXXXX)'.
char
*
CodePointToUtf8
(
UInt32
code_point
,
char
*
str
)
{
if
(
code_point
<=
kMaxCodePoint1
)
{
str
[
1
]
=
'\0'
;
str
[
0
]
=
static_cast
<
char
>
(
code_point
);
// 0xxxxxxx
}
else
if
(
code_point
<=
kMaxCodePoint2
)
{
str
[
2
]
=
'\0'
;
str
[
1
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code_point
,
6
));
// 10xxxxxx
str
[
0
]
=
static_cast
<
char
>
(
0xC0
|
code_point
);
// 110xxxxx
}
else
if
(
code_point
<=
kMaxCodePoint3
)
{
str
[
3
]
=
'\0'
;
str
[
2
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code_point
,
6
));
// 10xxxxxx
str
[
1
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code_point
,
6
));
// 10xxxxxx
str
[
0
]
=
static_cast
<
char
>
(
0xE0
|
code_point
);
// 1110xxxx
}
else
if
(
code_point
<=
kMaxCodePoint4
)
{
str
[
4
]
=
'\0'
;
str
[
3
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code_point
,
6
));
// 10xxxxxx
str
[
2
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code_point
,
6
));
// 10xxxxxx
str
[
1
]
=
static_cast
<
char
>
(
0x80
|
ChopLowBits
(
&
code_point
,
6
));
// 10xxxxxx
str
[
0
]
=
static_cast
<
char
>
(
0xF0
|
code_point
);
// 11110xxx
}
else
{
return
String
::
Format
(
"(Invalid Unicode 0x%llX)"
,
static_cast
<
UInt64
>
(
wchar
));
// The longest string String::Format can produce when invoked
// with these parameters is 28 character long (not including
// the terminating nul character). We are asking for 32 character
// buffer just in case. This is also enough for strncpy to
// null-terminate the destination string.
// MSVC 8 deprecates strncpy(), so we want to suppress warning
// 4996 (deprecated function) there.
#ifdef GTEST_OS_WINDOWS // We are on Windows.
#pragma warning(push) // Saves the current warning state.
#pragma warning(disable:4996) // Temporarily disables warning 4996.
#endif
strncpy
(
str
,
String
::
Format
(
"(Invalid Unicode 0x%X)"
,
code_point
).
c_str
(),
32
);
#ifdef GTEST_OS_WINDOWS // We are on Windows.
#pragma warning(pop) // Restores the warning state.
#endif
str
[
31
]
=
'\0'
;
// Makes sure no change in the format to strncpy leaves
// the result unterminated.
}
return
str
;
}
// The following two functions only make sense if the the system
// uses UTF-16 for wide string encoding. All supported systems
// with 16 bit wchar_t (Windows, Cygwin, Symbian OS) do use UTF-16.
return
String
(
str
);
// Determines if the arguments constitute UTF-16 surrogate pair
// and thus should be combined into a single Unicode code point
// using CreateCodePointFromUtf16SurrogatePair.
inline
bool
IsUtf16SurrogatePair
(
wchar_t
first
,
wchar_t
second
)
{
if
(
sizeof
(
wchar_t
)
==
2
)
return
(
first
&
0xFC00
)
==
0xD800
&&
(
second
&
0xFC00
)
==
0xDC00
;
else
return
false
;
}
// Creates a Unicode code point from UTF16 surrogate pair.
inline
UInt32
CreateCodePointFromUtf16SurrogatePair
(
wchar_t
first
,
wchar_t
second
)
{
if
(
sizeof
(
wchar_t
)
==
2
)
{
const
UInt32
mask
=
(
1
<<
10
)
-
1
;
return
(((
first
&
mask
)
<<
10
)
|
(
second
&
mask
))
+
0x10000
;
}
else
{
// This should not be called, but we provide a sensible default
// in case it is.
return
static_cast
<
UInt32
>
(
first
);
}
}
// Converts a wide string to a narrow string in UTF-8 encoding.
// The wide string is assumed to have the following encoding:
// UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS)
// UTF-32 if sizeof(wchar_t) == 4 (on Linux)
// Parameter str points to a null-terminated wide string.
// Parameter num_chars may additionally limit the number
// of wchar_t characters processed. -1 is used when the entire string
// should be processed.
// If the string contains code points that are not valid Unicode code points
// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output
// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
// and contains invalid UTF-16 surrogate pairs, values in those pairs
// will be encoded as individual Unicode characters from Basic Normal Plane.
String
WideStringToUtf8
(
const
wchar_t
*
str
,
int
num_chars
)
{
if
(
num_chars
==
-
1
)
num_chars
=
wcslen
(
str
);
StrStream
stream
;
for
(
int
i
=
0
;
i
<
num_chars
;
++
i
)
{
UInt32
unicode_code_point
;
if
(
str
[
i
]
==
L'\0'
)
{
break
;
}
else
if
(
i
+
1
<
num_chars
&&
IsUtf16SurrogatePair
(
str
[
i
],
str
[
i
+
1
]))
{
unicode_code_point
=
CreateCodePointFromUtf16SurrogatePair
(
str
[
i
],
str
[
i
+
1
]);
i
++
;
}
else
{
unicode_code_point
=
static_cast
<
UInt32
>
(
str
[
i
]);
}
char
buffer
[
32
];
// CodePointToUtf8 requires a buffer this big.
stream
<<
CodePointToUtf8
(
unicode_code_point
,
buffer
);
}
return
StrStreamToString
(
&
stream
);
}
// Converts a wide C string to a String using the UTF-8 encoding.
...
...
@@ -1349,12 +1441,7 @@ String ToUtf8String(wchar_t wchar) {
String
String
::
ShowWideCString
(
const
wchar_t
*
wide_c_str
)
{
if
(
wide_c_str
==
NULL
)
return
String
(
"(null)"
);
StrStream
ss
;
while
(
*
wide_c_str
)
{
ss
<<
internal
::
ToUtf8String
(
*
wide_c_str
++
);
}
return
internal
::
StrStreamToString
(
&
ss
);
return
String
(
internal
::
WideStringToUtf8
(
wide_c_str
,
-
1
).
c_str
());
}
// Similar to ShowWideCString(), except that this function encloses
...
...
test/gtest_unittest.cc
View file @
0c5a6624
...
...
@@ -101,6 +101,7 @@ using testing::TPRT_NONFATAL_FAILURE;
using
testing
::
TPRT_SUCCESS
;
using
testing
::
UnitTest
;
using
testing
::
internal
::
AppendUserMessage
;
using
testing
::
internal
::
CodePointToUtf8
;
using
testing
::
internal
::
EqFailure
;
using
testing
::
internal
::
FloatingPoint
;
using
testing
::
internal
::
GTestFlagSaver
;
...
...
@@ -111,8 +112,8 @@ using testing::internal::StreamableToString;
using
testing
::
internal
::
String
;
using
testing
::
internal
::
TestProperty
;
using
testing
::
internal
::
TestResult
;
using
testing
::
internal
::
ToUtf8String
;
using
testing
::
internal
::
UnitTestImpl
;
using
testing
::
internal
::
WideStringToUtf8
;
// This line tests that we can define tests in an unnamed namespace.
namespace
{
...
...
@@ -142,65 +143,184 @@ TEST(NullLiteralTest, IsFalseForNonNullLiterals) {
}
#endif // __SYMBIAN32__
// Tests ToUtf8String().
//
// Tests CodePointToUtf8().
// Tests that the NUL character L'\0' is encoded correctly.
TEST
(
ToUtf8StringTest
,
CanEncodeNul
)
{
EXPECT_STREQ
(
""
,
ToUtf8String
(
L'\0'
).
c_str
());
TEST
(
CodePointToUtf8Test
,
CanEncodeNul
)
{
char
buffer
[
32
];
EXPECT_STREQ
(
""
,
CodePointToUtf8
(
L'\0'
,
buffer
));
}
// Tests that ASCII characters are encoded correctly.
TEST
(
ToUtf8StringTest
,
CanEncodeAscii
)
{
EXPECT_STREQ
(
"a"
,
ToUtf8String
(
L'a'
).
c_str
());
EXPECT_STREQ
(
"Z"
,
ToUtf8String
(
L'Z'
).
c_str
());
EXPECT_STREQ
(
"&"
,
ToUtf8String
(
L'&'
).
c_str
());
EXPECT_STREQ
(
"
\x7F
"
,
ToUtf8String
(
L'\x7F'
).
c_str
());
TEST
(
CodePointToUtf8Test
,
CanEncodeAscii
)
{
char
buffer
[
32
];
EXPECT_STREQ
(
"a"
,
CodePointToUtf8
(
L'a'
,
buffer
));
EXPECT_STREQ
(
"Z"
,
CodePointToUtf8
(
L'Z'
,
buffer
));
EXPECT_STREQ
(
"&"
,
CodePointToUtf8
(
L'&'
,
buffer
));
EXPECT_STREQ
(
"
\x7F
"
,
CodePointToUtf8
(
L'\x7F'
,
buffer
));
}
// Tests that Unicode code-points that have 8 to 11 bits are encoded
// as 110xxxxx 10xxxxxx.
TEST
(
ToUtf8StringTest
,
CanEncode8To11Bits
)
{
TEST
(
CodePointToUtf8Test
,
CanEncode8To11Bits
)
{
char
buffer
[
32
];
// 000 1101 0011 => 110-00011 10-010011
EXPECT_STREQ
(
"
\xC3\x93
"
,
ToUtf8String
(
L'\xD3'
).
c_str
(
));
EXPECT_STREQ
(
"
\xC3\x93
"
,
CodePointToUtf8
(
L'\xD3'
,
buffer
));
// 101 0111 0110 => 110-10101 10-110110
EXPECT_STREQ
(
"
\xD5\xB6
"
,
ToUtf8
String
(
L
'\
x576
'
).
c_str
(
));
EXPECT_STREQ
(
"
\xD5\xB6
"
,
CodePoint
ToUtf8
(
L
'\
x576
'
,
buffer
));
}
// Tests that Unicode code-points that have 12 to 16 bits are encoded
// as 1110xxxx 10xxxxxx 10xxxxxx.
TEST
(
ToUtf8StringTest
,
CanEncode12To16Bits
)
{
TEST
(
CodePointToUtf8Test
,
CanEncode12To16Bits
)
{
char
buffer
[
32
];
// 0000 1000 1101 0011 => 1110-0000 10-100011 10-010011
EXPECT_STREQ
(
"
\xE0\xA3\x93
"
,
ToUtf8
String
(
L
'\
x8D3
'
).
c_str
(
));
EXPECT_STREQ
(
"
\xE0\xA3\x93
"
,
CodePoint
ToUtf8
(
L
'\
x8D3
'
,
buffer
));
// 1100 0111 0100 1101 => 1110-1100 10-011101 10-001101
EXPECT_STREQ
(
"
\xEC\x9D\x8D
"
,
ToUtf8
String
(
L
'\
xC74D
'
).
c_str
(
));
EXPECT_STREQ
(
"
\xEC\x9D\x8D
"
,
CodePoint
ToUtf8
(
L
'\
xC74D
'
,
buffer
));
}
#if !defined(GTEST_OS_WINDOWS) && !defined(GTEST_OS_CYGWIN) && \
!defined(__SYMBIAN32__)
#ifndef GTEST_WIDE_STRING_USES_UTF16_
// Tests in this group require a wchar_t to hold > 16 bits, and thus
// are skipped on Windows, Cygwin, and Symbian, where a wchar_t is
// 16-bit wide.
// 16-bit wide.
This code may not compile on those systems.
// Tests that Unicode code-points that have 17 to 21 bits are encoded
// as 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx.
TEST
(
ToUtf8StringTest
,
CanEncode17To21Bits
)
{
TEST
(
CodePointToUtf8Test
,
CanEncode17To21Bits
)
{
char
buffer
[
32
];
// 0 0001 0000 1000 1101 0011 => 11110-000 10-010000 10-100011 10-010011
EXPECT_STREQ
(
"
\xF0\x90\xA3\x93
"
,
ToUtf8String
(
L
'\
x108D3
'
).
c_str
());
EXPECT_STREQ
(
"
\xF0\x90\xA3\x93
"
,
CodePointToUtf8
(
L
'\
x108D3
'
,
buffer
));
// 0 0001 0000 0100 0000 0000 => 11110-000 10-010000 10-010000 10-000000
EXPECT_STREQ
(
"
\xF0\x90\x90\x80
"
,
CodePointToUtf8
(
L
'\
x10400
'
,
buffer
));
// 1 0
111
1000 0110 0011 0100 => 11110-10
1
10-
11
1000 10-011000 10-110100
EXPECT_STREQ
(
"
\xF
5
\x
B
8\x98\xB4
"
,
ToUtf8
String
(
L
'\
x1
7
8634
'
).
c_str
(
));
// 1 0
000
1000 0110 0011 0100 => 11110-10
0
10-
00
1000 10-011000 10-110100
EXPECT_STREQ
(
"
\xF
4
\x
8
8\x98\xB4
"
,
CodePoint
ToUtf8
(
L
'\
x1
0
8634
'
,
buffer
));
}
// Tests that encoding an invalid code-point generates the expected result.
TEST
(
ToUtf8StringTest
,
CanEncodeInvalidCodePoint
)
{
TEST
(
CodePointToUtf8Test
,
CanEncodeInvalidCodePoint
)
{
char
buffer
[
32
];
EXPECT_STREQ
(
"(Invalid Unicode 0x1234ABCD)"
,
ToUtf8String
(
L
'\
x1234ABCD
'
).
c_str
());
CodePointToUtf8
(
L
'\
x1234ABCD
'
,
buffer
));
}
#endif // GTEST_WIDE_STRING_USES_UTF16_
// Tests WideStringToUtf8().
// Tests that the NUL character L'\0' is encoded correctly.
TEST
(
WideStringToUtf8Test
,
CanEncodeNul
)
{
EXPECT_STREQ
(
""
,
WideStringToUtf8
(
L""
,
0
).
c_str
());
EXPECT_STREQ
(
""
,
WideStringToUtf8
(
L""
,
-
1
).
c_str
());
}
// Tests that ASCII strings are encoded correctly.
TEST
(
WideStringToUtf8Test
,
CanEncodeAscii
)
{
EXPECT_STREQ
(
"a"
,
WideStringToUtf8
(
L"a"
,
1
).
c_str
());
EXPECT_STREQ
(
"ab"
,
WideStringToUtf8
(
L"ab"
,
2
).
c_str
());
EXPECT_STREQ
(
"a"
,
WideStringToUtf8
(
L"a"
,
-
1
).
c_str
());
EXPECT_STREQ
(
"ab"
,
WideStringToUtf8
(
L"ab"
,
-
1
).
c_str
());
}
// Tests that Unicode code-points that have 8 to 11 bits are encoded
// as 110xxxxx 10xxxxxx.
TEST
(
WideStringToUtf8Test
,
CanEncode8To11Bits
)
{
// 000 1101 0011 => 110-00011 10-010011
EXPECT_STREQ
(
"
\xC3\x93
"
,
WideStringToUtf8
(
L"
\xD3
"
,
1
).
c_str
());
EXPECT_STREQ
(
"
\xC3\x93
"
,
WideStringToUtf8
(
L"
\xD3
"
,
-
1
).
c_str
());
// 101 0111 0110 => 110-10101 10-110110
EXPECT_STREQ
(
"
\xD5\xB6
"
,
WideStringToUtf8
(
L"
\x576
"
,
1
).
c_str
());
EXPECT_STREQ
(
"
\xD5\xB6
"
,
WideStringToUtf8
(
L"
\x576
"
,
-
1
).
c_str
());
}
// Tests that Unicode code-points that have 12 to 16 bits are encoded
// as 1110xxxx 10xxxxxx 10xxxxxx.
TEST
(
WideStringToUtf8Test
,
CanEncode12To16Bits
)
{
// 0000 1000 1101 0011 => 1110-0000 10-100011 10-010011
EXPECT_STREQ
(
"
\xE0\xA3\x93
"
,
WideStringToUtf8
(
L"
\x8D3
"
,
1
).
c_str
());
EXPECT_STREQ
(
"
\xE0\xA3\x93
"
,
WideStringToUtf8
(
L"
\x8D3
"
,
-
1
).
c_str
());
// 1100 0111 0100 1101 => 1110-1100 10-011101 10-001101
EXPECT_STREQ
(
"
\xEC\x9D\x8D
"
,
WideStringToUtf8
(
L"
\xC74D
"
,
1
).
c_str
());
EXPECT_STREQ
(
"
\xEC\x9D\x8D
"
,
WideStringToUtf8
(
L"
\xC74D
"
,
-
1
).
c_str
());
}
#endif // Windows, Cygwin, or Symbian
// Tests that the conversion stops when the function encounters \0 character.
TEST
(
WideStringToUtf8Test
,
StopsOnNulCharacter
)
{
EXPECT_STREQ
(
"ABC"
,
WideStringToUtf8
(
L"ABC
\0
XYZ"
,
100
).
c_str
());
}
// Tests that the conversion stops when the function reaches the limit
// specified by the 'length' parameter.
TEST
(
WideStringToUtf8Test
,
StopsWhenLengthLimitReached
)
{
EXPECT_STREQ
(
"ABC"
,
WideStringToUtf8
(
L"ABCDEF"
,
3
).
c_str
());
}
#ifndef GTEST_WIDE_STRING_USES_UTF16_
// Tests that Unicode code-points that have 17 to 21 bits are encoded
// as 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx. This code may not compile
// on the systems using UTF-16 encoding.
TEST
(
WideStringToUtf8Test
,
CanEncode17To21Bits
)
{
// 0 0001 0000 1000 1101 0011 => 11110-000 10-010000 10-100011 10-010011
EXPECT_STREQ
(
"
\xF0\x90\xA3\x93
"
,
WideStringToUtf8
(
L"
\x108D
3"
,
1
).
c_str
());
EXPECT_STREQ
(
"
\xF0\x90\xA3\x93
"
,
WideStringToUtf8
(
L"
\x108D
3"
,
-
1
).
c_str
());
// 1 0000 1000 0110 0011 0100 => 11110-100 10-001000 10-011000 10-110100
EXPECT_STREQ
(
"
\xF4\x88\x98\xB4
"
,
WideStringToUtf8
(
L"
\x1086
34"
,
1
).
c_str
());
EXPECT_STREQ
(
"
\xF4\x88\x98\xB4
"
,
WideStringToUtf8
(
L"
\x1086
34"
,
-
1
).
c_str
());
}
// Tests that encoding an invalid code-point generates the expected result.
TEST
(
WideStringToUtf8Test
,
CanEncodeInvalidCodePoint
)
{
EXPECT_STREQ
(
"(Invalid Unicode 0xABCDFF)"
,
WideStringToUtf8
(
L"
\xABCD
FF"
,
-
1
).
c_str
());
}
#else
// Tests that surrogate pairs are encoded correctly on the systems using
// UTF-16 encoding in the wide strings.
TEST
(
WideStringToUtf8Test
,
CanEncodeValidUtf16SUrrogatePairs
)
{
EXPECT_STREQ
(
"
\xF0\x90\x90\x80
"
,
WideStringToUtf8
(
L"
\xD801\xDC00
"
,
-
1
).
c_str
());
}
// Tests that encoding an invalid UTF-16 surrogate pair
// generates the expected result.
TEST
(
WideStringToUtf8Test
,
CanEncodeInvalidUtf16SurrogatePair
)
{
// Leading surrogate is at the end of the string.
EXPECT_STREQ
(
"
\xED\xA0\x80
"
,
WideStringToUtf8
(
L"
\xD800
"
,
-
1
).
c_str
());
// Leading surrogate is not followed by the trailing surrogate.
EXPECT_STREQ
(
"
\xED\xA0\x80
$"
,
WideStringToUtf8
(
L"
\xD800
$"
,
-
1
).
c_str
());
// Trailing surrogate appearas without a leading surrogate.
EXPECT_STREQ
(
"
\xED\xB0\x80
PQR"
,
WideStringToUtf8
(
L"
\xDC00
PQR"
,
-
1
).
c_str
());
}
#endif // GTEST_WIDE_STRING_USES_UTF16_
// Tests that codepoint concatenation works correctly.
#ifndef GTEST_WIDE_STRING_USES_UTF16_
TEST
(
WideStringToUtf8Test
,
ConcatenatesCodepointsCorrectly
)
{
EXPECT_STREQ
(
"
\xF4\x88\x98\xB4
"
"
\xEC\x9D\x8D
"
"
\n
"
"
\xD5\xB6
"
"
\xE0\xA3\x93
"
"
\xF4\x88\x98\xB4
"
,
WideStringToUtf8
(
L"
\x1086
34
\xC74D\n\x576\x8D3\x1086
34"
,
-
1
).
c_str
());
}
#else
TEST
(
WideStringToUtf8Test
,
ConcatenatesCodepointsCorrectly
)
{
EXPECT_STREQ
(
"
\xEC\x9D\x8D
"
"
\n
"
"
\xD5\xB6
"
"
\xE0\xA3\x93
"
,
WideStringToUtf8
(
L"
\xC74D\n\x576\x8D3
"
,
-
1
).
c_str
());
}
#endif // GTEST_WIDE_STRING_USES_UTF16_
// Tests the List template class.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment