Commit 114db223 authored by Jesse Beder's avatar Jesse Beder
Browse files

Fix UTF16 parsing for multi-byte characters

parent 06bf012d
......@@ -365,13 +365,13 @@ void Stream::StreamInUtf16() const {
}
unsigned long chLow = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) |
static_cast<unsigned long>(bytes[1 ^ nBigEnd]);
if (chLow < 0xDC00 || ch >= 0xE000) {
if (chLow < 0xDC00 || chLow >= 0xE000) {
// Trouble...not a low surrogate. Dump a REPLACEMENT CHARACTER into the
// stream.
QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
// Deal with the next UTF-16 unit
if (chLow < 0xD800 || ch >= 0xE000) {
if (chLow < 0xD800 || chLow >= 0xE000) {
// Easiest case: queue the codepoint and return
QueueUnicodeCodepoint(m_readahead, ch);
return;
......@@ -391,6 +391,7 @@ void Stream::StreamInUtf16() const {
// Add the surrogacy offset
ch += 0x10000;
break;
}
}
......
......@@ -139,22 +139,22 @@ TEST_F(EncodingTest, UTF8_BOM) {
Run();
}
TEST_F(EncodingTest, DISABLED_UTF16LE_noBOM) {
TEST_F(EncodingTest, UTF16LE_noBOM) {
SetUpEncoding(&EncodeToUtf16LE, false);
Run();
}
TEST_F(EncodingTest, DISABLED_UTF16LE_BOM) {
TEST_F(EncodingTest, UTF16LE_BOM) {
SetUpEncoding(&EncodeToUtf16LE, true);
Run();
}
TEST_F(EncodingTest, DISABLED_UTF16BE_noBOM) {
TEST_F(EncodingTest, UTF16BE_noBOM) {
SetUpEncoding(&EncodeToUtf16BE, false);
Run();
}
TEST_F(EncodingTest, DISABLED_UTF16BE_BOM) {
TEST_F(EncodingTest, UTF16BE_BOM) {
SetUpEncoding(&EncodeToUtf16BE, true);
Run();
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment