Commit 114db223 authored by Jesse Beder's avatar Jesse Beder
Browse files

Fix UTF16 parsing for multi-byte characters

parent 06bf012d
...@@ -365,13 +365,13 @@ void Stream::StreamInUtf16() const { ...@@ -365,13 +365,13 @@ void Stream::StreamInUtf16() const {
} }
unsigned long chLow = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) | unsigned long chLow = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) |
static_cast<unsigned long>(bytes[1 ^ nBigEnd]); static_cast<unsigned long>(bytes[1 ^ nBigEnd]);
if (chLow < 0xDC00 || ch >= 0xE000) { if (chLow < 0xDC00 || chLow >= 0xE000) {
// Trouble...not a low surrogate. Dump a REPLACEMENT CHARACTER into the // Trouble...not a low surrogate. Dump a REPLACEMENT CHARACTER into the
// stream. // stream.
QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER); QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
// Deal with the next UTF-16 unit // Deal with the next UTF-16 unit
if (chLow < 0xD800 || ch >= 0xE000) { if (chLow < 0xD800 || chLow >= 0xE000) {
// Easiest case: queue the codepoint and return // Easiest case: queue the codepoint and return
QueueUnicodeCodepoint(m_readahead, ch); QueueUnicodeCodepoint(m_readahead, ch);
return; return;
...@@ -391,6 +391,7 @@ void Stream::StreamInUtf16() const { ...@@ -391,6 +391,7 @@ void Stream::StreamInUtf16() const {
// Add the surrogacy offset // Add the surrogacy offset
ch += 0x10000; ch += 0x10000;
break;
} }
} }
......
...@@ -139,22 +139,22 @@ TEST_F(EncodingTest, UTF8_BOM) { ...@@ -139,22 +139,22 @@ TEST_F(EncodingTest, UTF8_BOM) {
Run(); Run();
} }
TEST_F(EncodingTest, DISABLED_UTF16LE_noBOM) { TEST_F(EncodingTest, UTF16LE_noBOM) {
SetUpEncoding(&EncodeToUtf16LE, false); SetUpEncoding(&EncodeToUtf16LE, false);
Run(); Run();
} }
TEST_F(EncodingTest, DISABLED_UTF16LE_BOM) { TEST_F(EncodingTest, UTF16LE_BOM) {
SetUpEncoding(&EncodeToUtf16LE, true); SetUpEncoding(&EncodeToUtf16LE, true);
Run(); Run();
} }
TEST_F(EncodingTest, DISABLED_UTF16BE_noBOM) { TEST_F(EncodingTest, UTF16BE_noBOM) {
SetUpEncoding(&EncodeToUtf16BE, false); SetUpEncoding(&EncodeToUtf16BE, false);
Run(); Run();
} }
TEST_F(EncodingTest, DISABLED_UTF16BE_BOM) { TEST_F(EncodingTest, UTF16BE_BOM) {
SetUpEncoding(&EncodeToUtf16BE, true); SetUpEncoding(&EncodeToUtf16BE, true);
Run(); Run();
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment