Fix UTF16 parsing for multi-byte characters

This commit is contained in:
Jesse Beder
2014-03-23 20:24:36 -05:00
parent 06bf012d81
commit 114db22335
2 changed files with 7 additions and 6 deletions

View File

@@ -365,13 +365,13 @@ void Stream::StreamInUtf16() const {
} }
unsigned long chLow = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) | unsigned long chLow = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) |
static_cast<unsigned long>(bytes[1 ^ nBigEnd]); static_cast<unsigned long>(bytes[1 ^ nBigEnd]);
if (chLow < 0xDC00 || ch >= 0xE000) { if (chLow < 0xDC00 || chLow >= 0xE000) {
// Trouble...not a low surrogate. Dump a REPLACEMENT CHARACTER into the // Trouble...not a low surrogate. Dump a REPLACEMENT CHARACTER into the
// stream. // stream.
QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER); QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
// Deal with the next UTF-16 unit // Deal with the next UTF-16 unit
if (chLow < 0xD800 || ch >= 0xE000) { if (chLow < 0xD800 || chLow >= 0xE000) {
// Easiest case: queue the codepoint and return // Easiest case: queue the codepoint and return
QueueUnicodeCodepoint(m_readahead, ch); QueueUnicodeCodepoint(m_readahead, ch);
return; return;
@@ -391,6 +391,7 @@ void Stream::StreamInUtf16() const {
// Add the surrogacy offset // Add the surrogacy offset
ch += 0x10000; ch += 0x10000;
break;
} }
} }

View File

@@ -139,22 +139,22 @@ TEST_F(EncodingTest, UTF8_BOM) {
Run(); Run();
} }
TEST_F(EncodingTest, DISABLED_UTF16LE_noBOM) { TEST_F(EncodingTest, UTF16LE_noBOM) {
SetUpEncoding(&EncodeToUtf16LE, false); SetUpEncoding(&EncodeToUtf16LE, false);
Run(); Run();
} }
TEST_F(EncodingTest, DISABLED_UTF16LE_BOM) { TEST_F(EncodingTest, UTF16LE_BOM) {
SetUpEncoding(&EncodeToUtf16LE, true); SetUpEncoding(&EncodeToUtf16LE, true);
Run(); Run();
} }
TEST_F(EncodingTest, DISABLED_UTF16BE_noBOM) { TEST_F(EncodingTest, UTF16BE_noBOM) {
SetUpEncoding(&EncodeToUtf16BE, false); SetUpEncoding(&EncodeToUtf16BE, false);
Run(); Run();
} }
TEST_F(EncodingTest, DISABLED_UTF16BE_BOM) { TEST_F(EncodingTest, UTF16BE_BOM) {
SetUpEncoding(&EncodeToUtf16BE, true); SetUpEncoding(&EncodeToUtf16BE, true);
Run(); Run();
} }