fix: parse files with '\r' symbols as line ending correctly

This commit is contained in:
Simon Gene Gottlieb
2024-08-22 10:29:29 +02:00
committed by Jesse Beder
parent b38ac5b55f
commit ee9c4d19be
3 changed files with 35 additions and 1 deletions

View File

@@ -262,7 +262,24 @@ char Stream::get() {
AdvanceCurrent();
m_mark.column++;
if (ch == '\n') {
// if line ending symbol is unknown, set it to the first
// encountered line ending.
// if line ending '\r' set ending symbol to '\r'
// other wise set it to '\n'
if (!m_lineEndingSymbol) {
if (ch == '\n') { // line ending is '\n'
m_lineEndingSymbol = '\n';
} else if (ch == '\r') {
auto ch2 = peek();
if (ch2 == '\n') { // line ending is '\r\n'
m_lineEndingSymbol = '\n';
} else { // line ending is '\r'
m_lineEndingSymbol = '\r';
}
}
}
if (ch == m_lineEndingSymbol) {
m_mark.column = 0;
m_mark.line++;
}

View File

@@ -53,6 +53,7 @@ class Stream {
Mark m_mark;
CharacterSet m_charSet;
char m_lineEndingSymbol{}; // 0 means it is not determined yet, must be '\n' or '\r'
mutable std::deque<char> m_readahead;
unsigned char* const m_pPrefetched;
mutable size_t m_nPrefetchedAvailable;

View File

@@ -360,5 +360,21 @@ TEST(LoadNodeTest, BlockCRNLEncoded) {
EXPECT_EQ(1, node["followup"].as<int>());
}
TEST(LoadNodeTest, BlockCREncoded) {
Node node = Load(
"blockText: |\r"
" some arbitrary text \r"
" spanning some \r"
" lines, that are split \r"
" by CR and NL\r"
"followup: 1");
EXPECT_EQ(
"some arbitrary text \nspanning some \nlines, that are split \nby CR and "
"NL\n",
node["blockText"].as<std::string>());
EXPECT_EQ(1, node["followup"].as<int>());
}
} // namespace
} // namespace YAML