Compare commits

...

9 Commits

Author SHA1 Message Date
Simon Gene Gottlieb
97d150e98b Feat/fix faulty string (#1406) 2026-02-18 14:45:23 -06:00
Simon Gene Gottlieb
44d54544b1 fix: handle escaped underscore \_ correctly 2026-02-17 16:19:43 -06:00
Simon Gene Gottlieb
3e53605a31 test: fixes test for NBSP (non-break space)
This fixes the example "Example 5.13 Escaped Characters" from the YAML
specification.

The example demands that `\_` is being translated to unicode `\u00a0`.
The unicode codepoint `\u00A0` encoded for utf-8 in hex is `\xC2\xA0`.

Fixing this test case will cause the unit test to not pass, since the
codepoint `\u00A0` is not handled correctly.
(Failing unittest is expected).
2026-02-17 16:19:43 -06:00
Simon Gene Gottlieb
f25f110e33 fix: change abort when no parsing progress is made
Previously, no parsing progress (detecting infinite loops) was detected by an empty Node.
This falsely triggers an abort, if an empty YAML document is being parsed.

Instead, we detect if progress in the parsing stream is made, by
comparing token positions. As long as new tokens are being
parsed (detected by position change), we assume we are not in an
infinite loop.
2026-02-17 08:58:37 -06:00
Simon Gene Gottlieb
f3f123cea0 test: add case demonstrating bug 2026-02-17 08:58:37 -06:00
Alexey Sokolov
495cde635d Support newer CMake in test too 2026-02-11 12:22:05 -06:00
Simon Gene Gottlieb
ebb85dd462 fix: floating point conversion on x86 (32bit) 2026-02-11 11:14:57 -06:00
Jeremy Nimmer
80ea0028c7 Fix DecodeBase64 to reject truncated input
If the input doesn't have the proper number of encoding characters
(a multiple of 4), return an empty result.

Co-Authored-By: Sean Curtis <sean.curtis@tri.global>
2026-02-10 13:49:45 -06:00
Kamil
2e6383d272 Update README.md 2026-02-07 21:23:53 -06:00
13 changed files with 68 additions and 15 deletions

View File

@@ -68,7 +68,7 @@ target_link_libraries(YOUR_LIBRARY PUBLIC yaml-cpp::yaml-cpp) # The library or e
## Recent Releases ## Recent Releases
[yaml-cpp 0.8.0](https://github.com/jbeder/yaml-cpp/releases/tag/0.8.0) released! [yaml-cpp 0.9.0](https://github.com/jbeder/yaml-cpp/releases/tag/yaml-cpp-0.9.0) released!
[yaml-cpp 0.3.0](https://github.com/jbeder/yaml-cpp/releases/tag/release-0.3.0) is still available if you want the old API. [yaml-cpp 0.3.0](https://github.com/jbeder/yaml-cpp/releases/tag/release-0.3.0) is still available if you want the old API.

View File

@@ -89,6 +89,7 @@ const char* const INVALID_ANCHOR = "invalid anchor";
const char* const INVALID_ALIAS = "invalid alias"; const char* const INVALID_ALIAS = "invalid alias";
const char* const INVALID_TAG = "invalid tag"; const char* const INVALID_TAG = "invalid tag";
const char* const BAD_FILE = "bad file"; const char* const BAD_FILE = "bad file";
const char* const UNEXPECTED_TOKEN_AFTER_DOC = "unexpected token after end of document";
template <typename T> template <typename T>
inline const std::string KEY_NOT_FOUND_WITH_KEY( inline const std::string KEY_NOT_FOUND_WITH_KEY(

View File

@@ -74,7 +74,8 @@ std::vector<unsigned char> DecodeBase64(const std::string &input) {
unsigned char *out = &ret[0]; unsigned char *out = &ret[0];
unsigned value = 0; unsigned value = 0;
for (std::size_t i = 0, cnt = 0; i < input.size(); i++) { std::size_t cnt = 0;
for (std::size_t i = 0; i < input.size(); i++) {
if (std::isspace(static_cast<unsigned char>(input[i]))) { if (std::isspace(static_cast<unsigned char>(input[i]))) {
// skip newlines // skip newlines
continue; continue;
@@ -84,14 +85,20 @@ std::vector<unsigned char> DecodeBase64(const std::string &input) {
return ret_type(); return ret_type();
value = (value << 6) | d; value = (value << 6) | d;
if (cnt % 4 == 3) { if (cnt == 3) {
*out++ = value >> 16; *out++ = value >> 16;
if (i > 0 && input[i - 1] != '=') if (i > 0 && input[i - 1] != '=')
*out++ = value >> 8; *out++ = value >> 8;
if (input[i] != '=') if (input[i] != '=')
*out++ = value; *out++ = value;
cnt = 0;
} else {
++cnt;
} }
++cnt; }
if (cnt != 0) {
// An invalid number of characters were encountered.
return ret_type();
} }
ret.resize(out - &ret[0]); ret.resize(out - &ret[0]);

View File

@@ -117,11 +117,11 @@ std::string Escape(Stream& in) {
case 'N': case 'N':
return "\x85"; return "\x85";
case '_': case '_':
return "\xA0"; return "\xC2\xA0"; // NBSP (U+00A0)
case 'L': case 'L':
return "\xE2\x80\xA8"; // LS (#x2028) return "\xE2\x80\xA8"; // LS (U+2028)
case 'P': case 'P':
return "\xE2\x80\xA9"; // PS (#x2029) return "\xE2\x80\xA9"; // PS (U+2029)
case 'x': case 'x':
return Escape(in, 2); return Escape(in, 2);
case 'u': case 'u':

View File

@@ -28,7 +28,7 @@ namespace fp_formatting {
* assert(buffer[1] == '2'); * assert(buffer[1] == '2');
* assert(buffer[2] == '3'); * assert(buffer[2] == '3');
*/ */
int ConvertToChars(char* begin, char* end, size_t value, int width=1) { int ConvertToChars(char* begin, char* end, uint64_t value, int width=1) {
// precondition of this function (will trigger in debug build) // precondition of this function (will trigger in debug build)
assert(width >= 1); assert(width >= 1);
assert(end >= begin); // end must be after begin assert(end >= begin); // end must be after begin

View File

@@ -53,7 +53,7 @@ std::vector<Node> LoadAll(std::istream& input) {
Parser parser(input); Parser parser(input);
while (true) { while (true) {
NodeBuilder builder; NodeBuilder builder;
if (!parser.HandleNextDocument(builder) || builder.Root().IsNull()) { if (!parser.HandleNextDocument(builder)) {
break; break;
} }
docs.push_back(builder.Root()); docs.push_back(builder.Root());

View File

@@ -33,9 +33,24 @@ bool Parser::HandleNextDocument(EventHandler& eventHandler) {
return false; return false;
} }
auto oldPos = m_pScanner->peek().mark.pos;
SingleDocParser sdp(*m_pScanner, *m_pDirectives); SingleDocParser sdp(*m_pScanner, *m_pDirectives);
sdp.HandleDocument(eventHandler); sdp.HandleDocument(eventHandler);
return true;
// checks if progress was made
// 1. if scanner has no more tokens, progress was made
if (m_pScanner->empty()) {
return true;
}
// 2. if token position has changed, progress was made
auto newPos = m_pScanner->peek().mark.pos;
if (newPos != oldPos) {
return true;
}
// No progress was made, no further processing
return false;
} }
void Parser::ParseDirectives() { void Parser::ParseDirectives() {

View File

@@ -41,8 +41,13 @@ void SingleDocParser::HandleDocument(EventHandler& eventHandler) {
eventHandler.OnDocumentEnd(); eventHandler.OnDocumentEnd();
// check if any tokens left after the text
if (!m_scanner.empty() && m_scanner.peek().type != Token::DOC_END
&& m_scanner.peek().type != Token::DOC_START)
throw ParserException(m_scanner.mark(), ErrorMsg::UNEXPECTED_TOKEN_AFTER_DOC);
// and finally eat any doc ends we see // and finally eat any doc ends we see
while (!m_scanner.empty() && m_scanner.peek().type == Token::DOC_END) if (!m_scanner.empty() && m_scanner.peek().type == Token::DOC_END)
m_scanner.pop(); m_scanner.pop();
} }

View File

@@ -12,3 +12,9 @@ TEST(BinaryTest, DecodingNoCrashOnNegative) {
const std::vector<unsigned char> &result = YAML::DecodeBase64(input); const std::vector<unsigned char> &result = YAML::DecodeBase64(input);
EXPECT_TRUE(result.empty()); EXPECT_TRUE(result.empty());
} }
TEST(BinaryTest, DecodingTooShort) {
std::string input{90, 71, 86, 104, 90, 71, 74, 108, 90, 87, 89};
const std::vector<unsigned char> &result = YAML::DecodeBase64(input);
EXPECT_TRUE(result.empty());
}

View File

@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.5) cmake_minimum_required(VERSION 3.5...3.30)
project(yaml-cpp-consumer LANGUAGES CXX) project(yaml-cpp-consumer LANGUAGES CXX)
find_package(yaml-cpp CONFIG REQUIRED) find_package(yaml-cpp CONFIG REQUIRED)

View File

@@ -751,7 +751,7 @@ TEST_F(HandlerSpecTest, Ex5_13_EscapedCharacters) {
OnScalar(_, "!", 0, OnScalar(_, "!", 0,
"Fun with \x5C \x22 \x07 \x08 \x1B \x0C \x0A \x0D \x09 \x0B " + "Fun with \x5C \x22 \x07 \x08 \x1B \x0C \x0A \x0D \x09 \x0B " +
std::string("\x00", 1) + std::string("\x00", 1) +
" \x20 \xA0 \x85 \xe2\x80\xa8 \xe2\x80\xa9 A A A")); " \x20 \xC2\xA0 \x85 \xe2\x80\xa8 \xe2\x80\xa9 A A A"));
EXPECT_CALL(handler, OnDocumentEnd()); EXPECT_CALL(handler, OnDocumentEnd());
Parse(ex5_13); Parse(ex5_13);
} }

View File

@@ -368,13 +368,28 @@ TEST(NodeTest, LoadCommaSeparatedStrings) {
EXPECT_THROW(Load(R"(,foo)"), ParserException); EXPECT_THROW(Load(R"(,foo)"), ParserException);
} }
TEST(NodeSpecTest, InfiniteLoopNodes) { TEST(NodeTest, InfiniteLoopNodes) {
// Until yaml-cpp <= 0.8.0 this caused an infinite loop; // Until yaml-cpp <= 0.8.0 this caused an infinite loop;
// After, it triggers an exception (but LoadAll is smart enough to avoid // After, it triggers an exception (but LoadAll is smart enough to avoid
// the infinite loop in any case). // the infinite loop in any case).
EXPECT_THROW(LoadAll(R"(,)"), ParserException); EXPECT_THROW(LoadAll(R"(,)"), ParserException);
} }
TEST(NodeTest, MultipleDocumentsBeginning) {
std::vector<Node> docs = LoadAll("\n---\n---\nA\n");
EXPECT_EQ(docs.size(), 2);
}
TEST(NodeTest, MultipleDocumentsEnds) {
std::vector<Node> docs = LoadAll("\n...\nA\n...\n");
EXPECT_EQ(docs.size(), 2);
}
TEST(NodeTest, MultipleDocumentsEndsWithEmptyDocs) {
std::vector<Node> docs = LoadAll("\n...\nA\n...\n...\nB\n...");
EXPECT_EQ(docs.size(), 4);
}
struct NewLineStringsTestCase { struct NewLineStringsTestCase {
std::string input; std::string input;
std::string expected_content; std::string expected_content;
@@ -446,6 +461,10 @@ TEST(LoadNodeTest, BlockCREncoded) {
EXPECT_EQ(1, node["followup"].as<int>()); EXPECT_EQ(1, node["followup"].as<int>());
} }
TEST(LoadNodeTest, IncorrectSeqEnd) {
EXPECT_THROW(Load("[foo]_bar"), ParserException);
}
} // namespace } // namespace
} // namespace YAML } // namespace YAML

View File

@@ -467,7 +467,7 @@ TEST(NodeSpecTest, Ex5_13_EscapedCharacters) {
EXPECT_TRUE(doc.as<std::string>() == EXPECT_TRUE(doc.as<std::string>() ==
"Fun with \x5C \x22 \x07 \x08 \x1B \x0C \x0A \x0D \x09 \x0B " + "Fun with \x5C \x22 \x07 \x08 \x1B \x0C \x0A \x0D \x09 \x0B " +
std::string("\x00", 1) + std::string("\x00", 1) +
" \x20 \xA0 \x85 \xe2\x80\xa8 \xe2\x80\xa9 A A A"); " \x20 \xC2\xA0 \x85 \xe2\x80\xa8 \xe2\x80\xa9 A A A");
} }
TEST(NodeSpecTest, Ex5_14_InvalidEscapedCharacters) { TEST(NodeSpecTest, Ex5_14_InvalidEscapedCharacters) {