mirror of
https://github.com/jbeder/yaml-cpp.git
synced 2025-09-09 20:51:16 +00:00
Add support for JSON-compatible string escapes (#485)
For completeness I've implemented escaping for characters outside the basic multilingual plane, but it doesn't get used (as there's no EscapeAsAsciiJson emitter option implemented).
This commit is contained in:
@@ -19,6 +19,7 @@ enum EMITTER_MANIP {
|
|||||||
// output character set
|
// output character set
|
||||||
EmitNonAscii,
|
EmitNonAscii,
|
||||||
EscapeNonAscii,
|
EscapeNonAscii,
|
||||||
|
EscapeAsJson,
|
||||||
|
|
||||||
// string manipulators
|
// string manipulators
|
||||||
// Auto, // duplicate
|
// Auto, // duplicate
|
||||||
|
@@ -686,14 +686,27 @@ void Emitter::StartedScalar() { m_pState->StartedScalar(); }
|
|||||||
// *******************************************************************************************
|
// *******************************************************************************************
|
||||||
// overloads of Write
|
// overloads of Write
|
||||||
|
|
||||||
|
StringEscaping::value GetStringEscapingStyle(const EMITTER_MANIP emitterManip) {
|
||||||
|
switch (emitterManip) {
|
||||||
|
case EscapeNonAscii:
|
||||||
|
return StringEscaping::NonAscii;
|
||||||
|
case EscapeAsJson:
|
||||||
|
return StringEscaping::JSON;
|
||||||
|
default:
|
||||||
|
return StringEscaping::None;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Emitter& Emitter::Write(const std::string& str) {
|
Emitter& Emitter::Write(const std::string& str) {
|
||||||
if (!good())
|
if (!good())
|
||||||
return *this;
|
return *this;
|
||||||
|
|
||||||
const bool escapeNonAscii = m_pState->GetOutputCharset() == EscapeNonAscii;
|
StringEscaping::value stringEscaping = GetStringEscapingStyle(m_pState->GetOutputCharset());
|
||||||
|
|
||||||
const StringFormat::value strFormat =
|
const StringFormat::value strFormat =
|
||||||
Utils::ComputeStringFormat(str, m_pState->GetStringFormat(),
|
Utils::ComputeStringFormat(str, m_pState->GetStringFormat(),
|
||||||
m_pState->CurGroupFlowType(), escapeNonAscii);
|
m_pState->CurGroupFlowType(), stringEscaping == StringEscaping::NonAscii);
|
||||||
|
|
||||||
if (strFormat == StringFormat::Literal)
|
if (strFormat == StringFormat::Literal)
|
||||||
m_pState->SetMapKeyFormat(YAML::LongKey, FmtScope::Local);
|
m_pState->SetMapKeyFormat(YAML::LongKey, FmtScope::Local);
|
||||||
@@ -708,7 +721,7 @@ Emitter& Emitter::Write(const std::string& str) {
|
|||||||
Utils::WriteSingleQuotedString(m_stream, str);
|
Utils::WriteSingleQuotedString(m_stream, str);
|
||||||
break;
|
break;
|
||||||
case StringFormat::DoubleQuoted:
|
case StringFormat::DoubleQuoted:
|
||||||
Utils::WriteDoubleQuotedString(m_stream, str, escapeNonAscii);
|
Utils::WriteDoubleQuotedString(m_stream, str, stringEscaping);
|
||||||
break;
|
break;
|
||||||
case StringFormat::Literal:
|
case StringFormat::Literal:
|
||||||
Utils::WriteLiteralString(m_stream, str,
|
Utils::WriteLiteralString(m_stream, str,
|
||||||
@@ -814,8 +827,10 @@ Emitter& Emitter::Write(char ch) {
|
|||||||
if (!good())
|
if (!good())
|
||||||
return *this;
|
return *this;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
PrepareNode(EmitterNodeType::Scalar);
|
PrepareNode(EmitterNodeType::Scalar);
|
||||||
Utils::WriteChar(m_stream, ch);
|
Utils::WriteChar(m_stream, ch, GetStringEscapingStyle(m_pState->GetOutputCharset()));
|
||||||
StartedScalar();
|
StartedScalar();
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
|
@@ -231,6 +231,7 @@ bool EmitterState::SetOutputCharset(EMITTER_MANIP value,
|
|||||||
switch (value) {
|
switch (value) {
|
||||||
case EmitNonAscii:
|
case EmitNonAscii:
|
||||||
case EscapeNonAscii:
|
case EscapeNonAscii:
|
||||||
|
case EscapeAsJson:
|
||||||
_Set(m_charset, value, scope);
|
_Set(m_charset, value, scope);
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
|
@@ -218,20 +218,34 @@ bool IsValidLiteralScalar(const std::string& str, FlowType::value flowType,
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void WriteDoubleQuoteEscapeSequence(ostream_wrapper& out, int codePoint) {
|
std::pair<uint16_t, uint16_t> EncodeUTF16SurrogatePair(int codePoint) {
|
||||||
|
const uint32_t leadOffset = 0xD800 - (0x10000 >> 10);
|
||||||
|
|
||||||
|
return {
|
||||||
|
leadOffset | (codePoint >> 10),
|
||||||
|
0xDC00 | (codePoint & 0x3FF),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
void WriteDoubleQuoteEscapeSequence(ostream_wrapper& out, int codePoint, StringEscaping::value stringEscapingStyle) {
|
||||||
static const char hexDigits[] = "0123456789abcdef";
|
static const char hexDigits[] = "0123456789abcdef";
|
||||||
|
|
||||||
out << "\\";
|
out << "\\";
|
||||||
int digits = 8;
|
int digits = 8;
|
||||||
if (codePoint < 0xFF) {
|
if (codePoint < 0xFF && stringEscapingStyle != StringEscaping::JSON) {
|
||||||
out << "x";
|
out << "x";
|
||||||
digits = 2;
|
digits = 2;
|
||||||
} else if (codePoint < 0xFFFF) {
|
} else if (codePoint < 0xFFFF) {
|
||||||
out << "u";
|
out << "u";
|
||||||
digits = 4;
|
digits = 4;
|
||||||
} else {
|
} else if (stringEscapingStyle != StringEscaping::JSON) {
|
||||||
out << "U";
|
out << "U";
|
||||||
digits = 8;
|
digits = 8;
|
||||||
|
} else {
|
||||||
|
auto surrogatePair = EncodeUTF16SurrogatePair(codePoint);
|
||||||
|
WriteDoubleQuoteEscapeSequence(out, surrogatePair.first, stringEscapingStyle);
|
||||||
|
WriteDoubleQuoteEscapeSequence(out, surrogatePair.second, stringEscapingStyle);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write digits into the escape sequence
|
// Write digits into the escape sequence
|
||||||
@@ -303,7 +317,7 @@ bool WriteSingleQuotedString(ostream_wrapper& out, const std::string& str) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
|
bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
|
||||||
bool escapeNonAscii) {
|
StringEscaping::value stringEscaping) {
|
||||||
out << "\"";
|
out << "\"";
|
||||||
int codePoint;
|
int codePoint;
|
||||||
for (std::string::const_iterator i = str.begin();
|
for (std::string::const_iterator i = str.begin();
|
||||||
@@ -327,16 +341,19 @@ bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
|
|||||||
case '\b':
|
case '\b':
|
||||||
out << "\\b";
|
out << "\\b";
|
||||||
break;
|
break;
|
||||||
|
case '\f':
|
||||||
|
out << "\\f";
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
if (codePoint < 0x20 ||
|
if (codePoint < 0x20 ||
|
||||||
(codePoint >= 0x80 &&
|
(codePoint >= 0x80 &&
|
||||||
codePoint <= 0xA0)) { // Control characters and non-breaking space
|
codePoint <= 0xA0)) { // Control characters and non-breaking space
|
||||||
WriteDoubleQuoteEscapeSequence(out, codePoint);
|
WriteDoubleQuoteEscapeSequence(out, codePoint, stringEscaping);
|
||||||
} else if (codePoint == 0xFEFF) { // Byte order marks (ZWNS) should be
|
} else if (codePoint == 0xFEFF) { // Byte order marks (ZWNS) should be
|
||||||
// escaped (YAML 1.2, sec. 5.2)
|
// escaped (YAML 1.2, sec. 5.2)
|
||||||
WriteDoubleQuoteEscapeSequence(out, codePoint);
|
WriteDoubleQuoteEscapeSequence(out, codePoint, stringEscaping);
|
||||||
} else if (escapeNonAscii && codePoint > 0x7E) {
|
} else if (stringEscaping == StringEscaping::NonAscii && codePoint > 0x7E) {
|
||||||
WriteDoubleQuoteEscapeSequence(out, codePoint);
|
WriteDoubleQuoteEscapeSequence(out, codePoint, stringEscaping);
|
||||||
} else {
|
} else {
|
||||||
WriteCodePoint(out, codePoint);
|
WriteCodePoint(out, codePoint);
|
||||||
}
|
}
|
||||||
@@ -362,7 +379,7 @@ bool WriteLiteralString(ostream_wrapper& out, const std::string& str,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool WriteChar(ostream_wrapper& out, char ch) {
|
bool WriteChar(ostream_wrapper& out, char ch, StringEscaping::value stringEscapingStyle) {
|
||||||
if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) {
|
if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) {
|
||||||
out << ch;
|
out << ch;
|
||||||
} else if (ch == '\"') {
|
} else if (ch == '\"') {
|
||||||
@@ -373,13 +390,17 @@ bool WriteChar(ostream_wrapper& out, char ch) {
|
|||||||
out << R"("\n")";
|
out << R"("\n")";
|
||||||
} else if (ch == '\b') {
|
} else if (ch == '\b') {
|
||||||
out << R"("\b")";
|
out << R"("\b")";
|
||||||
|
} else if (ch == '\r') {
|
||||||
|
out << R"("\r")";
|
||||||
|
} else if (ch == '\f') {
|
||||||
|
out << R"("\f")";
|
||||||
} else if (ch == '\\') {
|
} else if (ch == '\\') {
|
||||||
out << R"("\\")";
|
out << R"("\\")";
|
||||||
} else if (0x20 <= ch && ch <= 0x7e) {
|
} else if (0x20 <= ch && ch <= 0x7e) {
|
||||||
out << "\"" << ch << "\"";
|
out << "\"" << ch << "\"";
|
||||||
} else {
|
} else {
|
||||||
out << "\"";
|
out << "\"";
|
||||||
WriteDoubleQuoteEscapeSequence(out, ch);
|
WriteDoubleQuoteEscapeSequence(out, ch, stringEscapingStyle);
|
||||||
out << "\"";
|
out << "\"";
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@@ -469,7 +490,7 @@ bool WriteTagWithPrefix(ostream_wrapper& out, const std::string& prefix,
|
|||||||
|
|
||||||
bool WriteBinary(ostream_wrapper& out, const Binary& binary) {
|
bool WriteBinary(ostream_wrapper& out, const Binary& binary) {
|
||||||
WriteDoubleQuotedString(out, EncodeBase64(binary.data(), binary.size()),
|
WriteDoubleQuotedString(out, EncodeBase64(binary.data(), binary.size()),
|
||||||
false);
|
StringEscaping::None);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
} // namespace Utils
|
} // namespace Utils
|
||||||
|
@@ -24,6 +24,10 @@ struct StringFormat {
|
|||||||
enum value { Plain, SingleQuoted, DoubleQuoted, Literal };
|
enum value { Plain, SingleQuoted, DoubleQuoted, Literal };
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct StringEscaping {
|
||||||
|
enum value { None, NonAscii, JSON };
|
||||||
|
};
|
||||||
|
|
||||||
namespace Utils {
|
namespace Utils {
|
||||||
StringFormat::value ComputeStringFormat(const std::string& str,
|
StringFormat::value ComputeStringFormat(const std::string& str,
|
||||||
EMITTER_MANIP strFormat,
|
EMITTER_MANIP strFormat,
|
||||||
@@ -32,10 +36,11 @@ StringFormat::value ComputeStringFormat(const std::string& str,
|
|||||||
|
|
||||||
bool WriteSingleQuotedString(ostream_wrapper& out, const std::string& str);
|
bool WriteSingleQuotedString(ostream_wrapper& out, const std::string& str);
|
||||||
bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
|
bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
|
||||||
bool escapeNonAscii);
|
StringEscaping::value stringEscaping);
|
||||||
bool WriteLiteralString(ostream_wrapper& out, const std::string& str,
|
bool WriteLiteralString(ostream_wrapper& out, const std::string& str,
|
||||||
std::size_t indent);
|
std::size_t indent);
|
||||||
bool WriteChar(ostream_wrapper& out, char ch);
|
bool WriteChar(ostream_wrapper& out, char ch,
|
||||||
|
StringEscaping::value stringEscapingStyle);
|
||||||
bool WriteComment(ostream_wrapper& out, const std::string& str,
|
bool WriteComment(ostream_wrapper& out, const std::string& str,
|
||||||
std::size_t postCommentIndent);
|
std::size_t postCommentIndent);
|
||||||
bool WriteAlias(ostream_wrapper& out, const std::string& str);
|
bool WriteAlias(ostream_wrapper& out, const std::string& str);
|
||||||
|
@@ -816,6 +816,42 @@ TEST_F(EmitterTest, DoubleQuotedUnicode) {
|
|||||||
ExpectEmit("\"\x24 \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2\"");
|
ExpectEmit("\"\x24 \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2\"");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(EmitterTest, EscapedJsonString) {
|
||||||
|
out.SetStringFormat(DoubleQuoted);
|
||||||
|
out.SetOutputCharset(EscapeAsJson);
|
||||||
|
out << "\" \\ "
|
||||||
|
"\x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0A \x0B \x0C \x0D \x0E \x0F "
|
||||||
|
"\x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1A \x1B \x1C \x1D \x1E \x1F "
|
||||||
|
"\x24 \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2";
|
||||||
|
|
||||||
|
ExpectEmit(R"("\" \\ \u0001 \u0002 \u0003 \u0004 \u0005 \u0006 \u0007 \b \t )"
|
||||||
|
R"(\n \u000b \f \r \u000e \u000f \u0010 \u0011 \u0012 \u0013 )"
|
||||||
|
R"(\u0014 \u0015 \u0016 \u0017 \u0018 \u0019 \u001a \u001b )"
|
||||||
|
R"(\u001c \u001d \u001e \u001f )"
|
||||||
|
"$ \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2\"");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(EmitterTest, EscapedCharacters) {
|
||||||
|
out << BeginSeq
|
||||||
|
<< '\x00'
|
||||||
|
<< '\x0C'
|
||||||
|
<< '\x0D'
|
||||||
|
<< EndSeq;
|
||||||
|
|
||||||
|
ExpectEmit("- \"\\x00\"\n- \"\\f\"\n- \"\\r\"");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(EmitterTest, CharactersEscapedAsJson) {
|
||||||
|
out.SetOutputCharset(EscapeAsJson);
|
||||||
|
out << BeginSeq
|
||||||
|
<< '\x00'
|
||||||
|
<< '\x0C'
|
||||||
|
<< '\x0D'
|
||||||
|
<< EndSeq;
|
||||||
|
|
||||||
|
ExpectEmit("- \"\\u0000\"\n- \"\\f\"\n- \"\\r\"");
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(EmitterTest, DoubleQuotedString) {
|
TEST_F(EmitterTest, DoubleQuotedString) {
|
||||||
out << DoubleQuoted << "\" \\ \n \t \r \b \x15 \xEF\xBB\xBF \x24";
|
out << DoubleQuoted << "\" \\ \n \t \r \b \x15 \xEF\xBB\xBF \x24";
|
||||||
ExpectEmit("\"\\\" \\\\ \\n \\t \\r \\b \\x15 \\ufeff $\"");
|
ExpectEmit("\"\\\" \\\\ \\n \\t \\r \\b \\x15 \\ufeff $\"");
|
||||||
|
Reference in New Issue
Block a user