mirror of
https://github.com/jbeder/yaml-cpp.git
synced 2025-09-08 12:21:17 +00:00
Add support for JSON-compatible string escapes (#485)
For completeness I've implemented escaping for characters outside the basic multilingual plane, but it doesn't get used (as there's no EscapeAsAsciiJson emitter option implemented).
This commit is contained in:
@@ -19,6 +19,7 @@ enum EMITTER_MANIP {
|
||||
// output character set
|
||||
EmitNonAscii,
|
||||
EscapeNonAscii,
|
||||
EscapeAsJson,
|
||||
|
||||
// string manipulators
|
||||
// Auto, // duplicate
|
||||
|
@@ -686,14 +686,27 @@ void Emitter::StartedScalar() { m_pState->StartedScalar(); }
|
||||
// *******************************************************************************************
|
||||
// overloads of Write
|
||||
|
||||
StringEscaping::value GetStringEscapingStyle(const EMITTER_MANIP emitterManip) {
|
||||
switch (emitterManip) {
|
||||
case EscapeNonAscii:
|
||||
return StringEscaping::NonAscii;
|
||||
case EscapeAsJson:
|
||||
return StringEscaping::JSON;
|
||||
default:
|
||||
return StringEscaping::None;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Emitter& Emitter::Write(const std::string& str) {
|
||||
if (!good())
|
||||
return *this;
|
||||
|
||||
const bool escapeNonAscii = m_pState->GetOutputCharset() == EscapeNonAscii;
|
||||
StringEscaping::value stringEscaping = GetStringEscapingStyle(m_pState->GetOutputCharset());
|
||||
|
||||
const StringFormat::value strFormat =
|
||||
Utils::ComputeStringFormat(str, m_pState->GetStringFormat(),
|
||||
m_pState->CurGroupFlowType(), escapeNonAscii);
|
||||
m_pState->CurGroupFlowType(), stringEscaping == StringEscaping::NonAscii);
|
||||
|
||||
if (strFormat == StringFormat::Literal)
|
||||
m_pState->SetMapKeyFormat(YAML::LongKey, FmtScope::Local);
|
||||
@@ -708,7 +721,7 @@ Emitter& Emitter::Write(const std::string& str) {
|
||||
Utils::WriteSingleQuotedString(m_stream, str);
|
||||
break;
|
||||
case StringFormat::DoubleQuoted:
|
||||
Utils::WriteDoubleQuotedString(m_stream, str, escapeNonAscii);
|
||||
Utils::WriteDoubleQuotedString(m_stream, str, stringEscaping);
|
||||
break;
|
||||
case StringFormat::Literal:
|
||||
Utils::WriteLiteralString(m_stream, str,
|
||||
@@ -814,8 +827,10 @@ Emitter& Emitter::Write(char ch) {
|
||||
if (!good())
|
||||
return *this;
|
||||
|
||||
|
||||
|
||||
PrepareNode(EmitterNodeType::Scalar);
|
||||
Utils::WriteChar(m_stream, ch);
|
||||
Utils::WriteChar(m_stream, ch, GetStringEscapingStyle(m_pState->GetOutputCharset()));
|
||||
StartedScalar();
|
||||
|
||||
return *this;
|
||||
|
@@ -231,6 +231,7 @@ bool EmitterState::SetOutputCharset(EMITTER_MANIP value,
|
||||
switch (value) {
|
||||
case EmitNonAscii:
|
||||
case EscapeNonAscii:
|
||||
case EscapeAsJson:
|
||||
_Set(m_charset, value, scope);
|
||||
return true;
|
||||
default:
|
||||
|
@@ -218,20 +218,34 @@ bool IsValidLiteralScalar(const std::string& str, FlowType::value flowType,
|
||||
});
|
||||
}
|
||||
|
||||
void WriteDoubleQuoteEscapeSequence(ostream_wrapper& out, int codePoint) {
|
||||
std::pair<uint16_t, uint16_t> EncodeUTF16SurrogatePair(int codePoint) {
|
||||
const uint32_t leadOffset = 0xD800 - (0x10000 >> 10);
|
||||
|
||||
return {
|
||||
leadOffset | (codePoint >> 10),
|
||||
0xDC00 | (codePoint & 0x3FF),
|
||||
};
|
||||
}
|
||||
|
||||
void WriteDoubleQuoteEscapeSequence(ostream_wrapper& out, int codePoint, StringEscaping::value stringEscapingStyle) {
|
||||
static const char hexDigits[] = "0123456789abcdef";
|
||||
|
||||
out << "\\";
|
||||
int digits = 8;
|
||||
if (codePoint < 0xFF) {
|
||||
if (codePoint < 0xFF && stringEscapingStyle != StringEscaping::JSON) {
|
||||
out << "x";
|
||||
digits = 2;
|
||||
} else if (codePoint < 0xFFFF) {
|
||||
out << "u";
|
||||
digits = 4;
|
||||
} else {
|
||||
} else if (stringEscapingStyle != StringEscaping::JSON) {
|
||||
out << "U";
|
||||
digits = 8;
|
||||
} else {
|
||||
auto surrogatePair = EncodeUTF16SurrogatePair(codePoint);
|
||||
WriteDoubleQuoteEscapeSequence(out, surrogatePair.first, stringEscapingStyle);
|
||||
WriteDoubleQuoteEscapeSequence(out, surrogatePair.second, stringEscapingStyle);
|
||||
return;
|
||||
}
|
||||
|
||||
// Write digits into the escape sequence
|
||||
@@ -303,7 +317,7 @@ bool WriteSingleQuotedString(ostream_wrapper& out, const std::string& str) {
|
||||
}
|
||||
|
||||
bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
|
||||
bool escapeNonAscii) {
|
||||
StringEscaping::value stringEscaping) {
|
||||
out << "\"";
|
||||
int codePoint;
|
||||
for (std::string::const_iterator i = str.begin();
|
||||
@@ -327,16 +341,19 @@ bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
|
||||
case '\b':
|
||||
out << "\\b";
|
||||
break;
|
||||
case '\f':
|
||||
out << "\\f";
|
||||
break;
|
||||
default:
|
||||
if (codePoint < 0x20 ||
|
||||
(codePoint >= 0x80 &&
|
||||
codePoint <= 0xA0)) { // Control characters and non-breaking space
|
||||
WriteDoubleQuoteEscapeSequence(out, codePoint);
|
||||
WriteDoubleQuoteEscapeSequence(out, codePoint, stringEscaping);
|
||||
} else if (codePoint == 0xFEFF) { // Byte order marks (ZWNS) should be
|
||||
// escaped (YAML 1.2, sec. 5.2)
|
||||
WriteDoubleQuoteEscapeSequence(out, codePoint);
|
||||
} else if (escapeNonAscii && codePoint > 0x7E) {
|
||||
WriteDoubleQuoteEscapeSequence(out, codePoint);
|
||||
WriteDoubleQuoteEscapeSequence(out, codePoint, stringEscaping);
|
||||
} else if (stringEscaping == StringEscaping::NonAscii && codePoint > 0x7E) {
|
||||
WriteDoubleQuoteEscapeSequence(out, codePoint, stringEscaping);
|
||||
} else {
|
||||
WriteCodePoint(out, codePoint);
|
||||
}
|
||||
@@ -362,7 +379,7 @@ bool WriteLiteralString(ostream_wrapper& out, const std::string& str,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool WriteChar(ostream_wrapper& out, char ch) {
|
||||
bool WriteChar(ostream_wrapper& out, char ch, StringEscaping::value stringEscapingStyle) {
|
||||
if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) {
|
||||
out << ch;
|
||||
} else if (ch == '\"') {
|
||||
@@ -373,13 +390,17 @@ bool WriteChar(ostream_wrapper& out, char ch) {
|
||||
out << R"("\n")";
|
||||
} else if (ch == '\b') {
|
||||
out << R"("\b")";
|
||||
} else if (ch == '\r') {
|
||||
out << R"("\r")";
|
||||
} else if (ch == '\f') {
|
||||
out << R"("\f")";
|
||||
} else if (ch == '\\') {
|
||||
out << R"("\\")";
|
||||
} else if (0x20 <= ch && ch <= 0x7e) {
|
||||
out << "\"" << ch << "\"";
|
||||
} else {
|
||||
out << "\"";
|
||||
WriteDoubleQuoteEscapeSequence(out, ch);
|
||||
WriteDoubleQuoteEscapeSequence(out, ch, stringEscapingStyle);
|
||||
out << "\"";
|
||||
}
|
||||
return true;
|
||||
@@ -469,7 +490,7 @@ bool WriteTagWithPrefix(ostream_wrapper& out, const std::string& prefix,
|
||||
|
||||
bool WriteBinary(ostream_wrapper& out, const Binary& binary) {
|
||||
WriteDoubleQuotedString(out, EncodeBase64(binary.data(), binary.size()),
|
||||
false);
|
||||
StringEscaping::None);
|
||||
return true;
|
||||
}
|
||||
} // namespace Utils
|
||||
|
@@ -24,6 +24,10 @@ struct StringFormat {
|
||||
enum value { Plain, SingleQuoted, DoubleQuoted, Literal };
|
||||
};
|
||||
|
||||
struct StringEscaping {
|
||||
enum value { None, NonAscii, JSON };
|
||||
};
|
||||
|
||||
namespace Utils {
|
||||
StringFormat::value ComputeStringFormat(const std::string& str,
|
||||
EMITTER_MANIP strFormat,
|
||||
@@ -32,10 +36,11 @@ StringFormat::value ComputeStringFormat(const std::string& str,
|
||||
|
||||
bool WriteSingleQuotedString(ostream_wrapper& out, const std::string& str);
|
||||
bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
|
||||
bool escapeNonAscii);
|
||||
StringEscaping::value stringEscaping);
|
||||
bool WriteLiteralString(ostream_wrapper& out, const std::string& str,
|
||||
std::size_t indent);
|
||||
bool WriteChar(ostream_wrapper& out, char ch);
|
||||
bool WriteChar(ostream_wrapper& out, char ch,
|
||||
StringEscaping::value stringEscapingStyle);
|
||||
bool WriteComment(ostream_wrapper& out, const std::string& str,
|
||||
std::size_t postCommentIndent);
|
||||
bool WriteAlias(ostream_wrapper& out, const std::string& str);
|
||||
|
@@ -813,7 +813,43 @@ TEST_F(EmitterTest, Unicode) {
|
||||
|
||||
TEST_F(EmitterTest, DoubleQuotedUnicode) {
|
||||
out << DoubleQuoted << "\x24 \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2";
|
||||
ExpectEmit("\"\x24 \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2\"");
|
||||
ExpectEmit("\"\x24 \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2\"");
|
||||
}
|
||||
|
||||
TEST_F(EmitterTest, EscapedJsonString) {
|
||||
out.SetStringFormat(DoubleQuoted);
|
||||
out.SetOutputCharset(EscapeAsJson);
|
||||
out << "\" \\ "
|
||||
"\x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0A \x0B \x0C \x0D \x0E \x0F "
|
||||
"\x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1A \x1B \x1C \x1D \x1E \x1F "
|
||||
"\x24 \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2";
|
||||
|
||||
ExpectEmit(R"("\" \\ \u0001 \u0002 \u0003 \u0004 \u0005 \u0006 \u0007 \b \t )"
|
||||
R"(\n \u000b \f \r \u000e \u000f \u0010 \u0011 \u0012 \u0013 )"
|
||||
R"(\u0014 \u0015 \u0016 \u0017 \u0018 \u0019 \u001a \u001b )"
|
||||
R"(\u001c \u001d \u001e \u001f )"
|
||||
"$ \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2\"");
|
||||
}
|
||||
|
||||
TEST_F(EmitterTest, EscapedCharacters) {
|
||||
out << BeginSeq
|
||||
<< '\x00'
|
||||
<< '\x0C'
|
||||
<< '\x0D'
|
||||
<< EndSeq;
|
||||
|
||||
ExpectEmit("- \"\\x00\"\n- \"\\f\"\n- \"\\r\"");
|
||||
}
|
||||
|
||||
TEST_F(EmitterTest, CharactersEscapedAsJson) {
|
||||
out.SetOutputCharset(EscapeAsJson);
|
||||
out << BeginSeq
|
||||
<< '\x00'
|
||||
<< '\x0C'
|
||||
<< '\x0D'
|
||||
<< EndSeq;
|
||||
|
||||
ExpectEmit("- \"\\u0000\"\n- \"\\f\"\n- \"\\r\"");
|
||||
}
|
||||
|
||||
TEST_F(EmitterTest, DoubleQuotedString) {
|
||||
|
Reference in New Issue
Block a user