Add support for JSON-compatible string escapes (#485)

For completeness I've implemented escaping for characters outside the
basic multilingual plane, but it doesn't get used (as there's no
EscapeAsAsciiJson emitter option implemented).
This commit is contained in:
Oliver Hamlet
2020-07-14 03:16:34 +01:00
committed by GitHub
parent 370aceeaf8
commit c82d3129dd
6 changed files with 97 additions and 18 deletions

View File

@@ -686,14 +686,27 @@ void Emitter::StartedScalar() { m_pState->StartedScalar(); }
// *******************************************************************************************
// overloads of Write
StringEscaping::value GetStringEscapingStyle(const EMITTER_MANIP emitterManip) {
switch (emitterManip) {
case EscapeNonAscii:
return StringEscaping::NonAscii;
case EscapeAsJson:
return StringEscaping::JSON;
default:
return StringEscaping::None;
break;
}
}
Emitter& Emitter::Write(const std::string& str) {
if (!good())
return *this;
const bool escapeNonAscii = m_pState->GetOutputCharset() == EscapeNonAscii;
StringEscaping::value stringEscaping = GetStringEscapingStyle(m_pState->GetOutputCharset());
const StringFormat::value strFormat =
Utils::ComputeStringFormat(str, m_pState->GetStringFormat(),
m_pState->CurGroupFlowType(), escapeNonAscii);
m_pState->CurGroupFlowType(), stringEscaping == StringEscaping::NonAscii);
if (strFormat == StringFormat::Literal)
m_pState->SetMapKeyFormat(YAML::LongKey, FmtScope::Local);
@@ -708,7 +721,7 @@ Emitter& Emitter::Write(const std::string& str) {
Utils::WriteSingleQuotedString(m_stream, str);
break;
case StringFormat::DoubleQuoted:
Utils::WriteDoubleQuotedString(m_stream, str, escapeNonAscii);
Utils::WriteDoubleQuotedString(m_stream, str, stringEscaping);
break;
case StringFormat::Literal:
Utils::WriteLiteralString(m_stream, str,
@@ -814,8 +827,10 @@ Emitter& Emitter::Write(char ch) {
if (!good())
return *this;
PrepareNode(EmitterNodeType::Scalar);
Utils::WriteChar(m_stream, ch);
Utils::WriteChar(m_stream, ch, GetStringEscapingStyle(m_pState->GetOutputCharset()));
StartedScalar();
return *this;

View File

@@ -231,6 +231,7 @@ bool EmitterState::SetOutputCharset(EMITTER_MANIP value,
switch (value) {
case EmitNonAscii:
case EscapeNonAscii:
case EscapeAsJson:
_Set(m_charset, value, scope);
return true;
default:

View File

@@ -218,20 +218,34 @@ bool IsValidLiteralScalar(const std::string& str, FlowType::value flowType,
});
}
void WriteDoubleQuoteEscapeSequence(ostream_wrapper& out, int codePoint) {
std::pair<uint16_t, uint16_t> EncodeUTF16SurrogatePair(int codePoint) {
const uint32_t leadOffset = 0xD800 - (0x10000 >> 10);
return {
leadOffset | (codePoint >> 10),
0xDC00 | (codePoint & 0x3FF),
};
}
void WriteDoubleQuoteEscapeSequence(ostream_wrapper& out, int codePoint, StringEscaping::value stringEscapingStyle) {
static const char hexDigits[] = "0123456789abcdef";
out << "\\";
int digits = 8;
if (codePoint < 0xFF) {
if (codePoint < 0xFF && stringEscapingStyle != StringEscaping::JSON) {
out << "x";
digits = 2;
} else if (codePoint < 0xFFFF) {
out << "u";
digits = 4;
} else {
} else if (stringEscapingStyle != StringEscaping::JSON) {
out << "U";
digits = 8;
} else {
auto surrogatePair = EncodeUTF16SurrogatePair(codePoint);
WriteDoubleQuoteEscapeSequence(out, surrogatePair.first, stringEscapingStyle);
WriteDoubleQuoteEscapeSequence(out, surrogatePair.second, stringEscapingStyle);
return;
}
// Write digits into the escape sequence
@@ -303,7 +317,7 @@ bool WriteSingleQuotedString(ostream_wrapper& out, const std::string& str) {
}
bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
bool escapeNonAscii) {
StringEscaping::value stringEscaping) {
out << "\"";
int codePoint;
for (std::string::const_iterator i = str.begin();
@@ -327,16 +341,19 @@ bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
case '\b':
out << "\\b";
break;
case '\f':
out << "\\f";
break;
default:
if (codePoint < 0x20 ||
(codePoint >= 0x80 &&
codePoint <= 0xA0)) { // Control characters and non-breaking space
WriteDoubleQuoteEscapeSequence(out, codePoint);
WriteDoubleQuoteEscapeSequence(out, codePoint, stringEscaping);
} else if (codePoint == 0xFEFF) { // Byte order marks (ZWNS) should be
// escaped (YAML 1.2, sec. 5.2)
WriteDoubleQuoteEscapeSequence(out, codePoint);
} else if (escapeNonAscii && codePoint > 0x7E) {
WriteDoubleQuoteEscapeSequence(out, codePoint);
WriteDoubleQuoteEscapeSequence(out, codePoint, stringEscaping);
} else if (stringEscaping == StringEscaping::NonAscii && codePoint > 0x7E) {
WriteDoubleQuoteEscapeSequence(out, codePoint, stringEscaping);
} else {
WriteCodePoint(out, codePoint);
}
@@ -362,7 +379,7 @@ bool WriteLiteralString(ostream_wrapper& out, const std::string& str,
return true;
}
bool WriteChar(ostream_wrapper& out, char ch) {
bool WriteChar(ostream_wrapper& out, char ch, StringEscaping::value stringEscapingStyle) {
if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) {
out << ch;
} else if (ch == '\"') {
@@ -373,13 +390,17 @@ bool WriteChar(ostream_wrapper& out, char ch) {
out << R"("\n")";
} else if (ch == '\b') {
out << R"("\b")";
} else if (ch == '\r') {
out << R"("\r")";
} else if (ch == '\f') {
out << R"("\f")";
} else if (ch == '\\') {
out << R"("\\")";
} else if (0x20 <= ch && ch <= 0x7e) {
out << "\"" << ch << "\"";
} else {
out << "\"";
WriteDoubleQuoteEscapeSequence(out, ch);
WriteDoubleQuoteEscapeSequence(out, ch, stringEscapingStyle);
out << "\"";
}
return true;
@@ -469,7 +490,7 @@ bool WriteTagWithPrefix(ostream_wrapper& out, const std::string& prefix,
bool WriteBinary(ostream_wrapper& out, const Binary& binary) {
WriteDoubleQuotedString(out, EncodeBase64(binary.data(), binary.size()),
false);
StringEscaping::None);
return true;
}
} // namespace Utils

View File

@@ -24,6 +24,10 @@ struct StringFormat {
enum value { Plain, SingleQuoted, DoubleQuoted, Literal };
};
struct StringEscaping {
enum value { None, NonAscii, JSON };
};
namespace Utils {
StringFormat::value ComputeStringFormat(const std::string& str,
EMITTER_MANIP strFormat,
@@ -32,10 +36,11 @@ StringFormat::value ComputeStringFormat(const std::string& str,
bool WriteSingleQuotedString(ostream_wrapper& out, const std::string& str);
bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
bool escapeNonAscii);
StringEscaping::value stringEscaping);
bool WriteLiteralString(ostream_wrapper& out, const std::string& str,
std::size_t indent);
bool WriteChar(ostream_wrapper& out, char ch);
bool WriteChar(ostream_wrapper& out, char ch,
StringEscaping::value stringEscapingStyle);
bool WriteComment(ostream_wrapper& out, const std::string& str,
std::size_t postCommentIndent);
bool WriteAlias(ostream_wrapper& out, const std::string& str);