mirror of
https://github.com/jbeder/yaml-cpp.git
synced 2025-09-09 12:41:17 +00:00
Merged r270:HEAD of the emitting-unicode branch
This commit is contained in:
@@ -29,6 +29,7 @@ namespace YAML
|
|||||||
const std::string GetLastError() const;
|
const std::string GetLastError() const;
|
||||||
|
|
||||||
// global setters
|
// global setters
|
||||||
|
bool SetOutputCharset(EMITTER_MANIP value);
|
||||||
bool SetStringFormat(EMITTER_MANIP value);
|
bool SetStringFormat(EMITTER_MANIP value);
|
||||||
bool SetBoolFormat(EMITTER_MANIP value);
|
bool SetBoolFormat(EMITTER_MANIP value);
|
||||||
bool SetIntBase(EMITTER_MANIP value);
|
bool SetIntBase(EMITTER_MANIP value);
|
||||||
|
@@ -12,6 +12,10 @@ namespace YAML
|
|||||||
// general manipulators
|
// general manipulators
|
||||||
Auto,
|
Auto,
|
||||||
|
|
||||||
|
// output character set
|
||||||
|
EmitNonAscii,
|
||||||
|
EscapeNonAscii,
|
||||||
|
|
||||||
// string manipulators
|
// string manipulators
|
||||||
// Auto, // duplicate
|
// Auto, // duplicate
|
||||||
SingleQuoted,
|
SingleQuoted,
|
||||||
|
@@ -37,6 +37,11 @@ namespace YAML
|
|||||||
}
|
}
|
||||||
|
|
||||||
// global setters
|
// global setters
|
||||||
|
bool Emitter::SetOutputCharset(EMITTER_MANIP value)
|
||||||
|
{
|
||||||
|
return m_pState->SetOutputCharset(value, GLOBAL);
|
||||||
|
}
|
||||||
|
|
||||||
bool Emitter::SetStringFormat(EMITTER_MANIP value)
|
bool Emitter::SetStringFormat(EMITTER_MANIP value)
|
||||||
{
|
{
|
||||||
return m_pState->SetStringFormat(value, GLOBAL);
|
return m_pState->SetStringFormat(value, GLOBAL);
|
||||||
@@ -485,13 +490,14 @@ namespace YAML
|
|||||||
PreAtomicWrite();
|
PreAtomicWrite();
|
||||||
EmitSeparationIfNecessary();
|
EmitSeparationIfNecessary();
|
||||||
|
|
||||||
|
bool escapeNonAscii = m_pState->GetOutputCharset() == EscapeNonAscii;
|
||||||
EMITTER_MANIP strFmt = m_pState->GetStringFormat();
|
EMITTER_MANIP strFmt = m_pState->GetStringFormat();
|
||||||
FLOW_TYPE flowType = m_pState->GetCurGroupFlowType();
|
FLOW_TYPE flowType = m_pState->GetCurGroupFlowType();
|
||||||
unsigned curIndent = m_pState->GetCurIndent();
|
unsigned curIndent = m_pState->GetCurIndent();
|
||||||
|
|
||||||
switch(strFmt) {
|
switch(strFmt) {
|
||||||
case Auto:
|
case Auto:
|
||||||
Utils::WriteString(m_stream, str, flowType == FT_FLOW);
|
Utils::WriteString(m_stream, str, flowType == FT_FLOW, escapeNonAscii);
|
||||||
break;
|
break;
|
||||||
case SingleQuoted:
|
case SingleQuoted:
|
||||||
if(!Utils::WriteSingleQuotedString(m_stream, str)) {
|
if(!Utils::WriteSingleQuotedString(m_stream, str)) {
|
||||||
@@ -500,11 +506,11 @@ namespace YAML
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case DoubleQuoted:
|
case DoubleQuoted:
|
||||||
Utils::WriteDoubleQuotedString(m_stream, str);
|
Utils::WriteDoubleQuotedString(m_stream, str, escapeNonAscii);
|
||||||
break;
|
break;
|
||||||
case Literal:
|
case Literal:
|
||||||
if(flowType == FT_FLOW)
|
if(flowType == FT_FLOW)
|
||||||
Utils::WriteString(m_stream, str, flowType == FT_FLOW);
|
Utils::WriteString(m_stream, str, flowType == FT_FLOW, escapeNonAscii);
|
||||||
else
|
else
|
||||||
Utils::WriteLiteralString(m_stream, str, curIndent + m_pState->GetIndent());
|
Utils::WriteLiteralString(m_stream, str, curIndent + m_pState->GetIndent());
|
||||||
break;
|
break;
|
||||||
|
@@ -9,6 +9,7 @@ namespace YAML
|
|||||||
m_stateStack.push(ES_WAITING_FOR_DOC);
|
m_stateStack.push(ES_WAITING_FOR_DOC);
|
||||||
|
|
||||||
// set default global manipulators
|
// set default global manipulators
|
||||||
|
m_charset.set(EmitNonAscii);
|
||||||
m_strFmt.set(Auto);
|
m_strFmt.set(Auto);
|
||||||
m_boolFmt.set(TrueFalseBool);
|
m_boolFmt.set(TrueFalseBool);
|
||||||
m_boolLengthFmt.set(LongBool);
|
m_boolLengthFmt.set(LongBool);
|
||||||
@@ -43,6 +44,7 @@ namespace YAML
|
|||||||
// . Only the ones that make sense will be accepted
|
// . Only the ones that make sense will be accepted
|
||||||
void EmitterState::SetLocalValue(EMITTER_MANIP value)
|
void EmitterState::SetLocalValue(EMITTER_MANIP value)
|
||||||
{
|
{
|
||||||
|
SetOutputCharset(value, LOCAL);
|
||||||
SetStringFormat(value, LOCAL);
|
SetStringFormat(value, LOCAL);
|
||||||
SetBoolFormat(value, LOCAL);
|
SetBoolFormat(value, LOCAL);
|
||||||
SetBoolCaseFormat(value, LOCAL);
|
SetBoolCaseFormat(value, LOCAL);
|
||||||
@@ -133,6 +135,18 @@ namespace YAML
|
|||||||
m_modifiedSettings.clear();
|
m_modifiedSettings.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool EmitterState::SetOutputCharset(EMITTER_MANIP value, FMT_SCOPE scope)
|
||||||
|
{
|
||||||
|
switch(value) {
|
||||||
|
case EmitNonAscii:
|
||||||
|
case EscapeNonAscii:
|
||||||
|
_Set(m_charset, value, scope);
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool EmitterState::SetStringFormat(EMITTER_MANIP value, FMT_SCOPE scope)
|
bool EmitterState::SetStringFormat(EMITTER_MANIP value, FMT_SCOPE scope)
|
||||||
{
|
{
|
||||||
switch(value) {
|
switch(value) {
|
||||||
|
@@ -108,6 +108,9 @@ namespace YAML
|
|||||||
void ClearModifiedSettings();
|
void ClearModifiedSettings();
|
||||||
|
|
||||||
// formatters
|
// formatters
|
||||||
|
bool SetOutputCharset(EMITTER_MANIP value, FMT_SCOPE scope);
|
||||||
|
EMITTER_MANIP GetOutputCharset() const { return m_charset.get(); }
|
||||||
|
|
||||||
bool SetStringFormat(EMITTER_MANIP value, FMT_SCOPE scope);
|
bool SetStringFormat(EMITTER_MANIP value, FMT_SCOPE scope);
|
||||||
EMITTER_MANIP GetStringFormat() const { return m_strFmt.get(); }
|
EMITTER_MANIP GetStringFormat() const { return m_strFmt.get(); }
|
||||||
|
|
||||||
@@ -149,6 +152,7 @@ namespace YAML
|
|||||||
// other state
|
// other state
|
||||||
std::stack <EMITTER_STATE> m_stateStack;
|
std::stack <EMITTER_STATE> m_stateStack;
|
||||||
|
|
||||||
|
Setting <EMITTER_MANIP> m_charset;
|
||||||
Setting <EMITTER_MANIP> m_strFmt;
|
Setting <EMITTER_MANIP> m_strFmt;
|
||||||
Setting <EMITTER_MANIP> m_boolFmt;
|
Setting <EMITTER_MANIP> m_boolFmt;
|
||||||
Setting <EMITTER_MANIP> m_boolLengthFmt;
|
Setting <EMITTER_MANIP> m_boolLengthFmt;
|
||||||
|
@@ -5,18 +5,129 @@
|
|||||||
#include "stringsource.h"
|
#include "stringsource.h"
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <cassert>
|
|
||||||
|
|
||||||
namespace YAML
|
namespace YAML
|
||||||
{
|
{
|
||||||
namespace Utils
|
namespace Utils
|
||||||
{
|
{
|
||||||
namespace {
|
namespace {
|
||||||
bool IsPrintable(char ch) {
|
enum {REPLACEMENT_CHARACTER = 0xFFFD};
|
||||||
return (0x20 <= ch && ch <= 0x7E);
|
|
||||||
|
bool IsAnchorChar(int ch) { // test for ns-anchor-char
|
||||||
|
switch (ch) {
|
||||||
|
case ',': case '[': case ']': case '{': case '}': // c-flow-indicator
|
||||||
|
case ' ': case '\t': // s-white
|
||||||
|
case 0xFEFF: // c-byte-order-mark
|
||||||
|
case 0xA: case 0xD: // b-char
|
||||||
|
return false;
|
||||||
|
case 0x85:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ch < 0x20)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (ch < 0x7E)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (ch < 0xA0)
|
||||||
|
return false;
|
||||||
|
if (ch >= 0xD800 && ch <= 0xDFFF)
|
||||||
|
return false;
|
||||||
|
if ((ch & 0xFFFE) == 0xFFFE)
|
||||||
|
return false;
|
||||||
|
if ((ch >= 0xFDD0) && (ch <= 0xFDEF))
|
||||||
|
return false;
|
||||||
|
if (ch > 0x10FFFF)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsValidPlainScalar(const std::string& str, bool inFlow) {
|
int Utf8BytesIndicated(char ch) {
|
||||||
|
int byteVal = static_cast<unsigned char>(ch);
|
||||||
|
switch (byteVal >> 4) {
|
||||||
|
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
|
||||||
|
return 1;
|
||||||
|
case 12: case 13:
|
||||||
|
return 2;
|
||||||
|
case 14:
|
||||||
|
return 3;
|
||||||
|
case 15:
|
||||||
|
return 4;
|
||||||
|
default:
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsTrailingByte(char ch) {
|
||||||
|
return (ch & 0xC0) == 0x80;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GetNextCodePointAndAdvance(int& codePoint, std::string::const_iterator& first, std::string::const_iterator last) {
|
||||||
|
if (first == last)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
int nBytes = Utf8BytesIndicated(*first);
|
||||||
|
if (nBytes < 1) {
|
||||||
|
// Bad lead byte
|
||||||
|
++first;
|
||||||
|
codePoint = REPLACEMENT_CHARACTER;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nBytes == 1) {
|
||||||
|
codePoint = *first++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Gather bits from trailing bytes
|
||||||
|
codePoint = static_cast<unsigned char>(*first) & ~(0xFF << (7 - nBytes));
|
||||||
|
++first;
|
||||||
|
--nBytes;
|
||||||
|
for (; nBytes > 0; ++first, --nBytes) {
|
||||||
|
if ((first == last) || !IsTrailingByte(*first)) {
|
||||||
|
codePoint = REPLACEMENT_CHARACTER;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
codePoint <<= 6;
|
||||||
|
codePoint |= *first & 0x3F;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for illegal code points
|
||||||
|
if (codePoint > 0x10FFFF)
|
||||||
|
codePoint = REPLACEMENT_CHARACTER;
|
||||||
|
else if (codePoint >= 0xD800 && codePoint <= 0xDFFF)
|
||||||
|
codePoint = REPLACEMENT_CHARACTER;
|
||||||
|
else if ((codePoint & 0xFFFE) == 0xFFFE)
|
||||||
|
codePoint = REPLACEMENT_CHARACTER;
|
||||||
|
else if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF)
|
||||||
|
codePoint = REPLACEMENT_CHARACTER;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void WriteCodePoint(ostream& out, int codePoint) {
|
||||||
|
if (codePoint < 0 || codePoint > 0x10FFFF) {
|
||||||
|
codePoint = REPLACEMENT_CHARACTER;
|
||||||
|
}
|
||||||
|
if (codePoint < 0x7F) {
|
||||||
|
out << static_cast<char>(codePoint);
|
||||||
|
} else if (codePoint < 0x7FF) {
|
||||||
|
out << static_cast<char>(0xC0 | (codePoint >> 6))
|
||||||
|
<< static_cast<char>(0x80 | (codePoint & 0x3F));
|
||||||
|
} else if (codePoint < 0xFFFF) {
|
||||||
|
out << static_cast<char>(0xE0 | (codePoint >> 12))
|
||||||
|
<< static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
|
||||||
|
<< static_cast<char>(0x80 | (codePoint & 0x3F));
|
||||||
|
} else {
|
||||||
|
out << static_cast<char>(0xF0 | (codePoint >> 18))
|
||||||
|
<< static_cast<char>(0x80 | ((codePoint >> 12) & 0x3F))
|
||||||
|
<< static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
|
||||||
|
<< static_cast<char>(0x80 | (codePoint & 0x3F));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsValidPlainScalar(const std::string& str, bool inFlow, bool allowOnlyAscii) {
|
||||||
// first check the start
|
// first check the start
|
||||||
const RegEx& start = (inFlow ? Exp::PlainScalarInFlow : Exp::PlainScalar);
|
const RegEx& start = (inFlow ? Exp::PlainScalarInFlow : Exp::PlainScalar);
|
||||||
if(!start.Matches(str))
|
if(!start.Matches(str))
|
||||||
@@ -29,177 +140,109 @@ namespace YAML
|
|||||||
// then check until something is disallowed
|
// then check until something is disallowed
|
||||||
const RegEx& disallowed = (inFlow ? Exp::EndScalarInFlow : Exp::EndScalar)
|
const RegEx& disallowed = (inFlow ? Exp::EndScalarInFlow : Exp::EndScalar)
|
||||||
|| (Exp::BlankOrBreak + Exp::Comment)
|
|| (Exp::BlankOrBreak + Exp::Comment)
|
||||||
|| (!Exp::Printable)
|
|| Exp::NotPrintable
|
||||||
|
|| Exp::Utf8_ByteOrderMark
|
||||||
|| Exp::Break
|
|| Exp::Break
|
||||||
|| Exp::Tab;
|
|| Exp::Tab;
|
||||||
StringCharSource buffer(str.c_str(), str.size());
|
StringCharSource buffer(str.c_str(), str.size());
|
||||||
while(buffer) {
|
while(buffer) {
|
||||||
if(disallowed.Matches(buffer))
|
if(disallowed.Matches(buffer))
|
||||||
return false;
|
return false;
|
||||||
|
if(allowOnlyAscii && (0x7F < static_cast<unsigned char>(buffer[0])))
|
||||||
|
return false;
|
||||||
++buffer;
|
++buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef unsigned char byte;
|
void WriteDoubleQuoteEscapeSequence(ostream& out, int codePoint) {
|
||||||
byte ToByte(char ch) { return static_cast<byte>(ch); }
|
static const char hexDigits[] = "0123456789abcdef";
|
||||||
|
|
||||||
typedef std::string::const_iterator StrIter;
|
char escSeq[] = "\\U00000000";
|
||||||
|
int digits = 8;
|
||||||
std::string WriteUnicode(unsigned value) {
|
if (codePoint < 0xFF) {
|
||||||
std::stringstream str;
|
escSeq[1] = 'x';
|
||||||
// TODO: for the common escaped characters, give their usual symbol
|
digits = 2;
|
||||||
if(value <= 0xFF)
|
} else if (codePoint < 0xFFFF) {
|
||||||
str << "\\x" << std::hex << std::setfill('0') << std::setw(2) << value;
|
escSeq[1] = 'u';
|
||||||
else if(value <= 0xFFFF)
|
digits = 4;
|
||||||
str << "\\u" << std::hex << std::setfill('0') << std::setw(4) << value;
|
|
||||||
else
|
|
||||||
str << "\\U" << std::hex << std::setfill('0') << std::setw(8) << value;
|
|
||||||
return str.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetBytesToRead
|
|
||||||
// . Returns the length of the UTF-8 sequence starting with 'signal'
|
|
||||||
int GetBytesToRead(byte signal) {
|
|
||||||
if(signal <= 0x7F) // ASCII
|
|
||||||
return 1;
|
|
||||||
else if(signal <= 0xBF) // invalid first characters
|
|
||||||
return 0;
|
|
||||||
else if(signal <= 0xDF) // Note: this allows "overlong" UTF8 (0xC0 - 0xC1) to pass unscathed. OK?
|
|
||||||
return 2;
|
|
||||||
else if(signal <= 0xEF)
|
|
||||||
return 3;
|
|
||||||
else
|
|
||||||
return 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ReadBytes
|
|
||||||
// . Reads the next 'bytesToRead', if we can.
|
|
||||||
// . Returns zero if we fail, otherwise fills the byte buffer with
|
|
||||||
// the data and returns the number of bytes read.
|
|
||||||
int ReadBytes(byte bytes[4], StrIter start, StrIter end, int bytesToRead) {
|
|
||||||
for(int i=0;i<bytesToRead;i++) {
|
|
||||||
if(start == end)
|
|
||||||
return 0;
|
|
||||||
bytes[i] = ToByte(*start);
|
|
||||||
++start;
|
|
||||||
}
|
}
|
||||||
return bytesToRead;
|
|
||||||
|
// Write digits into the escape sequence
|
||||||
|
int i = 2;
|
||||||
|
for (; digits > 0; --digits, ++i) {
|
||||||
|
escSeq[i] = hexDigits[(codePoint >> (4 * (digits - 1))) & 0xF];
|
||||||
|
}
|
||||||
|
|
||||||
|
escSeq[i] = 0; // terminate with NUL character
|
||||||
|
out << escSeq;
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsValidUTF8
|
bool WriteAliasName(ostream& out, const std::string& str) {
|
||||||
// . Assumes bytes[0] is a valid signal byte with the right size passed
|
int codePoint;
|
||||||
bool IsValidUTF8(byte bytes[4], int size) {
|
for(std::string::const_iterator i = str.begin();
|
||||||
for(int i=1;i<size;i++)
|
GetNextCodePointAndAdvance(codePoint, i, str.end());
|
||||||
if(bytes[i] & 0x80 != 0x80)
|
)
|
||||||
|
{
|
||||||
|
if (!IsAnchorChar(codePoint))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
WriteCodePoint(out, codePoint);
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
byte UTF8SignalPrefix(int size) {
|
|
||||||
switch(size) {
|
|
||||||
case 1: return 0;
|
|
||||||
case 2: return 0xC0;
|
|
||||||
case 3: return 0xE0;
|
|
||||||
case 4: return 0xF0;
|
|
||||||
}
|
|
||||||
assert(false);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned UTF8ToUnicode(byte bytes[4], int size) {
|
|
||||||
unsigned value = bytes[0] - UTF8SignalPrefix(size);
|
|
||||||
for(int i=1;i<size;i++)
|
|
||||||
value = (value << 6) + (bytes[i] - 0x80);
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ReadUTF8
|
|
||||||
// . Returns the Unicode code point starting at 'start',
|
|
||||||
// and sets 'bytesRead' to the length of the UTF-8 Sequence
|
|
||||||
// . If it's invalid UTF8, we set 'bytesRead' to zero.
|
|
||||||
unsigned ReadUTF8(StrIter start, StrIter end, int& bytesRead) {
|
|
||||||
int bytesToRead = GetBytesToRead(ToByte(*start));
|
|
||||||
if(!bytesToRead) {
|
|
||||||
bytesRead = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
byte bytes[4];
|
|
||||||
bytesRead = ReadBytes(bytes, start, end, bytesToRead);
|
|
||||||
if(!bytesRead)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if(!IsValidUTF8(bytes, bytesRead)) {
|
|
||||||
bytesRead = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return UTF8ToUnicode(bytes, bytesRead);
|
|
||||||
}
|
|
||||||
|
|
||||||
// WriteNonPrintable
|
|
||||||
// . Writes the next UTF-8 code point to the stream
|
|
||||||
int WriteNonPrintable(ostream& out, StrIter start, StrIter end) {
|
|
||||||
int bytesRead = 0;
|
|
||||||
unsigned value = ReadUTF8(start, end, bytesRead);
|
|
||||||
|
|
||||||
if(bytesRead == 0) {
|
|
||||||
// TODO: is it ok to just write the replacement character here,
|
|
||||||
// or should we instead write the invalid byte (as \xNN)?
|
|
||||||
out << WriteUnicode(0xFFFD);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
out << WriteUnicode(value);
|
|
||||||
return bytesRead;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool WriteString(ostream& out, const std::string& str, bool inFlow)
|
bool WriteString(ostream& out, const std::string& str, bool inFlow, bool escapeNonAscii)
|
||||||
{
|
{
|
||||||
if(IsValidPlainScalar(str, inFlow)) {
|
if(IsValidPlainScalar(str, inFlow, escapeNonAscii)) {
|
||||||
out << str;
|
out << str;
|
||||||
return true;
|
return true;
|
||||||
} else
|
} else
|
||||||
return WriteDoubleQuotedString(out, str);
|
return WriteDoubleQuotedString(out, str, escapeNonAscii);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool WriteSingleQuotedString(ostream& out, const std::string& str)
|
bool WriteSingleQuotedString(ostream& out, const std::string& str)
|
||||||
{
|
{
|
||||||
out << "'";
|
out << "'";
|
||||||
for(std::size_t i=0;i<str.size();i++) {
|
int codePoint;
|
||||||
char ch = str[i];
|
for(std::string::const_iterator i = str.begin();
|
||||||
if(!IsPrintable(ch))
|
GetNextCodePointAndAdvance(codePoint, i, str.end());
|
||||||
return false;
|
)
|
||||||
|
{
|
||||||
|
if (codePoint == '\n')
|
||||||
|
return false; // We can't handle a new line and the attendant indentation yet
|
||||||
|
|
||||||
if(ch == '\'')
|
if (codePoint == '\'')
|
||||||
out << "''";
|
out << "''";
|
||||||
else
|
else
|
||||||
out << ch;
|
WriteCodePoint(out, codePoint);
|
||||||
}
|
}
|
||||||
out << "'";
|
out << "'";
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool WriteDoubleQuotedString(ostream& out, const std::string& str)
|
bool WriteDoubleQuotedString(ostream& out, const std::string& str, bool escapeNonAscii)
|
||||||
{
|
{
|
||||||
out << "\"";
|
out << "\"";
|
||||||
for(StrIter it=str.begin();it!=str.end();++it) {
|
int codePoint;
|
||||||
char ch = *it;
|
for(std::string::const_iterator i = str.begin();
|
||||||
if(IsPrintable(ch)) {
|
GetNextCodePointAndAdvance(codePoint, i, str.end());
|
||||||
if(ch == '\"')
|
)
|
||||||
out << "\\\"";
|
{
|
||||||
else if(ch == '\\')
|
if (codePoint == '\"')
|
||||||
out << "\\\\";
|
out << "\\\"";
|
||||||
else
|
else if (codePoint == '\\')
|
||||||
out << ch;
|
out << "\\\\";
|
||||||
} else {
|
else if (codePoint < 0x20 || (codePoint >= 0x80 && codePoint <= 0xA0)) // Control characters and non-breaking space
|
||||||
int bytesRead = WriteNonPrintable(out, it, str.end());
|
WriteDoubleQuoteEscapeSequence(out, codePoint);
|
||||||
if(bytesRead >= 1)
|
else if (codePoint == 0xFEFF) // Byte order marks (ZWNS) should be escaped (YAML 1.2, sec. 5.2)
|
||||||
it += (bytesRead - 1);
|
WriteDoubleQuoteEscapeSequence(out, codePoint);
|
||||||
}
|
else if (escapeNonAscii && codePoint > 0x7E)
|
||||||
|
WriteDoubleQuoteEscapeSequence(out, codePoint);
|
||||||
|
else
|
||||||
|
WriteCodePoint(out, codePoint);
|
||||||
}
|
}
|
||||||
out << "\"";
|
out << "\"";
|
||||||
return true;
|
return true;
|
||||||
@@ -209,11 +252,15 @@ namespace YAML
|
|||||||
{
|
{
|
||||||
out << "|\n";
|
out << "|\n";
|
||||||
out << IndentTo(indent);
|
out << IndentTo(indent);
|
||||||
for(std::size_t i=0;i<str.size();i++) {
|
int codePoint;
|
||||||
if(str[i] == '\n')
|
for(std::string::const_iterator i = str.begin();
|
||||||
out << "\n" << IndentTo(indent);
|
GetNextCodePointAndAdvance(codePoint, i, str.end());
|
||||||
|
)
|
||||||
|
{
|
||||||
|
if (codePoint == '\n')
|
||||||
|
out << "\n" << IndentTo(indent);
|
||||||
else
|
else
|
||||||
out << str[i];
|
WriteCodePoint(out, codePoint);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -222,11 +269,15 @@ namespace YAML
|
|||||||
{
|
{
|
||||||
unsigned curIndent = out.col();
|
unsigned curIndent = out.col();
|
||||||
out << "#" << Indentation(postCommentIndent);
|
out << "#" << Indentation(postCommentIndent);
|
||||||
for(std::size_t i=0;i<str.size();i++) {
|
int codePoint;
|
||||||
if(str[i] == '\n')
|
for(std::string::const_iterator i = str.begin();
|
||||||
|
GetNextCodePointAndAdvance(codePoint, i, str.end());
|
||||||
|
)
|
||||||
|
{
|
||||||
|
if(codePoint == '\n')
|
||||||
out << "\n" << IndentTo(curIndent) << "#" << Indentation(postCommentIndent);
|
out << "\n" << IndentTo(curIndent) << "#" << Indentation(postCommentIndent);
|
||||||
else
|
else
|
||||||
out << str[i];
|
WriteCodePoint(out, codePoint);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -234,25 +285,13 @@ namespace YAML
|
|||||||
bool WriteAlias(ostream& out, const std::string& str)
|
bool WriteAlias(ostream& out, const std::string& str)
|
||||||
{
|
{
|
||||||
out << "*";
|
out << "*";
|
||||||
for(std::size_t i=0;i<str.size();i++) {
|
return WriteAliasName(out, str);
|
||||||
if(!IsPrintable(str[i]) || str[i] == ' ' || str[i] == '\t' || str[i] == '\n' || str[i] == '\r')
|
|
||||||
return false;
|
|
||||||
|
|
||||||
out << str[i];
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool WriteAnchor(ostream& out, const std::string& str)
|
bool WriteAnchor(ostream& out, const std::string& str)
|
||||||
{
|
{
|
||||||
out << "&";
|
out << "&";
|
||||||
for(std::size_t i=0;i<str.size();i++) {
|
return WriteAliasName(out, str);
|
||||||
if(!IsPrintable(str[i]) || str[i] == ' ' || str[i] == '\t' || str[i] == '\n' || str[i] == '\r')
|
|
||||||
return false;
|
|
||||||
|
|
||||||
out << str[i];
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -11,9 +11,9 @@ namespace YAML
|
|||||||
{
|
{
|
||||||
namespace Utils
|
namespace Utils
|
||||||
{
|
{
|
||||||
bool WriteString(ostream& out, const std::string& str, bool inFlow);
|
bool WriteString(ostream& out, const std::string& str, bool inFlow, bool escapeNonAscii);
|
||||||
bool WriteSingleQuotedString(ostream& out, const std::string& str);
|
bool WriteSingleQuotedString(ostream& out, const std::string& str);
|
||||||
bool WriteDoubleQuotedString(ostream& out, const std::string& str);
|
bool WriteDoubleQuotedString(ostream& out, const std::string& str, bool escapeNonAscii);
|
||||||
bool WriteLiteralString(ostream& out, const std::string& str, int indent);
|
bool WriteLiteralString(ostream& out, const std::string& str, int indent);
|
||||||
bool WriteComment(ostream& out, const std::string& str, int postCommentIndent);
|
bool WriteComment(ostream& out, const std::string& str, int postCommentIndent);
|
||||||
bool WriteAlias(ostream& out, const std::string& str);
|
bool WriteAlias(ostream& out, const std::string& str);
|
||||||
|
@@ -28,9 +28,9 @@ namespace YAML
|
|||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string Str(char ch)
|
std::string Str(unsigned ch)
|
||||||
{
|
{
|
||||||
return std::string("") + ch;
|
return std::string("") + static_cast<char>(ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Escape
|
// Escape
|
||||||
|
@@ -26,7 +26,12 @@ namespace YAML
|
|||||||
const RegEx Alpha = RegEx('a', 'z') || RegEx('A', 'Z');
|
const RegEx Alpha = RegEx('a', 'z') || RegEx('A', 'Z');
|
||||||
const RegEx AlphaNumeric = Alpha || Digit;
|
const RegEx AlphaNumeric = Alpha || Digit;
|
||||||
const RegEx Hex = Digit || RegEx('A', 'F') || RegEx('a', 'f');
|
const RegEx Hex = Digit || RegEx('A', 'F') || RegEx('a', 'f');
|
||||||
const RegEx Printable = RegEx(0x20, 0x7E);
|
// Valid Unicode code points that are not part of c-printable (YAML 1.2, sec. 5.1)
|
||||||
|
const RegEx NotPrintable = RegEx(0) ||
|
||||||
|
RegEx("\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x7F", REGEX_OR) ||
|
||||||
|
RegEx(0x0E, 0x1F) ||
|
||||||
|
(RegEx('\xC2') + (RegEx('\x80', '\x84') || RegEx('\x86', '\x9F')));
|
||||||
|
const RegEx Utf8_ByteOrderMark = RegEx("\xEF\xBB\xBF");
|
||||||
|
|
||||||
// actual tags
|
// actual tags
|
||||||
|
|
||||||
|
@@ -448,13 +448,26 @@ namespace Test
|
|||||||
desiredOutput = "- ~\n-\n null value: ~\n ~: null key";
|
desiredOutput = "- ~\n-\n null value: ~\n ~: null key";
|
||||||
}
|
}
|
||||||
|
|
||||||
void Unicode(YAML::Emitter& out, std::string& desiredOutput)
|
void EscapedUnicode(YAML::Emitter& out, std::string& desiredOutput)
|
||||||
{
|
{
|
||||||
out << "\x24 \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2";
|
out << YAML::EscapeNonAscii << "\x24 \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2";
|
||||||
|
|
||||||
desiredOutput = "\"$ \\xa2 \\u20ac \\U00024b62\"";
|
desiredOutput = "\"$ \\xa2 \\u20ac \\U00024b62\"";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Unicode(YAML::Emitter& out, std::string& desiredOutput)
|
||||||
|
{
|
||||||
|
out << "\x24 \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2";
|
||||||
|
desiredOutput = "\x24 \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2";
|
||||||
|
}
|
||||||
|
|
||||||
|
void DoubleQuotedUnicode(YAML::Emitter& out, std::string& desiredOutput)
|
||||||
|
{
|
||||||
|
out << YAML::DoubleQuoted << "\x24 \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2";
|
||||||
|
desiredOutput = "\"\x24 \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2\"";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// incorrect emitting
|
// incorrect emitting
|
||||||
|
|
||||||
@@ -616,7 +629,9 @@ namespace Test
|
|||||||
RunEmitterTest(&Emitter::SimpleGlobalSettings, "simple global settings", passed, total);
|
RunEmitterTest(&Emitter::SimpleGlobalSettings, "simple global settings", passed, total);
|
||||||
RunEmitterTest(&Emitter::ComplexGlobalSettings, "complex global settings", passed, total);
|
RunEmitterTest(&Emitter::ComplexGlobalSettings, "complex global settings", passed, total);
|
||||||
RunEmitterTest(&Emitter::Null, "null", passed, total);
|
RunEmitterTest(&Emitter::Null, "null", passed, total);
|
||||||
|
RunEmitterTest(&Emitter::EscapedUnicode, "escaped unicode", passed, total);
|
||||||
RunEmitterTest(&Emitter::Unicode, "unicode", passed, total);
|
RunEmitterTest(&Emitter::Unicode, "unicode", passed, total);
|
||||||
|
RunEmitterTest(&Emitter::DoubleQuotedUnicode, "double quoted unicode", passed, total);
|
||||||
|
|
||||||
RunEmitterErrorTest(&Emitter::ExtraEndSeq, "extra EndSeq", passed, total);
|
RunEmitterErrorTest(&Emitter::ExtraEndSeq, "extra EndSeq", passed, total);
|
||||||
RunEmitterErrorTest(&Emitter::ExtraEndMap, "extra EndMap", passed, total);
|
RunEmitterErrorTest(&Emitter::ExtraEndMap, "extra EndMap", passed, total);
|
||||||
|
Reference in New Issue
Block a user