diff --git a/src/emitterutils.cpp b/src/emitterutils.cpp index f6ac3d3..f4b7a57 100644 --- a/src/emitterutils.cpp +++ b/src/emitterutils.cpp @@ -5,6 +5,7 @@ #include "stringsource.h" #include #include +#include namespace YAML { @@ -41,17 +42,11 @@ namespace YAML return true; } - unsigned ToUnsigned(char ch) { return static_cast(static_cast(ch)); } - unsigned AdvanceAndGetNextChar(std::string::const_iterator& it, std::string::const_iterator end) { - std::string::const_iterator jt = it; - ++jt; - if(jt == end) - return 0; - - ++it; - return ToUnsigned(*it); - } + typedef unsigned char byte; + byte ToByte(char ch) { return static_cast(ch); } + typedef std::string::const_iterator StrIter; + std::string WriteUnicode(unsigned value) { std::stringstream str; // TODO: for the common escaped characters, give their usual symbol @@ -64,74 +59,101 @@ namespace YAML return str.str(); } - std::string WriteSingleByte(unsigned ch) { - return WriteUnicode(ch); + // GetBytesToRead + // . Returns the length of the UTF-8 sequence starting with 'signal' + int GetBytesToRead(byte signal) { + if(signal <= 0x7F) // ASCII + return 1; + else if(signal <= 0xBF) // invalid first characters + return 0; + else if(signal <= 0xDF) // Note: this allows "overlong" UTF8 (0xC0 - 0xC1) to pass unscathed. OK? + return 2; + else if(signal <= 0xEF) + return 3; + else + return 4; } - std::string WriteTwoBytes(unsigned ch, unsigned ch1) { - // Note: if no second byte is provided (signalled by ch1 == 0) - // then we just write the first one as a single byte. - // Should we throw an error instead? Or write something else? - // (The same question goes for the other WriteNBytes functions) - if(ch1 == 0) - return WriteSingleByte(ch); - - unsigned value = ((ch - 0xC0) << 6) + (ch1 - 0x80); - return WriteUnicode(value); + // ReadBytes + // . Reads the next 'bytesToRead', if we can. + // . Returns zero if we fail, otherwise fills the byte buffer with + // the data and returns the number of bytes read. + int ReadBytes(byte bytes[4], StrIter start, StrIter end, int bytesToRead) { + for(int i=0;i= 1) + it += (bytesRead - 1); } } out << "\"";