Switched the Exp:: regexes to functions that lazily evaluate their regexes

This commit is contained in:
Jesse Beder
2009-11-04 22:56:59 +00:00
parent 3779e4255d
commit 9a21a3ec8d
6 changed files with 182 additions and 83 deletions

View File

@@ -129,7 +129,7 @@ namespace YAML
bool IsValidPlainScalar(const std::string& str, bool inFlow, bool allowOnlyAscii) {
// first check the start
const RegEx& start = (inFlow ? Exp::PlainScalarInFlow : Exp::PlainScalar);
const RegEx& start = (inFlow ? Exp::PlainScalarInFlow() : Exp::PlainScalar());
if(!start.Matches(str))
return false;
@@ -138,12 +138,12 @@ namespace YAML
return false;
// then check until something is disallowed
const RegEx& disallowed = (inFlow ? Exp::EndScalarInFlow : Exp::EndScalar)
|| (Exp::BlankOrBreak + Exp::Comment)
|| Exp::NotPrintable
|| Exp::Utf8_ByteOrderMark
|| Exp::Break
|| Exp::Tab;
const RegEx& disallowed = (inFlow ? Exp::EndScalarInFlow() : Exp::EndScalar())
|| (Exp::BlankOrBreak() + Exp::Comment())
|| Exp::NotPrintable()
|| Exp::Utf8_ByteOrderMark()
|| Exp::Break()
|| Exp::Tab();
StringCharSource buffer(str.c_str(), str.size());
while(buffer) {
if(disallowed.Matches(buffer))
@@ -299,7 +299,7 @@ namespace YAML
out << "!<";
StringCharSource buffer(str.c_str(), str.size());
while(buffer) {
int n = Exp::URI.Match(buffer);
int n = Exp::URI().Match(buffer);
if(n <= 0)
return false;

165
src/exp.h
View File

@@ -17,54 +17,153 @@ namespace YAML
namespace Exp
{
// misc
const RegEx Space = RegEx(' ');
const RegEx Tab = RegEx('\t');
const RegEx Blank = Space || Tab;
const RegEx Break = RegEx('\n') || RegEx("\r\n");
const RegEx BlankOrBreak = Blank || Break;
const RegEx Digit = RegEx('0', '9');
const RegEx Alpha = RegEx('a', 'z') || RegEx('A', 'Z');
const RegEx AlphaNumeric = Alpha || Digit;
const RegEx Word = AlphaNumeric || RegEx('-');
const RegEx Hex = Digit || RegEx('A', 'F') || RegEx('a', 'f');
inline const RegEx& Space() {
static const RegEx e = RegEx(' ');
return e;
}
inline const RegEx& Tab() {
static const RegEx e = RegEx('\t');
return e;
}
inline const RegEx& Blank() {
static const RegEx e = Space() || Tab();
return e;
}
inline const RegEx& Break() {
static const RegEx e = RegEx('\n') || RegEx("\r\n");
return e;
}
inline const RegEx& BlankOrBreak() {
static const RegEx e = Blank() || Break();
return e;
}
inline const RegEx& Digit() {
static const RegEx e = RegEx('0', '9');
return e;
}
inline const RegEx& Alpha() {
static const RegEx e = RegEx('a', 'z') || RegEx('A', 'Z');
return e;
}
inline const RegEx& AlphaNumeric() {
static const RegEx e = Alpha() || Digit();
return e;
}
inline const RegEx& Word() {
static const RegEx e = AlphaNumeric() || RegEx('-');
return e;
}
inline const RegEx& Hex() {
static const RegEx e = Digit() || RegEx('A', 'F') || RegEx('a', 'f');
return e;
}
// Valid Unicode code points that are not part of c-printable (YAML 1.2, sec. 5.1)
const RegEx NotPrintable = RegEx(0) ||
inline const RegEx& NotPrintable() {
static const RegEx e = RegEx(0) ||
RegEx("\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x7F", REGEX_OR) ||
RegEx(0x0E, 0x1F) ||
(RegEx('\xC2') + (RegEx('\x80', '\x84') || RegEx('\x86', '\x9F')));
const RegEx Utf8_ByteOrderMark = RegEx("\xEF\xBB\xBF");
return e;
}
inline const RegEx& Utf8_ByteOrderMark() {
static const RegEx e = RegEx("\xEF\xBB\xBF");
return e;
}
// actual tags
const RegEx DocStart = RegEx("---") + (BlankOrBreak || RegEx());
const RegEx DocEnd = RegEx("...") + (BlankOrBreak || RegEx());
const RegEx DocIndicator = DocStart || DocEnd;
const RegEx BlockEntry = RegEx('-') + (BlankOrBreak || RegEx());
const RegEx Key = RegEx('?'),
KeyInFlow = RegEx('?') + BlankOrBreak;
const RegEx Value = RegEx(':') + (BlankOrBreak || RegEx()),
ValueInFlow = RegEx(':') + (BlankOrBreak || RegEx(",}", REGEX_OR)),
ValueInJSONFlow = RegEx(':');
const RegEx Comment = RegEx('#');
const RegEx AnchorEnd = RegEx("?:,]}%@`", REGEX_OR) || BlankOrBreak;
const RegEx URI = Word || RegEx("#;/?:@&=+$,_.!~*'()[]", REGEX_OR) || (RegEx('%') + Hex + Hex);
const RegEx Tag = Word || RegEx("#;/?:@&=+$_.~*'", REGEX_OR) || (RegEx('%') + Hex + Hex);
inline const RegEx& DocStart() {
static const RegEx e = RegEx("---") + (BlankOrBreak() || RegEx());
return e;
}
inline const RegEx& DocEnd() {
static const RegEx e = RegEx("...") + (BlankOrBreak() || RegEx());
return e;
}
inline const RegEx& DocIndicator() {
static const RegEx e = DocStart || DocEnd;
return e;
}
inline const RegEx& BlockEntry() {
static const RegEx e = RegEx('-') + (BlankOrBreak() || RegEx());
return e;
}
inline const RegEx& Key() {
static const RegEx e = RegEx('?');
return e;
}
inline const RegEx& KeyInFlow() {
static const RegEx e = RegEx('?') + BlankOrBreak();
return e;
}
inline const RegEx& Value() {
static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx());
return e;
}
inline const RegEx& ValueInFlow() {
static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx(",}", REGEX_OR));
return e;
}
inline const RegEx& ValueInJSONFlow() {
static const RegEx e = RegEx(':');
return e;
}
inline const RegEx Comment() {
static const RegEx e = RegEx('#');
return e;
}
inline const RegEx& AnchorEnd() {
static const RegEx e = RegEx("?:,]}%@`", REGEX_OR) || BlankOrBreak();
return e;
}
inline const RegEx& URI() {
static const RegEx e = Word() || RegEx("#;/?:@&=+$,_.!~*'()[]", REGEX_OR) || (RegEx('%') + Hex() + Hex());
return e;
}
inline const RegEx& Tag() {
static const RegEx e = Word() || RegEx("#;/?:@&=+$_.~*'", REGEX_OR) || (RegEx('%') + Hex() + Hex());
return e;
}
// Plain scalar rules:
// . Cannot start with a blank.
// . Can never start with any of , [ ] { } # & * ! | > \' \" % @ `
// . In the block context - ? : must be not be followed with a space.
// . In the flow context ? is illegal and : and - must not be followed with a space.
const RegEx PlainScalar = !(BlankOrBreak || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-?:", REGEX_OR) + Blank)),
PlainScalarInFlow = !(BlankOrBreak || RegEx("?,[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-:", REGEX_OR) + Blank));
const RegEx EndScalar = RegEx(':') + (BlankOrBreak || RegEx()),
EndScalarInFlow = (RegEx(':') + (BlankOrBreak || RegEx(",]}", REGEX_OR))) || RegEx(",?[]{}", REGEX_OR);
inline const RegEx& PlainScalar() {
static const RegEx e = !(BlankOrBreak() || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-?:", REGEX_OR) + Blank()));
return e;
}
inline const RegEx& PlainScalarInFlow() {
static const RegEx e = !(BlankOrBreak() || RegEx("?,[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-:", REGEX_OR) + Blank()));
return e;
}
inline const RegEx& EndScalar() {
static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx());
return e;
}
inline const RegEx& EndScalarInFlow() {
static const RegEx e = (RegEx(':') + (BlankOrBreak() || RegEx(",]}", REGEX_OR))) || RegEx(",?[]{}", REGEX_OR);
return e;
}
const RegEx EscSingleQuote = RegEx("\'\'");
const RegEx EscBreak = RegEx('\\') + Break;
inline const RegEx& EscSingleQuote() {
static const RegEx e = RegEx("\'\'");
return e;
}
inline const RegEx& EscBreak() {
static const RegEx e = RegEx('\\') + Break();
return e;
}
const RegEx ChompIndicator = RegEx("+-", REGEX_OR);
const RegEx Chomp = (ChompIndicator + Digit) || (Digit + ChompIndicator) || ChompIndicator || Digit;
inline const RegEx& ChompIndicator() {
static const RegEx e = RegEx("+-", REGEX_OR);
return e;
}
inline const RegEx& Chomp() {
static const RegEx e = (ChompIndicator() + Digit()) || (Digit() + ChompIndicator()) || ChompIndicator() || Digit();
return e;
}
// and some functions
std::string Escape(Stream& in);

View File

@@ -119,10 +119,10 @@ namespace YAML
return ScanDirective();
// document token
if(INPUT.column() == 0 && Exp::DocStart.Matches(INPUT))
if(INPUT.column() == 0 && Exp::DocStart().Matches(INPUT))
return ScanDocStart();
if(INPUT.column() == 0 && Exp::DocEnd.Matches(INPUT))
if(INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT))
return ScanDocEnd();
// flow start/end/entry
@@ -136,10 +136,10 @@ namespace YAML
return ScanFlowEntry();
// block/map stuff
if(Exp::BlockEntry.Matches(INPUT))
if(Exp::BlockEntry().Matches(INPUT))
return ScanBlockEntry();
if((InBlockContext() ? Exp::Key : Exp::KeyInFlow).Matches(INPUT))
if((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT))
return ScanKey();
if(GetValueRegex().Matches(INPUT))
@@ -161,7 +161,7 @@ namespace YAML
return ScanQuotedScalar();
// plain scalars
if((InBlockContext() ? Exp::PlainScalar : Exp::PlainScalarInFlow).Matches(INPUT))
if((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow()).Matches(INPUT))
return ScanPlainScalar();
// don't know what it is!
@@ -175,24 +175,24 @@ namespace YAML
while(1) {
// first eat whitespace
while(INPUT && IsWhitespaceToBeEaten(INPUT.peek())) {
if(InBlockContext() && Exp::Tab.Matches(INPUT))
if(InBlockContext() && Exp::Tab().Matches(INPUT))
m_simpleKeyAllowed = false;
INPUT.eat(1);
}
// then eat a comment
if(Exp::Comment.Matches(INPUT)) {
if(Exp::Comment().Matches(INPUT)) {
// eat until line break
while(INPUT && !Exp::Break.Matches(INPUT))
while(INPUT && !Exp::Break().Matches(INPUT))
INPUT.eat(1);
}
// if it's NOT a line break, then we're done!
if(!Exp::Break.Matches(INPUT))
if(!Exp::Break().Matches(INPUT))
break;
// otherwise, let's eat the line break and keep going
int n = Exp::Break.Match(INPUT);
int n = Exp::Break().Match(INPUT);
INPUT.eat(n);
// oh yeah, and let's get rid of that simple key
@@ -231,9 +231,9 @@ namespace YAML
const RegEx& Scanner::GetValueRegex() const
{
if(InBlockContext())
return Exp::Value;
return Exp::Value();
return m_canBeJSONFlow ? Exp::ValueInJSONFlow : Exp::ValueInFlow;
return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow();
}
// StartStream
@@ -323,7 +323,7 @@ namespace YAML
const IndentMarker& indent = *m_indents.top();
if(indent.column < INPUT.column())
break;
if(indent.column == INPUT.column() && !(indent.type == IndentMarker::SEQ && !Exp::BlockEntry.Matches(INPUT)))
if(indent.column == INPUT.column() && !(indent.type == IndentMarker::SEQ && !Exp::BlockEntry().Matches(INPUT)))
break;
PopIndent();

View File

@@ -32,12 +32,12 @@ namespace YAML
std::size_t lastNonWhitespaceChar = scalar.size();
bool escapedNewline = false;
while(!params.end.Matches(INPUT) && !Exp::Break.Matches(INPUT)) {
while(!params.end.Matches(INPUT) && !Exp::Break().Matches(INPUT)) {
if(!INPUT)
break;
// document indicator?
if(INPUT.column() == 0 && Exp::DocIndicator.Matches(INPUT)) {
if(INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) {
if(params.onDocIndicator == BREAK)
break;
else if(params.onDocIndicator == THROW)
@@ -48,7 +48,7 @@ namespace YAML
pastOpeningBreak = true;
// escaped newline? (only if we're escaping on slash)
if(params.escape == '\\' && Exp::EscBreak.Matches(INPUT)) {
if(params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) {
// eat escape character and get out (but preserve trailing whitespace!)
INPUT.get();
lastNonWhitespaceChar = scalar.size();
@@ -78,7 +78,7 @@ namespace YAML
}
// doc indicator?
if(params.onDocIndicator == BREAK && INPUT.column() == 0 && Exp::DocIndicator.Matches(INPUT))
if(params.onDocIndicator == BREAK && INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT))
break;
// are we done via character match?
@@ -95,7 +95,7 @@ namespace YAML
// ********************************
// Phase #2: eat line ending
n = Exp::Break.Match(INPUT);
n = Exp::Break().Match(INPUT);
INPUT.eat(n);
// ********************************
@@ -110,7 +110,7 @@ namespace YAML
params.indent = std::max(params.indent, INPUT.column());
// and then the rest of the whitespace
while(Exp::Blank.Matches(INPUT)) {
while(Exp::Blank().Matches(INPUT)) {
// we check for tabs that masquerade as indentation
if(INPUT.peek() == '\t'&& INPUT.column() < params.indent && params.onTabInIndentation == THROW)
throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION);
@@ -122,8 +122,8 @@ namespace YAML
}
// was this an empty line?
bool nextEmptyLine = Exp::Break.Matches(INPUT);
bool nextMoreIndented = Exp::Blank.Matches(INPUT);
bool nextEmptyLine = Exp::Break().Matches(INPUT);
bool nextMoreIndented = Exp::Blank().Matches(INPUT);
if(params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine)
foldedNewlineStartedMoreIndented = moreIndented;

View File

@@ -19,7 +19,7 @@ namespace YAML
return tag;
}
int n = Exp::URI.Match(INPUT);
int n = Exp::URI().Match(INPUT);
if(n <= 0)
break;
@@ -44,7 +44,7 @@ namespace YAML
int n = 0;
if(canBeHandle) {
n = Exp::Word.Match(INPUT);
n = Exp::Word().Match(INPUT);
if(n <= 0) {
canBeHandle = false;
firstNonWordChar = INPUT.mark();
@@ -52,7 +52,7 @@ namespace YAML
}
if(!canBeHandle)
n = Exp::Tag.Match(INPUT);
n = Exp::Tag().Match(INPUT);
if(n <= 0)
break;
@@ -68,7 +68,7 @@ namespace YAML
std::string tag;
while(INPUT) {
int n = Exp::Tag.Match(INPUT);
int n = Exp::Tag().Match(INPUT);
if(n <= 0)
break;

View File

@@ -31,22 +31,22 @@ namespace YAML
INPUT.eat(1);
// read name
while(INPUT && !Exp::BlankOrBreak.Matches(INPUT))
while(INPUT && !Exp::BlankOrBreak().Matches(INPUT))
token.value += INPUT.get();
// read parameters
while(1) {
// first get rid of whitespace
while(Exp::Blank.Matches(INPUT))
while(Exp::Blank().Matches(INPUT))
INPUT.eat(1);
// break on newline or comment
if(!INPUT || Exp::Break.Matches(INPUT) || Exp::Comment.Matches(INPUT))
if(!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT))
break;
// now read parameter
std::string param;
while(INPUT && !Exp::BlankOrBreak.Matches(INPUT))
while(INPUT && !Exp::BlankOrBreak().Matches(INPUT))
param += INPUT.get();
token.params.push_back(param);
@@ -238,7 +238,7 @@ namespace YAML
alias = (indicator == Keys::Alias);
// now eat the content
while(Exp::AlphaNumeric.Matches(INPUT))
while(Exp::AlphaNumeric().Matches(INPUT))
name += INPUT.get();
// we need to have read SOMETHING!
@@ -246,7 +246,7 @@ namespace YAML
throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND : ErrorMsg::ANCHOR_NOT_FOUND);
// and needs to end correctly
if(INPUT && !Exp::AnchorEnd.Matches(INPUT))
if(INPUT && !Exp::AnchorEnd().Matches(INPUT))
throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS : ErrorMsg::CHAR_IN_ANCHOR);
// and we're done
@@ -297,7 +297,7 @@ namespace YAML
// set up the scanning parameters
ScanScalarParams params;
params.end = (InFlowContext() ? Exp::EndScalarInFlow : Exp::EndScalar) || (Exp::BlankOrBreak + Exp::Comment);
params.end = (InFlowContext() ? Exp::EndScalarInFlow() : Exp::EndScalar()) || (Exp::BlankOrBreak() + Exp::Comment());
params.eatEnd = false;
params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1);
params.fold = FOLD_FLOW;
@@ -337,7 +337,7 @@ namespace YAML
// setup the scanning parameters
ScanScalarParams params;
params.end = (single ? RegEx(quote) && !Exp::EscSingleQuote : RegEx(quote));
params.end = (single ? RegEx(quote) && !Exp::EscSingleQuote() : RegEx(quote));
params.eatEnd = true;
params.escape = (single ? '\'' : '\\');
params.indent = 0;
@@ -384,14 +384,14 @@ namespace YAML
// eat chomping/indentation indicators
params.chomp = CLIP;
int n = Exp::Chomp.Match(INPUT);
int n = Exp::Chomp().Match(INPUT);
for(int i=0;i<n;i++) {
char ch = INPUT.get();
if(ch == '+')
params.chomp = KEEP;
else if(ch == '-')
params.chomp = STRIP;
else if(Exp::Digit.Matches(ch)) {
else if(Exp::Digit().Matches(ch)) {
if(ch == '0')
throw ParserException(INPUT.mark(), ErrorMsg::ZERO_INDENT_IN_BLOCK);
@@ -401,16 +401,16 @@ namespace YAML
}
// now eat whitespace
while(Exp::Blank.Matches(INPUT))
while(Exp::Blank().Matches(INPUT))
INPUT.eat(1);
// and comments to the end of the line
if(Exp::Comment.Matches(INPUT))
while(INPUT && !Exp::Break.Matches(INPUT))
if(Exp::Comment().Matches(INPUT))
while(INPUT && !Exp::Break().Matches(INPUT))
INPUT.eat(1);
// if it's not a line break, then we ran into a bad character inline
if(INPUT && !Exp::Break.Matches(INPUT))
if(INPUT && !Exp::Break().Matches(INPUT))
throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_BLOCK);
// set the initial indentation