From 9a21a3ec8d7d73a6a2309c9f508ab68eb4a683ce Mon Sep 17 00:00:00 2001 From: Jesse Beder Date: Wed, 4 Nov 2009 22:56:59 +0000 Subject: [PATCH] Switched the Exp:: regexes to functions that lazily evaluate their regexes --- src/emitterutils.cpp | 16 ++-- src/exp.h | 171 ++++++++++++++++++++++++++++++++++--------- src/scanner.cpp | 26 +++---- src/scanscalar.cpp | 16 ++-- src/scantag.cpp | 8 +- src/scantoken.cpp | 28 +++---- 6 files changed, 182 insertions(+), 83 deletions(-) diff --git a/src/emitterutils.cpp b/src/emitterutils.cpp index 202750a..62a7ef3 100644 --- a/src/emitterutils.cpp +++ b/src/emitterutils.cpp @@ -129,7 +129,7 @@ namespace YAML bool IsValidPlainScalar(const std::string& str, bool inFlow, bool allowOnlyAscii) { // first check the start - const RegEx& start = (inFlow ? Exp::PlainScalarInFlow : Exp::PlainScalar); + const RegEx& start = (inFlow ? Exp::PlainScalarInFlow() : Exp::PlainScalar()); if(!start.Matches(str)) return false; @@ -138,12 +138,12 @@ namespace YAML return false; // then check until something is disallowed - const RegEx& disallowed = (inFlow ? Exp::EndScalarInFlow : Exp::EndScalar) - || (Exp::BlankOrBreak + Exp::Comment) - || Exp::NotPrintable - || Exp::Utf8_ByteOrderMark - || Exp::Break - || Exp::Tab; + const RegEx& disallowed = (inFlow ? Exp::EndScalarInFlow() : Exp::EndScalar()) + || (Exp::BlankOrBreak() + Exp::Comment()) + || Exp::NotPrintable() + || Exp::Utf8_ByteOrderMark() + || Exp::Break() + || Exp::Tab(); StringCharSource buffer(str.c_str(), str.size()); while(buffer) { if(disallowed.Matches(buffer)) @@ -299,7 +299,7 @@ namespace YAML out << "!<"; StringCharSource buffer(str.c_str(), str.size()); while(buffer) { - int n = Exp::URI.Match(buffer); + int n = Exp::URI().Match(buffer); if(n <= 0) return false; diff --git a/src/exp.h b/src/exp.h index f8dbc29..acebc18 100644 --- a/src/exp.h +++ b/src/exp.h @@ -17,54 +17,153 @@ namespace YAML namespace Exp { // misc - const RegEx Space = RegEx(' '); - const RegEx Tab = RegEx('\t'); - const RegEx Blank = Space || Tab; - const RegEx Break = RegEx('\n') || RegEx("\r\n"); - const RegEx BlankOrBreak = Blank || Break; - const RegEx Digit = RegEx('0', '9'); - const RegEx Alpha = RegEx('a', 'z') || RegEx('A', 'Z'); - const RegEx AlphaNumeric = Alpha || Digit; - const RegEx Word = AlphaNumeric || RegEx('-'); - const RegEx Hex = Digit || RegEx('A', 'F') || RegEx('a', 'f'); + inline const RegEx& Space() { + static const RegEx e = RegEx(' '); + return e; + } + inline const RegEx& Tab() { + static const RegEx e = RegEx('\t'); + return e; + } + inline const RegEx& Blank() { + static const RegEx e = Space() || Tab(); + return e; + } + inline const RegEx& Break() { + static const RegEx e = RegEx('\n') || RegEx("\r\n"); + return e; + } + inline const RegEx& BlankOrBreak() { + static const RegEx e = Blank() || Break(); + return e; + } + inline const RegEx& Digit() { + static const RegEx e = RegEx('0', '9'); + return e; + } + inline const RegEx& Alpha() { + static const RegEx e = RegEx('a', 'z') || RegEx('A', 'Z'); + return e; + } + inline const RegEx& AlphaNumeric() { + static const RegEx e = Alpha() || Digit(); + return e; + } + inline const RegEx& Word() { + static const RegEx e = AlphaNumeric() || RegEx('-'); + return e; + } + inline const RegEx& Hex() { + static const RegEx e = Digit() || RegEx('A', 'F') || RegEx('a', 'f'); + return e; + } // Valid Unicode code points that are not part of c-printable (YAML 1.2, sec. 5.1) - const RegEx NotPrintable = RegEx(0) || - RegEx("\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x7F", REGEX_OR) || - RegEx(0x0E, 0x1F) || - (RegEx('\xC2') + (RegEx('\x80', '\x84') || RegEx('\x86', '\x9F'))); - const RegEx Utf8_ByteOrderMark = RegEx("\xEF\xBB\xBF"); + inline const RegEx& NotPrintable() { + static const RegEx e = RegEx(0) || + RegEx("\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x7F", REGEX_OR) || + RegEx(0x0E, 0x1F) || + (RegEx('\xC2') + (RegEx('\x80', '\x84') || RegEx('\x86', '\x9F'))); + return e; + } + inline const RegEx& Utf8_ByteOrderMark() { + static const RegEx e = RegEx("\xEF\xBB\xBF"); + return e; + } // actual tags - const RegEx DocStart = RegEx("---") + (BlankOrBreak || RegEx()); - const RegEx DocEnd = RegEx("...") + (BlankOrBreak || RegEx()); - const RegEx DocIndicator = DocStart || DocEnd; - const RegEx BlockEntry = RegEx('-') + (BlankOrBreak || RegEx()); - const RegEx Key = RegEx('?'), - KeyInFlow = RegEx('?') + BlankOrBreak; - const RegEx Value = RegEx(':') + (BlankOrBreak || RegEx()), - ValueInFlow = RegEx(':') + (BlankOrBreak || RegEx(",}", REGEX_OR)), - ValueInJSONFlow = RegEx(':'); - const RegEx Comment = RegEx('#'); - const RegEx AnchorEnd = RegEx("?:,]}%@`", REGEX_OR) || BlankOrBreak; - const RegEx URI = Word || RegEx("#;/?:@&=+$,_.!~*'()[]", REGEX_OR) || (RegEx('%') + Hex + Hex); - const RegEx Tag = Word || RegEx("#;/?:@&=+$_.~*'", REGEX_OR) || (RegEx('%') + Hex + Hex); + inline const RegEx& DocStart() { + static const RegEx e = RegEx("---") + (BlankOrBreak() || RegEx()); + return e; + } + inline const RegEx& DocEnd() { + static const RegEx e = RegEx("...") + (BlankOrBreak() || RegEx()); + return e; + } + inline const RegEx& DocIndicator() { + static const RegEx e = DocStart || DocEnd; + return e; + } + inline const RegEx& BlockEntry() { + static const RegEx e = RegEx('-') + (BlankOrBreak() || RegEx()); + return e; + } + inline const RegEx& Key() { + static const RegEx e = RegEx('?'); + return e; + } + inline const RegEx& KeyInFlow() { + static const RegEx e = RegEx('?') + BlankOrBreak(); + return e; + } + inline const RegEx& Value() { + static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx()); + return e; + } + inline const RegEx& ValueInFlow() { + static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx(",}", REGEX_OR)); + return e; + } + inline const RegEx& ValueInJSONFlow() { + static const RegEx e = RegEx(':'); + return e; + } + inline const RegEx Comment() { + static const RegEx e = RegEx('#'); + return e; + } + inline const RegEx& AnchorEnd() { + static const RegEx e = RegEx("?:,]}%@`", REGEX_OR) || BlankOrBreak(); + return e; + } + inline const RegEx& URI() { + static const RegEx e = Word() || RegEx("#;/?:@&=+$,_.!~*'()[]", REGEX_OR) || (RegEx('%') + Hex() + Hex()); + return e; + } + inline const RegEx& Tag() { + static const RegEx e = Word() || RegEx("#;/?:@&=+$_.~*'", REGEX_OR) || (RegEx('%') + Hex() + Hex()); + return e; + } // Plain scalar rules: // . Cannot start with a blank. // . Can never start with any of , [ ] { } # & * ! | > \' \" % @ ` // . In the block context - ? : must be not be followed with a space. // . In the flow context ? is illegal and : and - must not be followed with a space. - const RegEx PlainScalar = !(BlankOrBreak || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-?:", REGEX_OR) + Blank)), - PlainScalarInFlow = !(BlankOrBreak || RegEx("?,[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-:", REGEX_OR) + Blank)); - const RegEx EndScalar = RegEx(':') + (BlankOrBreak || RegEx()), - EndScalarInFlow = (RegEx(':') + (BlankOrBreak || RegEx(",]}", REGEX_OR))) || RegEx(",?[]{}", REGEX_OR); + inline const RegEx& PlainScalar() { + static const RegEx e = !(BlankOrBreak() || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-?:", REGEX_OR) + Blank())); + return e; + } + inline const RegEx& PlainScalarInFlow() { + static const RegEx e = !(BlankOrBreak() || RegEx("?,[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-:", REGEX_OR) + Blank())); + return e; + } + inline const RegEx& EndScalar() { + static const RegEx e = RegEx(':') + (BlankOrBreak() || RegEx()); + return e; + } + inline const RegEx& EndScalarInFlow() { + static const RegEx e = (RegEx(':') + (BlankOrBreak() || RegEx(",]}", REGEX_OR))) || RegEx(",?[]{}", REGEX_OR); + return e; + } - const RegEx EscSingleQuote = RegEx("\'\'"); - const RegEx EscBreak = RegEx('\\') + Break; + inline const RegEx& EscSingleQuote() { + static const RegEx e = RegEx("\'\'"); + return e; + } + inline const RegEx& EscBreak() { + static const RegEx e = RegEx('\\') + Break(); + return e; + } - const RegEx ChompIndicator = RegEx("+-", REGEX_OR); - const RegEx Chomp = (ChompIndicator + Digit) || (Digit + ChompIndicator) || ChompIndicator || Digit; + inline const RegEx& ChompIndicator() { + static const RegEx e = RegEx("+-", REGEX_OR); + return e; + } + inline const RegEx& Chomp() { + static const RegEx e = (ChompIndicator() + Digit()) || (Digit() + ChompIndicator()) || ChompIndicator() || Digit(); + return e; + } // and some functions std::string Escape(Stream& in); diff --git a/src/scanner.cpp b/src/scanner.cpp index 13963a4..33052c2 100644 --- a/src/scanner.cpp +++ b/src/scanner.cpp @@ -119,10 +119,10 @@ namespace YAML return ScanDirective(); // document token - if(INPUT.column() == 0 && Exp::DocStart.Matches(INPUT)) + if(INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) return ScanDocStart(); - if(INPUT.column() == 0 && Exp::DocEnd.Matches(INPUT)) + if(INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) return ScanDocEnd(); // flow start/end/entry @@ -136,10 +136,10 @@ namespace YAML return ScanFlowEntry(); // block/map stuff - if(Exp::BlockEntry.Matches(INPUT)) + if(Exp::BlockEntry().Matches(INPUT)) return ScanBlockEntry(); - if((InBlockContext() ? Exp::Key : Exp::KeyInFlow).Matches(INPUT)) + if((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT)) return ScanKey(); if(GetValueRegex().Matches(INPUT)) @@ -161,7 +161,7 @@ namespace YAML return ScanQuotedScalar(); // plain scalars - if((InBlockContext() ? Exp::PlainScalar : Exp::PlainScalarInFlow).Matches(INPUT)) + if((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow()).Matches(INPUT)) return ScanPlainScalar(); // don't know what it is! @@ -175,24 +175,24 @@ namespace YAML while(1) { // first eat whitespace while(INPUT && IsWhitespaceToBeEaten(INPUT.peek())) { - if(InBlockContext() && Exp::Tab.Matches(INPUT)) + if(InBlockContext() && Exp::Tab().Matches(INPUT)) m_simpleKeyAllowed = false; INPUT.eat(1); } // then eat a comment - if(Exp::Comment.Matches(INPUT)) { + if(Exp::Comment().Matches(INPUT)) { // eat until line break - while(INPUT && !Exp::Break.Matches(INPUT)) + while(INPUT && !Exp::Break().Matches(INPUT)) INPUT.eat(1); } // if it's NOT a line break, then we're done! - if(!Exp::Break.Matches(INPUT)) + if(!Exp::Break().Matches(INPUT)) break; // otherwise, let's eat the line break and keep going - int n = Exp::Break.Match(INPUT); + int n = Exp::Break().Match(INPUT); INPUT.eat(n); // oh yeah, and let's get rid of that simple key @@ -231,9 +231,9 @@ namespace YAML const RegEx& Scanner::GetValueRegex() const { if(InBlockContext()) - return Exp::Value; + return Exp::Value(); - return m_canBeJSONFlow ? Exp::ValueInJSONFlow : Exp::ValueInFlow; + return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow(); } // StartStream @@ -323,7 +323,7 @@ namespace YAML const IndentMarker& indent = *m_indents.top(); if(indent.column < INPUT.column()) break; - if(indent.column == INPUT.column() && !(indent.type == IndentMarker::SEQ && !Exp::BlockEntry.Matches(INPUT))) + if(indent.column == INPUT.column() && !(indent.type == IndentMarker::SEQ && !Exp::BlockEntry().Matches(INPUT))) break; PopIndent(); diff --git a/src/scanscalar.cpp b/src/scanscalar.cpp index f7f3519..50b8bf7 100644 --- a/src/scanscalar.cpp +++ b/src/scanscalar.cpp @@ -32,12 +32,12 @@ namespace YAML std::size_t lastNonWhitespaceChar = scalar.size(); bool escapedNewline = false; - while(!params.end.Matches(INPUT) && !Exp::Break.Matches(INPUT)) { + while(!params.end.Matches(INPUT) && !Exp::Break().Matches(INPUT)) { if(!INPUT) break; // document indicator? - if(INPUT.column() == 0 && Exp::DocIndicator.Matches(INPUT)) { + if(INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) { if(params.onDocIndicator == BREAK) break; else if(params.onDocIndicator == THROW) @@ -48,7 +48,7 @@ namespace YAML pastOpeningBreak = true; // escaped newline? (only if we're escaping on slash) - if(params.escape == '\\' && Exp::EscBreak.Matches(INPUT)) { + if(params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) { // eat escape character and get out (but preserve trailing whitespace!) INPUT.get(); lastNonWhitespaceChar = scalar.size(); @@ -78,7 +78,7 @@ namespace YAML } // doc indicator? - if(params.onDocIndicator == BREAK && INPUT.column() == 0 && Exp::DocIndicator.Matches(INPUT)) + if(params.onDocIndicator == BREAK && INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) break; // are we done via character match? @@ -95,7 +95,7 @@ namespace YAML // ******************************** // Phase #2: eat line ending - n = Exp::Break.Match(INPUT); + n = Exp::Break().Match(INPUT); INPUT.eat(n); // ******************************** @@ -110,7 +110,7 @@ namespace YAML params.indent = std::max(params.indent, INPUT.column()); // and then the rest of the whitespace - while(Exp::Blank.Matches(INPUT)) { + while(Exp::Blank().Matches(INPUT)) { // we check for tabs that masquerade as indentation if(INPUT.peek() == '\t'&& INPUT.column() < params.indent && params.onTabInIndentation == THROW) throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION); @@ -122,8 +122,8 @@ namespace YAML } // was this an empty line? - bool nextEmptyLine = Exp::Break.Matches(INPUT); - bool nextMoreIndented = Exp::Blank.Matches(INPUT); + bool nextEmptyLine = Exp::Break().Matches(INPUT); + bool nextMoreIndented = Exp::Blank().Matches(INPUT); if(params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine) foldedNewlineStartedMoreIndented = moreIndented; diff --git a/src/scantag.cpp b/src/scantag.cpp index 17a6d65..f08218e 100644 --- a/src/scantag.cpp +++ b/src/scantag.cpp @@ -19,7 +19,7 @@ namespace YAML return tag; } - int n = Exp::URI.Match(INPUT); + int n = Exp::URI().Match(INPUT); if(n <= 0) break; @@ -44,7 +44,7 @@ namespace YAML int n = 0; if(canBeHandle) { - n = Exp::Word.Match(INPUT); + n = Exp::Word().Match(INPUT); if(n <= 0) { canBeHandle = false; firstNonWordChar = INPUT.mark(); @@ -52,7 +52,7 @@ namespace YAML } if(!canBeHandle) - n = Exp::Tag.Match(INPUT); + n = Exp::Tag().Match(INPUT); if(n <= 0) break; @@ -68,7 +68,7 @@ namespace YAML std::string tag; while(INPUT) { - int n = Exp::Tag.Match(INPUT); + int n = Exp::Tag().Match(INPUT); if(n <= 0) break; diff --git a/src/scantoken.cpp b/src/scantoken.cpp index 4320364..3e7e1cc 100644 --- a/src/scantoken.cpp +++ b/src/scantoken.cpp @@ -31,22 +31,22 @@ namespace YAML INPUT.eat(1); // read name - while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) + while(INPUT && !Exp::BlankOrBreak().Matches(INPUT)) token.value += INPUT.get(); // read parameters while(1) { // first get rid of whitespace - while(Exp::Blank.Matches(INPUT)) + while(Exp::Blank().Matches(INPUT)) INPUT.eat(1); // break on newline or comment - if(!INPUT || Exp::Break.Matches(INPUT) || Exp::Comment.Matches(INPUT)) + if(!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT)) break; // now read parameter std::string param; - while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) + while(INPUT && !Exp::BlankOrBreak().Matches(INPUT)) param += INPUT.get(); token.params.push_back(param); @@ -238,7 +238,7 @@ namespace YAML alias = (indicator == Keys::Alias); // now eat the content - while(Exp::AlphaNumeric.Matches(INPUT)) + while(Exp::AlphaNumeric().Matches(INPUT)) name += INPUT.get(); // we need to have read SOMETHING! @@ -246,7 +246,7 @@ namespace YAML throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND : ErrorMsg::ANCHOR_NOT_FOUND); // and needs to end correctly - if(INPUT && !Exp::AnchorEnd.Matches(INPUT)) + if(INPUT && !Exp::AnchorEnd().Matches(INPUT)) throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS : ErrorMsg::CHAR_IN_ANCHOR); // and we're done @@ -297,7 +297,7 @@ namespace YAML // set up the scanning parameters ScanScalarParams params; - params.end = (InFlowContext() ? Exp::EndScalarInFlow : Exp::EndScalar) || (Exp::BlankOrBreak + Exp::Comment); + params.end = (InFlowContext() ? Exp::EndScalarInFlow() : Exp::EndScalar()) || (Exp::BlankOrBreak() + Exp::Comment()); params.eatEnd = false; params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1); params.fold = FOLD_FLOW; @@ -337,7 +337,7 @@ namespace YAML // setup the scanning parameters ScanScalarParams params; - params.end = (single ? RegEx(quote) && !Exp::EscSingleQuote : RegEx(quote)); + params.end = (single ? RegEx(quote) && !Exp::EscSingleQuote() : RegEx(quote)); params.eatEnd = true; params.escape = (single ? '\'' : '\\'); params.indent = 0; @@ -384,14 +384,14 @@ namespace YAML // eat chomping/indentation indicators params.chomp = CLIP; - int n = Exp::Chomp.Match(INPUT); + int n = Exp::Chomp().Match(INPUT); for(int i=0;i