Switched to reading the entire file into a buffer at the start.\nThis speeds it up a TON (like 100x).

This commit is contained in:
Jesse Beder
2009-02-01 20:48:43 +00:00
parent a6d5902ebf
commit f9c0725684
9 changed files with 72 additions and 75 deletions

View File

@@ -3,7 +3,6 @@
#include "regex.h" #include "regex.h"
#include <string> #include <string>
#include <ios> #include <ios>
#include <cstdio>
#include "stream.h" #include "stream.h"
namespace YAML namespace YAML
@@ -24,10 +23,10 @@ namespace YAML
// actual tags // actual tags
const RegEx DocStart = RegEx("---") + (BlankOrBreak || RegEx(EOF) || RegEx()); const RegEx DocStart = RegEx("---") + (BlankOrBreak || RegEx());
const RegEx DocEnd = RegEx("...") + (BlankOrBreak || RegEx(EOF) || RegEx()); const RegEx DocEnd = RegEx("...") + (BlankOrBreak || RegEx());
const RegEx DocIndicator = DocStart || DocEnd; const RegEx DocIndicator = DocStart || DocEnd;
const RegEx BlockEntry = RegEx('-') + (BlankOrBreak || RegEx(EOF)); const RegEx BlockEntry = RegEx('-') + BlankOrBreak;
const RegEx Key = RegEx('?'), const RegEx Key = RegEx('?'),
KeyInFlow = RegEx('?') + BlankOrBreak; KeyInFlow = RegEx('?') + BlankOrBreak;
const RegEx Value = RegEx(':') + BlankOrBreak, const RegEx Value = RegEx(':') + BlankOrBreak,

View File

@@ -90,12 +90,12 @@ namespace YAML
return Match(str) >= 0; return Match(str) >= 0;
} }
bool RegEx::Matches(std::istream& in) const bool RegEx::Matches(const char *buffer) const
{ {
return Match(in) >= 0; return Match(buffer) >= 0;
} }
bool RegEx::Matches(Stream& in) const bool RegEx::Matches(const Stream& in) const
{ {
return Match(in) >= 0; return Match(in) >= 0;
} }
@@ -115,9 +115,9 @@ namespace YAML
} }
// Match // Match
int RegEx::Match(Stream& in) const int RegEx::Match(const Stream& in) const
{ {
return Match(in.stream()); return Match(in.current());
} }
// Match // Match
@@ -126,19 +126,12 @@ namespace YAML
// . Note: the istream is not a const reference, but we guarantee // . Note: the istream is not a const reference, but we guarantee
// that the pointer will be in the same spot, and we'll clear its // that the pointer will be in the same spot, and we'll clear its
// flags before we end. // flags before we end.
int RegEx::Match(std::istream& in) const int RegEx::Match(const char *buffer) const
{ {
if(!m_pOp) if(!m_pOp)
return -1; return -1;
int pos = in.tellg(); return m_pOp->Match(buffer, *this);
int ret = m_pOp->Match(in, *this);
// reset input stream!
in.clear();
in.seekg(pos);
return ret;
} }
RegEx operator ! (const RegEx& ex) RegEx operator ! (const RegEx& ex)
@@ -184,9 +177,9 @@ namespace YAML
} }
int RegEx::MatchOperator::Match(std::istream& in, const RegEx& regex) const int RegEx::MatchOperator::Match(const char *buffer, const RegEx& regex) const
{ {
if(!in || in.peek() != regex.m_a) if(buffer[0] != regex.m_a)
return -1; return -1;
return 1; return 1;
} }
@@ -199,9 +192,9 @@ namespace YAML
return 1; return 1;
} }
int RegEx::RangeOperator::Match(std::istream& in, const RegEx& regex) const int RegEx::RangeOperator::Match(const char *buffer, const RegEx& regex) const
{ {
if(!in || regex.m_a > in.peek() || regex.m_z < in.peek()) if(regex.m_a > buffer[0] || regex.m_z < buffer[0])
return -1; return -1;
return 1; return 1;
} }
@@ -217,10 +210,10 @@ namespace YAML
return -1; return -1;
} }
int RegEx::OrOperator::Match(std::istream& in, const RegEx& regex) const int RegEx::OrOperator::Match(const char *buffer, const RegEx& regex) const
{ {
for(unsigned i=0;i<regex.m_params.size();i++) { for(unsigned i=0;i<regex.m_params.size();i++) {
int n = regex.m_params[i].Match(in); int n = regex.m_params[i].Match(buffer);
if(n >= 0) if(n >= 0)
return n; return n;
} }
@@ -244,11 +237,11 @@ namespace YAML
return first; return first;
} }
int RegEx::AndOperator::Match(std::istream& in, const RegEx& regex) const int RegEx::AndOperator::Match(const char *buffer, const RegEx& regex) const
{ {
int first = -1; int first = -1;
for(unsigned i=0;i<regex.m_params.size();i++) { for(unsigned i=0;i<regex.m_params.size();i++) {
int n = regex.m_params[i].Match(in); int n = regex.m_params[i].Match(buffer);
if(n == -1) if(n == -1)
return -1; return -1;
if(i == 0) if(i == 0)
@@ -267,11 +260,11 @@ namespace YAML
return 1; return 1;
} }
int RegEx::NotOperator::Match(std::istream& in, const RegEx& regex) const int RegEx::NotOperator::Match(const char *buffer, const RegEx& regex) const
{ {
if(regex.m_params.empty()) if(regex.m_params.empty())
return -1; return -1;
if(regex.m_params[0].Match(in) >= 0) if(regex.m_params[0].Match(buffer) >= 0)
return -1; return -1;
return 1; return 1;
} }
@@ -289,16 +282,15 @@ namespace YAML
return offset; return offset;
} }
int RegEx::SeqOperator::Match(std::istream& in, const RegEx& regex) const int RegEx::SeqOperator::Match(const char *buffer, const RegEx& regex) const
{ {
int offset = 0; int offset = 0;
for(unsigned i=0;i<regex.m_params.size();i++) { for(unsigned i=0;i<regex.m_params.size();i++) {
int n = regex.m_params[i].Match(in); int n = regex.m_params[i].Match(buffer + offset);
if(n == -1) if(n == -1)
return -1; return -1;
offset += n; offset += n;
in.ignore(n);
} }
return offset; return offset;

View File

@@ -2,7 +2,6 @@
#include <vector> #include <vector>
#include <string> #include <string>
#include <ios>
namespace YAML namespace YAML
{ {
@@ -19,37 +18,37 @@ namespace YAML
struct Operator { struct Operator {
virtual ~Operator() {} virtual ~Operator() {}
virtual int Match(const std::string& str, const RegEx& regex) const = 0; virtual int Match(const std::string& str, const RegEx& regex) const = 0;
virtual int Match(std::istream& in, const RegEx& regex) const = 0; virtual int Match(const char *buffer, const RegEx& regex) const = 0;
}; };
struct MatchOperator: public Operator { struct MatchOperator: public Operator {
virtual int Match(const std::string& str, const RegEx& regex) const; virtual int Match(const std::string& str, const RegEx& regex) const;
virtual int Match(std::istream& in, const RegEx& regex) const; virtual int Match(const char *buffer, const RegEx& regex) const;
}; };
struct RangeOperator: public Operator { struct RangeOperator: public Operator {
virtual int Match(const std::string& str, const RegEx& regex) const; virtual int Match(const std::string& str, const RegEx& regex) const;
virtual int Match(std::istream& in, const RegEx& regex) const; virtual int Match(const char *buffer, const RegEx& regex) const;
}; };
struct OrOperator: public Operator { struct OrOperator: public Operator {
virtual int Match(const std::string& str, const RegEx& regex) const; virtual int Match(const std::string& str, const RegEx& regex) const;
virtual int Match(std::istream& in, const RegEx& regex) const; virtual int Match(const char *buffer, const RegEx& regex) const;
}; };
struct AndOperator: public Operator { struct AndOperator: public Operator {
virtual int Match(const std::string& str, const RegEx& regex) const; virtual int Match(const std::string& str, const RegEx& regex) const;
virtual int Match(std::istream& in, const RegEx& regex) const; virtual int Match(const char *buffer, const RegEx& regex) const;
}; };
struct NotOperator: public Operator { struct NotOperator: public Operator {
virtual int Match(const std::string& str, const RegEx& regex) const; virtual int Match(const std::string& str, const RegEx& regex) const;
virtual int Match(std::istream& in, const RegEx& regex) const; virtual int Match(const char *buffer, const RegEx& regex) const;
}; };
struct SeqOperator: public Operator { struct SeqOperator: public Operator {
virtual int Match(const std::string& str, const RegEx& regex) const; virtual int Match(const std::string& str, const RegEx& regex) const;
virtual int Match(std::istream& in, const RegEx& regex) const; virtual int Match(const char *buffer, const RegEx& regex) const;
}; };
public: public:
@@ -66,11 +65,11 @@ namespace YAML
bool Matches(char ch) const; bool Matches(char ch) const;
bool Matches(const std::string& str) const; bool Matches(const std::string& str) const;
bool Matches(std::istream& in) const; bool Matches(const char *buffer) const;
bool Matches(Stream& in) const; bool Matches(const Stream& in) const;
int Match(const std::string& str) const; int Match(const std::string& str) const;
int Match(std::istream& in) const; int Match(const char *buffer) const;
int Match(Stream& in) const; int Match(const Stream& in) const;
friend RegEx operator ! (const RegEx& ex); friend RegEx operator ! (const RegEx& ex);
friend RegEx operator || (const RegEx& ex1, const RegEx& ex2); friend RegEx operator || (const RegEx& ex1, const RegEx& ex2);

View File

@@ -100,7 +100,7 @@ namespace YAML
// ***** // *****
// end of stream // end of stream
if(INPUT.peek() == EOF) if(!INPUT)
return EndStream(); return EndStream();
if(INPUT.column == 0 && INPUT.peek() == Keys::Directive) if(INPUT.column == 0 && INPUT.peek() == Keys::Directive)

View File

@@ -32,7 +32,7 @@ namespace YAML
// and saves it on a stack. // and saves it on a stack.
void Scanner::InsertSimpleKey() void Scanner::InsertSimpleKey()
{ {
SimpleKey key(INPUT.pos(), INPUT.line, INPUT.column, m_flowLevel); SimpleKey key(INPUT.pos, INPUT.line, INPUT.column, m_flowLevel);
// first add a map start, if necessary // first add a map start, if necessary
key.pMapStart = PushIndentTo(INPUT.column, false); key.pMapStart = PushIndentTo(INPUT.column, false);
@@ -78,7 +78,7 @@ namespace YAML
isValid = false; isValid = false;
// also needs to be less than 1024 characters and inline // also needs to be less than 1024 characters and inline
if(INPUT.line != key.line || INPUT.pos() - key.pos > 1024) if(INPUT.line != key.line || INPUT.pos - key.pos > 1024)
isValid = false; isValid = false;
// invalidate key // invalidate key

View File

@@ -4,26 +4,39 @@
namespace YAML namespace YAML
{ {
int Stream::pos() const Stream::Stream(std::istream& input): buffer(0), pos(0), line(0), column(0), size(0)
{ {
return input.tellg(); std::streambuf *pBuf = input.rdbuf();
// store entire file in buffer
size = pBuf->pubseekoff(0, std::ios::end, std::ios::in);
pBuf->pubseekpos(0, std::ios::in);
buffer = new char[size];
pBuf->sgetn(buffer, size);
} }
Stream::~Stream()
{
delete [] buffer;
}
char Stream::peek() char Stream::peek()
{ {
return input.peek(); return buffer[pos];
} }
Stream::operator bool() Stream::operator bool() const
{ {
return input.good(); return pos < size;
} }
// get // get
// . Extracts a character from the stream and updates our position // . Extracts a character from the stream and updates our position
char Stream::get() char Stream::get()
{ {
char ch = input.get(); char ch = buffer[pos];
pos++;
column++; column++;
if(ch == '\n') { if(ch == '\n') {
column = 0; column = 0;

View File

@@ -5,21 +5,24 @@
namespace YAML namespace YAML
{ {
struct Stream class Stream
{ {
Stream(std::istream& input_): input(input_), line(0), column(0) {} public:
Stream(std::istream& input);
~Stream();
int pos() const; operator bool() const;
operator bool(); bool operator !() const { return !static_cast <bool>(*this); }
bool operator !() { return !(*this); }
std::istream& stream() const { return input; } const char *current() const { return buffer + pos; }
char peek(); char peek();
char get(); char get();
std::string get(int n); std::string get(int n);
void eat(int n = 1); void eat(int n = 1);
std::istream& input; int pos, line, column, size;
int line, column;
private:
char *buffer;
}; };
} }

View File

@@ -7,20 +7,13 @@
void run() void run()
{ {
std::ifstream fin("tests/test.yaml"); std::ifstream fin("tests/test.yaml");
YAML::Parser parser(fin);
try { while(parser)
YAML::Parser parser(fin); {
YAML::Node doc; YAML::Node doc;
parser.GetNextDocument(doc); parser.GetNextDocument(doc);
std::cout << doc;
std::cout << "name: " << doc["name"] << "\n";
std::cout << "age: " << doc["age"] << "\n";
} catch(YAML::TypedKeyNotFound <std::string>& e) {
std::cout << "Key '" << e.key << "' not found at line " << e.line+1 << ", col " << e.column+1 << "\n";
} catch(YAML::KeyNotFound& e) {
std::cout << "Key not found at line " << e.line+1 << ", col " << e.column+1 << "\n";
} catch(YAML::Exception& e) {
std::cout << "Error at line " << e.line+1 << ", col " << e.column+1 << ": " << e.msg << "\n";
} }
} }

View File

@@ -1,3 +1 @@
name: Brett Favre - test
position: QB
teams: [ Falcons, Packers, Jets ]