Update documentation for Scanner and AnchorDict, and fix formatting.

This commit is contained in:
Jesse Beder
2016-05-12 23:05:28 -05:00
parent a45a61742b
commit 148da47114
3 changed files with 146 additions and 89 deletions

View File

@@ -12,11 +12,13 @@
#include "../anchor.h" #include "../anchor.h"
namespace YAML { namespace YAML {
/// AnchorDict /**
/// . An object that stores and retrieves values correlating to anchor_t * An object that stores and retrieves values correlating to {@link anchor_t}
/// values. * values.
/// . Efficient implementation that can make assumptions about how anchor_t *
/// values are assigned by the Parser class. * <p>Efficient implementation that can make assumptions about how
* {@code anchor_t} values are assigned by the {@link Parser} class.
*/
template <class T> template <class T>
class AnchorDict { class AnchorDict {
public: public:

View File

@@ -16,23 +16,17 @@ Scanner::Scanner(std::istream& in)
Scanner::~Scanner() {} Scanner::~Scanner() {}
// empty
// . Returns true if there are no more tokens to be read
bool Scanner::empty() { bool Scanner::empty() {
EnsureTokensInQueue(); EnsureTokensInQueue();
return m_tokens.empty(); return m_tokens.empty();
} }
// pop
// . Simply removes the next token on the queue.
void Scanner::pop() { void Scanner::pop() {
EnsureTokensInQueue(); EnsureTokensInQueue();
if (!m_tokens.empty()) if (!m_tokens.empty())
m_tokens.pop(); m_tokens.pop();
} }
// peek
// . Returns (but does not remove) the next token on the queue.
Token& Scanner::peek() { Token& Scanner::peek() {
EnsureTokensInQueue(); EnsureTokensInQueue();
assert(!m_tokens.empty()); // should we be asserting here? I mean, we really assert(!m_tokens.empty()); // should we be asserting here? I mean, we really
@@ -49,21 +43,17 @@ Token& Scanner::peek() {
return m_tokens.front(); return m_tokens.front();
} }
// mark
// . Returns the current mark in the stream
Mark Scanner::mark() const { return INPUT.mark(); } Mark Scanner::mark() const { return INPUT.mark(); }
// EnsureTokensInQueue
// . Scan until there's a valid token at the front of the queue,
// or we're sure the queue is empty.
void Scanner::EnsureTokensInQueue() { void Scanner::EnsureTokensInQueue() {
while (1) { while (1) {
if (!m_tokens.empty()) { if (!m_tokens.empty()) {
Token& token = m_tokens.front(); Token& token = m_tokens.front();
// if this guy's valid, then we're done // if this guy's valid, then we're done
if (token.status == Token::VALID) if (token.status == Token::VALID) {
return; return;
}
// here's where we clean up the impossible tokens // here's where we clean up the impossible tokens
if (token.status == Token::INVALID) { if (token.status == Token::INVALID) {
@@ -75,23 +65,23 @@ void Scanner::EnsureTokensInQueue() {
} }
// no token? maybe we've actually finished // no token? maybe we've actually finished
if (m_endedStream) if (m_endedStream) {
return; return;
}
// no? then scan... // no? then scan...
ScanNextToken(); ScanNextToken();
} }
} }
// ScanNextToken
// . The main scanning function; here we branch out and
// scan whatever the next token should be.
void Scanner::ScanNextToken() { void Scanner::ScanNextToken() {
if (m_endedStream) if (m_endedStream) {
return; return;
}
if (!m_startedStream) if (!m_startedStream) {
return StartStream(); return StartStream();
}
// get rid of whitespace, etc. (in between tokens it should be irrelevent) // get rid of whitespace, etc. (in between tokens it should be irrelevent)
ScanToNextToken(); ScanToNextToken();
@@ -104,85 +94,102 @@ void Scanner::ScanNextToken() {
// ***** // *****
// end of stream // end of stream
if (!INPUT) if (!INPUT) {
return EndStream(); return EndStream();
}
if (INPUT.column() == 0 && INPUT.peek() == Keys::Directive) if (INPUT.column() == 0 && INPUT.peek() == Keys::Directive) {
return ScanDirective(); return ScanDirective();
}
// document token // document token
if (INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) if (INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) {
return ScanDocStart(); return ScanDocStart();
}
if (INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) if (INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) {
return ScanDocEnd(); return ScanDocEnd();
}
// flow start/end/entry // flow start/end/entry
if (INPUT.peek() == Keys::FlowSeqStart || INPUT.peek() == Keys::FlowMapStart) if (INPUT.peek() == Keys::FlowSeqStart ||
INPUT.peek() == Keys::FlowMapStart) {
return ScanFlowStart(); return ScanFlowStart();
}
if (INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd) if (INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd) {
return ScanFlowEnd(); return ScanFlowEnd();
}
if (INPUT.peek() == Keys::FlowEntry) if (INPUT.peek() == Keys::FlowEntry) {
return ScanFlowEntry(); return ScanFlowEntry();
}
// block/map stuff // block/map stuff
if (Exp::BlockEntry().Matches(INPUT)) if (Exp::BlockEntry().Matches(INPUT)) {
return ScanBlockEntry(); return ScanBlockEntry();
}
if ((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT)) if ((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT)) {
return ScanKey(); return ScanKey();
}
if (GetValueRegex().Matches(INPUT)) if (GetValueRegex().Matches(INPUT)) {
return ScanValue(); return ScanValue();
}
// alias/anchor // alias/anchor
if (INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor) if (INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor) {
return ScanAnchorOrAlias(); return ScanAnchorOrAlias();
}
// tag // tag
if (INPUT.peek() == Keys::Tag) if (INPUT.peek() == Keys::Tag) {
return ScanTag(); return ScanTag();
}
// special scalars // special scalars
if (InBlockContext() && (INPUT.peek() == Keys::LiteralScalar || if (InBlockContext() && (INPUT.peek() == Keys::LiteralScalar ||
INPUT.peek() == Keys::FoldedScalar)) INPUT.peek() == Keys::FoldedScalar)) {
return ScanBlockScalar(); return ScanBlockScalar();
}
if (INPUT.peek() == '\'' || INPUT.peek() == '\"') if (INPUT.peek() == '\'' || INPUT.peek() == '\"') {
return ScanQuotedScalar(); return ScanQuotedScalar();
}
// plain scalars // plain scalars
if ((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow()) if ((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow())
.Matches(INPUT)) .Matches(INPUT)) {
return ScanPlainScalar(); return ScanPlainScalar();
}
// don't know what it is! // don't know what it is!
throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN); throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN);
} }
// ScanToNextToken
// . Eats input until we reach the next token-like thing.
void Scanner::ScanToNextToken() { void Scanner::ScanToNextToken() {
while (1) { while (1) {
// first eat whitespace // first eat whitespace
while (INPUT && IsWhitespaceToBeEaten(INPUT.peek())) { while (INPUT && IsWhitespaceToBeEaten(INPUT.peek())) {
if (InBlockContext() && Exp::Tab().Matches(INPUT)) if (InBlockContext() && Exp::Tab().Matches(INPUT)) {
m_simpleKeyAllowed = false; m_simpleKeyAllowed = false;
}
INPUT.eat(1); INPUT.eat(1);
} }
// then eat a comment // then eat a comment
if (Exp::Comment().Matches(INPUT)) { if (Exp::Comment().Matches(INPUT)) {
// eat until line break // eat until line break
while (INPUT && !Exp::Break().Matches(INPUT)) while (INPUT && !Exp::Break().Matches(INPUT)) {
INPUT.eat(1); INPUT.eat(1);
} }
}
// if it's NOT a line break, then we're done! // if it's NOT a line break, then we're done!
if (!Exp::Break().Matches(INPUT)) if (!Exp::Break().Matches(INPUT)) {
break; break;
}
// otherwise, let's eat the line break and keep going // otherwise, let's eat the line break and keep going
int n = Exp::Break().Match(INPUT); int n = Exp::Break().Match(INPUT);
@@ -192,10 +199,11 @@ void Scanner::ScanToNextToken() {
InvalidateSimpleKey(); InvalidateSimpleKey();
// new line - we may be able to accept a simple key now // new line - we may be able to accept a simple key now
if (InBlockContext()) if (InBlockContext()) {
m_simpleKeyAllowed = true; m_simpleKeyAllowed = true;
} }
} }
}
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
// Misc. helpers // Misc. helpers
@@ -210,40 +218,39 @@ void Scanner::ScanToNextToken() {
// that they can't contribute to indentation, so once you've seen a tab in a // that they can't contribute to indentation, so once you've seen a tab in a
// line, you can't start a simple key // line, you can't start a simple key
bool Scanner::IsWhitespaceToBeEaten(char ch) { bool Scanner::IsWhitespaceToBeEaten(char ch) {
if (ch == ' ') if (ch == ' ') {
return true; return true;
}
if (ch == '\t') if (ch == '\t') {
return true; return true;
}
return false; return false;
} }
// GetValueRegex
// . Get the appropriate regex to check if it's a value token
const RegEx& Scanner::GetValueRegex() const { const RegEx& Scanner::GetValueRegex() const {
if (InBlockContext()) if (InBlockContext()) {
return Exp::Value(); return Exp::Value();
}
return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow(); return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow();
} }
// StartStream
// . Set the initial conditions for starting a stream.
void Scanner::StartStream() { void Scanner::StartStream() {
m_startedStream = true; m_startedStream = true;
m_simpleKeyAllowed = true; m_simpleKeyAllowed = true;
std::unique_ptr<IndentMarker> pIndent(new IndentMarker(-1, IndentMarker::NONE)); std::unique_ptr<IndentMarker> pIndent(
new IndentMarker(-1, IndentMarker::NONE));
m_indentRefs.push_back(std::move(pIndent)); m_indentRefs.push_back(std::move(pIndent));
m_indents.push(&m_indentRefs.back()); m_indents.push(&m_indentRefs.back());
} }
// EndStream
// . Close out the stream, finish up, etc.
void Scanner::EndStream() { void Scanner::EndStream() {
// force newline // force newline
if (INPUT.column() > 0) if (INPUT.column() > 0) {
INPUT.ResetColumn(); INPUT.ResetColumn();
}
PopAllIndents(); PopAllIndents();
PopAllSimpleKeys(); PopAllSimpleKeys();
@@ -271,27 +278,26 @@ Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const {
throw std::runtime_error("yaml-cpp: internal error, invalid indent type"); throw std::runtime_error("yaml-cpp: internal error, invalid indent type");
} }
// PushIndentTo
// . Pushes an indentation onto the stack, and enqueues the
// proper token (sequence start or mapping start).
// . Returns the indent marker it generates (if any).
Scanner::IndentMarker* Scanner::PushIndentTo(int column, Scanner::IndentMarker* Scanner::PushIndentTo(int column,
IndentMarker::INDENT_TYPE type) { IndentMarker::INDENT_TYPE type) {
// are we in flow? // are we in flow?
if (InFlowContext()) if (InFlowContext()) {
return 0; return 0;
}
std::unique_ptr<IndentMarker> pIndent(new IndentMarker(column, type)); std::unique_ptr<IndentMarker> pIndent(new IndentMarker(column, type));
IndentMarker& indent = *pIndent; IndentMarker& indent = *pIndent;
const IndentMarker& lastIndent = *m_indents.top(); const IndentMarker& lastIndent = *m_indents.top();
// is this actually an indentation? // is this actually an indentation?
if (indent.column < lastIndent.column) if (indent.column < lastIndent.column) {
return 0; return 0;
}
if (indent.column == lastIndent.column && if (indent.column == lastIndent.column &&
!(indent.type == IndentMarker::SEQ && !(indent.type == IndentMarker::SEQ &&
lastIndent.type == IndentMarker::MAP)) lastIndent.type == IndentMarker::MAP)) {
return 0; return 0;
}
// push a start token // push a start token
indent.pStartToken = PushToken(GetStartTokenFor(type)); indent.pStartToken = PushToken(GetStartTokenFor(type));
@@ -302,53 +308,50 @@ Scanner::IndentMarker* Scanner::PushIndentTo(int column,
return &m_indentRefs.back(); return &m_indentRefs.back();
} }
// PopIndentToHere
// . Pops indentations off the stack until we reach the current indentation
// level,
// and enqueues the proper token each time.
// . Then pops all invalid indentations off.
void Scanner::PopIndentToHere() { void Scanner::PopIndentToHere() {
// are we in flow? // are we in flow?
if (InFlowContext()) if (InFlowContext()) {
return; return;
}
// now pop away // now pop away
while (!m_indents.empty()) { while (!m_indents.empty()) {
const IndentMarker& indent = *m_indents.top(); const IndentMarker& indent = *m_indents.top();
if (indent.column < INPUT.column()) if (indent.column < INPUT.column()) {
break; break;
}
if (indent.column == INPUT.column() && if (indent.column == INPUT.column() &&
!(indent.type == IndentMarker::SEQ && !(indent.type == IndentMarker::SEQ &&
!Exp::BlockEntry().Matches(INPUT))) !Exp::BlockEntry().Matches(INPUT))) {
break; break;
}
PopIndent(); PopIndent();
} }
while (!m_indents.empty() && m_indents.top()->status == IndentMarker::INVALID) while (!m_indents.empty() &&
m_indents.top()->status == IndentMarker::INVALID) {
PopIndent(); PopIndent();
} }
}
// PopAllIndents
// . Pops all indentations (except for the base empty one) off the stack,
// and enqueues the proper token each time.
void Scanner::PopAllIndents() { void Scanner::PopAllIndents() {
// are we in flow? // are we in flow?
if (InFlowContext()) if (InFlowContext()) {
return; return;
}
// now pop away // now pop away
while (!m_indents.empty()) { while (!m_indents.empty()) {
const IndentMarker& indent = *m_indents.top(); const IndentMarker& indent = *m_indents.top();
if (indent.type == IndentMarker::NONE) if (indent.type == IndentMarker::NONE) {
break; break;
}
PopIndent(); PopIndent();
} }
} }
// PopIndent
// . Pops a single indent, pushing the proper token
void Scanner::PopIndent() { void Scanner::PopIndent() {
const IndentMarker& indent = *m_indents.top(); const IndentMarker& indent = *m_indents.top();
m_indents.pop(); m_indents.pop();
@@ -358,23 +361,20 @@ void Scanner::PopIndent() {
return; return;
} }
if (indent.type == IndentMarker::SEQ) if (indent.type == IndentMarker::SEQ) {
m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark())); m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark()));
else if (indent.type == IndentMarker::MAP) } else if (indent.type == IndentMarker::MAP) {
m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark())); m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark()));
} }
}
// GetTopIndent
int Scanner::GetTopIndent() const { int Scanner::GetTopIndent() const {
if (m_indents.empty()) if (m_indents.empty()) {
return 0; return 0;
}
return m_indents.top()->column; return m_indents.top()->column;
} }
// ThrowParserException
// . Throws a ParserException with the current token location
// (if available).
// . Does not parse any more tokens.
void Scanner::ThrowParserException(const std::string& msg) const { void Scanner::ThrowParserException(const std::string& msg) const {
Mark mark = Mark::null_mark(); Mark mark = Mark::null_mark();
if (!m_tokens.empty()) { if (!m_tokens.empty()) {
@@ -383,4 +383,4 @@ void Scanner::ThrowParserException(const std::string& msg) const {
} }
throw ParserException(mark, msg); throw ParserException(mark, msg);
} }
} } // namespace YAML

View File

@@ -24,15 +24,24 @@ namespace YAML {
class Node; class Node;
class RegEx; class RegEx;
/**
* A scanner transforms a stream of characters into a stream of tokens.
*/
class Scanner { class Scanner {
public: public:
Scanner(std::istream &in); explicit Scanner(std::istream &in);
~Scanner(); ~Scanner();
// token queue management (hopefully this looks kinda stl-ish) /** Returns true if there are no more tokens to be read. */
bool empty(); bool empty();
/** Removes the next token in the queue. */
void pop(); void pop();
/** Returns, but does not remove, the next token in the queue. */
Token &peek(); Token &peek();
/** Returns the current mark in the input stream. */
Mark mark() const; Mark mark() const;
private: private:
@@ -52,11 +61,29 @@ class Scanner {
private: private:
// scanning // scanning
/**
* Scans until there's a valid token at the front of the queue, or the queue
* is empty. The state can be checked by {@link #empty}, and the next token
* retrieved by {@link #peek}.
*/
void EnsureTokensInQueue(); void EnsureTokensInQueue();
/**
* The main scanning function; this method branches out to scan whatever the
* next token should be.
*/
void ScanNextToken(); void ScanNextToken();
/** Eats the input stream until it reaches the next token-like thing. */
void ScanToNextToken(); void ScanToNextToken();
/** Sets the initial conditions for starting a stream. */
void StartStream(); void StartStream();
/** Closes out the stream, finish up, etc. */
void EndStream(); void EndStream();
Token *PushToken(Token::TYPE type); Token *PushToken(Token::TYPE type);
bool InFlowContext() const { return !m_flows.empty(); } bool InFlowContext() const { return !m_flows.empty(); }
@@ -64,9 +91,29 @@ class Scanner {
std::size_t GetFlowLevel() const { return m_flows.size(); } std::size_t GetFlowLevel() const { return m_flows.size(); }
Token::TYPE GetStartTokenFor(IndentMarker::INDENT_TYPE type) const; Token::TYPE GetStartTokenFor(IndentMarker::INDENT_TYPE type) const;
/**
* Pushes an indentation onto the stack, and enqueues the proper token
* (sequence start or mapping start).
*
* @return the indent marker it generates (if any).
*/
IndentMarker *PushIndentTo(int column, IndentMarker::INDENT_TYPE type); IndentMarker *PushIndentTo(int column, IndentMarker::INDENT_TYPE type);
/**
* Pops indentations off the stack until it reaches the current indentation
* level, and enqueues the proper token each time. Then pops all invalid
* indentations off.
*/
void PopIndentToHere(); void PopIndentToHere();
/**
* Pops all indentations (except for the base empty one) off the stack, and
* enqueues the proper token each time.
*/
void PopAllIndents(); void PopAllIndents();
/** Pops a single indent, pushing the proper token. */
void PopIndent(); void PopIndent();
int GetTopIndent() const; int GetTopIndent() const;
@@ -78,9 +125,17 @@ class Scanner {
bool VerifySimpleKey(); bool VerifySimpleKey();
void PopAllSimpleKeys(); void PopAllSimpleKeys();
/**
* Throws a ParserException with the current token location (if available),
* and does not parse any more tokens.
*/
void ThrowParserException(const std::string &msg) const; void ThrowParserException(const std::string &msg) const;
bool IsWhitespaceToBeEaten(char ch); bool IsWhitespaceToBeEaten(char ch);
/**
* Returns the appropriate regex to check if the next token is a value token.
*/
const RegEx &GetValueRegex() const; const RegEx &GetValueRegex() const;
struct SimpleKey { struct SimpleKey {