mirror of
https://github.com/jbeder/yaml-cpp.git
synced 2025-09-09 12:41:17 +00:00

This improves performance on the test.yaml attached to #158 by about 25% on my machine as compared to the previous commit (0.25s -> 0.20s), as measured by `time build/util/parse < test.yaml > /dev/null`.
226 lines
6.5 KiB
C++
226 lines
6.5 KiB
C++
#include "scanscalar.h"
|
|
|
|
#include <algorithm>
|
|
|
|
#include "exp.h"
|
|
#include "regeximpl.h"
|
|
#include "stream.h"
|
|
#include "yaml-cpp/exceptions.h" // IWYU pragma: keep
|
|
|
|
namespace YAML {
|
|
// ScanScalar
|
|
// . This is where the scalar magic happens.
|
|
//
|
|
// . We do the scanning in three phases:
|
|
// 1. Scan until newline
|
|
// 2. Eat newline
|
|
// 3. Scan leading blanks.
|
|
//
|
|
// . Depending on the parameters given, we store or stop
|
|
// and different places in the above flow.
|
|
std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) {
|
|
bool foundNonEmptyLine = false;
|
|
bool pastOpeningBreak = (params.fold == FOLD_FLOW);
|
|
bool emptyLine = false, moreIndented = false;
|
|
int foldedNewlineCount = 0;
|
|
bool foldedNewlineStartedMoreIndented = false;
|
|
std::size_t lastEscapedChar = std::string::npos;
|
|
std::string scalar;
|
|
params.leadingSpaces = false;
|
|
|
|
if (!params.end) {
|
|
params.end = &Exp::Empty();
|
|
}
|
|
|
|
while (INPUT) {
|
|
// ********************************
|
|
// Phase #1: scan until line ending
|
|
|
|
std::size_t lastNonWhitespaceChar = scalar.size();
|
|
bool escapedNewline = false;
|
|
while (!params.end->Matches(INPUT) && !Exp::Break().Matches(INPUT)) {
|
|
if (!INPUT)
|
|
break;
|
|
|
|
// document indicator?
|
|
if (INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) {
|
|
if (params.onDocIndicator == BREAK)
|
|
break;
|
|
else if (params.onDocIndicator == THROW)
|
|
throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR);
|
|
}
|
|
|
|
foundNonEmptyLine = true;
|
|
pastOpeningBreak = true;
|
|
|
|
// escaped newline? (only if we're escaping on slash)
|
|
if (params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) {
|
|
// eat escape character and get out (but preserve trailing whitespace!)
|
|
INPUT.get();
|
|
lastNonWhitespaceChar = scalar.size();
|
|
lastEscapedChar = scalar.size();
|
|
escapedNewline = true;
|
|
break;
|
|
}
|
|
|
|
// escape this?
|
|
if (INPUT.peek() == params.escape) {
|
|
scalar += Exp::Escape(INPUT);
|
|
lastNonWhitespaceChar = scalar.size();
|
|
lastEscapedChar = scalar.size();
|
|
continue;
|
|
}
|
|
|
|
// otherwise, just add the damn character
|
|
char ch = INPUT.get();
|
|
scalar += ch;
|
|
if (ch != ' ' && ch != '\t')
|
|
lastNonWhitespaceChar = scalar.size();
|
|
}
|
|
|
|
// eof? if we're looking to eat something, then we throw
|
|
if (!INPUT) {
|
|
if (params.eatEnd)
|
|
throw ParserException(INPUT.mark(), ErrorMsg::EOF_IN_SCALAR);
|
|
break;
|
|
}
|
|
|
|
// doc indicator?
|
|
if (params.onDocIndicator == BREAK && INPUT.column() == 0 &&
|
|
Exp::DocIndicator().Matches(INPUT))
|
|
break;
|
|
|
|
// are we done via character match?
|
|
int n = params.end->Match(INPUT);
|
|
if (n >= 0) {
|
|
if (params.eatEnd)
|
|
INPUT.eat(n);
|
|
break;
|
|
}
|
|
|
|
// do we remove trailing whitespace?
|
|
if (params.fold == FOLD_FLOW)
|
|
scalar.erase(lastNonWhitespaceChar);
|
|
|
|
// ********************************
|
|
// Phase #2: eat line ending
|
|
n = Exp::Break().Match(INPUT);
|
|
INPUT.eat(n);
|
|
|
|
// ********************************
|
|
// Phase #3: scan initial spaces
|
|
|
|
// first the required indentation
|
|
while (INPUT.peek() == ' ' && (INPUT.column() < params.indent ||
|
|
(params.detectIndent && !foundNonEmptyLine)))
|
|
INPUT.eat(1);
|
|
|
|
// update indent if we're auto-detecting
|
|
if (params.detectIndent && !foundNonEmptyLine)
|
|
params.indent = std::max(params.indent, INPUT.column());
|
|
|
|
// and then the rest of the whitespace
|
|
while (Exp::Blank().Matches(INPUT)) {
|
|
// we check for tabs that masquerade as indentation
|
|
if (INPUT.peek() == '\t' && INPUT.column() < params.indent &&
|
|
params.onTabInIndentation == THROW)
|
|
throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION);
|
|
|
|
if (!params.eatLeadingWhitespace)
|
|
break;
|
|
|
|
INPUT.eat(1);
|
|
}
|
|
|
|
// was this an empty line?
|
|
bool nextEmptyLine = Exp::Break().Matches(INPUT);
|
|
bool nextMoreIndented = Exp::Blank().Matches(INPUT);
|
|
if (params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine)
|
|
foldedNewlineStartedMoreIndented = moreIndented;
|
|
|
|
// for block scalars, we always start with a newline, so we should ignore it
|
|
// (not fold or keep)
|
|
if (pastOpeningBreak) {
|
|
switch (params.fold) {
|
|
case DONT_FOLD:
|
|
scalar += "\n";
|
|
break;
|
|
case FOLD_BLOCK:
|
|
if (!emptyLine && !nextEmptyLine && !moreIndented &&
|
|
!nextMoreIndented && INPUT.column() >= params.indent)
|
|
scalar += " ";
|
|
else if (nextEmptyLine)
|
|
foldedNewlineCount++;
|
|
else
|
|
scalar += "\n";
|
|
|
|
if (!nextEmptyLine && foldedNewlineCount > 0) {
|
|
scalar += std::string(foldedNewlineCount - 1, '\n');
|
|
if (foldedNewlineStartedMoreIndented ||
|
|
nextMoreIndented | !foundNonEmptyLine)
|
|
scalar += "\n";
|
|
foldedNewlineCount = 0;
|
|
}
|
|
break;
|
|
case FOLD_FLOW:
|
|
if (nextEmptyLine)
|
|
scalar += "\n";
|
|
else if (!emptyLine && !nextEmptyLine && !escapedNewline)
|
|
scalar += " ";
|
|
break;
|
|
}
|
|
}
|
|
|
|
emptyLine = nextEmptyLine;
|
|
moreIndented = nextMoreIndented;
|
|
pastOpeningBreak = true;
|
|
|
|
// are we done via indentation?
|
|
if (!emptyLine && INPUT.column() < params.indent) {
|
|
params.leadingSpaces = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// post-processing
|
|
if (params.trimTrailingSpaces) {
|
|
std::size_t pos = scalar.find_last_not_of(' ');
|
|
if (lastEscapedChar != std::string::npos) {
|
|
if (pos < lastEscapedChar || pos == std::string::npos)
|
|
pos = lastEscapedChar;
|
|
}
|
|
if (pos < scalar.size())
|
|
scalar.erase(pos + 1);
|
|
}
|
|
|
|
switch (params.chomp) {
|
|
case CLIP: {
|
|
std::size_t pos = scalar.find_last_not_of('\n');
|
|
if (lastEscapedChar != std::string::npos) {
|
|
if (pos < lastEscapedChar || pos == std::string::npos)
|
|
pos = lastEscapedChar;
|
|
}
|
|
if (pos == std::string::npos)
|
|
scalar.erase();
|
|
else if (pos + 1 < scalar.size())
|
|
scalar.erase(pos + 2);
|
|
} break;
|
|
case STRIP: {
|
|
std::size_t pos = scalar.find_last_not_of('\n');
|
|
if (lastEscapedChar != std::string::npos) {
|
|
if (pos < lastEscapedChar || pos == std::string::npos)
|
|
pos = lastEscapedChar;
|
|
}
|
|
if (pos == std::string::npos)
|
|
scalar.erase();
|
|
else if (pos < scalar.size())
|
|
scalar.erase(pos + 1);
|
|
} break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return scalar;
|
|
}
|
|
}
|