Commit 9b4db068 authored by Jesse Beder's avatar Jesse Beder
Browse files

Run clang-format

parent e40ed4f9
#include "regex.h" #include "regex.h"
namespace YAML namespace YAML {
{ // constructors
// constructors RegEx::RegEx() : m_op(REGEX_EMPTY) {}
RegEx::RegEx(): m_op(REGEX_EMPTY)
{ RegEx::RegEx(REGEX_OP op) : m_op(op) {}
}
RegEx::RegEx(char ch) : m_op(REGEX_MATCH), m_a(ch) {}
RegEx::RegEx(REGEX_OP op): m_op(op)
{ RegEx::RegEx(char a, char z) : m_op(REGEX_RANGE), m_a(a), m_z(z) {}
}
RegEx::RegEx(const std::string& str, REGEX_OP op) : m_op(op) {
RegEx::RegEx(char ch): m_op(REGEX_MATCH), m_a(ch) for (std::size_t i = 0; i < str.size(); i++)
{ m_params.push_back(RegEx(str[i]));
} }
RegEx::RegEx(char a, char z): m_op(REGEX_RANGE), m_a(a), m_z(z) // combination constructors
{ RegEx operator!(const RegEx & ex) {
} RegEx ret(REGEX_NOT);
ret.m_params.push_back(ex);
RegEx::RegEx(const std::string& str, REGEX_OP op): m_op(op) return ret;
{
for(std::size_t i=0;i<str.size();i++)
m_params.push_back(RegEx(str[i]));
}
// combination constructors
RegEx operator ! (const RegEx& ex)
{
RegEx ret(REGEX_NOT);
ret.m_params.push_back(ex);
return ret;
}
RegEx operator || (const RegEx& ex1, const RegEx& ex2)
{
RegEx ret(REGEX_OR);
ret.m_params.push_back(ex1);
ret.m_params.push_back(ex2);
return ret;
}
RegEx operator && (const RegEx& ex1, const RegEx& ex2)
{
RegEx ret(REGEX_AND);
ret.m_params.push_back(ex1);
ret.m_params.push_back(ex2);
return ret;
}
RegEx operator + (const RegEx& ex1, const RegEx& ex2)
{
RegEx ret(REGEX_SEQ);
ret.m_params.push_back(ex1);
ret.m_params.push_back(ex2);
return ret;
}
} }
RegEx operator||(const RegEx& ex1, const RegEx& ex2) {
RegEx ret(REGEX_OR);
ret.m_params.push_back(ex1);
ret.m_params.push_back(ex2);
return ret;
}
RegEx operator&&(const RegEx& ex1, const RegEx& ex2) {
RegEx ret(REGEX_AND);
ret.m_params.push_back(ex1);
ret.m_params.push_back(ex2);
return ret;
}
RegEx operator+(const RegEx& ex1, const RegEx& ex2) {
RegEx ret(REGEX_SEQ);
ret.m_params.push_back(ex1);
ret.m_params.push_back(ex2);
return ret;
}
}
#ifndef REGEX_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #ifndef REGEX_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#define REGEX_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #define REGEX_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 #if defined(_MSC_VER) || \
(defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
(__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
#pragma once #pragma once
#endif #endif
#include <vector> #include <vector>
#include <string> #include <string>
namespace YAML namespace YAML {
{ class Stream;
class Stream;
enum REGEX_OP {
REGEX_EMPTY,
REGEX_MATCH,
REGEX_RANGE,
REGEX_OR,
REGEX_AND,
REGEX_NOT,
REGEX_SEQ
};
// simplified regular expressions
// . Only straightforward matches (no repeated characters)
// . Only matches from start of string
class RegEx {
public:
RegEx();
RegEx(char ch);
RegEx(char a, char z);
RegEx(const std::string& str, REGEX_OP op = REGEX_SEQ);
~RegEx() {}
enum REGEX_OP { REGEX_EMPTY, REGEX_MATCH, REGEX_RANGE, REGEX_OR, REGEX_AND, REGEX_NOT, REGEX_SEQ }; friend RegEx operator!(const RegEx & ex);
friend RegEx operator||(const RegEx& ex1, const RegEx& ex2);
friend RegEx operator&&(const RegEx& ex1, const RegEx& ex2);
friend RegEx operator+(const RegEx& ex1, const RegEx& ex2);
// simplified regular expressions bool Matches(char ch) const;
// . Only straightforward matches (no repeated characters) bool Matches(const std::string& str) const;
// . Only matches from start of string bool Matches(const Stream& in) const;
class RegEx template <typename Source>
{ bool Matches(const Source& source) const;
public:
RegEx();
RegEx(char ch);
RegEx(char a, char z);
RegEx(const std::string& str, REGEX_OP op = REGEX_SEQ);
~RegEx() {}
friend RegEx operator ! (const RegEx& ex); int Match(const std::string& str) const;
friend RegEx operator || (const RegEx& ex1, const RegEx& ex2); int Match(const Stream& in) const;
friend RegEx operator && (const RegEx& ex1, const RegEx& ex2); template <typename Source>
friend RegEx operator + (const RegEx& ex1, const RegEx& ex2); int Match(const Source& source) const;
bool Matches(char ch) const;
bool Matches(const std::string& str) const;
bool Matches(const Stream& in) const;
template <typename Source> bool Matches(const Source& source) const;
int Match(const std::string& str) const; private:
int Match(const Stream& in) const; RegEx(REGEX_OP op);
template <typename Source> int Match(const Source& source) const;
private: template <typename Source>
RegEx(REGEX_OP op); bool IsValidSource(const Source& source) const;
template <typename Source>
template <typename Source> bool IsValidSource(const Source& source) const; int MatchUnchecked(const Source& source) const;
template <typename Source> int MatchUnchecked(const Source& source) const;
template <typename Source> int MatchOpEmpty(const Source& source) const; template <typename Source>
template <typename Source> int MatchOpMatch(const Source& source) const; int MatchOpEmpty(const Source& source) const;
template <typename Source> int MatchOpRange(const Source& source) const; template <typename Source>
template <typename Source> int MatchOpOr(const Source& source) const; int MatchOpMatch(const Source& source) const;
template <typename Source> int MatchOpAnd(const Source& source) const; template <typename Source>
template <typename Source> int MatchOpNot(const Source& source) const; int MatchOpRange(const Source& source) const;
template <typename Source> int MatchOpSeq(const Source& source) const; template <typename Source>
int MatchOpOr(const Source& source) const;
template <typename Source>
int MatchOpAnd(const Source& source) const;
template <typename Source>
int MatchOpNot(const Source& source) const;
template <typename Source>
int MatchOpSeq(const Source& source) const;
private: private:
REGEX_OP m_op; REGEX_OP m_op;
char m_a, m_z; char m_a, m_z;
std::vector <RegEx> m_params; std::vector<RegEx> m_params;
}; };
} }
#include "regeximpl.h" #include "regeximpl.h"
#endif // REGEX_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #endif // REGEX_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#ifndef REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #ifndef REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#define REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #define REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 #if defined(_MSC_VER) || \
(defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
(__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
#pragma once #pragma once
#endif #endif
#include "stream.h" #include "stream.h"
#include "stringsource.h" #include "stringsource.h"
#include "streamcharsource.h" #include "streamcharsource.h"
namespace YAML namespace YAML {
{ // query matches
// query matches inline bool RegEx::Matches(char ch) const {
inline bool RegEx::Matches(char ch) const { std::string str;
std::string str; str += ch;
str += ch; return Matches(str);
return Matches(str); }
}
inline bool RegEx::Matches(const std::string& str) const {
inline bool RegEx::Matches(const std::string& str) const { return Match(str) >= 0;
return Match(str) >= 0; }
}
inline bool RegEx::Matches(const Stream& in) const { return Match(in) >= 0; }
inline bool RegEx::Matches(const Stream& in) const {
return Match(in) >= 0; template <typename Source>
} inline bool RegEx::Matches(const Source& source) const {
return Match(source) >= 0;
template <typename Source> }
inline bool RegEx::Matches(const Source& source) const {
return Match(source) >= 0; // Match
} // . Matches the given string against this regular expression.
// . Returns the number of characters matched.
// Match // . Returns -1 if no characters were matched (the reason for
// . Matches the given string against this regular expression. // not returning zero is that we may have an empty regex
// . Returns the number of characters matched. // which is ALWAYS successful at matching zero characters).
// . Returns -1 if no characters were matched (the reason for // . REMEMBER that we only match from the start of the buffer!
// not returning zero is that we may have an empty regex inline int RegEx::Match(const std::string& str) const {
// which is ALWAYS successful at matching zero characters). StringCharSource source(str.c_str(), str.size());
// . REMEMBER that we only match from the start of the buffer! return Match(source);
inline int RegEx::Match(const std::string& str) const }
{
StringCharSource source(str.c_str(), str.size()); inline int RegEx::Match(const Stream& in) const {
return Match(source); StreamCharSource source(in);
} return Match(source);
}
inline int RegEx::Match(const Stream& in) const
{ template <typename Source>
StreamCharSource source(in); inline bool RegEx::IsValidSource(const Source& source) const {
return Match(source); return source;
} }
template <typename Source> template <>
inline bool RegEx::IsValidSource(const Source& source) const inline bool RegEx::IsValidSource<StringCharSource>(
{ const StringCharSource& source) const {
return source; switch (m_op) {
} case REGEX_MATCH:
case REGEX_RANGE:
template<> return source;
inline bool RegEx::IsValidSource<StringCharSource>(const StringCharSource&source) const default:
{ return true;
switch(m_op) { }
case REGEX_MATCH: }
case REGEX_RANGE:
return source; template <typename Source>
default: inline int RegEx::Match(const Source& source) const {
return true; return IsValidSource(source) ? MatchUnchecked(source) : -1;
} }
}
template <typename Source>
template <typename Source> inline int RegEx::MatchUnchecked(const Source& source) const {
inline int RegEx::Match(const Source& source) const switch (m_op) {
{ case REGEX_EMPTY:
return IsValidSource(source) ? MatchUnchecked(source) : -1; return MatchOpEmpty(source);
} case REGEX_MATCH:
return MatchOpMatch(source);
template <typename Source> case REGEX_RANGE:
inline int RegEx::MatchUnchecked(const Source& source) const return MatchOpRange(source);
{ case REGEX_OR:
switch(m_op) { return MatchOpOr(source);
case REGEX_EMPTY: case REGEX_AND:
return MatchOpEmpty(source); return MatchOpAnd(source);
case REGEX_MATCH: case REGEX_NOT:
return MatchOpMatch(source); return MatchOpNot(source);
case REGEX_RANGE: case REGEX_SEQ:
return MatchOpRange(source); return MatchOpSeq(source);
case REGEX_OR: }
return MatchOpOr(source);
case REGEX_AND: return -1;
return MatchOpAnd(source); }
case REGEX_NOT:
return MatchOpNot(source); //////////////////////////////////////////////////////////////////////////////
case REGEX_SEQ: // Operators
return MatchOpSeq(source); // Note: the convention MatchOp*<Source> is that we can assume
} // IsSourceValid(source).
// So we do all our checks *before* we call these functions
return -1;
} // EmptyOperator
template <typename Source>
////////////////////////////////////////////////////////////////////////////// inline int RegEx::MatchOpEmpty(const Source& source) const {
// Operators return source[0] == Stream::eof() ? 0 : -1;
// Note: the convention MatchOp*<Source> is that we can assume IsSourceValid(source). }
// So we do all our checks *before* we call these functions
template <>
// EmptyOperator inline int RegEx::MatchOpEmpty<StringCharSource>(const StringCharSource& source)
template <typename Source> const {
inline int RegEx::MatchOpEmpty(const Source& source) const { return !source
return source[0] == Stream::eof() ? 0 : -1; ? 0
} : -1; // the empty regex only is successful on the empty string
}
template <>
inline int RegEx::MatchOpEmpty<StringCharSource>(const StringCharSource& source) const { // MatchOperator
return !source ? 0 : -1; // the empty regex only is successful on the empty string template <typename Source>
} inline int RegEx::MatchOpMatch(const Source& source) const {
if (source[0] != m_a)
// MatchOperator return -1;
template <typename Source> return 1;
inline int RegEx::MatchOpMatch(const Source& source) const { }
if(source[0] != m_a)
return -1; // RangeOperator
return 1; template <typename Source>
} inline int RegEx::MatchOpRange(const Source& source) const {
if (m_a > source[0] || m_z < source[0])
// RangeOperator return -1;
template <typename Source> return 1;
inline int RegEx::MatchOpRange(const Source& source) const { }
if(m_a > source[0] || m_z < source[0])
return -1; // OrOperator
return 1; template <typename Source>
} inline int RegEx::MatchOpOr(const Source& source) const {
for (std::size_t i = 0; i < m_params.size(); i++) {
// OrOperator int n = m_params[i].MatchUnchecked(source);
template <typename Source> if (n >= 0)
inline int RegEx::MatchOpOr(const Source& source) const { return n;
for(std::size_t i=0;i<m_params.size();i++) { }
int n = m_params[i].MatchUnchecked(source); return -1;
if(n >= 0) }
return n;
} // AndOperator
return -1; // Note: 'AND' is a little funny, since we may be required to match things
} // of different lengths. If we find a match, we return the length of
// the FIRST entry on the list.
// AndOperator template <typename Source>
// Note: 'AND' is a little funny, since we may be required to match things inline int RegEx::MatchOpAnd(const Source& source) const {
// of different lengths. If we find a match, we return the length of int first = -1;
// the FIRST entry on the list. for (std::size_t i = 0; i < m_params.size(); i++) {
template <typename Source> int n = m_params[i].MatchUnchecked(source);
inline int RegEx::MatchOpAnd(const Source& source) const { if (n == -1)
int first = -1; return -1;
for(std::size_t i=0;i<m_params.size();i++) { if (i == 0)
int n = m_params[i].MatchUnchecked(source); first = n;
if(n == -1) }
return -1; return first;
if(i == 0) }
first = n;
} // NotOperator
return first; template <typename Source>
} inline int RegEx::MatchOpNot(const Source& source) const {
if (m_params.empty())
// NotOperator return -1;
template <typename Source> if (m_params[0].MatchUnchecked(source) >= 0)
inline int RegEx::MatchOpNot(const Source& source) const { return -1;
if(m_params.empty()) return 1;
return -1; }
if(m_params[0].MatchUnchecked(source) >= 0)
return -1; // SeqOperator
return 1; template <typename Source>
} inline int RegEx::MatchOpSeq(const Source& source) const {
int offset = 0;
// SeqOperator for (std::size_t i = 0; i < m_params.size(); i++) {
template <typename Source> int n = m_params[i].Match(source + offset); // note Match, not
inline int RegEx::MatchOpSeq(const Source& source) const { // MatchUnchecked because we
int offset = 0; // need to check validity after
for(std::size_t i=0;i<m_params.size();i++) { // the offset
int n = m_params[i].Match(source + offset); // note Match, not MatchUnchecked because we need to check validity after the offset if (n == -1)
if(n == -1) return -1;
return -1; offset += n;
offset += n; }
}
return offset;
return offset; }
} }
}
#endif // REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#endif // REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
...@@ -5,41 +5,38 @@ ...@@ -5,41 +5,38 @@
#include <cassert> #include <cassert>
#include <memory> #include <memory>
namespace YAML namespace YAML {
{ Scanner::Scanner(std::istream& in)
Scanner::Scanner(std::istream& in) : INPUT(in),
: INPUT(in), m_startedStream(false), m_endedStream(false), m_simpleKeyAllowed(false), m_canBeJSONFlow(false) m_startedStream(false),
{ m_endedStream(false),
} m_simpleKeyAllowed(false),
m_canBeJSONFlow(false) {}
Scanner::~Scanner()
{ Scanner::~Scanner() {}
}
// empty
// empty // . Returns true if there are no more tokens to be read
// . Returns true if there are no more tokens to be read bool Scanner::empty() {
bool Scanner::empty() EnsureTokensInQueue();
{ return m_tokens.empty();
EnsureTokensInQueue(); }
return m_tokens.empty();
} // pop
// . Simply removes the next token on the queue.
// pop void Scanner::pop() {
// . Simply removes the next token on the queue. EnsureTokensInQueue();
void Scanner::pop() if (!m_tokens.empty())
{ m_tokens.pop();
EnsureTokensInQueue(); }
if(!m_tokens.empty())
m_tokens.pop(); // peek
} // . Returns (but does not remove) the next token on the queue.
Token& Scanner::peek() {
// peek EnsureTokensInQueue();
// . Returns (but does not remove) the next token on the queue. assert(!m_tokens.empty()); // should we be asserting here? I mean, we really
Token& Scanner::peek() // just be checking
{ // if it's empty before peeking.
EnsureTokensInQueue();
assert(!m_tokens.empty()); // should we be asserting here? I mean, we really just be checking
// if it's empty before peeking.
#if 0 #if 0
static Token *pLast = 0; static Token *pLast = 0;
...@@ -48,347 +45,340 @@ namespace YAML ...@@ -48,347 +45,340 @@ namespace YAML
pLast = &m_tokens.front(); pLast = &m_tokens.front();
#endif #endif
return m_tokens.front(); return m_tokens.front();
} }
// mark
// . Returns the current mark in the stream
Mark Scanner::mark() const { return INPUT.mark(); }
// EnsureTokensInQueue
// . Scan until there's a valid token at the front of the queue,
// or we're sure the queue is empty.
void Scanner::EnsureTokensInQueue() {
while (1) {
if (!m_tokens.empty()) {
Token& token = m_tokens.front();
// if this guy's valid, then we're done
if (token.status == Token::VALID)
return;
// here's where we clean up the impossible tokens
if (token.status == Token::INVALID) {
m_tokens.pop();
continue;
}
// note: what's left are the unverified tokens
}
// no token? maybe we've actually finished
if (m_endedStream)
return;
// no? then scan...
ScanNextToken();
}
}
// ScanNextToken
// . The main scanning function; here we branch out and
// scan whatever the next token should be.
void Scanner::ScanNextToken() {
if (m_endedStream)
return;
if (!m_startedStream)
return StartStream();
// get rid of whitespace, etc. (in between tokens it should be irrelevent)
ScanToNextToken();
// maybe need to end some blocks
PopIndentToHere();
// *****
// And now branch based on the next few characters!
// *****
// end of stream
if (!INPUT)
return EndStream();
if (INPUT.column() == 0 && INPUT.peek() == Keys::Directive)
return ScanDirective();
// document token
if (INPUT.column() == 0 && Exp::DocStart().Matches(INPUT))
return ScanDocStart();
if (INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT))
return ScanDocEnd();
// flow start/end/entry
if (INPUT.peek() == Keys::FlowSeqStart || INPUT.peek() == Keys::FlowMapStart)
return ScanFlowStart();
if (INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd)
return ScanFlowEnd();
if (INPUT.peek() == Keys::FlowEntry)
return ScanFlowEntry();
// block/map stuff
if (Exp::BlockEntry().Matches(INPUT))
return ScanBlockEntry();
if ((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT))
return ScanKey();
if (GetValueRegex().Matches(INPUT))
return ScanValue();
// alias/anchor
if (INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor)
return ScanAnchorOrAlias();
// tag
if (INPUT.peek() == Keys::Tag)
return ScanTag();
// special scalars
if (InBlockContext() && (INPUT.peek() == Keys::LiteralScalar ||
INPUT.peek() == Keys::FoldedScalar))
return ScanBlockScalar();
if (INPUT.peek() == '\'' || INPUT.peek() == '\"')
return ScanQuotedScalar();
// plain scalars
if ((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow())
.Matches(INPUT))
return ScanPlainScalar();
// don't know what it is!
throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN);
}
// ScanToNextToken
// . Eats input until we reach the next token-like thing.
void Scanner::ScanToNextToken() {
while (1) {
// first eat whitespace
while (INPUT && IsWhitespaceToBeEaten(INPUT.peek())) {
if (InBlockContext() && Exp::Tab().Matches(INPUT))
m_simpleKeyAllowed = false;
INPUT.eat(1);
}
// mark // then eat a comment
// . Returns the current mark in the stream if (Exp::Comment().Matches(INPUT)) {
Mark Scanner::mark() const // eat until line break
{ while (INPUT && !Exp::Break().Matches(INPUT))
return INPUT.mark(); INPUT.eat(1);
} }
// EnsureTokensInQueue // if it's NOT a line break, then we're done!
// . Scan until there's a valid token at the front of the queue, if (!Exp::Break().Matches(INPUT))
// or we're sure the queue is empty. break;
void Scanner::EnsureTokensInQueue()
{ // otherwise, let's eat the line break and keep going
while(1) { int n = Exp::Break().Match(INPUT);
if(!m_tokens.empty()) { INPUT.eat(n);
Token& token = m_tokens.front();
// oh yeah, and let's get rid of that simple key
// if this guy's valid, then we're done InvalidateSimpleKey();
if(token.status == Token::VALID)
return; // new line - we may be able to accept a simple key now
if (InBlockContext())
// here's where we clean up the impossible tokens m_simpleKeyAllowed = true;
if(token.status == Token::INVALID) { }
m_tokens.pop(); }
continue;
} ///////////////////////////////////////////////////////////////////////
// Misc. helpers
// note: what's left are the unverified tokens
} // IsWhitespaceToBeEaten
// . We can eat whitespace if it's a space or tab
// no token? maybe we've actually finished // . Note: originally tabs in block context couldn't be eaten
if(m_endedStream) // "where a simple key could be allowed
return; // (i.e., not at the beginning of a line, or following '-', '?', or
// ':')"
// no? then scan... // I think this is wrong, since tabs can be non-content whitespace; it's just
ScanNextToken(); // that they can't contribute to indentation, so once you've seen a tab in a
} // line, you can't start a simple key
} bool Scanner::IsWhitespaceToBeEaten(char ch) {
if (ch == ' ')
// ScanNextToken return true;
// . The main scanning function; here we branch out and
// scan whatever the next token should be. if (ch == '\t')
void Scanner::ScanNextToken() return true;
{
if(m_endedStream) return false;
return; }
if(!m_startedStream) // GetValueRegex
return StartStream(); // . Get the appropriate regex to check if it's a value token
const RegEx& Scanner::GetValueRegex() const {
// get rid of whitespace, etc. (in between tokens it should be irrelevent) if (InBlockContext())
ScanToNextToken(); return Exp::Value();
// maybe need to end some blocks return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow();
PopIndentToHere(); }
// ***** // StartStream
// And now branch based on the next few characters! // . Set the initial conditions for starting a stream.
// ***** void Scanner::StartStream() {
m_startedStream = true;
// end of stream m_simpleKeyAllowed = true;
if(!INPUT) std::auto_ptr<IndentMarker> pIndent(new IndentMarker(-1, IndentMarker::NONE));
return EndStream(); m_indentRefs.push_back(pIndent);
m_indents.push(&m_indentRefs.back());
if(INPUT.column() == 0 && INPUT.peek() == Keys::Directive) }
return ScanDirective();
// EndStream
// document token // . Close out the stream, finish up, etc.
if(INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) void Scanner::EndStream() {
return ScanDocStart(); // force newline
if (INPUT.column() > 0)
if(INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) INPUT.ResetColumn();
return ScanDocEnd();
PopAllIndents();
// flow start/end/entry PopAllSimpleKeys();
if(INPUT.peek() == Keys::FlowSeqStart || INPUT.peek() == Keys::FlowMapStart)
return ScanFlowStart(); m_simpleKeyAllowed = false;
m_endedStream = true;
if(INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd)
return ScanFlowEnd();
if(INPUT.peek() == Keys::FlowEntry)
return ScanFlowEntry();
// block/map stuff
if(Exp::BlockEntry().Matches(INPUT))
return ScanBlockEntry();
if((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT))
return ScanKey();
if(GetValueRegex().Matches(INPUT))
return ScanValue();
// alias/anchor
if(INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor)
return ScanAnchorOrAlias();
// tag
if(INPUT.peek() == Keys::Tag)
return ScanTag();
// special scalars
if(InBlockContext() && (INPUT.peek() == Keys::LiteralScalar || INPUT.peek() == Keys::FoldedScalar))
return ScanBlockScalar();
if(INPUT.peek() == '\'' || INPUT.peek() == '\"')
return ScanQuotedScalar();
// plain scalars
if((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow()).Matches(INPUT))
return ScanPlainScalar();
// don't know what it is!
throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN);
}
// ScanToNextToken
// . Eats input until we reach the next token-like thing.
void Scanner::ScanToNextToken()
{
while(1) {
// first eat whitespace
while(INPUT && IsWhitespaceToBeEaten(INPUT.peek())) {
if(InBlockContext() && Exp::Tab().Matches(INPUT))
m_simpleKeyAllowed = false;
INPUT.eat(1);
}
// then eat a comment
if(Exp::Comment().Matches(INPUT)) {
// eat until line break
while(INPUT && !Exp::Break().Matches(INPUT))
INPUT.eat(1);
}
// if it's NOT a line break, then we're done!
if(!Exp::Break().Matches(INPUT))
break;
// otherwise, let's eat the line break and keep going
int n = Exp::Break().Match(INPUT);
INPUT.eat(n);
// oh yeah, and let's get rid of that simple key
InvalidateSimpleKey();
// new line - we may be able to accept a simple key now
if(InBlockContext())
m_simpleKeyAllowed = true;
}
}
///////////////////////////////////////////////////////////////////////
// Misc. helpers
// IsWhitespaceToBeEaten
// . We can eat whitespace if it's a space or tab
// . Note: originally tabs in block context couldn't be eaten
// "where a simple key could be allowed
// (i.e., not at the beginning of a line, or following '-', '?', or ':')"
// I think this is wrong, since tabs can be non-content whitespace; it's just
// that they can't contribute to indentation, so once you've seen a tab in a
// line, you can't start a simple key
bool Scanner::IsWhitespaceToBeEaten(char ch)
{
if(ch == ' ')
return true;
if(ch == '\t')
return true;
return false;
}
// GetValueRegex
// . Get the appropriate regex to check if it's a value token
const RegEx& Scanner::GetValueRegex() const
{
if(InBlockContext())
return Exp::Value();
return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow();
}
// StartStream
// . Set the initial conditions for starting a stream.
void Scanner::StartStream()
{
m_startedStream = true;
m_simpleKeyAllowed = true;
std::auto_ptr<IndentMarker> pIndent(new IndentMarker(-1, IndentMarker::NONE));
m_indentRefs.push_back(pIndent);
m_indents.push(&m_indentRefs.back());
}
// EndStream
// . Close out the stream, finish up, etc.
void Scanner::EndStream()
{
// force newline
if(INPUT.column() > 0)
INPUT.ResetColumn();
PopAllIndents();
PopAllSimpleKeys();
m_simpleKeyAllowed = false;
m_endedStream = true;
}
Token *Scanner::PushToken(Token::TYPE type)
{
m_tokens.push(Token(type, INPUT.mark()));
return &m_tokens.back();
}
Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const
{
switch(type) {
case IndentMarker::SEQ: return Token::BLOCK_SEQ_START;
case IndentMarker::MAP: return Token::BLOCK_MAP_START;
case IndentMarker::NONE: assert(false); break;
}
assert(false);
throw std::runtime_error("yaml-cpp: internal error, invalid indent type");
}
// PushIndentTo
// . Pushes an indentation onto the stack, and enqueues the
// proper token (sequence start or mapping start).
// . Returns the indent marker it generates (if any).
Scanner::IndentMarker *Scanner::PushIndentTo(int column, IndentMarker::INDENT_TYPE type)
{
// are we in flow?
if(InFlowContext())
return 0;
std::auto_ptr<IndentMarker> pIndent(new IndentMarker(column, type));
IndentMarker& indent = *pIndent;
const IndentMarker& lastIndent = *m_indents.top();
// is this actually an indentation?
if(indent.column < lastIndent.column)
return 0;
if(indent.column == lastIndent.column && !(indent.type == IndentMarker::SEQ && lastIndent.type == IndentMarker::MAP))
return 0;
// push a start token
indent.pStartToken = PushToken(GetStartTokenFor(type));
// and then the indent
m_indents.push(&indent);
m_indentRefs.push_back(pIndent);
return &m_indentRefs.back();
}
// PopIndentToHere
// . Pops indentations off the stack until we reach the current indentation level,
// and enqueues the proper token each time.
// . Then pops all invalid indentations off.
void Scanner::PopIndentToHere()
{
// are we in flow?
if(InFlowContext())
return;
// now pop away
while(!m_indents.empty()) {
const IndentMarker& indent = *m_indents.top();
if(indent.column < INPUT.column())
break;
if(indent.column == INPUT.column() && !(indent.type == IndentMarker::SEQ && !Exp::BlockEntry().Matches(INPUT)))
break;
PopIndent();
}
while(!m_indents.empty() && m_indents.top()->status == IndentMarker::INVALID)
PopIndent();
}
// PopAllIndents
// . Pops all indentations (except for the base empty one) off the stack,
// and enqueues the proper token each time.
void Scanner::PopAllIndents()
{
// are we in flow?
if(InFlowContext())
return;
// now pop away
while(!m_indents.empty()) {
const IndentMarker& indent = *m_indents.top();
if(indent.type == IndentMarker::NONE)
break;
PopIndent();
}
}
// PopIndent
// . Pops a single indent, pushing the proper token
void Scanner::PopIndent()
{
const IndentMarker& indent = *m_indents.top();
m_indents.pop();
if(indent.status != IndentMarker::VALID) {
InvalidateSimpleKey();
return;
}
if(indent.type == IndentMarker::SEQ)
m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark()));
else if(indent.type == IndentMarker::MAP)
m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark()));
}
// GetTopIndent
int Scanner::GetTopIndent() const
{
if(m_indents.empty())
return 0;
return m_indents.top()->column;
}
// ThrowParserException
// . Throws a ParserException with the current token location
// (if available).
// . Does not parse any more tokens.
void Scanner::ThrowParserException(const std::string& msg) const
{
Mark mark = Mark::null_mark();
if(!m_tokens.empty()) {
const Token& token = m_tokens.front();
mark = token.mark;
}
throw ParserException(mark, msg);
}
} }
Token* Scanner::PushToken(Token::TYPE type) {
m_tokens.push(Token(type, INPUT.mark()));
return &m_tokens.back();
}
Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const {
switch (type) {
case IndentMarker::SEQ:
return Token::BLOCK_SEQ_START;
case IndentMarker::MAP:
return Token::BLOCK_MAP_START;
case IndentMarker::NONE:
assert(false);
break;
}
assert(false);
throw std::runtime_error("yaml-cpp: internal error, invalid indent type");
}
// PushIndentTo
// . Pushes an indentation onto the stack, and enqueues the
// proper token (sequence start or mapping start).
// . Returns the indent marker it generates (if any).
Scanner::IndentMarker* Scanner::PushIndentTo(int column,
IndentMarker::INDENT_TYPE type) {
// are we in flow?
if (InFlowContext())
return 0;
std::auto_ptr<IndentMarker> pIndent(new IndentMarker(column, type));
IndentMarker& indent = *pIndent;
const IndentMarker& lastIndent = *m_indents.top();
// is this actually an indentation?
if (indent.column < lastIndent.column)
return 0;
if (indent.column == lastIndent.column &&
!(indent.type == IndentMarker::SEQ &&
lastIndent.type == IndentMarker::MAP))
return 0;
// push a start token
indent.pStartToken = PushToken(GetStartTokenFor(type));
// and then the indent
m_indents.push(&indent);
m_indentRefs.push_back(pIndent);
return &m_indentRefs.back();
}
// PopIndentToHere
// . Pops indentations off the stack until we reach the current indentation
// level,
// and enqueues the proper token each time.
// . Then pops all invalid indentations off.
void Scanner::PopIndentToHere() {
// are we in flow?
if (InFlowContext())
return;
// now pop away
while (!m_indents.empty()) {
const IndentMarker& indent = *m_indents.top();
if (indent.column < INPUT.column())
break;
if (indent.column == INPUT.column() && !(indent.type == IndentMarker::SEQ &&
!Exp::BlockEntry().Matches(INPUT)))
break;
PopIndent();
}
while (!m_indents.empty() && m_indents.top()->status == IndentMarker::INVALID)
PopIndent();
}
// PopAllIndents
// . Pops all indentations (except for the base empty one) off the stack,
// and enqueues the proper token each time.
void Scanner::PopAllIndents() {
// are we in flow?
if (InFlowContext())
return;
// now pop away
while (!m_indents.empty()) {
const IndentMarker& indent = *m_indents.top();
if (indent.type == IndentMarker::NONE)
break;
PopIndent();
}
}
// PopIndent
// . Pops a single indent, pushing the proper token
void Scanner::PopIndent() {
const IndentMarker& indent = *m_indents.top();
m_indents.pop();
if (indent.status != IndentMarker::VALID) {
InvalidateSimpleKey();
return;
}
if (indent.type == IndentMarker::SEQ)
m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark()));
else if (indent.type == IndentMarker::MAP)
m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark()));
}
// GetTopIndent
int Scanner::GetTopIndent() const {
if (m_indents.empty())
return 0;
return m_indents.top()->column;
}
// ThrowParserException
// . Throws a ParserException with the current token location
// (if available).
// . Does not parse any more tokens.
void Scanner::ThrowParserException(const std::string& msg) const {
Mark mark = Mark::null_mark();
if (!m_tokens.empty()) {
const Token& token = m_tokens.front();
mark = token.mark;
}
throw ParserException(mark, msg);
}
}
#ifndef SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #ifndef SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#define SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #define SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 #if defined(_MSC_VER) || \
(defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
(__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
#pragma once #pragma once
#endif #endif
#include <ios> #include <ios>
#include <string> #include <string>
#include <queue> #include <queue>
...@@ -16,118 +17,127 @@ ...@@ -16,118 +17,127 @@
#include "stream.h" #include "stream.h"
#include "token.h" #include "token.h"
namespace YAML namespace YAML {
{ class Node;
class Node; class RegEx;
class RegEx;
class Scanner {
class Scanner public:
{ Scanner(std::istream &in);
public: ~Scanner();
Scanner(std::istream& in);
~Scanner(); // token queue management (hopefully this looks kinda stl-ish)
bool empty();
// token queue management (hopefully this looks kinda stl-ish) void pop();
bool empty(); Token &peek();
void pop(); Mark mark() const;
Token& peek();
Mark mark() const; private:
struct IndentMarker {
private: enum INDENT_TYPE {
struct IndentMarker { MAP,
enum INDENT_TYPE { MAP, SEQ, NONE }; SEQ,
enum STATUS { VALID, INVALID, UNKNOWN }; NONE
IndentMarker(int column_, INDENT_TYPE type_): column(column_), type(type_), status(VALID), pStartToken(0) {} };
enum STATUS {
int column; VALID,
INDENT_TYPE type; INVALID,
STATUS status; UNKNOWN
Token *pStartToken; };
}; IndentMarker(int column_, INDENT_TYPE type_)
: column(column_), type(type_), status(VALID), pStartToken(0) {}
enum FLOW_MARKER { FLOW_MAP, FLOW_SEQ };
int column;
private: INDENT_TYPE type;
// scanning STATUS status;
void EnsureTokensInQueue(); Token *pStartToken;
void ScanNextToken(); };
void ScanToNextToken();
void StartStream(); enum FLOW_MARKER {
void EndStream(); FLOW_MAP,
Token *PushToken(Token::TYPE type); FLOW_SEQ
};
bool InFlowContext() const { return !m_flows.empty(); }
bool InBlockContext() const { return m_flows.empty(); } private:
int GetFlowLevel() const { return m_flows.size(); } // scanning
void EnsureTokensInQueue();
Token::TYPE GetStartTokenFor(IndentMarker::INDENT_TYPE type) const; void ScanNextToken();
IndentMarker *PushIndentTo(int column, IndentMarker::INDENT_TYPE type); void ScanToNextToken();
void PopIndentToHere(); void StartStream();
void PopAllIndents(); void EndStream();
void PopIndent(); Token *PushToken(Token::TYPE type);
int GetTopIndent() const;
bool InFlowContext() const { return !m_flows.empty(); }
// checking input bool InBlockContext() const { return m_flows.empty(); }
bool CanInsertPotentialSimpleKey() const; int GetFlowLevel() const { return m_flows.size(); }
bool ExistsActiveSimpleKey() const;
void InsertPotentialSimpleKey(); Token::TYPE GetStartTokenFor(IndentMarker::INDENT_TYPE type) const;
void InvalidateSimpleKey(); IndentMarker *PushIndentTo(int column, IndentMarker::INDENT_TYPE type);
bool VerifySimpleKey(); void PopIndentToHere();
void PopAllSimpleKeys(); void PopAllIndents();
void PopIndent();
void ThrowParserException(const std::string& msg) const; int GetTopIndent() const;
bool IsWhitespaceToBeEaten(char ch); // checking input
const RegEx& GetValueRegex() const; bool CanInsertPotentialSimpleKey() const;
bool ExistsActiveSimpleKey() const;
struct SimpleKey { void InsertPotentialSimpleKey();
SimpleKey(const Mark& mark_, int flowLevel_); void InvalidateSimpleKey();
bool VerifySimpleKey();
void Validate(); void PopAllSimpleKeys();
void Invalidate();
void ThrowParserException(const std::string &msg) const;
Mark mark;
int flowLevel; bool IsWhitespaceToBeEaten(char ch);
IndentMarker *pIndent; const RegEx &GetValueRegex() const;
Token *pMapStart, *pKey;
}; struct SimpleKey {
SimpleKey(const Mark &mark_, int flowLevel_);
// and the tokens
void ScanDirective(); void Validate();
void ScanDocStart(); void Invalidate();
void ScanDocEnd();
void ScanBlockSeqStart(); Mark mark;
void ScanBlockMapSTart(); int flowLevel;
void ScanBlockEnd(); IndentMarker *pIndent;
void ScanBlockEntry(); Token *pMapStart, *pKey;
void ScanFlowStart(); };
void ScanFlowEnd();
void ScanFlowEntry(); // and the tokens
void ScanKey(); void ScanDirective();
void ScanValue(); void ScanDocStart();
void ScanAnchorOrAlias(); void ScanDocEnd();
void ScanTag(); void ScanBlockSeqStart();
void ScanPlainScalar(); void ScanBlockMapSTart();
void ScanQuotedScalar(); void ScanBlockEnd();
void ScanBlockScalar(); void ScanBlockEntry();
void ScanFlowStart();
private: void ScanFlowEnd();
// the stream void ScanFlowEntry();
Stream INPUT; void ScanKey();
void ScanValue();
// the output (tokens) void ScanAnchorOrAlias();
std::queue<Token> m_tokens; void ScanTag();
void ScanPlainScalar();
// state info void ScanQuotedScalar();
bool m_startedStream, m_endedStream; void ScanBlockScalar();
bool m_simpleKeyAllowed;
bool m_canBeJSONFlow; private:
std::stack<SimpleKey> m_simpleKeys; // the stream
std::stack<IndentMarker *> m_indents; Stream INPUT;
ptr_vector<IndentMarker> m_indentRefs; // for "garbage collection"
std::stack<FLOW_MARKER> m_flows; // the output (tokens)
}; std::queue<Token> m_tokens;
// state info
bool m_startedStream, m_endedStream;
bool m_simpleKeyAllowed;
bool m_canBeJSONFlow;
std::stack<SimpleKey> m_simpleKeys;
std::stack<IndentMarker *> m_indents;
ptr_vector<IndentMarker> m_indentRefs; // for "garbage collection"
std::stack<FLOW_MARKER> m_flows;
};
} }
#endif // SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #endif // SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
...@@ -4,211 +4,215 @@ ...@@ -4,211 +4,215 @@
#include "yaml-cpp/exceptions.h" #include "yaml-cpp/exceptions.h"
#include "token.h" #include "token.h"
namespace YAML namespace YAML {
{ // ScanScalar
// ScanScalar // . This is where the scalar magic happens.
// . This is where the scalar magic happens. //
// // . We do the scanning in three phases:
// . We do the scanning in three phases: // 1. Scan until newline
// 1. Scan until newline // 2. Eat newline
// 2. Eat newline // 3. Scan leading blanks.
// 3. Scan leading blanks. //
// // . Depending on the parameters given, we store or stop
// . Depending on the parameters given, we store or stop // and different places in the above flow.
// and different places in the above flow. std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) {
std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) bool foundNonEmptyLine = false;
{ bool pastOpeningBreak = (params.fold == FOLD_FLOW);
bool foundNonEmptyLine = false; bool emptyLine = false, moreIndented = false;
bool pastOpeningBreak = (params.fold == FOLD_FLOW); int foldedNewlineCount = 0;
bool emptyLine = false, moreIndented = false; bool foldedNewlineStartedMoreIndented = false;
int foldedNewlineCount = 0; std::size_t lastEscapedChar = std::string::npos;
bool foldedNewlineStartedMoreIndented = false; std::string scalar;
std::size_t lastEscapedChar = std::string::npos; params.leadingSpaces = false;
std::string scalar;
params.leadingSpaces = false; while (INPUT) {
// ********************************
while(INPUT) { // Phase #1: scan until line ending
// ********************************
// Phase #1: scan until line ending std::size_t lastNonWhitespaceChar = scalar.size();
bool escapedNewline = false;
std::size_t lastNonWhitespaceChar = scalar.size(); while (!params.end.Matches(INPUT) && !Exp::Break().Matches(INPUT)) {
bool escapedNewline = false; if (!INPUT)
while(!params.end.Matches(INPUT) && !Exp::Break().Matches(INPUT)) { break;
if(!INPUT)
break; // document indicator?
if (INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) {
// document indicator? if (params.onDocIndicator == BREAK)
if(INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) { break;
if(params.onDocIndicator == BREAK) else if (params.onDocIndicator == THROW)
break; throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR);
else if(params.onDocIndicator == THROW) }
throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR);
} foundNonEmptyLine = true;
pastOpeningBreak = true;
foundNonEmptyLine = true;
pastOpeningBreak = true; // escaped newline? (only if we're escaping on slash)
if (params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) {
// escaped newline? (only if we're escaping on slash) // eat escape character and get out (but preserve trailing whitespace!)
if(params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) { INPUT.get();
// eat escape character and get out (but preserve trailing whitespace!) lastNonWhitespaceChar = scalar.size();
INPUT.get(); lastEscapedChar = scalar.size();
lastNonWhitespaceChar = scalar.size(); escapedNewline = true;
lastEscapedChar = scalar.size(); break;
escapedNewline = true; }
break;
} // escape this?
if (INPUT.peek() == params.escape) {
// escape this? scalar += Exp::Escape(INPUT);
if(INPUT.peek() == params.escape) { lastNonWhitespaceChar = scalar.size();
scalar += Exp::Escape(INPUT); lastEscapedChar = scalar.size();
lastNonWhitespaceChar = scalar.size(); continue;
lastEscapedChar = scalar.size(); }
continue;
} // otherwise, just add the damn character
char ch = INPUT.get();
// otherwise, just add the damn character scalar += ch;
char ch = INPUT.get(); if (ch != ' ' && ch != '\t')
scalar += ch; lastNonWhitespaceChar = scalar.size();
if(ch != ' ' && ch != '\t') }
lastNonWhitespaceChar = scalar.size();
} // eof? if we're looking to eat something, then we throw
if (!INPUT) {
// eof? if we're looking to eat something, then we throw if (params.eatEnd)
if(!INPUT) { throw ParserException(INPUT.mark(), ErrorMsg::EOF_IN_SCALAR);
if(params.eatEnd) break;
throw ParserException(INPUT.mark(), ErrorMsg::EOF_IN_SCALAR); }
break;
} // doc indicator?
if (params.onDocIndicator == BREAK && INPUT.column() == 0 &&
// doc indicator? Exp::DocIndicator().Matches(INPUT))
if(params.onDocIndicator == BREAK && INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) break;
break;
// are we done via character match?
// are we done via character match? int n = params.end.Match(INPUT);
int n = params.end.Match(INPUT); if (n >= 0) {
if(n >= 0) { if (params.eatEnd)
if(params.eatEnd) INPUT.eat(n);
INPUT.eat(n); break;
break; }
}
// do we remove trailing whitespace?
// do we remove trailing whitespace? if (params.fold == FOLD_FLOW)
if(params.fold == FOLD_FLOW) scalar.erase(lastNonWhitespaceChar);
scalar.erase(lastNonWhitespaceChar);
// ********************************
// ******************************** // Phase #2: eat line ending
// Phase #2: eat line ending n = Exp::Break().Match(INPUT);
n = Exp::Break().Match(INPUT); INPUT.eat(n);
INPUT.eat(n);
// ********************************
// ******************************** // Phase #3: scan initial spaces
// Phase #3: scan initial spaces
// first the required indentation
// first the required indentation while (INPUT.peek() == ' ' && (INPUT.column() < params.indent ||
while(INPUT.peek() == ' ' && (INPUT.column() < params.indent || (params.detectIndent && !foundNonEmptyLine))) (params.detectIndent && !foundNonEmptyLine)))
INPUT.eat(1); INPUT.eat(1);
// update indent if we're auto-detecting // update indent if we're auto-detecting
if(params.detectIndent && !foundNonEmptyLine) if (params.detectIndent && !foundNonEmptyLine)
params.indent = std::max(params.indent, INPUT.column()); params.indent = std::max(params.indent, INPUT.column());
// and then the rest of the whitespace // and then the rest of the whitespace
while(Exp::Blank().Matches(INPUT)) { while (Exp::Blank().Matches(INPUT)) {
// we check for tabs that masquerade as indentation // we check for tabs that masquerade as indentation
if(INPUT.peek() == '\t'&& INPUT.column() < params.indent && params.onTabInIndentation == THROW) if (INPUT.peek() == '\t' && INPUT.column() < params.indent &&
throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION); params.onTabInIndentation == THROW)
throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION);
if(!params.eatLeadingWhitespace)
break; if (!params.eatLeadingWhitespace)
break;
INPUT.eat(1);
} INPUT.eat(1);
}
// was this an empty line?
bool nextEmptyLine = Exp::Break().Matches(INPUT); // was this an empty line?
bool nextMoreIndented = Exp::Blank().Matches(INPUT); bool nextEmptyLine = Exp::Break().Matches(INPUT);
if(params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine) bool nextMoreIndented = Exp::Blank().Matches(INPUT);
foldedNewlineStartedMoreIndented = moreIndented; if (params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine)
foldedNewlineStartedMoreIndented = moreIndented;
// for block scalars, we always start with a newline, so we should ignore it (not fold or keep)
if(pastOpeningBreak) { // for block scalars, we always start with a newline, so we should ignore it
switch(params.fold) { // (not fold or keep)
case DONT_FOLD: if (pastOpeningBreak) {
scalar += "\n"; switch (params.fold) {
break; case DONT_FOLD:
case FOLD_BLOCK: scalar += "\n";
if(!emptyLine && !nextEmptyLine && !moreIndented && !nextMoreIndented && INPUT.column() >= params.indent) break;
scalar += " "; case FOLD_BLOCK:
else if(nextEmptyLine) if (!emptyLine && !nextEmptyLine && !moreIndented &&
foldedNewlineCount++; !nextMoreIndented && INPUT.column() >= params.indent)
else scalar += " ";
scalar += "\n"; else if (nextEmptyLine)
foldedNewlineCount++;
if(!nextEmptyLine && foldedNewlineCount > 0) { else
scalar += std::string(foldedNewlineCount - 1, '\n'); scalar += "\n";
if(foldedNewlineStartedMoreIndented || nextMoreIndented | !foundNonEmptyLine)
scalar += "\n"; if (!nextEmptyLine && foldedNewlineCount > 0) {
foldedNewlineCount = 0; scalar += std::string(foldedNewlineCount - 1, '\n');
} if (foldedNewlineStartedMoreIndented ||
break; nextMoreIndented | !foundNonEmptyLine)
case FOLD_FLOW: scalar += "\n";
if(nextEmptyLine) foldedNewlineCount = 0;
scalar += "\n"; }
else if(!emptyLine && !nextEmptyLine && !escapedNewline) break;
scalar += " "; case FOLD_FLOW:
break; if (nextEmptyLine)
} scalar += "\n";
} else if (!emptyLine && !nextEmptyLine && !escapedNewline)
scalar += " ";
emptyLine = nextEmptyLine; break;
moreIndented = nextMoreIndented; }
pastOpeningBreak = true; }
// are we done via indentation? emptyLine = nextEmptyLine;
if(!emptyLine && INPUT.column() < params.indent) { moreIndented = nextMoreIndented;
params.leadingSpaces = true; pastOpeningBreak = true;
break;
} // are we done via indentation?
} if (!emptyLine && INPUT.column() < params.indent) {
params.leadingSpaces = true;
// post-processing break;
if(params.trimTrailingSpaces) { }
std::size_t pos = scalar.find_last_not_of(' '); }
if(lastEscapedChar != std::string::npos) {
if(pos < lastEscapedChar || pos == std::string::npos) // post-processing
pos = lastEscapedChar; if (params.trimTrailingSpaces) {
} std::size_t pos = scalar.find_last_not_of(' ');
if(pos < scalar.size()) if (lastEscapedChar != std::string::npos) {
scalar.erase(pos + 1); if (pos < lastEscapedChar || pos == std::string::npos)
} pos = lastEscapedChar;
}
switch(params.chomp) { if (pos < scalar.size())
case CLIP: { scalar.erase(pos + 1);
std::size_t pos = scalar.find_last_not_of('\n'); }
if(lastEscapedChar != std::string::npos) {
if(pos < lastEscapedChar || pos == std::string::npos) switch (params.chomp) {
pos = lastEscapedChar; case CLIP: {
} std::size_t pos = scalar.find_last_not_of('\n');
if(pos == std::string::npos) if (lastEscapedChar != std::string::npos) {
scalar.erase(); if (pos < lastEscapedChar || pos == std::string::npos)
else if(pos + 1 < scalar.size()) pos = lastEscapedChar;
scalar.erase(pos + 2); }
} break; if (pos == std::string::npos)
case STRIP: { scalar.erase();
std::size_t pos = scalar.find_last_not_of('\n'); else if (pos + 1 < scalar.size())
if(lastEscapedChar != std::string::npos) { scalar.erase(pos + 2);
if(pos < lastEscapedChar || pos == std::string::npos) } break;
pos = lastEscapedChar; case STRIP: {
} std::size_t pos = scalar.find_last_not_of('\n');
if(pos == std::string::npos) if (lastEscapedChar != std::string::npos) {
scalar.erase(); if (pos < lastEscapedChar || pos == std::string::npos)
else if(pos < scalar.size()) pos = lastEscapedChar;
scalar.erase(pos + 1); }
} break; if (pos == std::string::npos)
default: scalar.erase();
break; else if (pos < scalar.size())
} scalar.erase(pos + 1);
} break;
return scalar; default:
} break;
}
return scalar;
}
} }
#ifndef SCANSCALAR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #ifndef SCANSCALAR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#define SCANSCALAR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #define SCANSCALAR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 #if defined(_MSC_VER) || \
(defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
(__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
#pragma once #pragma once
#endif #endif
#include <string> #include <string>
#include "regex.h" #include "regex.h"
#include "stream.h" #include "stream.h"
namespace YAML namespace YAML {
{ enum CHOMP {
enum CHOMP { STRIP = -1, CLIP, KEEP }; STRIP = -1,
enum ACTION { NONE, BREAK, THROW }; CLIP,
enum FOLD { DONT_FOLD, FOLD_BLOCK, FOLD_FLOW }; KEEP
};
struct ScanScalarParams { enum ACTION {
ScanScalarParams(): eatEnd(false), indent(0), detectIndent(false), eatLeadingWhitespace(0), escape(0), fold(DONT_FOLD), NONE,
trimTrailingSpaces(0), chomp(CLIP), onDocIndicator(NONE), onTabInIndentation(NONE), leadingSpaces(false) {} BREAK,
THROW
// input: };
RegEx end; // what condition ends this scalar? enum FOLD {
bool eatEnd; // should we eat that condition when we see it? DONT_FOLD,
int indent; // what level of indentation should be eaten and ignored? FOLD_BLOCK,
bool detectIndent; // should we try to autodetect the indent? FOLD_FLOW
bool eatLeadingWhitespace; // should we continue eating this delicious indentation after 'indent' spaces? };
char escape; // what character do we escape on (i.e., slash or single quote) (0 for none)
FOLD fold; // how do we fold line ends? struct ScanScalarParams {
bool trimTrailingSpaces; // do we remove all trailing spaces (at the very end) ScanScalarParams()
CHOMP chomp; // do we strip, clip, or keep trailing newlines (at the very end) : eatEnd(false),
// Note: strip means kill all, clip means keep at most one, keep means keep all indent(0),
ACTION onDocIndicator; // what do we do if we see a document indicator? detectIndent(false),
ACTION onTabInIndentation; // what do we do if we see a tab where we should be seeing indentation spaces eatLeadingWhitespace(0),
escape(0),
// output: fold(DONT_FOLD),
bool leadingSpaces; trimTrailingSpaces(0),
}; chomp(CLIP),
onDocIndicator(NONE),
std::string ScanScalar(Stream& INPUT, ScanScalarParams& info); onTabInIndentation(NONE),
} leadingSpaces(false) {}
#endif // SCANSCALAR_H_62B23520_7C8E_11DE_8A39_0800200C9A66 // input:
RegEx end; // what condition ends this scalar?
bool eatEnd; // should we eat that condition when we see it?
int indent; // what level of indentation should be eaten and ignored?
bool detectIndent; // should we try to autodetect the indent?
bool eatLeadingWhitespace; // should we continue eating this delicious
// indentation after 'indent' spaces?
char escape; // what character do we escape on (i.e., slash or single quote)
// (0 for none)
FOLD fold; // how do we fold line ends?
bool trimTrailingSpaces; // do we remove all trailing spaces (at the very
// end)
CHOMP chomp; // do we strip, clip, or keep trailing newlines (at the very
// end)
// Note: strip means kill all, clip means keep at most one, keep means keep
// all
ACTION onDocIndicator; // what do we do if we see a document indicator?
ACTION onTabInIndentation; // what do we do if we see a tab where we should
// be seeing indentation spaces
// output:
bool leadingSpaces;
};
std::string ScanScalar(Stream& INPUT, ScanScalarParams& info);
}
#endif // SCANSCALAR_H_62B23520_7C8E_11DE_8A39_0800200C9A66
...@@ -3,82 +3,77 @@ ...@@ -3,82 +3,77 @@
#include "exp.h" #include "exp.h"
#include "yaml-cpp/exceptions.h" #include "yaml-cpp/exceptions.h"
namespace YAML namespace YAML {
{ const std::string ScanVerbatimTag(Stream& INPUT) {
const std::string ScanVerbatimTag(Stream& INPUT) std::string tag;
{
std::string tag; // eat the start character
INPUT.get();
// eat the start character
INPUT.get(); while (INPUT) {
if (INPUT.peek() == Keys::VerbatimTagEnd) {
while(INPUT) { // eat the end character
if(INPUT.peek() == Keys::VerbatimTagEnd) { INPUT.get();
// eat the end character return tag;
INPUT.get(); }
return tag;
} int n = Exp::URI().Match(INPUT);
if (n <= 0)
int n = Exp::URI().Match(INPUT); break;
if(n <= 0)
break; tag += INPUT.get(n);
}
tag += INPUT.get(n);
} throw ParserException(INPUT.mark(), ErrorMsg::END_OF_VERBATIM_TAG);
throw ParserException(INPUT.mark(), ErrorMsg::END_OF_VERBATIM_TAG);
}
const std::string ScanTagHandle(Stream& INPUT, bool& canBeHandle)
{
std::string tag;
canBeHandle = true;
Mark firstNonWordChar;
while(INPUT) {
if(INPUT.peek() == Keys::Tag) {
if(!canBeHandle)
throw ParserException(firstNonWordChar, ErrorMsg::CHAR_IN_TAG_HANDLE);
break;
}
int n = 0;
if(canBeHandle) {
n = Exp::Word().Match(INPUT);
if(n <= 0) {
canBeHandle = false;
firstNonWordChar = INPUT.mark();
}
}
if(!canBeHandle)
n = Exp::Tag().Match(INPUT);
if(n <= 0)
break;
tag += INPUT.get(n);
}
return tag;
}
const std::string ScanTagSuffix(Stream& INPUT)
{
std::string tag;
while(INPUT) {
int n = Exp::Tag().Match(INPUT);
if(n <= 0)
break;
tag += INPUT.get(n);
}
if(tag.empty())
throw ParserException(INPUT.mark(), ErrorMsg::TAG_WITH_NO_SUFFIX);
return tag;
}
} }
const std::string ScanTagHandle(Stream& INPUT, bool& canBeHandle) {
std::string tag;
canBeHandle = true;
Mark firstNonWordChar;
while (INPUT) {
if (INPUT.peek() == Keys::Tag) {
if (!canBeHandle)
throw ParserException(firstNonWordChar, ErrorMsg::CHAR_IN_TAG_HANDLE);
break;
}
int n = 0;
if (canBeHandle) {
n = Exp::Word().Match(INPUT);
if (n <= 0) {
canBeHandle = false;
firstNonWordChar = INPUT.mark();
}
}
if (!canBeHandle)
n = Exp::Tag().Match(INPUT);
if (n <= 0)
break;
tag += INPUT.get(n);
}
return tag;
}
const std::string ScanTagSuffix(Stream& INPUT) {
std::string tag;
while (INPUT) {
int n = Exp::Tag().Match(INPUT);
if (n <= 0)
break;
tag += INPUT.get(n);
}
if (tag.empty())
throw ParserException(INPUT.mark(), ErrorMsg::TAG_WITH_NO_SUFFIX);
return tag;
}
}
#ifndef SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #ifndef SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#define SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #define SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 #if defined(_MSC_VER) || \
(defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
(__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
#pragma once #pragma once
#endif #endif
#include <string> #include <string>
#include "stream.h" #include "stream.h"
namespace YAML namespace YAML {
{ const std::string ScanVerbatimTag(Stream& INPUT);
const std::string ScanVerbatimTag(Stream& INPUT); const std::string ScanTagHandle(Stream& INPUT, bool& canBeHandle);
const std::string ScanTagHandle(Stream& INPUT, bool& canBeHandle); const std::string ScanTagSuffix(Stream& INPUT);
const std::string ScanTagSuffix(Stream& INPUT);
} }
#endif // SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #endif // SCANTAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
...@@ -7,433 +7,426 @@ ...@@ -7,433 +7,426 @@
#include "tag.h" #include "tag.h"
#include <sstream> #include <sstream>
namespace YAML namespace YAML {
{ ///////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////// // Specialization for scanning specific tokens
// Specialization for scanning specific tokens
// Directive
// Directive // . Note: no semantic checking is done here (that's for the parser to do)
// . Note: no semantic checking is done here (that's for the parser to do) void Scanner::ScanDirective() {
void Scanner::ScanDirective() std::string name;
{ std::vector<std::string> params;
std::string name;
std::vector <std::string> params; // pop indents and simple keys
PopAllIndents();
// pop indents and simple keys PopAllSimpleKeys();
PopAllIndents();
PopAllSimpleKeys(); m_simpleKeyAllowed = false;
m_canBeJSONFlow = false;
m_simpleKeyAllowed = false;
m_canBeJSONFlow = false; // store pos and eat indicator
Token token(Token::DIRECTIVE, INPUT.mark());
// store pos and eat indicator INPUT.eat(1);
Token token(Token::DIRECTIVE, INPUT.mark());
INPUT.eat(1); // read name
while (INPUT && !Exp::BlankOrBreak().Matches(INPUT))
// read name token.value += INPUT.get();
while(INPUT && !Exp::BlankOrBreak().Matches(INPUT))
token.value += INPUT.get(); // read parameters
while (1) {
// read parameters // first get rid of whitespace
while(1) { while (Exp::Blank().Matches(INPUT))
// first get rid of whitespace INPUT.eat(1);
while(Exp::Blank().Matches(INPUT))
INPUT.eat(1); // break on newline or comment
if (!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT))
// break on newline or comment break;
if(!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT))
break; // now read parameter
std::string param;
// now read parameter while (INPUT && !Exp::BlankOrBreak().Matches(INPUT))
std::string param; param += INPUT.get();
while(INPUT && !Exp::BlankOrBreak().Matches(INPUT))
param += INPUT.get(); token.params.push_back(param);
}
token.params.push_back(param);
} m_tokens.push(token);
}
m_tokens.push(token);
} // DocStart
void Scanner::ScanDocStart() {
// DocStart PopAllIndents();
void Scanner::ScanDocStart() PopAllSimpleKeys();
{ m_simpleKeyAllowed = false;
PopAllIndents(); m_canBeJSONFlow = false;
PopAllSimpleKeys();
m_simpleKeyAllowed = false; // eat
m_canBeJSONFlow = false; Mark mark = INPUT.mark();
INPUT.eat(3);
// eat m_tokens.push(Token(Token::DOC_START, mark));
Mark mark = INPUT.mark(); }
INPUT.eat(3);
m_tokens.push(Token(Token::DOC_START, mark)); // DocEnd
} void Scanner::ScanDocEnd() {
PopAllIndents();
// DocEnd PopAllSimpleKeys();
void Scanner::ScanDocEnd() m_simpleKeyAllowed = false;
{ m_canBeJSONFlow = false;
PopAllIndents();
PopAllSimpleKeys(); // eat
m_simpleKeyAllowed = false; Mark mark = INPUT.mark();
m_canBeJSONFlow = false; INPUT.eat(3);
m_tokens.push(Token(Token::DOC_END, mark));
// eat }
Mark mark = INPUT.mark();
INPUT.eat(3); // FlowStart
m_tokens.push(Token(Token::DOC_END, mark)); void Scanner::ScanFlowStart() {
} // flows can be simple keys
InsertPotentialSimpleKey();
// FlowStart m_simpleKeyAllowed = true;
void Scanner::ScanFlowStart() m_canBeJSONFlow = false;
{
// flows can be simple keys // eat
InsertPotentialSimpleKey(); Mark mark = INPUT.mark();
m_simpleKeyAllowed = true; char ch = INPUT.get();
m_canBeJSONFlow = false; FLOW_MARKER flowType = (ch == Keys::FlowSeqStart ? FLOW_SEQ : FLOW_MAP);
m_flows.push(flowType);
// eat Token::TYPE type =
Mark mark = INPUT.mark(); (flowType == FLOW_SEQ ? Token::FLOW_SEQ_START : Token::FLOW_MAP_START);
char ch = INPUT.get(); m_tokens.push(Token(type, mark));
FLOW_MARKER flowType = (ch == Keys::FlowSeqStart ? FLOW_SEQ : FLOW_MAP); }
m_flows.push(flowType);
Token::TYPE type = (flowType == FLOW_SEQ ? Token::FLOW_SEQ_START : Token::FLOW_MAP_START); // FlowEnd
m_tokens.push(Token(type, mark)); void Scanner::ScanFlowEnd() {
} if (InBlockContext())
throw ParserException(INPUT.mark(), ErrorMsg::FLOW_END);
// FlowEnd
void Scanner::ScanFlowEnd() // we might have a solo entry in the flow context
{ if (InFlowContext()) {
if(InBlockContext()) if (m_flows.top() == FLOW_MAP && VerifySimpleKey())
throw ParserException(INPUT.mark(), ErrorMsg::FLOW_END); m_tokens.push(Token(Token::VALUE, INPUT.mark()));
else if (m_flows.top() == FLOW_SEQ)
// we might have a solo entry in the flow context InvalidateSimpleKey();
if(InFlowContext()) { }
if(m_flows.top() == FLOW_MAP && VerifySimpleKey())
m_tokens.push(Token(Token::VALUE, INPUT.mark())); m_simpleKeyAllowed = false;
else if(m_flows.top() == FLOW_SEQ) m_canBeJSONFlow = true;
InvalidateSimpleKey();
} // eat
Mark mark = INPUT.mark();
m_simpleKeyAllowed = false; char ch = INPUT.get();
m_canBeJSONFlow = true;
// check that it matches the start
// eat FLOW_MARKER flowType = (ch == Keys::FlowSeqEnd ? FLOW_SEQ : FLOW_MAP);
Mark mark = INPUT.mark(); if (m_flows.top() != flowType)
char ch = INPUT.get(); throw ParserException(mark, ErrorMsg::FLOW_END);
m_flows.pop();
// check that it matches the start
FLOW_MARKER flowType = (ch == Keys::FlowSeqEnd ? FLOW_SEQ : FLOW_MAP); Token::TYPE type = (flowType ? Token::FLOW_SEQ_END : Token::FLOW_MAP_END);
if(m_flows.top() != flowType) m_tokens.push(Token(type, mark));
throw ParserException(mark, ErrorMsg::FLOW_END); }
m_flows.pop();
// FlowEntry
Token::TYPE type = (flowType ? Token::FLOW_SEQ_END : Token::FLOW_MAP_END); void Scanner::ScanFlowEntry() {
m_tokens.push(Token(type, mark)); // we might have a solo entry in the flow context
} if (InFlowContext()) {
if (m_flows.top() == FLOW_MAP && VerifySimpleKey())
// FlowEntry m_tokens.push(Token(Token::VALUE, INPUT.mark()));
void Scanner::ScanFlowEntry() else if (m_flows.top() == FLOW_SEQ)
{ InvalidateSimpleKey();
// we might have a solo entry in the flow context }
if(InFlowContext()) {
if(m_flows.top() == FLOW_MAP && VerifySimpleKey()) m_simpleKeyAllowed = true;
m_tokens.push(Token(Token::VALUE, INPUT.mark())); m_canBeJSONFlow = false;
else if(m_flows.top() == FLOW_SEQ)
InvalidateSimpleKey(); // eat
} Mark mark = INPUT.mark();
INPUT.eat(1);
m_simpleKeyAllowed = true; m_tokens.push(Token(Token::FLOW_ENTRY, mark));
m_canBeJSONFlow = false; }
// eat // BlockEntry
Mark mark = INPUT.mark(); void Scanner::ScanBlockEntry() {
INPUT.eat(1); // we better be in the block context!
m_tokens.push(Token(Token::FLOW_ENTRY, mark)); if (InFlowContext())
} throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
// BlockEntry // can we put it here?
void Scanner::ScanBlockEntry() if (!m_simpleKeyAllowed)
{ throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
// we better be in the block context!
if(InFlowContext()) PushIndentTo(INPUT.column(), IndentMarker::SEQ);
throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY); m_simpleKeyAllowed = true;
m_canBeJSONFlow = false;
// can we put it here?
if(!m_simpleKeyAllowed) // eat
throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY); Mark mark = INPUT.mark();
INPUT.eat(1);
PushIndentTo(INPUT.column(), IndentMarker::SEQ); m_tokens.push(Token(Token::BLOCK_ENTRY, mark));
m_simpleKeyAllowed = true; }
m_canBeJSONFlow = false;
// Key
// eat void Scanner::ScanKey() {
Mark mark = INPUT.mark(); // handle keys diffently in the block context (and manage indents)
INPUT.eat(1); if (InBlockContext()) {
m_tokens.push(Token(Token::BLOCK_ENTRY, mark)); if (!m_simpleKeyAllowed)
} throw ParserException(INPUT.mark(), ErrorMsg::MAP_KEY);
// Key PushIndentTo(INPUT.column(), IndentMarker::MAP);
void Scanner::ScanKey() }
{
// handle keys diffently in the block context (and manage indents) // can only put a simple key here if we're in block context
if(InBlockContext()) { m_simpleKeyAllowed = InBlockContext();
if(!m_simpleKeyAllowed)
throw ParserException(INPUT.mark(), ErrorMsg::MAP_KEY); // eat
Mark mark = INPUT.mark();
PushIndentTo(INPUT.column(), IndentMarker::MAP); INPUT.eat(1);
} m_tokens.push(Token(Token::KEY, mark));
}
// can only put a simple key here if we're in block context
m_simpleKeyAllowed = InBlockContext(); // Value
void Scanner::ScanValue() {
// eat // and check that simple key
Mark mark = INPUT.mark(); bool isSimpleKey = VerifySimpleKey();
INPUT.eat(1); m_canBeJSONFlow = false;
m_tokens.push(Token(Token::KEY, mark));
} if (isSimpleKey) {
// can't follow a simple key with another simple key (dunno why, though - it
// Value // seems fine)
void Scanner::ScanValue() m_simpleKeyAllowed = false;
{ } else {
// and check that simple key // handle values diffently in the block context (and manage indents)
bool isSimpleKey = VerifySimpleKey(); if (InBlockContext()) {
m_canBeJSONFlow = false; if (!m_simpleKeyAllowed)
throw ParserException(INPUT.mark(), ErrorMsg::MAP_VALUE);
if(isSimpleKey) {
// can't follow a simple key with another simple key (dunno why, though - it seems fine) PushIndentTo(INPUT.column(), IndentMarker::MAP);
m_simpleKeyAllowed = false; }
} else {
// handle values diffently in the block context (and manage indents) // can only put a simple key here if we're in block context
if(InBlockContext()) { m_simpleKeyAllowed = InBlockContext();
if(!m_simpleKeyAllowed) }
throw ParserException(INPUT.mark(), ErrorMsg::MAP_VALUE);
// eat
PushIndentTo(INPUT.column(), IndentMarker::MAP); Mark mark = INPUT.mark();
} INPUT.eat(1);
m_tokens.push(Token(Token::VALUE, mark));
// can only put a simple key here if we're in block context }
m_simpleKeyAllowed = InBlockContext();
} // AnchorOrAlias
void Scanner::ScanAnchorOrAlias() {
// eat bool alias;
Mark mark = INPUT.mark(); std::string name;
INPUT.eat(1);
m_tokens.push(Token(Token::VALUE, mark)); // insert a potential simple key
} InsertPotentialSimpleKey();
m_simpleKeyAllowed = false;
// AnchorOrAlias m_canBeJSONFlow = false;
void Scanner::ScanAnchorOrAlias()
{ // eat the indicator
bool alias; Mark mark = INPUT.mark();
std::string name; char indicator = INPUT.get();
alias = (indicator == Keys::Alias);
// insert a potential simple key
InsertPotentialSimpleKey(); // now eat the content
m_simpleKeyAllowed = false; while (INPUT && Exp::Anchor().Matches(INPUT))
m_canBeJSONFlow = false; name += INPUT.get();
// eat the indicator // we need to have read SOMETHING!
Mark mark = INPUT.mark(); if (name.empty())
char indicator = INPUT.get(); throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND
alias = (indicator == Keys::Alias); : ErrorMsg::ANCHOR_NOT_FOUND);
// now eat the content // and needs to end correctly
while(INPUT && Exp::Anchor().Matches(INPUT)) if (INPUT && !Exp::AnchorEnd().Matches(INPUT))
name += INPUT.get(); throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS
: ErrorMsg::CHAR_IN_ANCHOR);
// we need to have read SOMETHING!
if(name.empty()) // and we're done
throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND : ErrorMsg::ANCHOR_NOT_FOUND); Token token(alias ? Token::ALIAS : Token::ANCHOR, mark);
token.value = name;
// and needs to end correctly m_tokens.push(token);
if(INPUT && !Exp::AnchorEnd().Matches(INPUT)) }
throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS : ErrorMsg::CHAR_IN_ANCHOR);
// Tag
// and we're done void Scanner::ScanTag() {
Token token(alias ? Token::ALIAS : Token::ANCHOR, mark); // insert a potential simple key
token.value = name; InsertPotentialSimpleKey();
m_tokens.push(token); m_simpleKeyAllowed = false;
} m_canBeJSONFlow = false;
// Tag Token token(Token::TAG, INPUT.mark());
void Scanner::ScanTag()
{ // eat the indicator
// insert a potential simple key INPUT.get();
InsertPotentialSimpleKey();
m_simpleKeyAllowed = false; if (INPUT && INPUT.peek() == Keys::VerbatimTagStart) {
m_canBeJSONFlow = false; std::string tag = ScanVerbatimTag(INPUT);
Token token(Token::TAG, INPUT.mark()); token.value = tag;
token.data = Tag::VERBATIM;
// eat the indicator } else {
INPUT.get(); bool canBeHandle;
token.value = ScanTagHandle(INPUT, canBeHandle);
if(INPUT && INPUT.peek() == Keys::VerbatimTagStart){ if (!canBeHandle && token.value.empty())
std::string tag = ScanVerbatimTag(INPUT); token.data = Tag::NON_SPECIFIC;
else if (token.value.empty())
token.value = tag; token.data = Tag::SECONDARY_HANDLE;
token.data = Tag::VERBATIM; else
} else { token.data = Tag::PRIMARY_HANDLE;
bool canBeHandle;
token.value = ScanTagHandle(INPUT, canBeHandle); // is there a suffix?
if(!canBeHandle && token.value.empty()) if (canBeHandle && INPUT.peek() == Keys::Tag) {
token.data = Tag::NON_SPECIFIC; // eat the indicator
else if(token.value.empty()) INPUT.get();
token.data = Tag::SECONDARY_HANDLE; token.params.push_back(ScanTagSuffix(INPUT));
else token.data = Tag::NAMED_HANDLE;
token.data = Tag::PRIMARY_HANDLE; }
}
// is there a suffix?
if(canBeHandle && INPUT.peek() == Keys::Tag) { m_tokens.push(token);
// eat the indicator }
INPUT.get();
token.params.push_back(ScanTagSuffix(INPUT)); // PlainScalar
token.data = Tag::NAMED_HANDLE; void Scanner::ScanPlainScalar() {
} std::string scalar;
}
// set up the scanning parameters
m_tokens.push(token); ScanScalarParams params;
} params.end = (InFlowContext() ? Exp::EndScalarInFlow() : Exp::EndScalar()) ||
(Exp::BlankOrBreak() + Exp::Comment());
// PlainScalar params.eatEnd = false;
void Scanner::ScanPlainScalar() params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1);
{ params.fold = FOLD_FLOW;
std::string scalar; params.eatLeadingWhitespace = true;
params.trimTrailingSpaces = true;
// set up the scanning parameters params.chomp = STRIP;
ScanScalarParams params; params.onDocIndicator = BREAK;
params.end = (InFlowContext() ? Exp::EndScalarInFlow() : Exp::EndScalar()) || (Exp::BlankOrBreak() + Exp::Comment()); params.onTabInIndentation = THROW;
params.eatEnd = false;
params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1); // insert a potential simple key
params.fold = FOLD_FLOW; InsertPotentialSimpleKey();
params.eatLeadingWhitespace = true;
params.trimTrailingSpaces = true; Mark mark = INPUT.mark();
params.chomp = STRIP; scalar = ScanScalar(INPUT, params);
params.onDocIndicator = BREAK;
params.onTabInIndentation = THROW; // can have a simple key only if we ended the scalar by starting a new line
m_simpleKeyAllowed = params.leadingSpaces;
// insert a potential simple key m_canBeJSONFlow = false;
InsertPotentialSimpleKey();
// finally, check and see if we ended on an illegal character
Mark mark = INPUT.mark(); // if(Exp::IllegalCharInScalar.Matches(INPUT))
scalar = ScanScalar(INPUT, params); // throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_SCALAR);
// can have a simple key only if we ended the scalar by starting a new line Token token(Token::PLAIN_SCALAR, mark);
m_simpleKeyAllowed = params.leadingSpaces; token.value = scalar;
m_canBeJSONFlow = false; m_tokens.push(token);
}
// finally, check and see if we ended on an illegal character
//if(Exp::IllegalCharInScalar.Matches(INPUT)) // QuotedScalar
// throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_SCALAR); void Scanner::ScanQuotedScalar() {
std::string scalar;
Token token(Token::PLAIN_SCALAR, mark);
token.value = scalar; // peek at single or double quote (don't eat because we need to preserve (for
m_tokens.push(token); // the time being) the input position)
} char quote = INPUT.peek();
bool single = (quote == '\'');
// QuotedScalar
void Scanner::ScanQuotedScalar() // setup the scanning parameters
{ ScanScalarParams params;
std::string scalar; params.end = (single ? RegEx(quote) && !Exp::EscSingleQuote() : RegEx(quote));
params.eatEnd = true;
// peek at single or double quote (don't eat because we need to preserve (for the time being) the input position) params.escape = (single ? '\'' : '\\');
char quote = INPUT.peek(); params.indent = 0;
bool single = (quote == '\''); params.fold = FOLD_FLOW;
params.eatLeadingWhitespace = true;
// setup the scanning parameters params.trimTrailingSpaces = false;
ScanScalarParams params; params.chomp = CLIP;
params.end = (single ? RegEx(quote) && !Exp::EscSingleQuote() : RegEx(quote)); params.onDocIndicator = THROW;
params.eatEnd = true;
params.escape = (single ? '\'' : '\\'); // insert a potential simple key
params.indent = 0; InsertPotentialSimpleKey();
params.fold = FOLD_FLOW;
params.eatLeadingWhitespace = true; Mark mark = INPUT.mark();
params.trimTrailingSpaces = false;
params.chomp = CLIP; // now eat that opening quote
params.onDocIndicator = THROW; INPUT.get();
// insert a potential simple key // and scan
InsertPotentialSimpleKey(); scalar = ScanScalar(INPUT, params);
m_simpleKeyAllowed = false;
Mark mark = INPUT.mark(); m_canBeJSONFlow = true;
// now eat that opening quote Token token(Token::NON_PLAIN_SCALAR, mark);
INPUT.get(); token.value = scalar;
m_tokens.push(token);
// and scan }
scalar = ScanScalar(INPUT, params);
m_simpleKeyAllowed = false; // BlockScalarToken
m_canBeJSONFlow = true; // . These need a little extra processing beforehand.
// . We need to scan the line where the indicator is (this doesn't count as part
Token token(Token::NON_PLAIN_SCALAR, mark); // of the scalar),
token.value = scalar; // and then we need to figure out what level of indentation we'll be using.
m_tokens.push(token); void Scanner::ScanBlockScalar() {
} std::string scalar;
// BlockScalarToken ScanScalarParams params;
// . These need a little extra processing beforehand. params.indent = 1;
// . We need to scan the line where the indicator is (this doesn't count as part of the scalar), params.detectIndent = true;
// and then we need to figure out what level of indentation we'll be using.
void Scanner::ScanBlockScalar() // eat block indicator ('|' or '>')
{ Mark mark = INPUT.mark();
std::string scalar; char indicator = INPUT.get();
params.fold = (indicator == Keys::FoldedScalar ? FOLD_BLOCK : DONT_FOLD);
ScanScalarParams params;
params.indent = 1; // eat chomping/indentation indicators
params.detectIndent = true; params.chomp = CLIP;
int n = Exp::Chomp().Match(INPUT);
// eat block indicator ('|' or '>') for (int i = 0; i < n; i++) {
Mark mark = INPUT.mark(); char ch = INPUT.get();
char indicator = INPUT.get(); if (ch == '+')
params.fold = (indicator == Keys::FoldedScalar ? FOLD_BLOCK : DONT_FOLD); params.chomp = KEEP;
else if (ch == '-')
// eat chomping/indentation indicators params.chomp = STRIP;
params.chomp = CLIP; else if (Exp::Digit().Matches(ch)) {
int n = Exp::Chomp().Match(INPUT); if (ch == '0')
for(int i=0;i<n;i++) { throw ParserException(INPUT.mark(), ErrorMsg::ZERO_INDENT_IN_BLOCK);
char ch = INPUT.get();
if(ch == '+') params.indent = ch - '0';
params.chomp = KEEP; params.detectIndent = false;
else if(ch == '-') }
params.chomp = STRIP; }
else if(Exp::Digit().Matches(ch)) {
if(ch == '0') // now eat whitespace
throw ParserException(INPUT.mark(), ErrorMsg::ZERO_INDENT_IN_BLOCK); while (Exp::Blank().Matches(INPUT))
INPUT.eat(1);
params.indent = ch - '0';
params.detectIndent = false; // and comments to the end of the line
} if (Exp::Comment().Matches(INPUT))
} while (INPUT && !Exp::Break().Matches(INPUT))
INPUT.eat(1);
// now eat whitespace
while(Exp::Blank().Matches(INPUT)) // if it's not a line break, then we ran into a bad character inline
INPUT.eat(1); if (INPUT && !Exp::Break().Matches(INPUT))
throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_BLOCK);
// and comments to the end of the line
if(Exp::Comment().Matches(INPUT)) // set the initial indentation
while(INPUT && !Exp::Break().Matches(INPUT)) if (GetTopIndent() >= 0)
INPUT.eat(1); params.indent += GetTopIndent();
// if it's not a line break, then we ran into a bad character inline params.eatLeadingWhitespace = false;
if(INPUT && !Exp::Break().Matches(INPUT)) params.trimTrailingSpaces = false;
throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_BLOCK); params.onTabInIndentation = THROW;
// set the initial indentation scalar = ScanScalar(INPUT, params);
if(GetTopIndent() >= 0)
params.indent += GetTopIndent(); // simple keys always ok after block scalars (since we're gonna start a new
// line anyways)
params.eatLeadingWhitespace = false; m_simpleKeyAllowed = true;
params.trimTrailingSpaces = false; m_canBeJSONFlow = false;
params.onTabInIndentation = THROW;
Token token(Token::NON_PLAIN_SCALAR, mark);
scalar = ScanScalar(INPUT, params); token.value = scalar;
m_tokens.push(token);
// simple keys always ok after block scalars (since we're gonna start a new line anyways) }
m_simpleKeyAllowed = true;
m_canBeJSONFlow = false;
Token token(Token::NON_PLAIN_SCALAR, mark);
token.value = scalar;
m_tokens.push(token);
}
} }
#ifndef SETTING_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #ifndef SETTING_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#define SETTING_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #define SETTING_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 #if defined(_MSC_VER) || \
(defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
(__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
#pragma once #pragma once
#endif #endif
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "yaml-cpp/noncopyable.h" #include "yaml-cpp/noncopyable.h"
namespace YAML namespace YAML {
{ class SettingChangeBase;
class SettingChangeBase;
template <typename T>
template <typename T> class Setting {
class Setting public:
{ Setting() : m_value() {}
public:
Setting(): m_value() {} const T get() const { return m_value; }
std::auto_ptr<SettingChangeBase> set(const T& value);
const T get() const { return m_value; } void restore(const Setting<T>& oldSetting) { m_value = oldSetting.get(); }
std::auto_ptr <SettingChangeBase> set(const T& value);
void restore(const Setting<T>& oldSetting) { private:
m_value = oldSetting.get(); T m_value;
} };
private: class SettingChangeBase {
T m_value; public:
}; virtual ~SettingChangeBase() {}
virtual void pop() = 0;
class SettingChangeBase };
{
public: template <typename T>
virtual ~SettingChangeBase() {} class SettingChange : public SettingChangeBase {
virtual void pop() = 0; public:
}; SettingChange(Setting<T>* pSetting) : m_pCurSetting(pSetting) {
// copy old setting to save its state
template <typename T> m_oldSetting = *pSetting;
class SettingChange: public SettingChangeBase }
{
public: virtual void pop() { m_pCurSetting->restore(m_oldSetting); }
SettingChange(Setting<T> *pSetting): m_pCurSetting(pSetting) {
// copy old setting to save its state private:
m_oldSetting = *pSetting; Setting<T>* m_pCurSetting;
} Setting<T> m_oldSetting;
};
virtual void pop() {
m_pCurSetting->restore(m_oldSetting); template <typename T>
} inline std::auto_ptr<SettingChangeBase> Setting<T>::set(const T& value) {
std::auto_ptr<SettingChangeBase> pChange(new SettingChange<T>(this));
private: m_value = value;
Setting<T> *m_pCurSetting; return pChange;
Setting<T> m_oldSetting; }
};
class SettingChanges : private noncopyable {
template <typename T> public:
inline std::auto_ptr <SettingChangeBase> Setting<T>::set(const T& value) { SettingChanges() {}
std::auto_ptr <SettingChangeBase> pChange(new SettingChange<T> (this)); ~SettingChanges() { clear(); }
m_value = value;
return pChange; void clear() {
} restore();
class SettingChanges: private noncopyable for (setting_changes::const_iterator it = m_settingChanges.begin();
{ it != m_settingChanges.end(); ++it)
public: delete *it;
SettingChanges() {} m_settingChanges.clear();
~SettingChanges() { clear(); } }
void clear() { void restore() {
restore(); for (setting_changes::const_iterator it = m_settingChanges.begin();
it != m_settingChanges.end(); ++it)
for(setting_changes::const_iterator it=m_settingChanges.begin();it!=m_settingChanges.end();++it) (*it)->pop();
delete *it; }
m_settingChanges.clear();
} void push(std::auto_ptr<SettingChangeBase> pSettingChange) {
m_settingChanges.push_back(pSettingChange.release());
void restore() { }
for(setting_changes::const_iterator it=m_settingChanges.begin();it!=m_settingChanges.end();++it)
(*it)->pop(); // like std::auto_ptr - assignment is transfer of ownership
} SettingChanges& operator=(SettingChanges& rhs) {
if (this == &rhs)
void push(std::auto_ptr <SettingChangeBase> pSettingChange) { return *this;
m_settingChanges.push_back(pSettingChange.release());
} clear();
m_settingChanges = rhs.m_settingChanges;
// like std::auto_ptr - assignment is transfer of ownership rhs.m_settingChanges.clear();
SettingChanges& operator = (SettingChanges& rhs) { return *this;
if(this == &rhs) }
return *this;
private:
clear(); typedef std::vector<SettingChangeBase*> setting_changes;
m_settingChanges = rhs.m_settingChanges; setting_changes m_settingChanges;
rhs.m_settingChanges.clear(); };
return *this;
}
private:
typedef std::vector <SettingChangeBase *> setting_changes;
setting_changes m_settingChanges;
};
} }
#endif // SETTING_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #endif // SETTING_H_62B23520_7C8E_11DE_8A39_0800200C9A66
...@@ -3,137 +3,126 @@ ...@@ -3,137 +3,126 @@
#include "yaml-cpp/exceptions.h" #include "yaml-cpp/exceptions.h"
#include "exp.h" #include "exp.h"
namespace YAML namespace YAML {
{ Scanner::SimpleKey::SimpleKey(const Mark& mark_, int flowLevel_)
Scanner::SimpleKey::SimpleKey(const Mark& mark_, int flowLevel_) : mark(mark_), flowLevel(flowLevel_), pIndent(0), pMapStart(0), pKey(0) {}
: mark(mark_), flowLevel(flowLevel_), pIndent(0), pMapStart(0), pKey(0)
{ void Scanner::SimpleKey::Validate() {
} // Note: pIndent will *not* be garbage here;
// we "garbage collect" them so we can
void Scanner::SimpleKey::Validate() // always refer to them
{ if (pIndent)
// Note: pIndent will *not* be garbage here; pIndent->status = IndentMarker::VALID;
// we "garbage collect" them so we can if (pMapStart)
// always refer to them pMapStart->status = Token::VALID;
if(pIndent) if (pKey)
pIndent->status = IndentMarker::VALID; pKey->status = Token::VALID;
if(pMapStart)
pMapStart->status = Token::VALID;
if(pKey)
pKey->status = Token::VALID;
}
void Scanner::SimpleKey::Invalidate()
{
if(pIndent)
pIndent->status = IndentMarker::INVALID;
if(pMapStart)
pMapStart->status = Token::INVALID;
if(pKey)
pKey->status = Token::INVALID;
}
// CanInsertPotentialSimpleKey
bool Scanner::CanInsertPotentialSimpleKey() const
{
if(!m_simpleKeyAllowed)
return false;
return !ExistsActiveSimpleKey();
}
// ExistsActiveSimpleKey
// . Returns true if there's a potential simple key at our flow level
// (there's allowed at most one per flow level, i.e., at the start of the flow start token)
bool Scanner::ExistsActiveSimpleKey() const
{
if(m_simpleKeys.empty())
return false;
const SimpleKey& key = m_simpleKeys.top();
return key.flowLevel == GetFlowLevel();
}
// InsertPotentialSimpleKey
// . If we can, add a potential simple key to the queue,
// and save it on a stack.
void Scanner::InsertPotentialSimpleKey()
{
if(!CanInsertPotentialSimpleKey())
return;
SimpleKey key(INPUT.mark(), GetFlowLevel());
// first add a map start, if necessary
if(InBlockContext()) {
key.pIndent = PushIndentTo(INPUT.column(), IndentMarker::MAP);
if(key.pIndent) {
key.pIndent->status = IndentMarker::UNKNOWN;
key.pMapStart = key.pIndent->pStartToken;
key.pMapStart->status = Token::UNVERIFIED;
}
}
// then add the (now unverified) key
m_tokens.push(Token(Token::KEY, INPUT.mark()));
key.pKey = &m_tokens.back();
key.pKey->status = Token::UNVERIFIED;
m_simpleKeys.push(key);
}
// InvalidateSimpleKey
// . Automatically invalidate the simple key in our flow level
void Scanner::InvalidateSimpleKey()
{
if(m_simpleKeys.empty())
return;
// grab top key
SimpleKey& key = m_simpleKeys.top();
if(key.flowLevel != GetFlowLevel())
return;
key.Invalidate();
m_simpleKeys.pop();
}
// VerifySimpleKey
// . Determines whether the latest simple key to be added is valid,
// and if so, makes it valid.
bool Scanner::VerifySimpleKey()
{
if(m_simpleKeys.empty())
return false;
// grab top key
SimpleKey key = m_simpleKeys.top();
// only validate if we're in the correct flow level
if(key.flowLevel != GetFlowLevel())
return false;
m_simpleKeys.pop();
bool isValid = true;
// needs to be less than 1024 characters and inline
if(INPUT.line() != key.mark.line || INPUT.pos() - key.mark.pos > 1024)
isValid = false;
// invalidate key
if(isValid)
key.Validate();
else
key.Invalidate();
return isValid;
}
void Scanner::PopAllSimpleKeys()
{
while(!m_simpleKeys.empty())
m_simpleKeys.pop();
}
} }
void Scanner::SimpleKey::Invalidate() {
if (pIndent)
pIndent->status = IndentMarker::INVALID;
if (pMapStart)
pMapStart->status = Token::INVALID;
if (pKey)
pKey->status = Token::INVALID;
}
// CanInsertPotentialSimpleKey
bool Scanner::CanInsertPotentialSimpleKey() const {
if (!m_simpleKeyAllowed)
return false;
return !ExistsActiveSimpleKey();
}
// ExistsActiveSimpleKey
// . Returns true if there's a potential simple key at our flow level
// (there's allowed at most one per flow level, i.e., at the start of the flow
// start token)
bool Scanner::ExistsActiveSimpleKey() const {
if (m_simpleKeys.empty())
return false;
const SimpleKey& key = m_simpleKeys.top();
return key.flowLevel == GetFlowLevel();
}
// InsertPotentialSimpleKey
// . If we can, add a potential simple key to the queue,
// and save it on a stack.
void Scanner::InsertPotentialSimpleKey() {
if (!CanInsertPotentialSimpleKey())
return;
SimpleKey key(INPUT.mark(), GetFlowLevel());
// first add a map start, if necessary
if (InBlockContext()) {
key.pIndent = PushIndentTo(INPUT.column(), IndentMarker::MAP);
if (key.pIndent) {
key.pIndent->status = IndentMarker::UNKNOWN;
key.pMapStart = key.pIndent->pStartToken;
key.pMapStart->status = Token::UNVERIFIED;
}
}
// then add the (now unverified) key
m_tokens.push(Token(Token::KEY, INPUT.mark()));
key.pKey = &m_tokens.back();
key.pKey->status = Token::UNVERIFIED;
m_simpleKeys.push(key);
}
// InvalidateSimpleKey
// . Automatically invalidate the simple key in our flow level
void Scanner::InvalidateSimpleKey() {
if (m_simpleKeys.empty())
return;
// grab top key
SimpleKey& key = m_simpleKeys.top();
if (key.flowLevel != GetFlowLevel())
return;
key.Invalidate();
m_simpleKeys.pop();
}
// VerifySimpleKey
// . Determines whether the latest simple key to be added is valid,
// and if so, makes it valid.
bool Scanner::VerifySimpleKey() {
if (m_simpleKeys.empty())
return false;
// grab top key
SimpleKey key = m_simpleKeys.top();
// only validate if we're in the correct flow level
if (key.flowLevel != GetFlowLevel())
return false;
m_simpleKeys.pop();
bool isValid = true;
// needs to be less than 1024 characters and inline
if (INPUT.line() != key.mark.line || INPUT.pos() - key.mark.pos > 1024)
isValid = false;
// invalidate key
if (isValid)
key.Validate();
else
key.Invalidate();
return isValid;
}
void Scanner::PopAllSimpleKeys() {
while (!m_simpleKeys.empty())
m_simpleKeys.pop();
}
}
...@@ -10,385 +10,394 @@ ...@@ -10,385 +10,394 @@
#include <cstdio> #include <cstdio>
#include <algorithm> #include <algorithm>
namespace YAML namespace YAML {
{ SingleDocParser::SingleDocParser(Scanner& scanner, const Directives& directives)
SingleDocParser::SingleDocParser(Scanner& scanner, const Directives& directives): m_scanner(scanner), m_directives(directives), m_pCollectionStack(new CollectionStack), m_curAnchor(0) : m_scanner(scanner),
{ m_directives(directives),
} m_pCollectionStack(new CollectionStack),
m_curAnchor(0) {}
SingleDocParser::~SingleDocParser()
{ SingleDocParser::~SingleDocParser() {}
}
// HandleDocument
// HandleDocument // . Handles the next document
// . Handles the next document // . Throws a ParserException on error.
// . Throws a ParserException on error. void SingleDocParser::HandleDocument(EventHandler& eventHandler) {
void SingleDocParser::HandleDocument(EventHandler& eventHandler) assert(!m_scanner.empty()); // guaranteed that there are tokens
{ assert(!m_curAnchor);
assert(!m_scanner.empty()); // guaranteed that there are tokens
assert(!m_curAnchor); eventHandler.OnDocumentStart(m_scanner.peek().mark);
eventHandler.OnDocumentStart(m_scanner.peek().mark); // eat doc start
if (m_scanner.peek().type == Token::DOC_START)
// eat doc start m_scanner.pop();
if(m_scanner.peek().type == Token::DOC_START)
m_scanner.pop(); // recurse!
HandleNode(eventHandler);
// recurse!
HandleNode(eventHandler); eventHandler.OnDocumentEnd();
eventHandler.OnDocumentEnd(); // and finally eat any doc ends we see
while (!m_scanner.empty() && m_scanner.peek().type == Token::DOC_END)
// and finally eat any doc ends we see m_scanner.pop();
while(!m_scanner.empty() && m_scanner.peek().type == Token::DOC_END) }
m_scanner.pop();
} void SingleDocParser::HandleNode(EventHandler& eventHandler) {
// an empty node *is* a possibility
void SingleDocParser::HandleNode(EventHandler& eventHandler) if (m_scanner.empty()) {
{ eventHandler.OnNull(m_scanner.mark(), NullAnchor);
// an empty node *is* a possibility return;
if(m_scanner.empty()) { }
eventHandler.OnNull(m_scanner.mark(), NullAnchor);
return; // save location
} Mark mark = m_scanner.peek().mark;
// save location // special case: a value node by itself must be a map, with no header
Mark mark = m_scanner.peek().mark; if (m_scanner.peek().type == Token::VALUE) {
eventHandler.OnMapStart(mark, "?", NullAnchor);
// special case: a value node by itself must be a map, with no header HandleMap(eventHandler);
if(m_scanner.peek().type == Token::VALUE) { eventHandler.OnMapEnd();
eventHandler.OnMapStart(mark, "?", NullAnchor); return;
HandleMap(eventHandler); }
eventHandler.OnMapEnd();
return; // special case: an alias node
} if (m_scanner.peek().type == Token::ALIAS) {
eventHandler.OnAlias(mark, LookupAnchor(mark, m_scanner.peek().value));
// special case: an alias node m_scanner.pop();
if(m_scanner.peek().type == Token::ALIAS) { return;
eventHandler.OnAlias(mark, LookupAnchor(mark, m_scanner.peek().value)); }
m_scanner.pop();
return; std::string tag;
} anchor_t anchor;
ParseProperties(tag, anchor);
std::string tag;
anchor_t anchor; const Token& token = m_scanner.peek();
ParseProperties(tag, anchor);
if (token.type == Token::PLAIN_SCALAR && token.value == "null") {
const Token& token = m_scanner.peek(); eventHandler.OnNull(mark, anchor);
m_scanner.pop();
if(token.type == Token::PLAIN_SCALAR && token.value == "null") { return;
eventHandler.OnNull(mark, anchor); }
m_scanner.pop();
return; // add non-specific tags
} if (tag.empty())
tag = (token.type == Token::NON_PLAIN_SCALAR ? "!" : "?");
// add non-specific tags
if(tag.empty()) // now split based on what kind of node we should be
tag = (token.type == Token::NON_PLAIN_SCALAR ? "!" : "?"); switch (token.type) {
case Token::PLAIN_SCALAR:
// now split based on what kind of node we should be case Token::NON_PLAIN_SCALAR:
switch(token.type) { eventHandler.OnScalar(mark, tag, anchor, token.value);
case Token::PLAIN_SCALAR: m_scanner.pop();
case Token::NON_PLAIN_SCALAR: return;
eventHandler.OnScalar(mark, tag, anchor, token.value); case Token::FLOW_SEQ_START:
m_scanner.pop(); case Token::BLOCK_SEQ_START:
return; eventHandler.OnSequenceStart(mark, tag, anchor);
case Token::FLOW_SEQ_START: HandleSequence(eventHandler);
case Token::BLOCK_SEQ_START: eventHandler.OnSequenceEnd();
eventHandler.OnSequenceStart(mark, tag, anchor); return;
HandleSequence(eventHandler); case Token::FLOW_MAP_START:
eventHandler.OnSequenceEnd(); case Token::BLOCK_MAP_START:
return; eventHandler.OnMapStart(mark, tag, anchor);
case Token::FLOW_MAP_START: HandleMap(eventHandler);
case Token::BLOCK_MAP_START: eventHandler.OnMapEnd();
eventHandler.OnMapStart(mark, tag, anchor); return;
HandleMap(eventHandler); case Token::KEY:
eventHandler.OnMapEnd(); // compact maps can only go in a flow sequence
return; if (m_pCollectionStack->GetCurCollectionType() ==
case Token::KEY: CollectionType::FlowSeq) {
// compact maps can only go in a flow sequence eventHandler.OnMapStart(mark, tag, anchor);
if(m_pCollectionStack->GetCurCollectionType() == CollectionType::FlowSeq) { HandleMap(eventHandler);
eventHandler.OnMapStart(mark, tag, anchor); eventHandler.OnMapEnd();
HandleMap(eventHandler); return;
eventHandler.OnMapEnd(); }
return; break;
} default:
break; break;
default: }
break;
} if (tag == "?")
eventHandler.OnNull(mark, anchor);
if(tag == "?") else
eventHandler.OnNull(mark, anchor); eventHandler.OnScalar(mark, tag, anchor, "");
else }
eventHandler.OnScalar(mark, tag, anchor, "");
} void SingleDocParser::HandleSequence(EventHandler& eventHandler) {
// split based on start token
void SingleDocParser::HandleSequence(EventHandler& eventHandler) switch (m_scanner.peek().type) {
{ case Token::BLOCK_SEQ_START:
// split based on start token HandleBlockSequence(eventHandler);
switch(m_scanner.peek().type) { break;
case Token::BLOCK_SEQ_START: HandleBlockSequence(eventHandler); break; case Token::FLOW_SEQ_START:
case Token::FLOW_SEQ_START: HandleFlowSequence(eventHandler); break; HandleFlowSequence(eventHandler);
default: break; break;
} default:
} break;
}
void SingleDocParser::HandleBlockSequence(EventHandler& eventHandler) }
{
// eat start token void SingleDocParser::HandleBlockSequence(EventHandler& eventHandler) {
m_scanner.pop(); // eat start token
m_pCollectionStack->PushCollectionType(CollectionType::BlockSeq); m_scanner.pop();
m_pCollectionStack->PushCollectionType(CollectionType::BlockSeq);
while(1) {
if(m_scanner.empty()) while (1) {
throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ); if (m_scanner.empty())
throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ);
Token token = m_scanner.peek();
if(token.type != Token::BLOCK_ENTRY && token.type != Token::BLOCK_SEQ_END) Token token = m_scanner.peek();
throw ParserException(token.mark, ErrorMsg::END_OF_SEQ); if (token.type != Token::BLOCK_ENTRY && token.type != Token::BLOCK_SEQ_END)
throw ParserException(token.mark, ErrorMsg::END_OF_SEQ);
m_scanner.pop();
if(token.type == Token::BLOCK_SEQ_END) m_scanner.pop();
break; if (token.type == Token::BLOCK_SEQ_END)
break;
// check for null
if(!m_scanner.empty()) { // check for null
const Token& token = m_scanner.peek(); if (!m_scanner.empty()) {
if(token.type == Token::BLOCK_ENTRY || token.type == Token::BLOCK_SEQ_END) { const Token& token = m_scanner.peek();
eventHandler.OnNull(token.mark, NullAnchor); if (token.type == Token::BLOCK_ENTRY ||
continue; token.type == Token::BLOCK_SEQ_END) {
} eventHandler.OnNull(token.mark, NullAnchor);
} continue;
}
HandleNode(eventHandler); }
}
HandleNode(eventHandler);
m_pCollectionStack->PopCollectionType(CollectionType::BlockSeq); }
}
m_pCollectionStack->PopCollectionType(CollectionType::BlockSeq);
void SingleDocParser::HandleFlowSequence(EventHandler& eventHandler) }
{
// eat start token void SingleDocParser::HandleFlowSequence(EventHandler& eventHandler) {
m_scanner.pop(); // eat start token
m_pCollectionStack->PushCollectionType(CollectionType::FlowSeq); m_scanner.pop();
m_pCollectionStack->PushCollectionType(CollectionType::FlowSeq);
while(1) {
if(m_scanner.empty()) while (1) {
throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ_FLOW); if (m_scanner.empty())
throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ_FLOW);
// first check for end
if(m_scanner.peek().type == Token::FLOW_SEQ_END) { // first check for end
m_scanner.pop(); if (m_scanner.peek().type == Token::FLOW_SEQ_END) {
break; m_scanner.pop();
} break;
}
// then read the node
HandleNode(eventHandler); // then read the node
HandleNode(eventHandler);
if(m_scanner.empty())
throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ_FLOW); if (m_scanner.empty())
throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ_FLOW);
// now eat the separator (or could be a sequence end, which we ignore - but if it's neither, then it's a bad node)
Token& token = m_scanner.peek(); // now eat the separator (or could be a sequence end, which we ignore - but
if(token.type == Token::FLOW_ENTRY) // if it's neither, then it's a bad node)
m_scanner.pop(); Token& token = m_scanner.peek();
else if(token.type != Token::FLOW_SEQ_END) if (token.type == Token::FLOW_ENTRY)
throw ParserException(token.mark, ErrorMsg::END_OF_SEQ_FLOW); m_scanner.pop();
} else if (token.type != Token::FLOW_SEQ_END)
throw ParserException(token.mark, ErrorMsg::END_OF_SEQ_FLOW);
m_pCollectionStack->PopCollectionType(CollectionType::FlowSeq); }
}
m_pCollectionStack->PopCollectionType(CollectionType::FlowSeq);
void SingleDocParser::HandleMap(EventHandler& eventHandler) }
{
// split based on start token void SingleDocParser::HandleMap(EventHandler& eventHandler) {
switch(m_scanner.peek().type) { // split based on start token
case Token::BLOCK_MAP_START: HandleBlockMap(eventHandler); break; switch (m_scanner.peek().type) {
case Token::FLOW_MAP_START: HandleFlowMap(eventHandler); break; case Token::BLOCK_MAP_START:
case Token::KEY: HandleCompactMap(eventHandler); break; HandleBlockMap(eventHandler);
case Token::VALUE: HandleCompactMapWithNoKey(eventHandler); break; break;
default: break; case Token::FLOW_MAP_START:
} HandleFlowMap(eventHandler);
} break;
case Token::KEY:
void SingleDocParser::HandleBlockMap(EventHandler& eventHandler) HandleCompactMap(eventHandler);
{ break;
// eat start token case Token::VALUE:
m_scanner.pop(); HandleCompactMapWithNoKey(eventHandler);
m_pCollectionStack->PushCollectionType(CollectionType::BlockMap); break;
default:
while(1) { break;
if(m_scanner.empty()) }
throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP); }
Token token = m_scanner.peek(); void SingleDocParser::HandleBlockMap(EventHandler& eventHandler) {
if(token.type != Token::KEY && token.type != Token::VALUE && token.type != Token::BLOCK_MAP_END) // eat start token
throw ParserException(token.mark, ErrorMsg::END_OF_MAP); m_scanner.pop();
m_pCollectionStack->PushCollectionType(CollectionType::BlockMap);
if(token.type == Token::BLOCK_MAP_END) {
m_scanner.pop(); while (1) {
break; if (m_scanner.empty())
} throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP);
// grab key (if non-null) Token token = m_scanner.peek();
if(token.type == Token::KEY) { if (token.type != Token::KEY && token.type != Token::VALUE &&
m_scanner.pop(); token.type != Token::BLOCK_MAP_END)
HandleNode(eventHandler); throw ParserException(token.mark, ErrorMsg::END_OF_MAP);
} else {
eventHandler.OnNull(token.mark, NullAnchor); if (token.type == Token::BLOCK_MAP_END) {
} m_scanner.pop();
break;
// now grab value (optional) }
if(!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) {
m_scanner.pop(); // grab key (if non-null)
HandleNode(eventHandler); if (token.type == Token::KEY) {
} else { m_scanner.pop();
eventHandler.OnNull(token.mark, NullAnchor); HandleNode(eventHandler);
} } else {
} eventHandler.OnNull(token.mark, NullAnchor);
}
m_pCollectionStack->PopCollectionType(CollectionType::BlockMap);
} // now grab value (optional)
if (!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) {
void SingleDocParser::HandleFlowMap(EventHandler& eventHandler) m_scanner.pop();
{ HandleNode(eventHandler);
// eat start token } else {
m_scanner.pop(); eventHandler.OnNull(token.mark, NullAnchor);
m_pCollectionStack->PushCollectionType(CollectionType::FlowMap); }
}
while(1) {
if(m_scanner.empty()) m_pCollectionStack->PopCollectionType(CollectionType::BlockMap);
throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP_FLOW); }
Token& token = m_scanner.peek(); void SingleDocParser::HandleFlowMap(EventHandler& eventHandler) {
const Mark mark = token.mark; // eat start token
// first check for end m_scanner.pop();
if(token.type == Token::FLOW_MAP_END) { m_pCollectionStack->PushCollectionType(CollectionType::FlowMap);
m_scanner.pop();
break; while (1) {
} if (m_scanner.empty())
throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP_FLOW);
// grab key (if non-null)
if(token.type == Token::KEY) { Token& token = m_scanner.peek();
m_scanner.pop(); const Mark mark = token.mark;
HandleNode(eventHandler); // first check for end
} else { if (token.type == Token::FLOW_MAP_END) {
eventHandler.OnNull(mark, NullAnchor); m_scanner.pop();
} break;
}
// now grab value (optional)
if(!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) { // grab key (if non-null)
m_scanner.pop(); if (token.type == Token::KEY) {
HandleNode(eventHandler); m_scanner.pop();
} else { HandleNode(eventHandler);
eventHandler.OnNull(mark, NullAnchor); } else {
} eventHandler.OnNull(mark, NullAnchor);
}
if(m_scanner.empty())
throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP_FLOW); // now grab value (optional)
if (!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) {
// now eat the separator (or could be a map end, which we ignore - but if it's neither, then it's a bad node) m_scanner.pop();
Token& nextToken = m_scanner.peek(); HandleNode(eventHandler);
if(nextToken.type == Token::FLOW_ENTRY) } else {
m_scanner.pop(); eventHandler.OnNull(mark, NullAnchor);
else if(nextToken.type != Token::FLOW_MAP_END) }
throw ParserException(nextToken.mark, ErrorMsg::END_OF_MAP_FLOW);
} if (m_scanner.empty())
throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP_FLOW);
m_pCollectionStack->PopCollectionType(CollectionType::FlowMap);
} // now eat the separator (or could be a map end, which we ignore - but if
// it's neither, then it's a bad node)
// . Single "key: value" pair in a flow sequence Token& nextToken = m_scanner.peek();
void SingleDocParser::HandleCompactMap(EventHandler& eventHandler) if (nextToken.type == Token::FLOW_ENTRY)
{ m_scanner.pop();
m_pCollectionStack->PushCollectionType(CollectionType::CompactMap); else if (nextToken.type != Token::FLOW_MAP_END)
throw ParserException(nextToken.mark, ErrorMsg::END_OF_MAP_FLOW);
// grab key }
Mark mark = m_scanner.peek().mark;
m_scanner.pop(); m_pCollectionStack->PopCollectionType(CollectionType::FlowMap);
HandleNode(eventHandler); }
// now grab value (optional) // . Single "key: value" pair in a flow sequence
if(!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) { void SingleDocParser::HandleCompactMap(EventHandler& eventHandler) {
m_scanner.pop(); m_pCollectionStack->PushCollectionType(CollectionType::CompactMap);
HandleNode(eventHandler);
} else { // grab key
eventHandler.OnNull(mark, NullAnchor); Mark mark = m_scanner.peek().mark;
} m_scanner.pop();
HandleNode(eventHandler);
m_pCollectionStack->PopCollectionType(CollectionType::CompactMap);
} // now grab value (optional)
if (!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) {
// . Single ": value" pair in a flow sequence m_scanner.pop();
void SingleDocParser::HandleCompactMapWithNoKey(EventHandler& eventHandler) HandleNode(eventHandler);
{ } else {
m_pCollectionStack->PushCollectionType(CollectionType::CompactMap); eventHandler.OnNull(mark, NullAnchor);
}
// null key
eventHandler.OnNull(m_scanner.peek().mark, NullAnchor); m_pCollectionStack->PopCollectionType(CollectionType::CompactMap);
}
// grab value
m_scanner.pop(); // . Single ": value" pair in a flow sequence
HandleNode(eventHandler); void SingleDocParser::HandleCompactMapWithNoKey(EventHandler& eventHandler) {
m_pCollectionStack->PushCollectionType(CollectionType::CompactMap);
m_pCollectionStack->PopCollectionType(CollectionType::CompactMap);
} // null key
eventHandler.OnNull(m_scanner.peek().mark, NullAnchor);
// ParseProperties
// . Grabs any tag or anchor tokens and deals with them. // grab value
void SingleDocParser::ParseProperties(std::string& tag, anchor_t& anchor) m_scanner.pop();
{ HandleNode(eventHandler);
tag.clear();
anchor = NullAnchor; m_pCollectionStack->PopCollectionType(CollectionType::CompactMap);
}
while(1) {
if(m_scanner.empty()) // ParseProperties
return; // . Grabs any tag or anchor tokens and deals with them.
void SingleDocParser::ParseProperties(std::string& tag, anchor_t& anchor) {
switch(m_scanner.peek().type) { tag.clear();
case Token::TAG: ParseTag(tag); break; anchor = NullAnchor;
case Token::ANCHOR: ParseAnchor(anchor); break;
default: return; while (1) {
} if (m_scanner.empty())
} return;
}
switch (m_scanner.peek().type) {
void SingleDocParser::ParseTag(std::string& tag) case Token::TAG:
{ ParseTag(tag);
Token& token = m_scanner.peek(); break;
if(!tag.empty()) case Token::ANCHOR:
throw ParserException(token.mark, ErrorMsg::MULTIPLE_TAGS); ParseAnchor(anchor);
break;
Tag tagInfo(token); default:
tag = tagInfo.Translate(m_directives); return;
m_scanner.pop(); }
} }
}
void SingleDocParser::ParseAnchor(anchor_t& anchor)
{ void SingleDocParser::ParseTag(std::string& tag) {
Token& token = m_scanner.peek(); Token& token = m_scanner.peek();
if(anchor) if (!tag.empty())
throw ParserException(token.mark, ErrorMsg::MULTIPLE_ANCHORS); throw ParserException(token.mark, ErrorMsg::MULTIPLE_TAGS);
anchor = RegisterAnchor(token.value); Tag tagInfo(token);
m_scanner.pop(); tag = tagInfo.Translate(m_directives);
} m_scanner.pop();
}
anchor_t SingleDocParser::RegisterAnchor(const std::string& name)
{ void SingleDocParser::ParseAnchor(anchor_t& anchor) {
if(name.empty()) Token& token = m_scanner.peek();
return NullAnchor; if (anchor)
throw ParserException(token.mark, ErrorMsg::MULTIPLE_ANCHORS);
return m_anchors[name] = ++m_curAnchor;
} anchor = RegisterAnchor(token.value);
m_scanner.pop();
anchor_t SingleDocParser::LookupAnchor(const Mark& mark, const std::string& name) const }
{
Anchors::const_iterator it = m_anchors.find(name); anchor_t SingleDocParser::RegisterAnchor(const std::string& name) {
if(it == m_anchors.end()) if (name.empty())
throw ParserException(mark, ErrorMsg::UNKNOWN_ANCHOR); return NullAnchor;
return it->second; return m_anchors[name] = ++m_curAnchor;
} }
anchor_t SingleDocParser::LookupAnchor(const Mark& mark,
const std::string& name) const {
Anchors::const_iterator it = m_anchors.find(name);
if (it == m_anchors.end())
throw ParserException(mark, ErrorMsg::UNKNOWN_ANCHOR);
return it->second;
}
} }
#ifndef SINGLEDOCPARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #ifndef SINGLEDOCPARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#define SINGLEDOCPARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #define SINGLEDOCPARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 #if defined(_MSC_VER) || \
(defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
(__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
#pragma once #pragma once
#endif #endif
#include "yaml-cpp/anchor.h" #include "yaml-cpp/anchor.h"
#include "yaml-cpp/noncopyable.h" #include "yaml-cpp/noncopyable.h"
#include <string> #include <string>
#include <map> #include <map>
#include <memory> #include <memory>
namespace YAML namespace YAML {
{ struct Directives;
struct Directives; struct Mark;
struct Mark; struct Token;
struct Token; class CollectionStack;
class CollectionStack; class EventHandler;
class EventHandler; class Node;
class Node; class Scanner;
class Scanner;
class SingleDocParser : private noncopyable {
class SingleDocParser: private noncopyable public:
{ SingleDocParser(Scanner& scanner, const Directives& directives);
public: ~SingleDocParser();
SingleDocParser(Scanner& scanner, const Directives& directives);
~SingleDocParser(); void HandleDocument(EventHandler& eventHandler);
private:
void HandleNode(EventHandler& eventHandler);
void HandleSequence(EventHandler& eventHandler);
void HandleBlockSequence(EventHandler& eventHandler);
void HandleFlowSequence(EventHandler& eventHandler);
void HandleMap(EventHandler& eventHandler);
void HandleBlockMap(EventHandler& eventHandler);
void HandleFlowMap(EventHandler& eventHandler);
void HandleCompactMap(EventHandler& eventHandler);
void HandleCompactMapWithNoKey(EventHandler& eventHandler);
void ParseProperties(std::string& tag, anchor_t& anchor);
void ParseTag(std::string& tag);
void ParseAnchor(anchor_t& anchor);
anchor_t RegisterAnchor(const std::string& name);
anchor_t LookupAnchor(const Mark& mark, const std::string& name) const;
private:
Scanner& m_scanner;
const Directives& m_directives;
std::auto_ptr<CollectionStack> m_pCollectionStack;
void HandleDocument(EventHandler& eventHandler); typedef std::map<std::string, anchor_t> Anchors;
Anchors m_anchors;
private: anchor_t m_curAnchor;
void HandleNode(EventHandler& eventHandler); };
void HandleSequence(EventHandler& eventHandler);
void HandleBlockSequence(EventHandler& eventHandler);
void HandleFlowSequence(EventHandler& eventHandler);
void HandleMap(EventHandler& eventHandler);
void HandleBlockMap(EventHandler& eventHandler);
void HandleFlowMap(EventHandler& eventHandler);
void HandleCompactMap(EventHandler& eventHandler);
void HandleCompactMapWithNoKey(EventHandler& eventHandler);
void ParseProperties(std::string& tag, anchor_t& anchor);
void ParseTag(std::string& tag);
void ParseAnchor(anchor_t& anchor);
anchor_t RegisterAnchor(const std::string& name);
anchor_t LookupAnchor(const Mark& mark, const std::string& name) const;
private:
Scanner& m_scanner;
const Directives& m_directives;
std::auto_ptr<CollectionStack> m_pCollectionStack;
typedef std::map<std::string, anchor_t> Anchors;
Anchors m_anchors;
anchor_t m_curAnchor;
};
} }
#endif // SINGLEDOCPARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #endif // SINGLEDOCPARSER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
...@@ -6,442 +6,439 @@ ...@@ -6,442 +6,439 @@
#define YAML_PREFETCH_SIZE 2048 #define YAML_PREFETCH_SIZE 2048
#endif #endif
#define S_ARRAY_SIZE( A ) (sizeof(A)/sizeof(*(A))) #define S_ARRAY_SIZE(A) (sizeof(A) / sizeof(*(A)))
#define S_ARRAY_END( A ) ((A) + S_ARRAY_SIZE(A)) #define S_ARRAY_END(A) ((A) + S_ARRAY_SIZE(A))
#define CP_REPLACEMENT_CHARACTER (0xFFFD) #define CP_REPLACEMENT_CHARACTER (0xFFFD)
namespace YAML namespace YAML {
{ enum UtfIntroState {
enum UtfIntroState { uis_start,
uis_start, uis_utfbe_b1,
uis_utfbe_b1, uis_utf32be_b2,
uis_utf32be_b2, uis_utf32be_bom3,
uis_utf32be_bom3, uis_utf32be,
uis_utf32be, uis_utf16be,
uis_utf16be, uis_utf16be_bom1,
uis_utf16be_bom1, uis_utfle_bom1,
uis_utfle_bom1, uis_utf16le_bom2,
uis_utf16le_bom2, uis_utf32le_bom3,
uis_utf32le_bom3, uis_utf16le,
uis_utf16le, uis_utf32le,
uis_utf32le, uis_utf8_imp,
uis_utf8_imp, uis_utf16le_imp,
uis_utf16le_imp, uis_utf32le_imp3,
uis_utf32le_imp3, uis_utf8_bom1,
uis_utf8_bom1, uis_utf8_bom2,
uis_utf8_bom2, uis_utf8,
uis_utf8, uis_error
uis_error };
};
enum UtfIntroCharType {
enum UtfIntroCharType { uict00,
uict00, uictBB,
uictBB, uictBF,
uictBF, uictEF,
uictEF, uictFE,
uictFE, uictFF,
uictFF, uictAscii,
uictAscii, uictOther,
uictOther, uictMax
uictMax };
};
static bool s_introFinalState[] = {false, // uis_start
static bool s_introFinalState[] = { false, // uis_utfbe_b1
false, //uis_start false, // uis_utf32be_b2
false, //uis_utfbe_b1 false, // uis_utf32be_bom3
false, //uis_utf32be_b2 true, // uis_utf32be
false, //uis_utf32be_bom3 true, // uis_utf16be
true, //uis_utf32be false, // uis_utf16be_bom1
true, //uis_utf16be false, // uis_utfle_bom1
false, //uis_utf16be_bom1 false, // uis_utf16le_bom2
false, //uis_utfle_bom1 false, // uis_utf32le_bom3
false, //uis_utf16le_bom2 true, // uis_utf16le
false, //uis_utf32le_bom3 true, // uis_utf32le
true, //uis_utf16le false, // uis_utf8_imp
true, //uis_utf32le false, // uis_utf16le_imp
false, //uis_utf8_imp false, // uis_utf32le_imp3
false, //uis_utf16le_imp false, // uis_utf8_bom1
false, //uis_utf32le_imp3 false, // uis_utf8_bom2
false, //uis_utf8_bom1 true, // uis_utf8
false, //uis_utf8_bom2 true, // uis_error
true, //uis_utf8 };
true, //uis_error
}; static UtfIntroState s_introTransitions[][uictMax] = {
// uict00, uictBB, uictBF, uictEF,
static UtfIntroState s_introTransitions[][uictMax] = { // uictFE, uictFF, uictAscii, uictOther
// uict00, uictBB, uictBF, uictEF, uictFE, uictFF, uictAscii, uictOther {uis_utfbe_b1, uis_utf8, uis_utf8, uis_utf8_bom1,
{uis_utfbe_b1, uis_utf8, uis_utf8, uis_utf8_bom1, uis_utf16be_bom1, uis_utfle_bom1, uis_utf8_imp, uis_utf8}, uis_utf16be_bom1, uis_utfle_bom1, uis_utf8_imp, uis_utf8},
{uis_utf32be_b2, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16be, uis_utf8}, {uis_utf32be_b2, uis_utf8, uis_utf8, uis_utf8,
{uis_utf32be, uis_utf8, uis_utf8, uis_utf8, uis_utf32be_bom3, uis_utf8, uis_utf8, uis_utf8}, uis_utf8, uis_utf8, uis_utf16be, uis_utf8},
{uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf32be, uis_utf8, uis_utf8}, {uis_utf32be, uis_utf8, uis_utf8, uis_utf8,
{uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be}, uis_utf32be_bom3, uis_utf8, uis_utf8, uis_utf8},
{uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be}, {uis_utf8, uis_utf8, uis_utf8, uis_utf8,
{uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16be, uis_utf8, uis_utf8}, uis_utf8, uis_utf32be, uis_utf8, uis_utf8},
{uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf16le_bom2, uis_utf8, uis_utf8, uis_utf8}, {uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be,
{uis_utf32le_bom3, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le}, uis_utf32be, uis_utf32be, uis_utf32be, uis_utf32be},
{uis_utf32le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le}, {uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be,
{uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le}, uis_utf16be, uis_utf16be, uis_utf16be, uis_utf16be},
{uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le}, {uis_utf8, uis_utf8, uis_utf8, uis_utf8,
{uis_utf16le_imp, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8}, uis_utf8, uis_utf16be, uis_utf8, uis_utf8},
{uis_utf32le_imp3, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le}, {uis_utf8, uis_utf8, uis_utf8, uis_utf8,
{uis_utf32le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le}, uis_utf16le_bom2, uis_utf8, uis_utf8, uis_utf8},
{uis_utf8, uis_utf8_bom2, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8}, {uis_utf32le_bom3, uis_utf16le, uis_utf16le, uis_utf16le,
{uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8}, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le},
{uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8, uis_utf8}, {uis_utf32le, uis_utf16le, uis_utf16le, uis_utf16le,
}; uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le},
{uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le,
static char s_introUngetCount[][uictMax] = { uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le},
// uict00, uictBB, uictBF, uictEF, uictFE, uictFF, uictAscii, uictOther {uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le,
{0, 1, 1, 0, 0, 0, 0, 1}, uis_utf32le, uis_utf32le, uis_utf32le, uis_utf32le},
{0, 2, 2, 2, 2, 2, 2, 2}, {uis_utf16le_imp, uis_utf8, uis_utf8, uis_utf8,
{3, 3, 3, 3, 0, 3, 3, 3}, uis_utf8, uis_utf8, uis_utf8, uis_utf8},
{4, 4, 4, 4, 4, 0, 4, 4}, {uis_utf32le_imp3, uis_utf16le, uis_utf16le, uis_utf16le,
{1, 1, 1, 1, 1, 1, 1, 1}, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le},
{1, 1, 1, 1, 1, 1, 1, 1}, {uis_utf32le, uis_utf16le, uis_utf16le, uis_utf16le,
{2, 2, 2, 2, 2, 0, 2, 2}, uis_utf16le, uis_utf16le, uis_utf16le, uis_utf16le},
{2, 2, 2, 2, 0, 2, 2, 2}, {uis_utf8, uis_utf8_bom2, uis_utf8, uis_utf8,
{0, 1, 1, 1, 1, 1, 1, 1}, uis_utf8, uis_utf8, uis_utf8, uis_utf8},
{0, 2, 2, 2, 2, 2, 2, 2}, {uis_utf8, uis_utf8, uis_utf8, uis_utf8,
{1, 1, 1, 1, 1, 1, 1, 1}, uis_utf8, uis_utf8, uis_utf8, uis_utf8},
{1, 1, 1, 1, 1, 1, 1, 1}, {uis_utf8, uis_utf8, uis_utf8, uis_utf8,
{0, 2, 2, 2, 2, 2, 2, 2}, uis_utf8, uis_utf8, uis_utf8, uis_utf8}, };
{0, 3, 3, 3, 3, 3, 3, 3},
{4, 4, 4, 4, 4, 4, 4, 4}, static char s_introUngetCount[][uictMax] = {
{2, 0, 2, 2, 2, 2, 2, 2}, // uict00, uictBB, uictBF, uictEF, uictFE, uictFF, uictAscii, uictOther
{3, 3, 0, 3, 3, 3, 3, 3}, {0, 1, 1, 0, 0, 0, 0, 1},
{1, 1, 1, 1, 1, 1, 1, 1}, {0, 2, 2, 2, 2, 2, 2, 2},
}; {3, 3, 3, 3, 0, 3, 3, 3},
{4, 4, 4, 4, 4, 0, 4, 4},
inline UtfIntroCharType IntroCharTypeOf(std::istream::int_type ch) {1, 1, 1, 1, 1, 1, 1, 1},
{ {1, 1, 1, 1, 1, 1, 1, 1},
if (std::istream::traits_type::eof() == ch) { {2, 2, 2, 2, 2, 0, 2, 2},
return uictOther; {2, 2, 2, 2, 0, 2, 2, 2},
} {0, 1, 1, 1, 1, 1, 1, 1},
{0, 2, 2, 2, 2, 2, 2, 2},
switch (ch) { {1, 1, 1, 1, 1, 1, 1, 1},
case 0: return uict00; {1, 1, 1, 1, 1, 1, 1, 1},
case 0xBB: return uictBB; {0, 2, 2, 2, 2, 2, 2, 2},
case 0xBF: return uictBF; {0, 3, 3, 3, 3, 3, 3, 3},
case 0xEF: return uictEF; {4, 4, 4, 4, 4, 4, 4, 4},
case 0xFE: return uictFE; {2, 0, 2, 2, 2, 2, 2, 2},
case 0xFF: return uictFF; {3, 3, 0, 3, 3, 3, 3, 3},
} {1, 1, 1, 1, 1, 1, 1, 1}, };
if ((ch > 0) && (ch < 0xFF)) { inline UtfIntroCharType IntroCharTypeOf(std::istream::int_type ch) {
return uictAscii; if (std::istream::traits_type::eof() == ch) {
} return uictOther;
}
return uictOther;
} switch (ch) {
case 0:
inline char Utf8Adjust(unsigned long ch, unsigned char lead_bits, unsigned char rshift) return uict00;
{ case 0xBB:
const unsigned char header = ((1 << lead_bits) - 1) << (8 - lead_bits); return uictBB;
const unsigned char mask = (0xFF >> (lead_bits + 1)); case 0xBF:
return static_cast<char>(static_cast<unsigned char>( return uictBF;
header | ((ch >> rshift) & mask) case 0xEF:
)); return uictEF;
} case 0xFE:
return uictFE;
inline void QueueUnicodeCodepoint(std::deque<char>& q, unsigned long ch) case 0xFF:
{ return uictFF;
// We are not allowed to queue the Stream::eof() codepoint, so }
// replace it with CP_REPLACEMENT_CHARACTER
if (static_cast<unsigned long>(Stream::eof()) == ch) if ((ch > 0) && (ch < 0xFF)) {
{ return uictAscii;
ch = CP_REPLACEMENT_CHARACTER; }
}
return uictOther;
if (ch < 0x80) }
{
q.push_back(Utf8Adjust(ch, 0, 0)); inline char Utf8Adjust(unsigned long ch, unsigned char lead_bits,
} unsigned char rshift) {
else if (ch < 0x800) const unsigned char header = ((1 << lead_bits) - 1) << (8 - lead_bits);
{ const unsigned char mask = (0xFF >> (lead_bits + 1));
q.push_back(Utf8Adjust(ch, 2, 6)); return static_cast<char>(
q.push_back(Utf8Adjust(ch, 1, 0)); static_cast<unsigned char>(header | ((ch >> rshift) & mask)));
} }
else if (ch < 0x10000)
{ inline void QueueUnicodeCodepoint(std::deque<char>& q, unsigned long ch) {
q.push_back(Utf8Adjust(ch, 3, 12)); // We are not allowed to queue the Stream::eof() codepoint, so
q.push_back(Utf8Adjust(ch, 1, 6)); // replace it with CP_REPLACEMENT_CHARACTER
q.push_back(Utf8Adjust(ch, 1, 0)); if (static_cast<unsigned long>(Stream::eof()) == ch) {
} ch = CP_REPLACEMENT_CHARACTER;
else }
{
q.push_back(Utf8Adjust(ch, 4, 18)); if (ch < 0x80) {
q.push_back(Utf8Adjust(ch, 1, 12)); q.push_back(Utf8Adjust(ch, 0, 0));
q.push_back(Utf8Adjust(ch, 1, 6)); } else if (ch < 0x800) {
q.push_back(Utf8Adjust(ch, 1, 0)); q.push_back(Utf8Adjust(ch, 2, 6));
} q.push_back(Utf8Adjust(ch, 1, 0));
} } else if (ch < 0x10000) {
q.push_back(Utf8Adjust(ch, 3, 12));
Stream::Stream(std::istream& input) q.push_back(Utf8Adjust(ch, 1, 6));
: m_input(input), q.push_back(Utf8Adjust(ch, 1, 0));
m_pPrefetched(new unsigned char[YAML_PREFETCH_SIZE]), } else {
m_nPrefetchedAvailable(0), m_nPrefetchedUsed(0) q.push_back(Utf8Adjust(ch, 4, 18));
{ q.push_back(Utf8Adjust(ch, 1, 12));
typedef std::istream::traits_type char_traits; q.push_back(Utf8Adjust(ch, 1, 6));
q.push_back(Utf8Adjust(ch, 1, 0));
if(!input) }
return; }
// Determine (or guess) the character-set by reading the BOM, if any. See Stream::Stream(std::istream& input)
// the YAML specification for the determination algorithm. : m_input(input),
char_traits::int_type intro[4]; m_pPrefetched(new unsigned char[YAML_PREFETCH_SIZE]),
int nIntroUsed = 0; m_nPrefetchedAvailable(0),
UtfIntroState state = uis_start; m_nPrefetchedUsed(0) {
for(; !s_introFinalState[state]; ) { typedef std::istream::traits_type char_traits;
std::istream::int_type ch = input.get();
intro[nIntroUsed++] = ch; if (!input)
UtfIntroCharType charType = IntroCharTypeOf(ch); return;
UtfIntroState newState = s_introTransitions[state][charType];
int nUngets = s_introUngetCount[state][charType]; // Determine (or guess) the character-set by reading the BOM, if any. See
if(nUngets > 0) { // the YAML specification for the determination algorithm.
input.clear(); char_traits::int_type intro[4];
for(; nUngets > 0; --nUngets) { int nIntroUsed = 0;
if(char_traits::eof() != intro[--nIntroUsed]) UtfIntroState state = uis_start;
input.putback(char_traits::to_char_type(intro[nIntroUsed])); for (; !s_introFinalState[state];) {
} std::istream::int_type ch = input.get();
} intro[nIntroUsed++] = ch;
state = newState; UtfIntroCharType charType = IntroCharTypeOf(ch);
} UtfIntroState newState = s_introTransitions[state][charType];
int nUngets = s_introUngetCount[state][charType];
switch (state) { if (nUngets > 0) {
case uis_utf8: m_charSet = utf8; break; input.clear();
case uis_utf16le: m_charSet = utf16le; break; for (; nUngets > 0; --nUngets) {
case uis_utf16be: m_charSet = utf16be; break; if (char_traits::eof() != intro[--nIntroUsed])
case uis_utf32le: m_charSet = utf32le; break; input.putback(char_traits::to_char_type(intro[nIntroUsed]));
case uis_utf32be: m_charSet = utf32be; break; }
default: m_charSet = utf8; break; }
} state = newState;
}
ReadAheadTo(0);
} switch (state) {
case uis_utf8:
Stream::~Stream() m_charSet = utf8;
{ break;
delete[] m_pPrefetched; case uis_utf16le:
} m_charSet = utf16le;
break;
char Stream::peek() const case uis_utf16be:
{ m_charSet = utf16be;
if (m_readahead.empty()) break;
{ case uis_utf32le:
return Stream::eof(); m_charSet = utf32le;
} break;
case uis_utf32be:
return m_readahead[0]; m_charSet = utf32be;
} break;
default:
Stream::operator bool() const m_charSet = utf8;
{ break;
return m_input.good() || (!m_readahead.empty() && m_readahead[0] != Stream::eof()); }
}
ReadAheadTo(0);
// get }
// . Extracts a character from the stream and updates our position
char Stream::get() Stream::~Stream() { delete[] m_pPrefetched; }
{
char ch = peek(); char Stream::peek() const {
AdvanceCurrent(); if (m_readahead.empty()) {
m_mark.column++; return Stream::eof();
}
if(ch == '\n') {
m_mark.column = 0; return m_readahead[0];
m_mark.line++; }
}
Stream::operator bool() const {
return ch; return m_input.good() ||
} (!m_readahead.empty() && m_readahead[0] != Stream::eof());
}
// get
// . Extracts 'n' characters from the stream and updates our position // get
std::string Stream::get(int n) // . Extracts a character from the stream and updates our position
{ char Stream::get() {
std::string ret; char ch = peek();
ret.reserve(n); AdvanceCurrent();
for(int i=0;i<n;i++) m_mark.column++;
ret += get();
return ret; if (ch == '\n') {
} m_mark.column = 0;
m_mark.line++;
// eat }
// . Eats 'n' characters and updates our position.
void Stream::eat(int n) return ch;
{ }
for(int i=0;i<n;i++)
get(); // get
} // . Extracts 'n' characters from the stream and updates our position
std::string Stream::get(int n) {
void Stream::AdvanceCurrent() std::string ret;
{ ret.reserve(n);
if (!m_readahead.empty()) for (int i = 0; i < n; i++)
{ ret += get();
m_readahead.pop_front(); return ret;
m_mark.pos++; }
}
// eat
ReadAheadTo(0); // . Eats 'n' characters and updates our position.
} void Stream::eat(int n) {
for (int i = 0; i < n; i++)
bool Stream::_ReadAheadTo(size_t i) const get();
{ }
while (m_input.good() && (m_readahead.size() <= i))
{ void Stream::AdvanceCurrent() {
switch (m_charSet) if (!m_readahead.empty()) {
{ m_readahead.pop_front();
case utf8: StreamInUtf8(); break; m_mark.pos++;
case utf16le: StreamInUtf16(); break; }
case utf16be: StreamInUtf16(); break;
case utf32le: StreamInUtf32(); break; ReadAheadTo(0);
case utf32be: StreamInUtf32(); break; }
}
} bool Stream::_ReadAheadTo(size_t i) const {
while (m_input.good() && (m_readahead.size() <= i)) {
// signal end of stream switch (m_charSet) {
if(!m_input.good()) case utf8:
m_readahead.push_back(Stream::eof()); StreamInUtf8();
break;
return m_readahead.size() > i; case utf16le:
} StreamInUtf16();
break;
void Stream::StreamInUtf8() const case utf16be:
{ StreamInUtf16();
unsigned char b = GetNextByte(); break;
if (m_input.good()) case utf32le:
{ StreamInUtf32();
m_readahead.push_back(b); break;
} case utf32be:
} StreamInUtf32();
break;
void Stream::StreamInUtf16() const }
{ }
unsigned long ch = 0;
unsigned char bytes[2]; // signal end of stream
int nBigEnd = (m_charSet == utf16be) ? 0 : 1; if (!m_input.good())
m_readahead.push_back(Stream::eof());
bytes[0] = GetNextByte();
bytes[1] = GetNextByte(); return m_readahead.size() > i;
if (!m_input.good()) }
{
return; void Stream::StreamInUtf8() const {
} unsigned char b = GetNextByte();
ch = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) | if (m_input.good()) {
static_cast<unsigned long>(bytes[1 ^ nBigEnd]); m_readahead.push_back(b);
}
if (ch >= 0xDC00 && ch < 0xE000) }
{
// Trailing (low) surrogate...ugh, wrong order void Stream::StreamInUtf16() const {
QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER); unsigned long ch = 0;
return; unsigned char bytes[2];
} int nBigEnd = (m_charSet == utf16be) ? 0 : 1;
else if (ch >= 0xD800 && ch < 0xDC00)
{ bytes[0] = GetNextByte();
// ch is a leading (high) surrogate bytes[1] = GetNextByte();
if (!m_input.good()) {
// Four byte UTF-8 code point return;
}
// Read the trailing (low) surrogate ch = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) |
for (;;) static_cast<unsigned long>(bytes[1 ^ nBigEnd]);
{
bytes[0] = GetNextByte(); if (ch >= 0xDC00 && ch < 0xE000) {
bytes[1] = GetNextByte(); // Trailing (low) surrogate...ugh, wrong order
if (!m_input.good()) QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
{ return;
QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER); } else if (ch >= 0xD800 && ch < 0xDC00) {
return; // ch is a leading (high) surrogate
}
unsigned long chLow = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) | // Four byte UTF-8 code point
static_cast<unsigned long>(bytes[1 ^ nBigEnd]);
if (chLow < 0xDC00 || ch >= 0xE000) // Read the trailing (low) surrogate
{ for (;;) {
// Trouble...not a low surrogate. Dump a REPLACEMENT CHARACTER into the stream. bytes[0] = GetNextByte();
QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER); bytes[1] = GetNextByte();
if (!m_input.good()) {
// Deal with the next UTF-16 unit QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
if (chLow < 0xD800 || ch >= 0xE000) return;
{ }
// Easiest case: queue the codepoint and return unsigned long chLow = (static_cast<unsigned long>(bytes[nBigEnd]) << 8) |
QueueUnicodeCodepoint(m_readahead, ch); static_cast<unsigned long>(bytes[1 ^ nBigEnd]);
return; if (chLow < 0xDC00 || ch >= 0xE000) {
} // Trouble...not a low surrogate. Dump a REPLACEMENT CHARACTER into the
else // stream.
{ QueueUnicodeCodepoint(m_readahead, CP_REPLACEMENT_CHARACTER);
// Start the loop over with the new high surrogate
ch = chLow; // Deal with the next UTF-16 unit
continue; if (chLow < 0xD800 || ch >= 0xE000) {
} // Easiest case: queue the codepoint and return
} QueueUnicodeCodepoint(m_readahead, ch);
return;
// Select the payload bits from the high surrogate } else {
ch &= 0x3FF; // Start the loop over with the new high surrogate
ch <<= 10; ch = chLow;
continue;
// Include bits from low surrogate }
ch |= (chLow & 0x3FF); }
// Add the surrogacy offset // Select the payload bits from the high surrogate
ch += 0x10000; ch &= 0x3FF;
} ch <<= 10;
}
// Include bits from low surrogate
QueueUnicodeCodepoint(m_readahead, ch); ch |= (chLow & 0x3FF);
}
// Add the surrogacy offset
inline char* ReadBuffer(unsigned char* pBuffer) ch += 0x10000;
{ }
return reinterpret_cast<char*>(pBuffer); }
}
QueueUnicodeCodepoint(m_readahead, ch);
unsigned char Stream::GetNextByte() const }
{
if (m_nPrefetchedUsed >= m_nPrefetchedAvailable) inline char* ReadBuffer(unsigned char* pBuffer) {
{ return reinterpret_cast<char*>(pBuffer);
std::streambuf *pBuf = m_input.rdbuf(); }
m_nPrefetchedAvailable = static_cast<std::size_t>(pBuf->sgetn(ReadBuffer(m_pPrefetched), YAML_PREFETCH_SIZE));
m_nPrefetchedUsed = 0; unsigned char Stream::GetNextByte() const {
if (!m_nPrefetchedAvailable) if (m_nPrefetchedUsed >= m_nPrefetchedAvailable) {
{ std::streambuf* pBuf = m_input.rdbuf();
m_input.setstate(std::ios_base::eofbit); m_nPrefetchedAvailable = static_cast<std::size_t>(
} pBuf->sgetn(ReadBuffer(m_pPrefetched), YAML_PREFETCH_SIZE));
m_nPrefetchedUsed = 0;
if (0 == m_nPrefetchedAvailable) if (!m_nPrefetchedAvailable) {
{ m_input.setstate(std::ios_base::eofbit);
return 0; }
}
} if (0 == m_nPrefetchedAvailable) {
return 0;
return m_pPrefetched[m_nPrefetchedUsed++]; }
} }
void Stream::StreamInUtf32() const return m_pPrefetched[m_nPrefetchedUsed++];
{ }
static int indexes[2][4] = {
{3, 2, 1, 0}, void Stream::StreamInUtf32() const {
{0, 1, 2, 3} static int indexes[2][4] = {{3, 2, 1, 0}, {0, 1, 2, 3}};
};
unsigned long ch = 0;
unsigned long ch = 0; unsigned char bytes[4];
unsigned char bytes[4]; int* pIndexes = (m_charSet == utf32be) ? indexes[1] : indexes[0];
int* pIndexes = (m_charSet == utf32be) ? indexes[1] : indexes[0];
bytes[0] = GetNextByte();
bytes[0] = GetNextByte(); bytes[1] = GetNextByte();
bytes[1] = GetNextByte(); bytes[2] = GetNextByte();
bytes[2] = GetNextByte(); bytes[3] = GetNextByte();
bytes[3] = GetNextByte(); if (!m_input.good()) {
if (!m_input.good()) return;
{ }
return;
} for (int i = 0; i < 4; ++i) {
ch <<= 8;
for (int i = 0; i < 4; ++i) ch |= bytes[pIndexes[i]];
{ }
ch <<= 8;
ch |= bytes[pIndexes[i]]; QueueUnicodeCodepoint(m_readahead, ch);
} }
QueueUnicodeCodepoint(m_readahead, ch);
}
} }
#ifndef STREAM_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #ifndef STREAM_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#define STREAM_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #define STREAM_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 #if defined(_MSC_VER) || \
(defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
(__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
#pragma once #pragma once
#endif #endif
#include "yaml-cpp/noncopyable.h" #include "yaml-cpp/noncopyable.h"
#include "yaml-cpp/mark.h" #include "yaml-cpp/mark.h"
#include <cstddef> #include <cstddef>
...@@ -15,65 +16,67 @@ ...@@ -15,65 +16,67 @@
#include <set> #include <set>
#include <string> #include <string>
namespace YAML namespace YAML {
{ class Stream : private noncopyable {
class Stream: private noncopyable public:
{ friend class StreamCharSource;
public:
friend class StreamCharSource; Stream(std::istream& input);
~Stream();
Stream(std::istream& input);
~Stream(); operator bool() const;
bool operator!() const { return !static_cast<bool>(*this); }
char peek() const;
char get();
std::string get(int n);
void eat(int n = 1);
operator bool() const; static char eof() { return 0x04; }
bool operator !() const { return !static_cast <bool>(*this); }
char peek() const; const Mark mark() const { return m_mark; }
char get(); int pos() const { return m_mark.pos; }
std::string get(int n); int line() const { return m_mark.line; }
void eat(int n = 1); int column() const { return m_mark.column; }
void ResetColumn() { m_mark.column = 0; }
static char eof() { return 0x04; } private:
enum CharacterSet {
const Mark mark() const { return m_mark; } utf8,
int pos() const { return m_mark.pos; } utf16le,
int line() const { return m_mark.line; } utf16be,
int column() const { return m_mark.column; } utf32le,
void ResetColumn() { m_mark.column = 0; } utf32be
};
private: std::istream& m_input;
enum CharacterSet {utf8, utf16le, utf16be, utf32le, utf32be}; Mark m_mark;
std::istream& m_input; CharacterSet m_charSet;
Mark m_mark; mutable std::deque<char> m_readahead;
unsigned char* const m_pPrefetched;
CharacterSet m_charSet; mutable size_t m_nPrefetchedAvailable;
mutable std::deque<char> m_readahead; mutable size_t m_nPrefetchedUsed;
unsigned char* const m_pPrefetched;
mutable size_t m_nPrefetchedAvailable;
mutable size_t m_nPrefetchedUsed;
void AdvanceCurrent();
char CharAt(size_t i) const;
bool ReadAheadTo(size_t i) const;
bool _ReadAheadTo(size_t i) const;
void StreamInUtf8() const;
void StreamInUtf16() const;
void StreamInUtf32() const;
unsigned char GetNextByte() const;
};
// CharAt void AdvanceCurrent();
// . Unchecked access char CharAt(size_t i) const;
inline char Stream::CharAt(size_t i) const { bool ReadAheadTo(size_t i) const;
return m_readahead[i]; bool _ReadAheadTo(size_t i) const;
} void StreamInUtf8() const;
void StreamInUtf16() const;
inline bool Stream::ReadAheadTo(size_t i) const { void StreamInUtf32() const;
if(m_readahead.size() > i) unsigned char GetNextByte() const;
return true; };
return _ReadAheadTo(i);
} // CharAt
// . Unchecked access
inline char Stream::CharAt(size_t i) const { return m_readahead[i]; }
inline bool Stream::ReadAheadTo(size_t i) const {
if (m_readahead.size() > i)
return true;
return _ReadAheadTo(i);
}
} }
#endif // STREAM_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #endif // STREAM_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#ifndef STREAMCHARSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #ifndef STREAMCHARSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#define STREAMCHARSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #define STREAMCHARSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 #if defined(_MSC_VER) || \
(defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
(__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
#pragma once #pragma once
#endif #endif
#include "yaml-cpp/noncopyable.h" #include "yaml-cpp/noncopyable.h"
#include <cstddef> #include <cstddef>
namespace YAML namespace YAML {
{ class StreamCharSource {
class StreamCharSource public:
{ StreamCharSource(const Stream& stream) : m_offset(0), m_stream(stream) {}
public: StreamCharSource(const StreamCharSource& source)
StreamCharSource(const Stream& stream): m_offset(0), m_stream(stream) {} : m_offset(source.m_offset), m_stream(source.m_stream) {}
StreamCharSource(const StreamCharSource& source): m_offset(source.m_offset), m_stream(source.m_stream) {} ~StreamCharSource() {}
~StreamCharSource() {}
operator bool() const;
operator bool() const; char operator[](std::size_t i) const { return m_stream.CharAt(m_offset + i); }
char operator [] (std::size_t i) const { return m_stream.CharAt(m_offset + i); } bool operator!() const { return !static_cast<bool>(*this); }
bool operator !() const { return !static_cast<bool>(*this); }
const StreamCharSource operator+(int i) const;
const StreamCharSource operator + (int i) const;
private:
private: std::size_t m_offset;
std::size_t m_offset; const Stream& m_stream;
const Stream& m_stream;
StreamCharSource& operator=(const StreamCharSource&); // non-assignable
StreamCharSource& operator = (const StreamCharSource&); // non-assignable };
};
inline StreamCharSource::operator bool() const {
inline StreamCharSource::operator bool() const { return m_stream.ReadAheadTo(m_offset);
return m_stream.ReadAheadTo(m_offset); }
}
inline const StreamCharSource StreamCharSource::operator+(int i) const {
inline const StreamCharSource StreamCharSource::operator + (int i) const { StreamCharSource source(*this);
StreamCharSource source(*this); if (static_cast<int>(source.m_offset) + i >= 0)
if(static_cast<int> (source.m_offset) + i >= 0) source.m_offset += i;
source.m_offset += i; else
else source.m_offset = 0;
source.m_offset = 0; return source;
return source; }
}
} }
#endif // STREAMCHARSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #endif // STREAMCHARSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#ifndef STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #ifndef STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#define STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #define STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 #if defined(_MSC_VER) || \
(defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
(__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
#pragma once #pragma once
#endif #endif
#include <cstddef> #include <cstddef>
namespace YAML namespace YAML {
{ class StringCharSource {
class StringCharSource public:
{ StringCharSource(const char* str, std::size_t size)
public: : m_str(str), m_size(size), m_offset(0) {}
StringCharSource(const char *str, std::size_t size): m_str(str), m_size(size), m_offset(0) {}
operator bool() const { return m_offset < m_size; }
operator bool() const { return m_offset < m_size; } char operator[](std::size_t i) const { return m_str[m_offset + i]; }
char operator [] (std::size_t i) const { return m_str[m_offset + i]; } bool operator!() const { return !static_cast<bool>(*this); }
bool operator !() const { return !static_cast<bool>(*this); }
const StringCharSource operator+(int i) const {
const StringCharSource operator + (int i) const { StringCharSource source(*this);
StringCharSource source(*this); if (static_cast<int>(source.m_offset) + i >= 0)
if(static_cast<int> (source.m_offset) + i >= 0) source.m_offset += i;
source.m_offset += i; else
else source.m_offset = 0;
source.m_offset = 0; return source;
return source; }
}
StringCharSource& operator++() {
StringCharSource& operator ++ () { ++m_offset;
++m_offset; return *this;
return *this; }
}
StringCharSource& operator+=(std::size_t offset) {
StringCharSource& operator += (std::size_t offset) { m_offset += offset;
m_offset += offset; return *this;
return *this; }
}
private: private:
const char *m_str; const char* m_str;
std::size_t m_size; std::size_t m_size;
std::size_t m_offset; std::size_t m_offset;
}; };
} }
#endif // STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #endif // STRINGSOURCE_H_62B23520_7C8E_11DE_8A39_0800200C9A66
...@@ -4,49 +4,45 @@ ...@@ -4,49 +4,45 @@
#include <cassert> #include <cassert>
#include <stdexcept> #include <stdexcept>
namespace YAML namespace YAML {
{ Tag::Tag(const Token& token) : type(static_cast<TYPE>(token.data)) {
Tag::Tag(const Token& token): type(static_cast<TYPE>(token.data)) switch (type) {
{ case VERBATIM:
switch(type) { value = token.value;
case VERBATIM: break;
value = token.value; case PRIMARY_HANDLE:
break; value = token.value;
case PRIMARY_HANDLE: break;
value = token.value; case SECONDARY_HANDLE:
break; value = token.value;
case SECONDARY_HANDLE: break;
value = token.value; case NAMED_HANDLE:
break; handle = token.value;
case NAMED_HANDLE: value = token.params[0];
handle = token.value; break;
value = token.params[0]; case NON_SPECIFIC:
break; break;
case NON_SPECIFIC: default:
break; assert(false);
default: }
assert(false);
}
}
const std::string Tag::Translate(const Directives& directives)
{
switch(type) {
case VERBATIM:
return value;
case PRIMARY_HANDLE:
return directives.TranslateTagHandle("!") + value;
case SECONDARY_HANDLE:
return directives.TranslateTagHandle("!!") + value;
case NAMED_HANDLE:
return directives.TranslateTagHandle("!" + handle + "!") + value;
case NON_SPECIFIC:
// TODO:
return "!";
default:
assert(false);
}
throw std::runtime_error("yaml-cpp: internal error, bad tag type");
}
} }
const std::string Tag::Translate(const Directives& directives) {
switch (type) {
case VERBATIM:
return value;
case PRIMARY_HANDLE:
return directives.TranslateTagHandle("!") + value;
case SECONDARY_HANDLE:
return directives.TranslateTagHandle("!!") + value;
case NAMED_HANDLE:
return directives.TranslateTagHandle("!" + handle + "!") + value;
case NON_SPECIFIC:
// TODO:
return "!";
default:
assert(false);
}
throw std::runtime_error("yaml-cpp: internal error, bad tag type");
}
}
#ifndef TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #ifndef TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#define TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #define TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#if defined(_MSC_VER) || (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 #if defined(_MSC_VER) || \
(defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
(__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4
#pragma once #pragma once
#endif #endif
#include <string> #include <string>
namespace YAML namespace YAML {
{ struct Token;
struct Token; struct Directives;
struct Directives;
struct Tag { struct Tag {
enum TYPE { enum TYPE {
VERBATIM, PRIMARY_HANDLE, SECONDARY_HANDLE, NAMED_HANDLE, NON_SPECIFIC VERBATIM,
}; PRIMARY_HANDLE,
SECONDARY_HANDLE,
Tag(const Token& token); NAMED_HANDLE,
const std::string Translate(const Directives& directives); NON_SPECIFIC
};
TYPE type;
std::string handle, value; Tag(const Token& token);
}; const std::string Translate(const Directives& directives);
TYPE type;
std::string handle, value;
};
} }
#endif // TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66 #endif // TAG_H_62B23520_7C8E_11DE_8A39_0800200C9A66
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment