You need to sign in or sign up before continuing.
Commit 8fca02fb authored by Jesse Beder's avatar Jesse Beder
Browse files

Split off the specific regular expressions, and the specialized token-scanning...

Split off the specific regular expressions, and the specialized token-scanning functions, into their own files.
parent de290681
#pragma once
#include "regex.h"
namespace YAML
{
////////////////////////////////////////////////////////////////////////////////
// Here we store a bunch of expressions for matching different parts of the file.
namespace Exp
{
// misc
const RegEx Blank = RegEx(' ') || RegEx('\t');
const RegEx Break = RegEx('\n');
const RegEx BlankOrBreak = Blank || Break;
// actual tags
const RegEx DocStart = RegEx("---") + (BlankOrBreak || RegEx(EOF) || RegEx());
const RegEx DocEnd = RegEx("...") + (BlankOrBreak || RegEx(EOF) || RegEx());
const RegEx BlockEntry = RegEx('-') + (BlankOrBreak || RegEx(EOF));
const RegEx Key = RegEx('?'),
KeyInFlow = RegEx('?') + BlankOrBreak;
const RegEx Value = RegEx(':'),
ValueInFlow = RegEx(':') + BlankOrBreak;
const RegEx Comment = RegEx('#');
// Plain scalar rules:
// . Cannot start with a blank.
// . Can never start with any of , [ ] { } # & * ! | > \' \" % @ `
// . In the block context - ? : must be not be followed with a space.
// . In the flow context ? : are illegal and - must not be followed with a space.
const RegEx PlainScalar = !(BlankOrBreak || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-?:") + Blank)),
PlainScalarInFlow = !(BlankOrBreak || RegEx("?:,[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx('-') + Blank));
const RegEx IllegalColonInScalar = RegEx(':') + !BlankOrBreak;
const RegEx EndScalar = RegEx(':') + BlankOrBreak,
EndScalarInFlow = (RegEx(':') + BlankOrBreak) || RegEx(",:?[]{}");
}
namespace Keys
{
const char FlowSeqStart = '[';
const char FlowSeqEnd = ']';
const char FlowMapStart = '{';
const char FlowMapEnd = '}';
const char FlowEntry = ',';
const char Alias = '*';
const char Anchor = '&';
const char Tag = '!';
const char LiteralScalar = '|';
const char FoldedScalar = '>';
}
}
#include "scanner.h" #include "scanner.h"
#include "token.h" #include "token.h"
#include "exceptions.h" #include "exceptions.h"
#include "exp.h"
#include <iostream> #include <iostream>
namespace YAML namespace YAML
...@@ -124,9 +125,6 @@ namespace YAML ...@@ -124,9 +125,6 @@ namespace YAML
return Exp::PlainScalar.Matches(INPUT); return Exp::PlainScalar.Matches(INPUT);
} }
///////////////////////////////////////////////////////////////////////
// Specialization for scanning specific tokens
// ScanAndEnqueue // ScanAndEnqueue
// . Scans the token, then pushes it in the queue. // . Scans the token, then pushes it in the queue.
// . Note: we also use a set of "limbo tokens", i.e., tokens // . Note: we also use a set of "limbo tokens", i.e., tokens
...@@ -141,290 +139,6 @@ namespace YAML ...@@ -141,290 +139,6 @@ namespace YAML
m_limboTokens.erase(pToken); m_limboTokens.erase(pToken);
} }
// StreamStartToken
template <> StreamStartToken *Scanner::ScanToken(StreamStartToken *pToken)
{
m_startedStream = true;
m_simpleKeyAllowed = true;
m_indents.push(-1);
return pToken;
}
// StreamEndToken
template <> StreamEndToken *Scanner::ScanToken(StreamEndToken *pToken)
{
// force newline
if(m_column > 0)
m_column = 0;
PopIndentTo(-1);
// TODO: "reset simple keys"
m_simpleKeyAllowed = false;
m_endedStream = true;
return pToken;
}
// DocumentStartToken
template <> DocumentStartToken *Scanner::ScanToken(DocumentStartToken *pToken)
{
PopIndentTo(m_column);
// TODO: "reset simple keys"
m_simpleKeyAllowed = false;
// eat
Eat(3);
return pToken;
}
// DocumentEndToken
template <> DocumentEndToken *Scanner::ScanToken(DocumentEndToken *pToken)
{
PopIndentTo(-1);
// TODO: "reset simple keys"
m_simpleKeyAllowed = false;
// eat
Eat(3);
return pToken;
}
// FlowSeqStartToken
template <> FlowSeqStartToken *Scanner::ScanToken(FlowSeqStartToken *pToken)
{
// TODO: "save simple key"
// TODO: increase flow level
m_simpleKeyAllowed = true;
// eat
Eat(1);
return pToken;
}
// FlowMapStartToken
template <> FlowMapStartToken *Scanner::ScanToken(FlowMapStartToken *pToken)
{
// TODO: "save simple key"
// TODO: increase flow level
m_simpleKeyAllowed = true;
// eat
Eat(1);
return pToken;
}
// FlowSeqEndToken
template <> FlowSeqEndToken *Scanner::ScanToken(FlowSeqEndToken *pToken)
{
// TODO: "remove simple key"
// TODO: decrease flow level
m_simpleKeyAllowed = false;
// eat
Eat(1);
return pToken;
}
// FlowMapEndToken
template <> FlowMapEndToken *Scanner::ScanToken(FlowMapEndToken *pToken)
{
// TODO: "remove simple key"
// TODO: decrease flow level
m_simpleKeyAllowed = false;
// eat
Eat(1);
return pToken;
}
// FlowEntryToken
template <> FlowEntryToken *Scanner::ScanToken(FlowEntryToken *pToken)
{
// TODO: "remove simple key"
m_simpleKeyAllowed = true;
// eat
Eat(1);
return pToken;
}
// BlockEntryToken
template <> BlockEntryToken *Scanner::ScanToken(BlockEntryToken *pToken)
{
// we better be in the block context!
if(m_flowLevel == 0) {
// can we put it here?
if(!m_simpleKeyAllowed)
throw IllegalBlockEntry();
PushIndentTo(m_column, true); // , -1
} else {
// TODO: throw?
}
// TODO: "remove simple key"
m_simpleKeyAllowed = true;
// eat
Eat(1);
return pToken;
}
// KeyToken
template <> KeyToken *Scanner::ScanToken(KeyToken *pToken)
{
// are we in block context?
if(m_flowLevel == 0) {
if(!m_simpleKeyAllowed)
throw IllegalMapKey();
PushIndentTo(m_column, false);
}
// TODO: "remove simple key"
// can only put a simple key here if we're in block context
if(m_flowLevel == 0)
m_simpleKeyAllowed = true;
else
m_simpleKeyAllowed = false;
// eat
Eat(1);
return pToken;
}
// ValueToken
template <> ValueToken *Scanner::ScanToken(ValueToken *pToken)
{
// TODO: Is it a simple key?
if(false) {
} else {
// If not, ...
// are we in block context?
if(m_flowLevel == 0) {
if(!m_simpleKeyAllowed)
throw IllegalMapValue();
PushIndentTo(m_column, false);
}
}
// can only put a simple key here if we're in block context
if(m_flowLevel == 0)
m_simpleKeyAllowed = true;
else
m_simpleKeyAllowed = false;
// eat
Eat(1);
return pToken;
}
// PlainScalarToken
template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken)
{
// TODO: "save simple key"
m_simpleKeyAllowed = false;
// now eat and store the scalar
std::string scalar, whitespace, leadingBreaks, trailingBreaks;
bool leadingBlanks = false;
while(INPUT) {
// doc start/end tokens
if(IsDocumentStart() || IsDocumentEnd())
break;
// comment
if(Exp::Comment.Matches(INPUT))
break;
// first eat non-blanks
while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
// illegal colon in flow context
if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT))
throw IllegalScalar();
// characters that might end the scalar
if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT))
break;
if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT))
break;
if(leadingBlanks) {
if(!leadingBreaks.empty() && leadingBreaks[0] == '\n') {
// fold line break?
if(trailingBreaks.empty())
scalar += ' ';
else {
scalar += trailingBreaks;
trailingBreaks = "";
}
} else {
scalar += leadingBreaks + trailingBreaks;
leadingBreaks = "";
trailingBreaks = "";
}
} else if(!whitespace.empty()) {
scalar += whitespace;
whitespace = "";
}
// finally, read the character!
scalar += GetChar();
}
// did we hit a non-blank character that ended us?
if(!Exp::BlankOrBreak.Matches(INPUT))
break;
// now eat blanks
while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) {
if(Exp::Blank.Matches(INPUT)) {
if(leadingBlanks && m_column <= m_indents.top())
throw IllegalTabInScalar();
// maybe store this character
if(!leadingBlanks)
whitespace += GetChar();
else
Eat(1);
} else {
// where to store this character?
if(!leadingBlanks) {
leadingBlanks = true;
whitespace = "";
leadingBreaks += GetChar();
} else
trailingBreaks += GetChar();
}
}
// and finally break if we're below the indentation level
if(m_flowLevel == 0 && m_column <= m_indents.top())
break;
}
// now modify our token
pToken->SetValue(scalar);
if(leadingBlanks)
m_simpleKeyAllowed = true;
return pToken;
}
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
// The main scanning function // The main scanning function
......
...@@ -5,56 +5,11 @@ ...@@ -5,56 +5,11 @@
#include <queue> #include <queue>
#include <stack> #include <stack>
#include <set> #include <set>
#include "regex.h"
namespace YAML namespace YAML
{ {
class Token; class Token;
namespace Exp
{
// misc
const RegEx Blank = RegEx(' ') || RegEx('\t');
const RegEx Break = RegEx('\n');
const RegEx BlankOrBreak = Blank || Break;
// actual tags
const RegEx DocStart = RegEx("---") + (BlankOrBreak || RegEx(EOF) || RegEx());
const RegEx DocEnd = RegEx("...") + (BlankOrBreak || RegEx(EOF) || RegEx());
const RegEx BlockEntry = RegEx('-') + (BlankOrBreak || RegEx(EOF));
const RegEx Key = RegEx('?'),
KeyInFlow = RegEx('?') + BlankOrBreak;
const RegEx Value = RegEx(':'),
ValueInFlow = RegEx(':') + BlankOrBreak;
const RegEx Comment = RegEx('#');
// Plain scalar rules:
// . Cannot start with a blank.
// . Can never start with any of , [ ] { } # & * ! | > \' \" % @ `
// . In the block context - ? : must be not be followed with a space.
// . In the flow context ? : are illegal and - must not be followed with a space.
const RegEx PlainScalar = !(BlankOrBreak || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-?:") + Blank)),
PlainScalarInFlow = !(BlankOrBreak || RegEx("?:,[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx('-') + Blank));
const RegEx IllegalColonInScalar = RegEx(':') + !BlankOrBreak;
const RegEx EndScalar = RegEx(':') + BlankOrBreak,
EndScalarInFlow = (RegEx(':') + BlankOrBreak) || RegEx(",:?[]{}");
}
namespace Keys
{
const char FlowSeqStart = '[';
const char FlowSeqEnd = ']';
const char FlowMapStart = '{';
const char FlowMapEnd = '}';
const char FlowEntry = ',';
const char Alias = '*';
const char Anchor = '&';
const char Tag = '!';
const char LiteralScalar = '|';
const char FoldedScalar = '>';
}
class Scanner class Scanner
{ {
public: public:
......
#include "scanner.h"
#include "token.h"
#include "exceptions.h"
#include "exp.h"
namespace YAML
{
///////////////////////////////////////////////////////////////////////
// Specialization for scanning specific tokens
// StreamStartToken
template <> StreamStartToken *Scanner::ScanToken(StreamStartToken *pToken)
{
m_startedStream = true;
m_simpleKeyAllowed = true;
m_indents.push(-1);
return pToken;
}
// StreamEndToken
template <> StreamEndToken *Scanner::ScanToken(StreamEndToken *pToken)
{
// force newline
if(m_column > 0)
m_column = 0;
PopIndentTo(-1);
// TODO: "reset simple keys"
m_simpleKeyAllowed = false;
m_endedStream = true;
return pToken;
}
// DocumentStartToken
template <> DocumentStartToken *Scanner::ScanToken(DocumentStartToken *pToken)
{
PopIndentTo(m_column);
// TODO: "reset simple keys"
m_simpleKeyAllowed = false;
// eat
Eat(3);
return pToken;
}
// DocumentEndToken
template <> DocumentEndToken *Scanner::ScanToken(DocumentEndToken *pToken)
{
PopIndentTo(-1);
// TODO: "reset simple keys"
m_simpleKeyAllowed = false;
// eat
Eat(3);
return pToken;
}
// FlowSeqStartToken
template <> FlowSeqStartToken *Scanner::ScanToken(FlowSeqStartToken *pToken)
{
// TODO: "save simple key"
// TODO: increase flow level
m_simpleKeyAllowed = true;
// eat
Eat(1);
return pToken;
}
// FlowMapStartToken
template <> FlowMapStartToken *Scanner::ScanToken(FlowMapStartToken *pToken)
{
// TODO: "save simple key"
// TODO: increase flow level
m_simpleKeyAllowed = true;
// eat
Eat(1);
return pToken;
}
// FlowSeqEndToken
template <> FlowSeqEndToken *Scanner::ScanToken(FlowSeqEndToken *pToken)
{
// TODO: "remove simple key"
// TODO: decrease flow level
m_simpleKeyAllowed = false;
// eat
Eat(1);
return pToken;
}
// FlowMapEndToken
template <> FlowMapEndToken *Scanner::ScanToken(FlowMapEndToken *pToken)
{
// TODO: "remove simple key"
// TODO: decrease flow level
m_simpleKeyAllowed = false;
// eat
Eat(1);
return pToken;
}
// FlowEntryToken
template <> FlowEntryToken *Scanner::ScanToken(FlowEntryToken *pToken)
{
// TODO: "remove simple key"
m_simpleKeyAllowed = true;
// eat
Eat(1);
return pToken;
}
// BlockEntryToken
template <> BlockEntryToken *Scanner::ScanToken(BlockEntryToken *pToken)
{
// we better be in the block context!
if(m_flowLevel == 0) {
// can we put it here?
if(!m_simpleKeyAllowed)
throw IllegalBlockEntry();
PushIndentTo(m_column, true); // , -1
} else {
// TODO: throw?
}
// TODO: "remove simple key"
m_simpleKeyAllowed = true;
// eat
Eat(1);
return pToken;
}
// KeyToken
template <> KeyToken *Scanner::ScanToken(KeyToken *pToken)
{
// are we in block context?
if(m_flowLevel == 0) {
if(!m_simpleKeyAllowed)
throw IllegalMapKey();
PushIndentTo(m_column, false);
}
// TODO: "remove simple key"
// can only put a simple key here if we're in block context
if(m_flowLevel == 0)
m_simpleKeyAllowed = true;
else
m_simpleKeyAllowed = false;
// eat
Eat(1);
return pToken;
}
// ValueToken
template <> ValueToken *Scanner::ScanToken(ValueToken *pToken)
{
// TODO: Is it a simple key?
if(false) {
} else {
// If not, ...
// are we in block context?
if(m_flowLevel == 0) {
if(!m_simpleKeyAllowed)
throw IllegalMapValue();
PushIndentTo(m_column, false);
}
}
// can only put a simple key here if we're in block context
if(m_flowLevel == 0)
m_simpleKeyAllowed = true;
else
m_simpleKeyAllowed = false;
// eat
Eat(1);
return pToken;
}
// PlainScalarToken
template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken)
{
// TODO: "save simple key"
m_simpleKeyAllowed = false;
// now eat and store the scalar
std::string scalar, whitespace, leadingBreaks, trailingBreaks;
bool leadingBlanks = false;
while(INPUT) {
// doc start/end tokens
if(IsDocumentStart() || IsDocumentEnd())
break;
// comment
if(Exp::Comment.Matches(INPUT))
break;
// first eat non-blanks
while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
// illegal colon in flow context
if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT))
throw IllegalScalar();
// characters that might end the scalar
if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT))
break;
if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT))
break;
if(leadingBlanks) {
if(!leadingBreaks.empty() && leadingBreaks[0] == '\n') {
// fold line break?
if(trailingBreaks.empty())
scalar += ' ';
else {
scalar += trailingBreaks;
trailingBreaks = "";
}
} else {
scalar += leadingBreaks + trailingBreaks;
leadingBreaks = "";
trailingBreaks = "";
}
} else if(!whitespace.empty()) {
scalar += whitespace;
whitespace = "";
}
// finally, read the character!
scalar += GetChar();
}
// did we hit a non-blank character that ended us?
if(!Exp::BlankOrBreak.Matches(INPUT))
break;
// now eat blanks
while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) {
if(Exp::Blank.Matches(INPUT)) {
if(leadingBlanks && m_column <= m_indents.top())
throw IllegalTabInScalar();
// maybe store this character
if(!leadingBlanks)
whitespace += GetChar();
else
Eat(1);
} else {
// where to store this character?
if(!leadingBlanks) {
leadingBlanks = true;
whitespace = "";
leadingBreaks += GetChar();
} else
trailingBreaks += GetChar();
}
}
// and finally break if we're below the indentation level
if(m_flowLevel == 0 && m_column <= m_indents.top())
break;
}
// now modify our token
pToken->SetValue(scalar);
if(leadingBlanks)
m_simpleKeyAllowed = true;
return pToken;
}
}
...@@ -197,6 +197,10 @@ ...@@ -197,6 +197,10 @@
RelativePath=".\scanner.cpp" RelativePath=".\scanner.cpp"
> >
</File> </File>
<File
RelativePath=".\scantoken.cpp"
>
</File>
<File <File
RelativePath=".\sequence.cpp" RelativePath=".\sequence.cpp"
> >
...@@ -219,6 +223,10 @@ ...@@ -219,6 +223,10 @@
RelativePath=".\exceptions.h" RelativePath=".\exceptions.h"
> >
</File> </File>
<File
RelativePath=".\exp.h"
>
</File>
<File <File
RelativePath=".\map.h" RelativePath=".\map.h"
> >
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment