Split off the specific regular expressions, and the specialized token-scanning...

Split off the specific regular expressions, and the specialized token-scanning functions, into their own files.

Split off the specific regular expressions, and the specialized token-scanning...
Split off the specific regular expressions, and the specialized token-scanning functions, into their own files.
8fca02fb · Jesse Beder · de290681 · 8fca02fb · 8fca02fb · 8fca02fb
Commit 8fca02fb authored Jun 27, 2008 by Jesse Beder
Hide whitespace changes
Inline Side-by-side

Showing with 356 additions and 332 deletions

exp.h exp.h +53 -0

scanner.cpp scanner.cpp +1 -287

scanner.h scanner.h +0 -45

scantoken.cpp scantoken.cpp +294 -0

yaml-reader.vcproj yaml-reader.vcproj +8 -0

No files found.
--- a/exp.h
+++ b/exp.h
+#pragma once
+#include "regex.h"
+namespace YAML
+{
+	////////////////////////////////////////////////////////////////////////////////
+	// Here we store a bunch of expressions for matching different parts of the file.
+	namespace Exp
+	{
+		// misc
+		const RegEx Blank = RegEx(' ') || RegEx('\t');
+		const RegEx Break = RegEx('\n');
+		const RegEx BlankOrBreak = Blank || Break;
+		// actual tags
+		const RegEx DocStart = RegEx("---") + (BlankOrBreak || RegEx(EOF) || RegEx());
+		const RegEx DocEnd = RegEx("...") + (BlankOrBreak || RegEx(EOF) || RegEx());
+		const RegEx BlockEntry = RegEx('-') + (BlankOrBreak || RegEx(EOF));
+		const RegEx Key = RegEx('?'),
+		            KeyInFlow = RegEx('?') + BlankOrBreak;
+		const RegEx Value = RegEx(':'),
+		            ValueInFlow = RegEx(':') + BlankOrBreak;
+		const RegEx Comment = RegEx('#');
+		// Plain scalar rules:
+		// . Cannot start with a blank.
+		// . Can never start with any of , [ ] { } # & * ! | > \' \" % @ `
+		// . In the block context - ? : must be not be followed with a space.
+		// . In the flow context ? : are illegal and - must not be followed with a space.
+		const RegEx PlainScalar = !(BlankOrBreak || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-?:") + Blank)),
+	                PlainScalarInFlow = !(BlankOrBreak || RegEx("?:,[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx('-') + Blank));
+		const RegEx IllegalColonInScalar = RegEx(':') + !BlankOrBreak;
+		const RegEx EndScalar = RegEx(':') + BlankOrBreak,
+		            EndScalarInFlow = (RegEx(':') + BlankOrBreak) || RegEx(",:?[]{}");
+	}
+	namespace Keys
+	{
+		const char FlowSeqStart = '[';
+		const char FlowSeqEnd = ']';
+		const char FlowMapStart = '{';
+		const char FlowMapEnd = '}';
+		const char FlowEntry = ',';
+		const char Alias = '*';
+		const char Anchor = '&';
+		const char Tag = '!';
+		const char LiteralScalar = '|';
+		const char FoldedScalar = '>';
+	}
+}
--- a/scanner.cpp
+++ b/scanner.cpp
 #include "scanner.h"
 #include "token.h"
 #include "exceptions.h"
+#include "exp.h"
 #include <iostream>
 namespace YAML
@@ -124,9 +125,6 @@ namespace YAML
 		return Exp::PlainScalar.Matches(INPUT);
 	}
-	///////////////////////////////////////////////////////////////////////
-	// Specialization for scanning specific tokens
 	// ScanAndEnqueue
 	// . Scans the token, then pushes it in the queue.
 	// . Note: we also use a set of "limbo tokens", i.e., tokens
@@ -141,290 +139,6 @@ namespace YAML
 		m_limboTokens.erase(pToken);
 	}
-	// StreamStartToken
-	template <> StreamStartToken *Scanner::ScanToken(StreamStartToken *pToken)
-	{
-		m_startedStream = true;
-		m_simpleKeyAllowed = true;
-		m_indents.push(-1);
-		return pToken;
-	}
-	// StreamEndToken
-	template <> StreamEndToken *Scanner::ScanToken(StreamEndToken *pToken)
-	{
-		// force newline
-		if(m_column > 0)
-			m_column = 0;
-		PopIndentTo(-1);
-		// TODO: "reset simple keys"
-		m_simpleKeyAllowed = false;
-		m_endedStream = true;
-		return pToken;
-	}
-	// DocumentStartToken
-	template <> DocumentStartToken *Scanner::ScanToken(DocumentStartToken *pToken)
-	{
-		PopIndentTo(m_column);
-		// TODO: "reset simple keys"
-		m_simpleKeyAllowed = false;
-		// eat
-		Eat(3);
-		return pToken;
-	}
-	// DocumentEndToken
-	template <> DocumentEndToken *Scanner::ScanToken(DocumentEndToken *pToken)
-	{
-		PopIndentTo(-1);
-		// TODO: "reset simple keys"
-		m_simpleKeyAllowed = false;
-		// eat
-		Eat(3);
-		return pToken;
-	}
-	// FlowSeqStartToken
-	template <> FlowSeqStartToken *Scanner::ScanToken(FlowSeqStartToken *pToken)
-	{
-		// TODO: "save simple key"
-		// TODO: increase flow level
-		m_simpleKeyAllowed = true;
-		// eat
-		Eat(1);
-		return pToken;
-	}
-	// FlowMapStartToken
-	template <> FlowMapStartToken *Scanner::ScanToken(FlowMapStartToken *pToken)
-	{
-		// TODO: "save simple key"
-		// TODO: increase flow level
-		m_simpleKeyAllowed = true;
-		// eat
-		Eat(1);
-		return pToken;
-	}
-	// FlowSeqEndToken
-	template <> FlowSeqEndToken *Scanner::ScanToken(FlowSeqEndToken *pToken)
-	{
-		// TODO: "remove simple key"
-		// TODO: decrease flow level
-		m_simpleKeyAllowed = false;
-		// eat
-		Eat(1);
-		return pToken;
-	}
-	// FlowMapEndToken
-	template <> FlowMapEndToken *Scanner::ScanToken(FlowMapEndToken *pToken)
-	{
-		// TODO: "remove simple key"
-		// TODO: decrease flow level
-		m_simpleKeyAllowed = false;
-		// eat
-		Eat(1);
-		return pToken;
-	}
-	// FlowEntryToken
-	template <> FlowEntryToken *Scanner::ScanToken(FlowEntryToken *pToken)
-	{
-		// TODO: "remove simple key"
-		m_simpleKeyAllowed = true;
-		// eat
-		Eat(1);
-		return pToken;
-	}
-	// BlockEntryToken
-	template <> BlockEntryToken *Scanner::ScanToken(BlockEntryToken *pToken)
-	{
-		// we better be in the block context!
-		if(m_flowLevel == 0) {
-			// can we put it here?
-			if(!m_simpleKeyAllowed)
-				throw IllegalBlockEntry();
-			PushIndentTo(m_column, true);	// , -1
-		} else {
-			// TODO: throw?
-		}
-		// TODO: "remove simple key"
-		m_simpleKeyAllowed = true;
-		// eat
-		Eat(1);
-		return pToken;
-	}
-	// KeyToken
-	template <> KeyToken *Scanner::ScanToken(KeyToken *pToken)
-	{
-		// are we in block context?
-		if(m_flowLevel == 0) {
-			if(!m_simpleKeyAllowed)
-				throw IllegalMapKey();
-			PushIndentTo(m_column, false);
-		}
-		// TODO: "remove simple key"
-		// can only put a simple key here if we're in block context
-		if(m_flowLevel == 0)
-			m_simpleKeyAllowed = true;
-		else
-			m_simpleKeyAllowed = false;
-		// eat
-		Eat(1);
-		return pToken;
-	}
-	// ValueToken
-	template <> ValueToken *Scanner::ScanToken(ValueToken *pToken)
-	{
-		// TODO: Is it a simple key?
-		if(false) {
-		} else {
-			// If not, ...
-			// are we in block context?
-			if(m_flowLevel == 0) {
-				if(!m_simpleKeyAllowed)
-					throw IllegalMapValue();
-				PushIndentTo(m_column, false);
-			}
-		}
-		// can only put a simple key here if we're in block context
-		if(m_flowLevel == 0)
-			m_simpleKeyAllowed = true;
-		else
-			m_simpleKeyAllowed = false;
-		// eat
-		Eat(1);
-		return pToken;
-	}
-	// PlainScalarToken
-	template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken)
-	{
-		// TODO: "save simple key"
-		m_simpleKeyAllowed = false;
-		// now eat and store the scalar
-		std::string scalar, whitespace, leadingBreaks, trailingBreaks;
-		bool leadingBlanks = false;
-		while(INPUT) {
-			// doc start/end tokens
-			if(IsDocumentStart() || IsDocumentEnd())
-				break;
-			// comment
-			if(Exp::Comment.Matches(INPUT))
-				break;
-			// first eat non-blanks
-			while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
-				// illegal colon in flow context
-				if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT))
-					throw IllegalScalar();
-				// characters that might end the scalar
-				if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT))
-					break;
-				if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT))
-					break;
-				if(leadingBlanks) {
-					if(!leadingBreaks.empty() && leadingBreaks[0] == '\n') {
-						// fold line break?
-						if(trailingBreaks.empty())
-							scalar += ' ';
-						else {
-							scalar += trailingBreaks;
-							trailingBreaks = "";
-						}
-					} else {
-						scalar += leadingBreaks + trailingBreaks;
-						leadingBreaks = "";
-						trailingBreaks = "";
-					}
-				} else if(!whitespace.empty()) {
-					scalar += whitespace;
-					whitespace = "";
-				}
-				// finally, read the character!
-				scalar += GetChar();
-			}
-			// did we hit a non-blank character that ended us?
-			if(!Exp::BlankOrBreak.Matches(INPUT))
-				break;
-			// now eat blanks
-			while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) {
-				if(Exp::Blank.Matches(INPUT)) {
-					if(leadingBlanks && m_column <= m_indents.top())
-						throw IllegalTabInScalar();
-					// maybe store this character
-					if(!leadingBlanks)
-						whitespace += GetChar();
-					else
-						Eat(1);
-				} else {
-					// where to store this character?
-					if(!leadingBlanks) {
-						leadingBlanks = true;
-						whitespace = "";
-						leadingBreaks += GetChar();
-					} else
-						trailingBreaks += GetChar();
-				}
-			}
-			// and finally break if we're below the indentation level
-			if(m_flowLevel == 0 && m_column <= m_indents.top())
-				break;
-		}
-		// now modify our token
-		pToken->SetValue(scalar);
-		if(leadingBlanks)
-			m_simpleKeyAllowed = true;
-		return pToken;
-	}
 	///////////////////////////////////////////////////////////////////////
 	// The main scanning function

--- a/scanner.h
+++ b/scanner.h
@@ -5,56 +5,11 @@
 #include <queue>
 #include <stack>
 #include <set>
-#include "regex.h"
 namespace YAML
 {
 	class Token;
-	namespace Exp
-	{
-		// misc
-		const RegEx Blank = RegEx(' ') || RegEx('\t');
-		const RegEx Break = RegEx('\n');
-		const RegEx BlankOrBreak = Blank || Break;
-		// actual tags
-		const RegEx DocStart = RegEx("---") + (BlankOrBreak || RegEx(EOF) || RegEx());
-		const RegEx DocEnd = RegEx("...") + (BlankOrBreak || RegEx(EOF) || RegEx());
-		const RegEx BlockEntry = RegEx('-') + (BlankOrBreak || RegEx(EOF));
-		const RegEx Key = RegEx('?'),
-		            KeyInFlow = RegEx('?') + BlankOrBreak;
-		const RegEx Value = RegEx(':'),
-		            ValueInFlow = RegEx(':') + BlankOrBreak;
-		const RegEx Comment = RegEx('#');
-		// Plain scalar rules:
-		// . Cannot start with a blank.
-		// . Can never start with any of , [ ] { } # & * ! | > \' \" % @ `
-		// . In the block context - ? : must be not be followed with a space.
-		// . In the flow context ? : are illegal and - must not be followed with a space.
-		const RegEx PlainScalar = !(BlankOrBreak || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-?:") + Blank)),
-	                PlainScalarInFlow = !(BlankOrBreak || RegEx("?:,[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx('-') + Blank));
-		const RegEx IllegalColonInScalar = RegEx(':') + !BlankOrBreak;
-		const RegEx EndScalar = RegEx(':') + BlankOrBreak,
-		            EndScalarInFlow = (RegEx(':') + BlankOrBreak) || RegEx(",:?[]{}");
-	}
-	namespace Keys
-	{
-		const char FlowSeqStart = '[';
-		const char FlowSeqEnd = ']';
-		const char FlowMapStart = '{';
-		const char FlowMapEnd = '}';
-		const char FlowEntry = ',';
-		const char Alias = '*';
-		const char Anchor = '&';
-		const char Tag = '!';
-		const char LiteralScalar = '|';
-		const char FoldedScalar = '>';
-	}
 	class Scanner
 	{
 	public:

--- a/scantoken.cpp
+++ b/scantoken.cpp
+#include "scanner.h"
+#include "token.h"
+#include "exceptions.h"
+#include "exp.h"
+namespace YAML
+{
+	///////////////////////////////////////////////////////////////////////
+	// Specialization for scanning specific tokens
+	// StreamStartToken
+	template <> StreamStartToken *Scanner::ScanToken(StreamStartToken *pToken)
+	{
+		m_startedStream = true;
+		m_simpleKeyAllowed = true;
+		m_indents.push(-1);
+		return pToken;
+	}
+	// StreamEndToken
+	template <> StreamEndToken *Scanner::ScanToken(StreamEndToken *pToken)
+	{
+		// force newline
+		if(m_column > 0)
+			m_column = 0;
+		PopIndentTo(-1);
+		// TODO: "reset simple keys"
+		m_simpleKeyAllowed = false;
+		m_endedStream = true;
+		return pToken;
+	}
+	// DocumentStartToken
+	template <> DocumentStartToken *Scanner::ScanToken(DocumentStartToken *pToken)
+	{
+		PopIndentTo(m_column);
+		// TODO: "reset simple keys"
+		m_simpleKeyAllowed = false;
+		// eat
+		Eat(3);
+		return pToken;
+	}
+	// DocumentEndToken
+	template <> DocumentEndToken *Scanner::ScanToken(DocumentEndToken *pToken)
+	{
+		PopIndentTo(-1);
+		// TODO: "reset simple keys"
+		m_simpleKeyAllowed = false;
+		// eat
+		Eat(3);
+		return pToken;
+	}
+	// FlowSeqStartToken
+	template <> FlowSeqStartToken *Scanner::ScanToken(FlowSeqStartToken *pToken)
+	{
+		// TODO: "save simple key"
+		// TODO: increase flow level
+		m_simpleKeyAllowed = true;
+		// eat
+		Eat(1);
+		return pToken;
+	}
+	// FlowMapStartToken
+	template <> FlowMapStartToken *Scanner::ScanToken(FlowMapStartToken *pToken)
+	{
+		// TODO: "save simple key"
+		// TODO: increase flow level
+		m_simpleKeyAllowed = true;
+		// eat
+		Eat(1);
+		return pToken;
+	}
+	// FlowSeqEndToken
+	template <> FlowSeqEndToken *Scanner::ScanToken(FlowSeqEndToken *pToken)
+	{
+		// TODO: "remove simple key"
+		// TODO: decrease flow level
+		m_simpleKeyAllowed = false;
+		// eat
+		Eat(1);
+		return pToken;
+	}
+	// FlowMapEndToken
+	template <> FlowMapEndToken *Scanner::ScanToken(FlowMapEndToken *pToken)
+	{
+		// TODO: "remove simple key"
+		// TODO: decrease flow level
+		m_simpleKeyAllowed = false;
+		// eat
+		Eat(1);
+		return pToken;
+	}
+	// FlowEntryToken
+	template <> FlowEntryToken *Scanner::ScanToken(FlowEntryToken *pToken)
+	{
+		// TODO: "remove simple key"
+		m_simpleKeyAllowed = true;
+		// eat
+		Eat(1);
+		return pToken;
+	}
+	// BlockEntryToken
+	template <> BlockEntryToken *Scanner::ScanToken(BlockEntryToken *pToken)
+	{
+		// we better be in the block context!
+		if(m_flowLevel == 0) {
+			// can we put it here?
+			if(!m_simpleKeyAllowed)
+				throw IllegalBlockEntry();
+			PushIndentTo(m_column, true);	// , -1
+		} else {
+			// TODO: throw?
+		}
+		// TODO: "remove simple key"
+		m_simpleKeyAllowed = true;
+		// eat
+		Eat(1);
+		return pToken;
+	}
+	// KeyToken
+	template <> KeyToken *Scanner::ScanToken(KeyToken *pToken)
+	{
+		// are we in block context?
+		if(m_flowLevel == 0) {
+			if(!m_simpleKeyAllowed)
+				throw IllegalMapKey();
+			PushIndentTo(m_column, false);
+		}
+		// TODO: "remove simple key"
+		// can only put a simple key here if we're in block context
+		if(m_flowLevel == 0)
+			m_simpleKeyAllowed = true;
+		else
+			m_simpleKeyAllowed = false;
+		// eat
+		Eat(1);
+		return pToken;
+	}
+	// ValueToken
+	template <> ValueToken *Scanner::ScanToken(ValueToken *pToken)
+	{
+		// TODO: Is it a simple key?
+		if(false) {
+		} else {
+			// If not, ...
+			// are we in block context?
+			if(m_flowLevel == 0) {
+				if(!m_simpleKeyAllowed)
+					throw IllegalMapValue();
+				PushIndentTo(m_column, false);
+			}
+		}
+		// can only put a simple key here if we're in block context
+		if(m_flowLevel == 0)
+			m_simpleKeyAllowed = true;
+		else
+			m_simpleKeyAllowed = false;
+		// eat
+		Eat(1);
+		return pToken;
+	}
+	// PlainScalarToken
+	template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken)
+	{
+		// TODO: "save simple key"
+		m_simpleKeyAllowed = false;
+		// now eat and store the scalar
+		std::string scalar, whitespace, leadingBreaks, trailingBreaks;
+		bool leadingBlanks = false;
+		while(INPUT) {
+			// doc start/end tokens
+			if(IsDocumentStart() || IsDocumentEnd())
+				break;
+			// comment
+			if(Exp::Comment.Matches(INPUT))
+				break;
+			// first eat non-blanks
+			while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
+				// illegal colon in flow context
+				if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT))
+					throw IllegalScalar();
+				// characters that might end the scalar
+				if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT))
+					break;
+				if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT))
+					break;
+				if(leadingBlanks) {
+					if(!leadingBreaks.empty() && leadingBreaks[0] == '\n') {
+						// fold line break?
+						if(trailingBreaks.empty())
+							scalar += ' ';
+						else {
+							scalar += trailingBreaks;
+							trailingBreaks = "";
+						}
+					} else {
+						scalar += leadingBreaks + trailingBreaks;
+						leadingBreaks = "";
+						trailingBreaks = "";
+					}
+				} else if(!whitespace.empty()) {
+					scalar += whitespace;
+					whitespace = "";
+				}
+				// finally, read the character!
+				scalar += GetChar();
+			}
+			// did we hit a non-blank character that ended us?
+			if(!Exp::BlankOrBreak.Matches(INPUT))
+				break;
+			// now eat blanks
+			while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) {
+				if(Exp::Blank.Matches(INPUT)) {
+					if(leadingBlanks && m_column <= m_indents.top())
+						throw IllegalTabInScalar();
+					// maybe store this character
+					if(!leadingBlanks)
+						whitespace += GetChar();
+					else
+						Eat(1);
+				} else {
+					// where to store this character?
+					if(!leadingBlanks) {
+						leadingBlanks = true;
+						whitespace = "";
+						leadingBreaks += GetChar();
+					} else
+						trailingBreaks += GetChar();
+				}
+			}
+			// and finally break if we're below the indentation level
+			if(m_flowLevel == 0 && m_column <= m_indents.top())
+				break;
+		}
+		// now modify our token
+		pToken->SetValue(scalar);
+		if(leadingBlanks)
+			m_simpleKeyAllowed = true;
+		return pToken;
+	}
+}
--- a/yaml-reader.vcproj
+++ b/yaml-reader.vcproj
@@ -197,6 +197,10 @@
 				RelativePath=".\scanner.cpp"
 				>
 			</File>
+			<File
+				RelativePath=".\scantoken.cpp"
+				>
+			</File>
 			<File
 				RelativePath=".\sequence.cpp"
 				>
@@ -219,6 +223,10 @@
 				RelativePath=".\exceptions.h"
 				>
 			</File>
+			<File
+				RelativePath=".\exp.h"
+				>
+			</File>
 			<File
 				RelativePath=".\map.h"
 				>