scanner.h 2.77 KB
Newer Older
Jesse Beder's avatar
Jesse Beder committed
1
2
3
4
5
6
#pragma once

#include <ios>
#include <string>
#include <queue>
#include <stack>
Jesse Beder's avatar
Jesse Beder committed
7
#include <set>
8
#include "regex.h"
Jesse Beder's avatar
Jesse Beder committed
9
10
11
12
13

namespace YAML
{
	class Token;

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
	namespace Exp
	{
		// misc
		const RegEx Blank = RegEx(' ') || RegEx('\t');
		const RegEx Break = RegEx('\n');
		const RegEx BlankOrBreak = Blank || Break;

		// actual tags

		const RegEx DocStart = RegEx("---") + (BlankOrBreak || RegEx(EOF) || RegEx());
		const RegEx DocEnd = RegEx("...") + (BlankOrBreak || RegEx(EOF) || RegEx());
		const RegEx BlockEntry = RegEx('-') + (BlankOrBreak || RegEx(EOF));
		const RegEx Key = RegEx('?'),
		            KeyInFlow = RegEx('?') + BlankOrBreak;
		const RegEx Value = RegEx(':'),
		            ValueInFlow = RegEx(':') + BlankOrBreak;
		const RegEx Comment = RegEx('#');

		// Plain scalar rules:
		// . Cannot start with a blank.
		// . Can never start with any of , [ ] { } # & * ! | > \' \" % @ `
		// . In the block context - ? : must be not be followed with a space.
		// . In the flow context ? : are illegal and - must not be followed with a space.
		const RegEx PlainScalar = !(BlankOrBreak || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-?:") + Blank)),
	                PlainScalarInFlow = !(BlankOrBreak || RegEx("?:,[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx('-') + Blank));
		const RegEx IllegalColonInScalar = RegEx(':') + !BlankOrBreak;
		const RegEx EndScalar = RegEx(':') + BlankOrBreak,
		            EndScalarInFlow = (RegEx(':') + BlankOrBreak) || RegEx(",:?[]{}");
	}

Jesse Beder's avatar
Jesse Beder committed
44
45
	namespace Keys
	{
Jesse Beder's avatar
Jesse Beder committed
46
47
48
49
50
51
52
53
54
55
		const char FlowSeqStart = '[';
		const char FlowSeqEnd = ']';
		const char FlowMapStart = '{';
		const char FlowMapEnd = '}';
		const char FlowEntry = ',';
		const char Alias = '*';
		const char Anchor = '&';
		const char Tag = '!';
		const char LiteralScalar = '|';
		const char FoldedScalar = '>';
Jesse Beder's avatar
Jesse Beder committed
56
57
58
59
60
61
62
63
	}

	class Scanner
	{
	public:
		Scanner(std::istream& in);
		~Scanner();

Jesse Beder's avatar
Jesse Beder committed
64
		void ScanNextToken();
Jesse Beder's avatar
Jesse Beder committed
65
		void ScanToNextToken();
Jesse Beder's avatar
Jesse Beder committed
66
67
		void PushIndentTo(int column, bool sequence);
		void PopIndentTo(int column);
Jesse Beder's avatar
Jesse Beder committed
68
69
70
71
72

		void Scan();

	private:
		char GetChar();
Jesse Beder's avatar
Jesse Beder committed
73
		void Eat(int n = 1);
Jesse Beder's avatar
Jesse Beder committed
74
75
		void EatLineBreak();

Jesse Beder's avatar
Jesse Beder committed
76
		bool IsWhitespaceToBeEaten(char ch);
Jesse Beder's avatar
Jesse Beder committed
77
78
		bool IsDocumentStart();
		bool IsDocumentEnd();
Jesse Beder's avatar
Jesse Beder committed
79
80
81
82
83
84
		bool IsBlockEntry();
		bool IsKey();
		bool IsValue();
		bool IsPlainScalar();

		template <typename T> void ScanAndEnqueue(T *pToken);
Jesse Beder's avatar
Jesse Beder committed
85
86
87
88
89
90
91
92
93
		template <typename T> T *ScanToken(T *pToken);

	private:
		// the stream
		std::istream& INPUT;
		int m_column;

		// the output (tokens)
		std::queue <Token *> m_tokens;
Jesse Beder's avatar
Jesse Beder committed
94
		std::set <Token *> m_limboTokens;
Jesse Beder's avatar
Jesse Beder committed
95
96

		// state info
97
		bool m_startedStream, m_endedStream;
Jesse Beder's avatar
Jesse Beder committed
98
99
100
101
102
		bool m_simpleKeyAllowed;
		int m_flowLevel;                // number of unclosed '[' and '{' indicators
		std::stack <int> m_indents;
	};
}