scanner.h 2.8 KB
Newer Older
beder's avatar
beder committed
1
2
3
4
5
6
#pragma once

#include <ios>
#include <string>
#include <queue>
#include <stack>
beder's avatar
beder committed
7
#include <set>
8
#include "regex.h"
beder's avatar
beder committed
9
10
11
12
13

namespace YAML
{
	class Token;

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
	namespace Exp
	{
		// misc
		const RegEx Blank = RegEx(' ') || RegEx('\t');
		const RegEx Break = RegEx('\n');
		const RegEx BlankOrBreak = Blank || Break;

		// actual tags

		const RegEx DocStart = RegEx("---") + (BlankOrBreak || RegEx(EOF) || RegEx());
		const RegEx DocEnd = RegEx("...") + (BlankOrBreak || RegEx(EOF) || RegEx());
		const RegEx BlockEntry = RegEx('-') + (BlankOrBreak || RegEx(EOF));
		const RegEx Key = RegEx('?'),
		            KeyInFlow = RegEx('?') + BlankOrBreak;
		const RegEx Value = RegEx(':'),
		            ValueInFlow = RegEx(':') + BlankOrBreak;
		const RegEx Comment = RegEx('#');

		// Plain scalar rules:
		// . Cannot start with a blank.
		// . Can never start with any of , [ ] { } # & * ! | > \' \" % @ `
		// . In the block context - ? : must be not be followed with a space.
		// . In the flow context ? : are illegal and - must not be followed with a space.
		const RegEx PlainScalar = !(BlankOrBreak || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-?:") + Blank)),
	                PlainScalarInFlow = !(BlankOrBreak || RegEx("?:,[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx('-') + Blank));
		const RegEx IllegalColonInScalar = RegEx(':') + !BlankOrBreak;
		const RegEx EndScalar = RegEx(':') + BlankOrBreak,
		            EndScalarInFlow = (RegEx(':') + BlankOrBreak) || RegEx(",:?[]{}");
	}

beder's avatar
beder committed
44
45
	namespace Keys
	{
beder's avatar
beder committed
46
47
48
49
50
51
52
53
54
55
		const char FlowSeqStart = '[';
		const char FlowSeqEnd = ']';
		const char FlowMapStart = '{';
		const char FlowMapEnd = '}';
		const char FlowEntry = ',';
		const char Alias = '*';
		const char Anchor = '&';
		const char Tag = '!';
		const char LiteralScalar = '|';
		const char FoldedScalar = '>';
beder's avatar
beder committed
56
57
58
59
60
61
62
63
	}

	class Scanner
	{
	public:
		Scanner(std::istream& in);
		~Scanner();

beder's avatar
beder committed
64
		void ScanNextToken();
beder's avatar
beder committed
65
		void ScanToNextToken();
beder's avatar
beder committed
66
67
		void PushIndentTo(int column, bool sequence);
		void PopIndentTo(int column);
beder's avatar
beder committed
68
69
70
71
72

		void Scan();

	private:
		char GetChar();
beder's avatar
beder committed
73
74
75
		void Eat(int n = 1);
		std::string Peek(int n);

beder's avatar
beder committed
76
77
		void EatLineBreak();

beder's avatar
beder committed
78
		bool IsWhitespaceToBeEaten(char ch);
beder's avatar
beder committed
79
80
		bool IsDocumentStart();
		bool IsDocumentEnd();
beder's avatar
beder committed
81
82
83
84
85
86
		bool IsBlockEntry();
		bool IsKey();
		bool IsValue();
		bool IsPlainScalar();

		template <typename T> void ScanAndEnqueue(T *pToken);
beder's avatar
beder committed
87
88
89
90
91
92
93
94
95
		template <typename T> T *ScanToken(T *pToken);

	private:
		// the stream
		std::istream& INPUT;
		int m_column;

		// the output (tokens)
		std::queue <Token *> m_tokens;
beder's avatar
beder committed
96
		std::set <Token *> m_limboTokens;
beder's avatar
beder committed
97
98

		// state info
99
		bool m_startedStream, m_endedStream;
beder's avatar
beder committed
100
101
102
103
104
		bool m_simpleKeyAllowed;
		int m_flowLevel;                // number of unclosed '[' and '{' indicators
		std::stack <int> m_indents;
	};
}