scanner.cpp 6.91 KB
Newer Older
beder's avatar
beder committed
1
2
#include "scanner.h"
#include "token.h"
beder's avatar
beder committed
3
#include "exceptions.h"
4
#include "exp.h"
beder's avatar
beder committed
5
#include <iostream>
beder's avatar
beder committed
6
7
8
9

namespace YAML
{
	Scanner::Scanner(std::istream& in)
10
		: INPUT(in), m_startedStream(false), m_endedStream(false), m_simpleKeyAllowed(false), m_flowLevel(0)
beder's avatar
beder committed
11
12
13
14
15
	{
	}

	Scanner::~Scanner()
	{
beder's avatar
beder committed
16
17
18
19
20
21
22
23
		while(!m_tokens.empty()) {
			delete m_tokens.front();
			m_tokens.pop();
		}

		// delete limbo tokens (they're here for RAII)
		for(std::set <Token *>::const_iterator it=m_limboTokens.begin();it!=m_limboTokens.end();++it)
			delete *it;
beder's avatar
beder committed
24
25
26
27
28
29
30
31
32
33
34
35
	}

	///////////////////////////////////////////////////////////////////////
	// Misc. helpers

	// IsWhitespaceToBeEaten
	// . We can eat whitespace if:
	//   1. It's a space
	//   2. It's a tab, and we're either:
	//      a. In the flow context
	//      b. In the block context but not where a simple key could be allowed
	//         (i.e., not at the beginning of a line, or following '-', '?', or ':')
beder's avatar
beder committed
36
	bool Scanner::IsWhitespaceToBeEaten(char ch)
beder's avatar
beder committed
37
38
39
40
41
42
43
44
45
46
	{
		if(ch == ' ')
			return true;

		if(ch == '\t' && (m_flowLevel >= 0 || !m_simpleKeyAllowed))
			return true;

		return false;
	}

beder's avatar
beder committed
47
48
49
50
51
52
53
54
55
56
57
58
59
60
	// ScanAndEnqueue
	// . Scans the token, then pushes it in the queue.
	// . Note: we also use a set of "limbo tokens", i.e., tokens
	//   that haven't yet been pushed. This way, if ScanToken()
	//   throws an exception, we'll be keeping track of 'pToken'
	//   somewhere, and it will be automatically cleaned up when
	//   the Scanner destructs.
	template <typename T> void Scanner::ScanAndEnqueue(T *pToken)
	{
		m_limboTokens.insert(pToken);
		m_tokens.push(ScanToken(pToken));
		m_limboTokens.erase(pToken);
	}

beder's avatar
beder committed
61
62
63
	// ScanNextToken
	// . The main scanning function; here we branch out and
	//   scan whatever the next token should be.
beder's avatar
beder committed
64
	void Scanner::ScanNextToken()
beder's avatar
beder committed
65
	{
66
67
68
		if(m_endedStream)
			return;

beder's avatar
beder committed
69
		if(!m_startedStream)
beder's avatar
beder committed
70
			return ScanAndEnqueue(new StreamStartToken);
beder's avatar
beder committed
71

beder's avatar
beder committed
72
		// get rid of whitespace, etc. (in between tokens it should be irrelevent)
beder's avatar
beder committed
73
		ScanToNextToken();
beder's avatar
beder committed
74
75

		// check the latest simple key
76
		VerifySimpleKey();
beder's avatar
beder committed
77
78

		// maybe need to end some blocks
79
		PopIndentTo(INPUT.column);
beder's avatar
beder committed
80

beder's avatar
beder committed
81
82
83
84
85
		// *****
		// And now branch based on the next few characters!
		// *****

		// end of stream
beder's avatar
beder committed
86
		if(INPUT.peek() == EOF)
beder's avatar
beder committed
87
			return ScanAndEnqueue(new StreamEndToken);
beder's avatar
beder committed
88

beder's avatar
beder committed
89
90
91
		if(INPUT.column == 0 && INPUT.peek() == Keys::Directive)
			return ScanAndEnqueue(new DirectiveToken);

beder's avatar
beder committed
92
		// document token
beder's avatar
beder committed
93
		if(INPUT.column == 0 && Exp::DocStart.Matches(INPUT))
beder's avatar
beder committed
94
			return ScanAndEnqueue(new DocumentStartToken);
beder's avatar
beder committed
95

beder's avatar
beder committed
96
		if(INPUT.column == 0 && Exp::DocEnd.Matches(INPUT))
beder's avatar
beder committed
97
			return ScanAndEnqueue(new DocumentEndToken);
beder's avatar
beder committed
98

beder's avatar
beder committed
99
		// flow start/end/entry
beder's avatar
beder committed
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
		if(INPUT.peek() == Keys::FlowSeqStart)
			return ScanAndEnqueue(new FlowSeqStartToken);

		if(INPUT.peek() == Keys::FlowSeqEnd)
			return ScanAndEnqueue(new FlowSeqEndToken);
		
		if(INPUT.peek() == Keys::FlowMapStart)
			return ScanAndEnqueue(new FlowMapStartToken);
		
		if(INPUT.peek() == Keys::FlowMapEnd)
			return ScanAndEnqueue(new FlowMapEndToken);

		if(INPUT.peek() == Keys::FlowEntry)
			return ScanAndEnqueue(new FlowEntryToken);

beder's avatar
beder committed
115
		// block/map stuff
beder's avatar
beder committed
116
		if(Exp::BlockEntry.Matches(INPUT))
beder's avatar
beder committed
117
118
			return ScanAndEnqueue(new BlockEntryToken);

beder's avatar
beder committed
119
		if((m_flowLevel == 0 ? Exp::Key : Exp::KeyInFlow).Matches(INPUT))
beder's avatar
beder committed
120
121
			return ScanAndEnqueue(new KeyToken);

beder's avatar
beder committed
122
		if((m_flowLevel == 0 ? Exp::Value : Exp::ValueInFlow).Matches(INPUT))
beder's avatar
beder committed
123
124
			return ScanAndEnqueue(new ValueToken);

beder's avatar
beder committed
125
		// alias/anchor
beder's avatar
beder committed
126
127
128
		if(INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor)
			return ScanAndEnqueue(new AnchorToken);

beder's avatar
beder committed
129
130
131
		// tag
		if(INPUT.peek() == Keys::Tag)
			return ScanAndEnqueue(new TagToken);
beder's avatar
beder committed
132

beder's avatar
beder committed
133
134
135
		// special scalars
		if(m_flowLevel == 0 && (INPUT.peek() == Keys::LiteralScalar || INPUT.peek() == Keys::FoldedScalar))
			return ScanAndEnqueue(new BlockScalarToken);
beder's avatar
beder committed
136

beder's avatar
beder committed
137
138
		if(INPUT.peek() == '\'' || INPUT.peek() == '\"')
			return ScanAndEnqueue(new QuotedScalarToken);
beder's avatar
beder committed
139
140

		// plain scalars
beder's avatar
beder committed
141
		if((m_flowLevel == 0 ? Exp::PlainScalar : Exp::PlainScalarInFlow).Matches(INPUT))
beder's avatar
beder committed
142
143
144
145
			return ScanAndEnqueue(new PlainScalarToken);

		// don't know what it is!
		throw UnknownToken();
beder's avatar
beder committed
146
147
148
149
150
151
152
153
	}

	// ScanToNextToken
	// . Eats input until we reach the next token-like thing.
	void Scanner::ScanToNextToken()
	{
		while(1) {
			// first eat whitespace
beder's avatar
beder committed
154
			while(IsWhitespaceToBeEaten(INPUT.peek()))
155
				INPUT.Eat(1);
beder's avatar
beder committed
156
157

			// then eat a comment
beder's avatar
beder committed
158
			if(Exp::Comment.Matches(INPUT)) {
beder's avatar
beder committed
159
				// eat until line break
beder's avatar
beder committed
160
				while(INPUT && !Exp::Break.Matches(INPUT))
161
					INPUT.Eat(1);
beder's avatar
beder committed
162
163
164
			}

			// if it's NOT a line break, then we're done!
beder's avatar
beder committed
165
			if(!Exp::Break.Matches(INPUT))
beder's avatar
beder committed
166
167
168
				break;

			// otherwise, let's eat the line break and keep going
169
			INPUT.EatLineBreak();
beder's avatar
beder committed
170

beder's avatar
beder committed
171
			// oh yeah, and let's get rid of that simple key
172
			VerifySimpleKey();
beder's avatar
beder committed
173

beder's avatar
beder committed
174
175
176
177
178
179
			// new line - we may be able to accept a simple key now
			if(m_flowLevel == 0)
				m_simpleKeyAllowed = true;
        }
	}

beder's avatar
beder committed
180
181
182
	// PushIndentTo
	// . Pushes an indentation onto the stack, and enqueues the
	//   proper token (sequence start or mapping start).
beder's avatar
beder committed
183
184
	// . Returns the token it generates (if any).
	Token *Scanner::PushIndentTo(int column, bool sequence)
beder's avatar
beder committed
185
186
187
	{
		// are we in flow?
		if(m_flowLevel > 0)
beder's avatar
beder committed
188
			return 0;
beder's avatar
beder committed
189
190
191

		// is this actually an indentation?
		if(column <= m_indents.top())
beder's avatar
beder committed
192
			return 0;
beder's avatar
beder committed
193
194
195
196
197
198
199

		// now push
		m_indents.push(column);
		if(sequence)
			m_tokens.push(new BlockSeqStartToken);
		else
			m_tokens.push(new BlockMapStartToken);
beder's avatar
beder committed
200
201

		return m_tokens.front();
beder's avatar
beder committed
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
	}

	// PopIndentTo
	// . Pops indentations off the stack until we reach 'column' indentation,
	//   and enqueues the proper token each time.
	void Scanner::PopIndentTo(int column)
	{
		// are we in flow?
		if(m_flowLevel > 0)
			return;

		// now pop away
		while(!m_indents.empty() && m_indents.top() > column) {
			m_indents.pop();
			m_tokens.push(new BlockEndToken);
		}
	}

220
221
222
	// GetNextToken
	// . Returns the next token on the queue, and scans if only we need to.
	Token *Scanner::GetNextToken()
beder's avatar
beder committed
223
	{
224
		while(1) {
225
			Token *pToken = 0;
beder's avatar
beder committed
226

227
228
229
			// is there a token in the queue?
			if(!m_tokens.empty())
				pToken = m_tokens.front();
beder's avatar
beder committed
230

231
			// (here's where we clean up the impossible tokens)
232
			if(pToken && pToken->status == TS_INVALID) {
beder's avatar
beder committed
233
234
				m_tokens.pop();
				delete pToken;
235
				continue;
beder's avatar
beder committed
236
			}
237

238
239
			// on unverified tokens, we just have to wait
			if(pToken && pToken->status == TS_UNVERIFIED)
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
				pToken = 0;

			// then that's what we want
			if(pToken) {
				m_tokens.pop();
				return pToken;
			}

			// no token? maybe we've actually finished
			if(m_endedStream)
				break;

			// no? then scan...
			ScanNextToken();
		}

		return 0;
	}

	// temporary function for testing
	void Scanner::Scan()
	{
		while(1) {
			Token *pToken = GetNextToken();
			if(!pToken)
				break;

			std::cout << typeid(*pToken).name() << ": " << *pToken << std::endl;
			delete pToken;
beder's avatar
beder committed
269
		}
beder's avatar
beder committed
270
271
	}
}