scanner.cpp 7.41 KB
Newer Older
Jesse Beder's avatar
Jesse Beder committed
1
2
#include "scanner.h"
#include "token.h"
Jesse Beder's avatar
Jesse Beder committed
3
#include "exceptions.h"
4
#include "exp.h"
Jesse Beder's avatar
Jesse Beder committed
5
#include <iostream>
Jesse Beder's avatar
Jesse Beder committed
6
7
8
9

namespace YAML
{
	Scanner::Scanner(std::istream& in)
10
		: INPUT(in), m_startedStream(false), m_endedStream(false), m_simpleKeyAllowed(false), m_flowLevel(0)
Jesse Beder's avatar
Jesse Beder committed
11
12
13
14
15
	{
	}

	Scanner::~Scanner()
	{
Jesse Beder's avatar
Jesse Beder committed
16
17
18
19
20
21
22
23
		while(!m_tokens.empty()) {
			delete m_tokens.front();
			m_tokens.pop();
		}

		// delete limbo tokens (they're here for RAII)
		for(std::set <Token *>::const_iterator it=m_limboTokens.begin();it!=m_limboTokens.end();++it)
			delete *it;
Jesse Beder's avatar
Jesse Beder committed
24
25
26
27
28
29
30
31
32
33
34
35
	}

	///////////////////////////////////////////////////////////////////////
	// Misc. helpers

	// IsWhitespaceToBeEaten
	// . We can eat whitespace if:
	//   1. It's a space
	//   2. It's a tab, and we're either:
	//      a. In the flow context
	//      b. In the block context but not where a simple key could be allowed
	//         (i.e., not at the beginning of a line, or following '-', '?', or ':')
Jesse Beder's avatar
Jesse Beder committed
36
	bool Scanner::IsWhitespaceToBeEaten(char ch)
Jesse Beder's avatar
Jesse Beder committed
37
38
39
40
41
42
43
44
45
46
47
48
49
50
	{
		if(ch == ' ')
			return true;

		if(ch == '\t' && (m_flowLevel >= 0 || !m_simpleKeyAllowed))
			return true;

		return false;
	}

	// IsDocumentStart
	bool Scanner::IsDocumentStart()
	{
		// needs to be at the start of a new line
51
		if(INPUT.column != 0)
Jesse Beder's avatar
Jesse Beder committed
52
53
			return false;

54
		return Exp::DocStart.Matches(INPUT);
Jesse Beder's avatar
Jesse Beder committed
55
56
57
58
59
60
	}

	// IsDocumentEnd
	bool Scanner::IsDocumentEnd()
	{
		// needs to be at the start of a new line
61
		if(INPUT.column != 0)
Jesse Beder's avatar
Jesse Beder committed
62
63
			return false;

64
		return Exp::DocEnd.Matches(INPUT);
Jesse Beder's avatar
Jesse Beder committed
65
66
	}

Jesse Beder's avatar
Jesse Beder committed
67
68
69
	// IsBlockEntry
	bool Scanner::IsBlockEntry()
	{
70
		return Exp::BlockEntry.Matches(INPUT);
Jesse Beder's avatar
Jesse Beder committed
71
72
73
74
75
	}

	// IsKey
	bool Scanner::IsKey()
	{
76
		if(m_flowLevel > 0)
77
78
			return Exp::KeyInFlow.Matches(INPUT);
		return Exp::Key.Matches(INPUT);
Jesse Beder's avatar
Jesse Beder committed
79
80
81
82
83
	}

	// IsValue
	bool Scanner::IsValue()
	{
84
		if(m_flowLevel > 0)
85
86
			return Exp::ValueInFlow.Matches(INPUT);
		return Exp::Value.Matches(INPUT);
Jesse Beder's avatar
Jesse Beder committed
87
88
89
90
91
	}

	// IsPlainScalar
	bool Scanner::IsPlainScalar()
	{
92
		if(m_flowLevel > 0)
93
94
			return Exp::PlainScalarInFlow.Matches(INPUT);
		return Exp::PlainScalar.Matches(INPUT);
Jesse Beder's avatar
Jesse Beder committed
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
	}

	// ScanAndEnqueue
	// . Scans the token, then pushes it in the queue.
	// . Note: we also use a set of "limbo tokens", i.e., tokens
	//   that haven't yet been pushed. This way, if ScanToken()
	//   throws an exception, we'll be keeping track of 'pToken'
	//   somewhere, and it will be automatically cleaned up when
	//   the Scanner destructs.
	template <typename T> void Scanner::ScanAndEnqueue(T *pToken)
	{
		m_limboTokens.insert(pToken);
		m_tokens.push(ScanToken(pToken));
		m_limboTokens.erase(pToken);
	}

Jesse Beder's avatar
Jesse Beder committed
111
112
113
	// ScanNextToken
	// . The main scanning function; here we branch out and
	//   scan whatever the next token should be.
Jesse Beder's avatar
Jesse Beder committed
114
	void Scanner::ScanNextToken()
Jesse Beder's avatar
Jesse Beder committed
115
	{
116
117
118
		if(m_endedStream)
			return;

Jesse Beder's avatar
Jesse Beder committed
119
		if(!m_startedStream)
Jesse Beder's avatar
Jesse Beder committed
120
			return ScanAndEnqueue(new StreamStartToken);
Jesse Beder's avatar
Jesse Beder committed
121

Jesse Beder's avatar
Jesse Beder committed
122
		// get rid of whitespace, etc. (in between tokens it should be irrelevent)
Jesse Beder's avatar
Jesse Beder committed
123
		ScanToNextToken();
Jesse Beder's avatar
Jesse Beder committed
124
125

		// check the latest simple key
126
		VerifySimpleKey();
Jesse Beder's avatar
Jesse Beder committed
127
128

		// maybe need to end some blocks
129
		PopIndentTo(INPUT.column);
Jesse Beder's avatar
Jesse Beder committed
130

Jesse Beder's avatar
Jesse Beder committed
131
132
133
134
135
		// *****
		// And now branch based on the next few characters!
		// *****

		// end of stream
Jesse Beder's avatar
Jesse Beder committed
136
		if(INPUT.peek() == EOF)
Jesse Beder's avatar
Jesse Beder committed
137
			return ScanAndEnqueue(new StreamEndToken);
Jesse Beder's avatar
Jesse Beder committed
138

Jesse Beder's avatar
Jesse Beder committed
139
		// document token
Jesse Beder's avatar
Jesse Beder committed
140
		if(IsDocumentStart())
Jesse Beder's avatar
Jesse Beder committed
141
			return ScanAndEnqueue(new DocumentStartToken);
Jesse Beder's avatar
Jesse Beder committed
142
143

		if(IsDocumentEnd())
Jesse Beder's avatar
Jesse Beder committed
144
			return ScanAndEnqueue(new DocumentEndToken);
Jesse Beder's avatar
Jesse Beder committed
145

Jesse Beder's avatar
Jesse Beder committed
146
		// flow start/end/entry
Jesse Beder's avatar
Jesse Beder committed
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
		if(INPUT.peek() == Keys::FlowSeqStart)
			return ScanAndEnqueue(new FlowSeqStartToken);

		if(INPUT.peek() == Keys::FlowSeqEnd)
			return ScanAndEnqueue(new FlowSeqEndToken);
		
		if(INPUT.peek() == Keys::FlowMapStart)
			return ScanAndEnqueue(new FlowMapStartToken);
		
		if(INPUT.peek() == Keys::FlowMapEnd)
			return ScanAndEnqueue(new FlowMapEndToken);

		if(INPUT.peek() == Keys::FlowEntry)
			return ScanAndEnqueue(new FlowEntryToken);

Jesse Beder's avatar
Jesse Beder committed
162
		// block/map stuff
Jesse Beder's avatar
Jesse Beder committed
163
164
165
166
167
168
169
170
171
		if(IsBlockEntry())
			return ScanAndEnqueue(new BlockEntryToken);

		if(IsKey())
			return ScanAndEnqueue(new KeyToken);

		if(IsValue())
			return ScanAndEnqueue(new ValueToken);

Jesse Beder's avatar
Jesse Beder committed
172
173
174
175
		if(INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor)
			return ScanAndEnqueue(new AnchorToken);

		// TODO: tag
Jesse Beder's avatar
Jesse Beder committed
176

177
178
179
		// special scalars
		if(m_flowLevel == 0 && (INPUT.peek() == Keys::LiteralScalar || INPUT.peek() == Keys::FoldedScalar))
			return ScanAndEnqueue(new BlockScalarToken);
Jesse Beder's avatar
Jesse Beder committed
180

181
182
		if(INPUT.peek() == '\'' || INPUT.peek() == '\"')
			return ScanAndEnqueue(new QuotedScalarToken);
Jesse Beder's avatar
Jesse Beder committed
183
184
185
186
187
188
189

		// plain scalars
		if(IsPlainScalar())
			return ScanAndEnqueue(new PlainScalarToken);

		// don't know what it is!
		throw UnknownToken();
Jesse Beder's avatar
Jesse Beder committed
190
191
192
193
194
195
196
197
	}

	// ScanToNextToken
	// . Eats input until we reach the next token-like thing.
	void Scanner::ScanToNextToken()
	{
		while(1) {
			// first eat whitespace
Jesse Beder's avatar
Jesse Beder committed
198
			while(IsWhitespaceToBeEaten(INPUT.peek()))
199
				INPUT.Eat(1);
Jesse Beder's avatar
Jesse Beder committed
200
201

			// then eat a comment
Jesse Beder's avatar
Jesse Beder committed
202
			if(Exp::Comment.Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
203
				// eat until line break
Jesse Beder's avatar
Jesse Beder committed
204
				while(INPUT && !Exp::Break.Matches(INPUT))
205
					INPUT.Eat(1);
Jesse Beder's avatar
Jesse Beder committed
206
207
208
			}

			// if it's NOT a line break, then we're done!
Jesse Beder's avatar
Jesse Beder committed
209
			if(!Exp::Break.Matches(INPUT))
Jesse Beder's avatar
Jesse Beder committed
210
211
212
				break;

			// otherwise, let's eat the line break and keep going
213
			INPUT.EatLineBreak();
Jesse Beder's avatar
Jesse Beder committed
214

Jesse Beder's avatar
Jesse Beder committed
215
			// oh yeah, and let's get rid of that simple key
216
			VerifySimpleKey();
Jesse Beder's avatar
Jesse Beder committed
217

Jesse Beder's avatar
Jesse Beder committed
218
219
220
221
222
223
			// new line - we may be able to accept a simple key now
			if(m_flowLevel == 0)
				m_simpleKeyAllowed = true;
        }
	}

Jesse Beder's avatar
Jesse Beder committed
224
225
226
	// PushIndentTo
	// . Pushes an indentation onto the stack, and enqueues the
	//   proper token (sequence start or mapping start).
Jesse Beder's avatar
Jesse Beder committed
227
228
	// . Returns the token it generates (if any).
	Token *Scanner::PushIndentTo(int column, bool sequence)
Jesse Beder's avatar
Jesse Beder committed
229
230
231
	{
		// are we in flow?
		if(m_flowLevel > 0)
Jesse Beder's avatar
Jesse Beder committed
232
			return 0;
Jesse Beder's avatar
Jesse Beder committed
233
234
235

		// is this actually an indentation?
		if(column <= m_indents.top())
Jesse Beder's avatar
Jesse Beder committed
236
			return 0;
Jesse Beder's avatar
Jesse Beder committed
237
238
239
240
241
242
243

		// now push
		m_indents.push(column);
		if(sequence)
			m_tokens.push(new BlockSeqStartToken);
		else
			m_tokens.push(new BlockMapStartToken);
Jesse Beder's avatar
Jesse Beder committed
244
245

		return m_tokens.front();
Jesse Beder's avatar
Jesse Beder committed
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
	}

	// PopIndentTo
	// . Pops indentations off the stack until we reach 'column' indentation,
	//   and enqueues the proper token each time.
	void Scanner::PopIndentTo(int column)
	{
		// are we in flow?
		if(m_flowLevel > 0)
			return;

		// now pop away
		while(!m_indents.empty() && m_indents.top() > column) {
			m_indents.pop();
			m_tokens.push(new BlockEndToken);
		}
	}

264
265
266
	// GetNextToken
	// . Returns the next token on the queue, and scans if only we need to.
	Token *Scanner::GetNextToken()
Jesse Beder's avatar
Jesse Beder committed
267
	{
268
		while(1) {
269
			Token *pToken = 0;
Jesse Beder's avatar
Jesse Beder committed
270

271
272
273
			// is there a token in the queue?
			if(!m_tokens.empty())
				pToken = m_tokens.front();
Jesse Beder's avatar
Jesse Beder committed
274

275
			// (here's where we clean up the impossible tokens)
276
			if(pToken && pToken->status == TS_INVALID) {
Jesse Beder's avatar
Jesse Beder committed
277
278
				m_tokens.pop();
				delete pToken;
279
				continue;
Jesse Beder's avatar
Jesse Beder committed
280
			}
281

282
283
			// on unverified tokens, we just have to wait
			if(pToken && pToken->status == TS_UNVERIFIED)
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
				pToken = 0;

			// then that's what we want
			if(pToken) {
				m_tokens.pop();
				return pToken;
			}

			// no token? maybe we've actually finished
			if(m_endedStream)
				break;

			// no? then scan...
			ScanNextToken();
		}

		return 0;
	}

	// temporary function for testing
	void Scanner::Scan()
	{
		while(1) {
			Token *pToken = GetNextToken();
			if(!pToken)
				break;

			std::cout << typeid(*pToken).name() << ": " << *pToken << std::endl;
			delete pToken;
Jesse Beder's avatar
Jesse Beder committed
313
		}
Jesse Beder's avatar
Jesse Beder committed
314
315
	}
}