scanner.cpp 9.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
#include "crt.h"
#include "scanner.h"
#include "token.h"
#include "exceptions.h"
#include "exp.h"
#include <cassert>

namespace YAML
{
	Scanner::Scanner(std::istream& in)
11
		: INPUT(in), m_startedStream(false), m_endedStream(false), m_simpleKeyAllowed(false)
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
	{
	}

	Scanner::~Scanner()
	{
	}

	// empty
	// . Returns true if there are no more tokens to be read
	bool Scanner::empty()
	{
		EnsureTokensInQueue();
		return m_tokens.empty();
	}

	// pop
	// . Simply removes the next token on the queue.
	void Scanner::pop()
	{
		EnsureTokensInQueue();
32
33
		if(!m_tokens.empty()) {
			// Saved anchors shouldn't survive popping the document end marker
34
			if (m_tokens.front().type == Token::DOC_END) {
35
36
				ClearAnchors();
			}
37
			m_tokens.pop();
38
		}
39
40
41
42
43
44
45
46
47
48
	}

	// peek
	// . Returns (but does not remove) the next token on the queue.
	Token& Scanner::peek()
	{
		EnsureTokensInQueue();
		assert(!m_tokens.empty());  // should we be asserting here? I mean, we really just be checking
		                            // if it's empty before peeking.

49
50
51
52
53
54
55
#if 0
		static Token *pLast = 0;
		if(pLast != &m_tokens.front())
			std::cerr << "peek: " << m_tokens.front() << "\n";
		pLast = &m_tokens.front();
#endif

56
57
58
59
60
61
62
63
64
65
66
67
68
		return m_tokens.front();
	}

	// EnsureTokensInQueue
	// . Scan until there's a valid token at the front of the queue,
	//   or we're sure the queue is empty.
	void Scanner::EnsureTokensInQueue()
	{
		while(1) {
			if(!m_tokens.empty()) {
				Token& token = m_tokens.front();

				// if this guy's valid, then we're done
69
				if(token.status == Token::VALID)
70
71
72
					return;

				// here's where we clean up the impossible tokens
73
				if(token.status == Token::INVALID) {
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
					m_tokens.pop();
					continue;
				}

				// note: what's left are the unverified tokens
			}

			// no token? maybe we've actually finished
			if(m_endedStream)
				return;

			// no? then scan...
			ScanNextToken();
		}
	}

	// ScanNextToken
	// . The main scanning function; here we branch out and
	//   scan whatever the next token should be.
	void Scanner::ScanNextToken()
	{
		if(m_endedStream)
			return;

		if(!m_startedStream)
			return StartStream();

		// get rid of whitespace, etc. (in between tokens it should be irrelevent)
		ScanToNextToken();

		// maybe need to end some blocks
105
		PopIndentToHere();
106
107
108
109

		// *****
		// And now branch based on the next few characters!
		// *****
110
		
111
		// end of stream
112
		if(!INPUT)
113
114
			return EndStream();

115
		if(INPUT.column() == 0 && INPUT.peek() == Keys::Directive)
116
117
118
			return ScanDirective();

		// document token
119
		if(INPUT.column() == 0 && Exp::DocStart.Matches(INPUT))
120
121
			return ScanDocStart();

122
		if(INPUT.column() == 0 && Exp::DocEnd.Matches(INPUT))
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
			return ScanDocEnd();

		// flow start/end/entry
		if(INPUT.peek() == Keys::FlowSeqStart || INPUT.peek() == Keys::FlowMapStart)
			return ScanFlowStart();

		if(INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd)
			return ScanFlowEnd();
	
		if(INPUT.peek() == Keys::FlowEntry)
			return ScanFlowEntry();

		// block/map stuff
		if(Exp::BlockEntry.Matches(INPUT))
			return ScanBlockEntry();

139
		if((InBlockContext() ? Exp::Key : Exp::KeyInFlow).Matches(INPUT))
140
141
			return ScanKey();

142
		if((InBlockContext() ? Exp::Value : Exp::ValueInFlow).Matches(INPUT))
143
144
145
146
147
148
149
150
151
152
153
			return ScanValue();

		// alias/anchor
		if(INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor)
			return ScanAnchorOrAlias();

		// tag
		if(INPUT.peek() == Keys::Tag)
			return ScanTag();

		// special scalars
154
		if(InBlockContext() && (INPUT.peek() == Keys::LiteralScalar || INPUT.peek() == Keys::FoldedScalar))
155
156
157
158
159
160
			return ScanBlockScalar();

		if(INPUT.peek() == '\'' || INPUT.peek() == '\"')
			return ScanQuotedScalar();

		// plain scalars
161
		if((InBlockContext() ? Exp::PlainScalar : Exp::PlainScalarInFlow).Matches(INPUT))
162
163
164
			return ScanPlainScalar();

		// don't know what it is!
165
		throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN);
166
167
168
169
170
171
172
173
	}

	// ScanToNextToken
	// . Eats input until we reach the next token-like thing.
	void Scanner::ScanToNextToken()
	{
		while(1) {
			// first eat whitespace
174
			while(INPUT && IsWhitespaceToBeEaten(INPUT.peek()))
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
				INPUT.eat(1);

			// then eat a comment
			if(Exp::Comment.Matches(INPUT)) {
				// eat until line break
				while(INPUT && !Exp::Break.Matches(INPUT))
					INPUT.eat(1);
			}

			// if it's NOT a line break, then we're done!
			if(!Exp::Break.Matches(INPUT))
				break;

			// otherwise, let's eat the line break and keep going
			int n = Exp::Break.Match(INPUT);
			INPUT.eat(n);

			// oh yeah, and let's get rid of that simple key
193
			InvalidateSimpleKey();
194
195

			// new line - we may be able to accept a simple key now
196
			if(InBlockContext())
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
				m_simpleKeyAllowed = true;
        }
	}

	///////////////////////////////////////////////////////////////////////
	// Misc. helpers

	// IsWhitespaceToBeEaten
	// . We can eat whitespace if:
	//   1. It's a space
	//   2. It's a tab, and we're either:
	//      a. In the flow context
	//      b. In the block context but not where a simple key could be allowed
	//         (i.e., not at the beginning of a line, or following '-', '?', or ':')
	bool Scanner::IsWhitespaceToBeEaten(char ch)
	{
		if(ch == ' ')
			return true;

216
		if(ch == '\t' && (InFlowContext() || !m_simpleKeyAllowed))
217
218
219
220
221
222
223
224
225
226
227
			return true;

		return false;
	}

	// StartStream
	// . Set the initial conditions for starting a stream.
	void Scanner::StartStream()
	{
		m_startedStream = true;
		m_simpleKeyAllowed = true;
228
		m_indents.push(IndentMarker(-1, IndentMarker::NONE));
229
		m_anchors.clear();
230
231
232
233
234
235
236
	}

	// EndStream
	// . Close out the stream, finish up, etc.
	void Scanner::EndStream()
	{
		// force newline
237
238
		if(INPUT.column() > 0)
			INPUT.ResetColumn();
239

240
		PopAllIndents();
241
		PopAllSimpleKeys();
242
243
244
245
246
247
248
249

		m_simpleKeyAllowed = false;
		m_endedStream = true;
	}

	// PushIndentTo
	// . Pushes an indentation onto the stack, and enqueues the
	//   proper token (sequence start or mapping start).
250
251
	// . Returns the indent marker it generates (if any).
	Scanner::IndentMarker *Scanner::PushIndentTo(int column, IndentMarker::INDENT_TYPE type)
252
253
	{
		// are we in flow?
254
		if(InFlowContext())
255
			return 0;
256
257
258
		
		IndentMarker indent(column, type);
		const IndentMarker& lastIndent = m_indents.top();
259
260

		// is this actually an indentation?
261
262
263
		if(indent.column < lastIndent.column)
			return 0;
		if(indent.column == lastIndent.column && !(indent.type == IndentMarker::SEQ && lastIndent.type == IndentMarker::MAP))
264
265
			return 0;

266
		// push a start token
267
		if(type == IndentMarker::SEQ)
268
			m_tokens.push(Token(Token::BLOCK_SEQ_START, INPUT.mark()));
269
		else if(type == IndentMarker::MAP)
270
			m_tokens.push(Token(Token::BLOCK_MAP_START, INPUT.mark()));
271
272
		else
			assert(false);
273
		indent.pStartToken = &m_tokens.back();
274

275
276
277
		// and then the indent
		m_indents.push(indent);
		return &m_indents.top();
278
279
	}

280
281
	// PopIndentToHere
	// . Pops indentations off the stack until we reach the current indentation level,
282
	//   and enqueues the proper token each time.
283
	void Scanner::PopIndentToHere()
284
285
	{
		// are we in flow?
286
		if(InFlowContext())
287
288
289
			return;

		// now pop away
290
291
292
293
294
295
296
297
		while(!m_indents.empty()) {
			const IndentMarker& indent = m_indents.top();
			if(indent.column < INPUT.column())
				break;
			if(indent.column == INPUT.column() && !(indent.type == IndentMarker::SEQ && !Exp::BlockEntry.Matches(INPUT)))
				break;
				
			PopIndent();
298
299
		}
	}
300
301
	
	// PopAllIndents
302
	// . Pops all indentations (except for the base empty one) off the stack,
303
304
305
306
	//   and enqueues the proper token each time.
	void Scanner::PopAllIndents()
	{
		// are we in flow?
307
		if(InFlowContext())
308
309
310
			return;

		// now pop away
311
312
313
314
315
		while(!m_indents.empty()) {
			const IndentMarker& indent = m_indents.top();
			if(indent.type == IndentMarker::NONE)
				break;
			
316
			PopIndent();
317
		}
318
319
320
321
322
323
	}
	
	// PopIndent
	// . Pops a single indent, pushing the proper token
	void Scanner::PopIndent()
	{
324
325
		IndentMarker indent = m_indents.top();
		IndentMarker::INDENT_TYPE type = indent.type;
326
		m_indents.pop();
327
328
		if(!indent.isValid) {
			InvalidateSimpleKey();
329
			return;
330
		}
331
		
332
		if(type == IndentMarker::SEQ)
333
			m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark()));
334
		else if(type == IndentMarker::MAP)
335
			m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark()));
336
337
338
339
340
341
342
343
344
	}

	// GetTopIndent
	int Scanner::GetTopIndent() const
	{
		if(m_indents.empty())
			return 0;
		return m_indents.top().column;
	}
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374

	// Save
	// . Saves a pointer to the Node object referenced by a particular anchor
	//   name.
	void Scanner::Save(const std::string& anchor, Node* value)
	{
		m_anchors[anchor] = value;
	}

	// Retrieve
	// . Retrieves a pointer previously saved for an anchor name.
	// . Throws an exception if the anchor has not been defined.
	const Node *Scanner::Retrieve(const std::string& anchor) const
	{
		typedef std::map<std::string, const Node *> map;

		map::const_iterator itNode = m_anchors.find(anchor);

		if(m_anchors.end() == itNode)
			ThrowParserException(ErrorMsg::UNKNOWN_ANCHOR);

		return itNode->second;
	}

	// ThrowParserException
	// . Throws a ParserException with the current token location
	//   (if available).
	// . Does not parse any more tokens.
	void Scanner::ThrowParserException(const std::string& msg) const
	{
375
		Mark mark = Mark::null();
376
377
		if(!m_tokens.empty()) {
			const Token& token = m_tokens.front();
378
			mark = token.mark;
379
		}
380
		throw ParserException(mark, msg);
381
382
383
384
385
386
	}

	void Scanner::ClearAnchors()
	{
		m_anchors.clear();
	}
387
}