scanner.cpp 9.61 KB
Newer Older
1
2
#include "scanner.h"
#include "token.h"
3
#include "yaml-cpp/exceptions.h"
4
5
#include "exp.h"
#include <cassert>
6
#include <memory>
7
8
9
10

namespace YAML
{
	Scanner::Scanner(std::istream& in)
11
		: INPUT(in), m_startedStream(false), m_endedStream(false), m_simpleKeyAllowed(false), m_canBeJSONFlow(false)
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
	{
	}

	Scanner::~Scanner()
	{
	}

	// empty
	// . Returns true if there are no more tokens to be read
	bool Scanner::empty()
	{
		EnsureTokensInQueue();
		return m_tokens.empty();
	}

	// pop
	// . Simply removes the next token on the queue.
	void Scanner::pop()
	{
		EnsureTokensInQueue();
32
		if(!m_tokens.empty())
33
34
35
36
37
38
39
40
41
42
43
			m_tokens.pop();
	}

	// peek
	// . Returns (but does not remove) the next token on the queue.
	Token& Scanner::peek()
	{
		EnsureTokensInQueue();
		assert(!m_tokens.empty());  // should we be asserting here? I mean, we really just be checking
		                            // if it's empty before peeking.

44
45
46
47
48
49
50
#if 0
		static Token *pLast = 0;
		if(pLast != &m_tokens.front())
			std::cerr << "peek: " << m_tokens.front() << "\n";
		pLast = &m_tokens.front();
#endif

51
52
53
		return m_tokens.front();
	}

54
55
56
57
58
59
60
    // mark
    // . Returns the current mark in the stream
    Mark Scanner::mark() const
    {
        return INPUT.mark();
    }

61
62
63
64
65
66
67
68
69
70
	// EnsureTokensInQueue
	// . Scan until there's a valid token at the front of the queue,
	//   or we're sure the queue is empty.
	void Scanner::EnsureTokensInQueue()
	{
		while(1) {
			if(!m_tokens.empty()) {
				Token& token = m_tokens.front();

				// if this guy's valid, then we're done
71
				if(token.status == Token::VALID)
72
73
74
					return;

				// here's where we clean up the impossible tokens
75
				if(token.status == Token::INVALID) {
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
					m_tokens.pop();
					continue;
				}

				// note: what's left are the unverified tokens
			}

			// no token? maybe we've actually finished
			if(m_endedStream)
				return;

			// no? then scan...
			ScanNextToken();
		}
	}

	// ScanNextToken
	// . The main scanning function; here we branch out and
	//   scan whatever the next token should be.
	void Scanner::ScanNextToken()
	{
		if(m_endedStream)
			return;

		if(!m_startedStream)
			return StartStream();

		// get rid of whitespace, etc. (in between tokens it should be irrelevent)
		ScanToNextToken();

		// maybe need to end some blocks
107
		PopIndentToHere();
108
109
110
111

		// *****
		// And now branch based on the next few characters!
		// *****
112
		
113
		// end of stream
114
		if(!INPUT)
115
116
			return EndStream();

117
		if(INPUT.column() == 0 && INPUT.peek() == Keys::Directive)
118
119
120
			return ScanDirective();

		// document token
121
		if(INPUT.column() == 0 && Exp::DocStart().Matches(INPUT))
122
123
			return ScanDocStart();

124
		if(INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT))
125
126
127
128
129
130
131
132
133
134
135
136
137
			return ScanDocEnd();

		// flow start/end/entry
		if(INPUT.peek() == Keys::FlowSeqStart || INPUT.peek() == Keys::FlowMapStart)
			return ScanFlowStart();

		if(INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd)
			return ScanFlowEnd();
	
		if(INPUT.peek() == Keys::FlowEntry)
			return ScanFlowEntry();

		// block/map stuff
138
		if(Exp::BlockEntry().Matches(INPUT))
139
140
			return ScanBlockEntry();

141
		if((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT))
142
143
			return ScanKey();

144
		if(GetValueRegex().Matches(INPUT))
145
146
147
148
149
150
151
152
153
154
155
			return ScanValue();

		// alias/anchor
		if(INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor)
			return ScanAnchorOrAlias();

		// tag
		if(INPUT.peek() == Keys::Tag)
			return ScanTag();

		// special scalars
156
		if(InBlockContext() && (INPUT.peek() == Keys::LiteralScalar || INPUT.peek() == Keys::FoldedScalar))
157
158
159
160
161
162
			return ScanBlockScalar();

		if(INPUT.peek() == '\'' || INPUT.peek() == '\"')
			return ScanQuotedScalar();

		// plain scalars
163
		if((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow()).Matches(INPUT))
164
165
166
			return ScanPlainScalar();

		// don't know what it is!
167
		throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN);
168
169
170
171
172
173
174
175
	}

	// ScanToNextToken
	// . Eats input until we reach the next token-like thing.
	void Scanner::ScanToNextToken()
	{
		while(1) {
			// first eat whitespace
176
			while(INPUT && IsWhitespaceToBeEaten(INPUT.peek())) {
177
				if(InBlockContext() && Exp::Tab().Matches(INPUT))
178
					m_simpleKeyAllowed = false;
179
				INPUT.eat(1);
180
			}
181
182

			// then eat a comment
183
			if(Exp::Comment().Matches(INPUT)) {
184
				// eat until line break
185
				while(INPUT && !Exp::Break().Matches(INPUT))
186
187
188
189
					INPUT.eat(1);
			}

			// if it's NOT a line break, then we're done!
190
			if(!Exp::Break().Matches(INPUT))
191
192
193
				break;

			// otherwise, let's eat the line break and keep going
194
			int n = Exp::Break().Match(INPUT);
195
196
197
			INPUT.eat(n);

			// oh yeah, and let's get rid of that simple key
198
			InvalidateSimpleKey();
199
200

			// new line - we may be able to accept a simple key now
201
			if(InBlockContext())
202
203
204
205
206
207
208
209
				m_simpleKeyAllowed = true;
        }
	}

	///////////////////////////////////////////////////////////////////////
	// Misc. helpers

	// IsWhitespaceToBeEaten
210
211
212
213
214
215
216
	// . We can eat whitespace if it's a space or tab
	// . Note: originally tabs in block context couldn't be eaten
	//         "where a simple key could be allowed
	//         (i.e., not at the beginning of a line, or following '-', '?', or ':')"
	//   I think this is wrong, since tabs can be non-content whitespace; it's just
	//   that they can't contribute to indentation, so once you've seen a tab in a
	//   line, you can't start a simple key
217
218
219
220
221
	bool Scanner::IsWhitespaceToBeEaten(char ch)
	{
		if(ch == ' ')
			return true;

222
		if(ch == '\t')
223
224
225
226
227
			return true;

		return false;
	}

228
229
230
231
232
	// GetValueRegex
	// . Get the appropriate regex to check if it's a value token
	const RegEx& Scanner::GetValueRegex() const
	{
		if(InBlockContext())
233
			return Exp::Value();
234
		
235
		return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow();
236
237
	}

238
239
240
241
242
243
	// StartStream
	// . Set the initial conditions for starting a stream.
	void Scanner::StartStream()
	{
		m_startedStream = true;
		m_simpleKeyAllowed = true;
244
		std::auto_ptr<IndentMarker> pIndent(new IndentMarker(-1, IndentMarker::NONE));
245
		m_indentRefs.push_back(pIndent);
246
		m_indents.push(&m_indentRefs.back());
247
248
249
250
251
252
253
	}

	// EndStream
	// . Close out the stream, finish up, etc.
	void Scanner::EndStream()
	{
		// force newline
254
255
		if(INPUT.column() > 0)
			INPUT.ResetColumn();
256

257
		PopAllIndents();
258
		PopAllSimpleKeys();
259
260
261
262
263

		m_simpleKeyAllowed = false;
		m_endedStream = true;
	}

264
265
266
267
268
269
270
271
272
273
274
275
276
277
	Token *Scanner::PushToken(Token::TYPE type)
	{
		m_tokens.push(Token(type, INPUT.mark()));
		return &m_tokens.back();
	}

	Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const
	{
		switch(type) {
			case IndentMarker::SEQ: return Token::BLOCK_SEQ_START;
			case IndentMarker::MAP: return Token::BLOCK_MAP_START;
			case IndentMarker::NONE: assert(false); break;
		}
		assert(false);
278
		throw std::runtime_error("yaml-cpp: internal error, invalid indent type");
279
280
	}

281
282
283
	// PushIndentTo
	// . Pushes an indentation onto the stack, and enqueues the
	//   proper token (sequence start or mapping start).
284
285
	// . Returns the indent marker it generates (if any).
	Scanner::IndentMarker *Scanner::PushIndentTo(int column, IndentMarker::INDENT_TYPE type)
286
287
	{
		// are we in flow?
288
		if(InFlowContext())
289
			return 0;
290
		
291
292
293
		std::auto_ptr<IndentMarker> pIndent(new IndentMarker(column, type));
		IndentMarker& indent = *pIndent;
		const IndentMarker& lastIndent = *m_indents.top();
294
295

		// is this actually an indentation?
296
297
298
		if(indent.column < lastIndent.column)
			return 0;
		if(indent.column == lastIndent.column && !(indent.type == IndentMarker::SEQ && lastIndent.type == IndentMarker::MAP))
299
300
			return 0;

301
		// push a start token
302
		indent.pStartToken = PushToken(GetStartTokenFor(type));
303

304
		// and then the indent
305
		m_indents.push(&indent);
306
307
		m_indentRefs.push_back(pIndent);
		return &m_indentRefs.back();
308
309
	}

310
311
	// PopIndentToHere
	// . Pops indentations off the stack until we reach the current indentation level,
312
	//   and enqueues the proper token each time.
313
	// . Then pops all invalid indentations off.
314
	void Scanner::PopIndentToHere()
315
316
	{
		// are we in flow?
317
		if(InFlowContext())
318
319
320
			return;

		// now pop away
321
		while(!m_indents.empty()) {
322
			const IndentMarker& indent = *m_indents.top();
323
324
			if(indent.column < INPUT.column())
				break;
325
			if(indent.column == INPUT.column() && !(indent.type == IndentMarker::SEQ && !Exp::BlockEntry().Matches(INPUT)))
326
327
328
				break;
				
			PopIndent();
329
		}
330
331
332
		
		while(!m_indents.empty() && m_indents.top()->status == IndentMarker::INVALID)
			PopIndent();
333
	}
334
335
	
	// PopAllIndents
336
	// . Pops all indentations (except for the base empty one) off the stack,
337
338
339
340
	//   and enqueues the proper token each time.
	void Scanner::PopAllIndents()
	{
		// are we in flow?
341
		if(InFlowContext())
342
343
344
			return;

		// now pop away
345
		while(!m_indents.empty()) {
346
			const IndentMarker& indent = *m_indents.top();
347
348
349
			if(indent.type == IndentMarker::NONE)
				break;
			
350
			PopIndent();
351
		}
352
353
354
355
356
357
	}
	
	// PopIndent
	// . Pops a single indent, pushing the proper token
	void Scanner::PopIndent()
	{
358
		const IndentMarker& indent = *m_indents.top();
359
		m_indents.pop();
360
361

		if(indent.status != IndentMarker::VALID) {
362
			InvalidateSimpleKey();
363
			return;
364
		}
365
		
366
		if(indent.type == IndentMarker::SEQ)
367
			m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark()));
368
		else if(indent.type == IndentMarker::MAP)
369
			m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark()));
370
371
372
373
374
375
376
	}

	// GetTopIndent
	int Scanner::GetTopIndent() const
	{
		if(m_indents.empty())
			return 0;
377
		return m_indents.top()->column;
378
	}
379
380
381
382
383
384
385

	// ThrowParserException
	// . Throws a ParserException with the current token location
	//   (if available).
	// . Does not parse any more tokens.
	void Scanner::ThrowParserException(const std::string& msg) const
	{
386
		Mark mark = Mark::null_mark();
387
388
		if(!m_tokens.empty()) {
			const Token& token = m_tokens.front();
389
			mark = token.mark;
390
		}
391
		throw ParserException(mark, msg);
392
	}
393
}
394