"driver/driver.cpp" did not exist on "3439e4b5b72c0533cd60bb06ff076df6e4b004f5"
scanner.cpp 7.39 KB
Newer Older
beder's avatar
beder committed
1
2
#include "scanner.h"
#include "token.h"
beder's avatar
beder committed
3
#include "exceptions.h"
4
#include "exp.h"
beder's avatar
beder committed
5
#include <iostream>
beder's avatar
beder committed
6
7
8
9

namespace YAML
{
	Scanner::Scanner(std::istream& in)
10
		: INPUT(in), m_startedStream(false), m_endedStream(false), m_simpleKeyAllowed(false), m_flowLevel(0)
beder's avatar
beder committed
11
12
13
14
15
	{
	}

	Scanner::~Scanner()
	{
beder's avatar
beder committed
16
17
18
19
20
21
22
23
		while(!m_tokens.empty()) {
			delete m_tokens.front();
			m_tokens.pop();
		}

		// delete limbo tokens (they're here for RAII)
		for(std::set <Token *>::const_iterator it=m_limboTokens.begin();it!=m_limboTokens.end();++it)
			delete *it;
beder's avatar
beder committed
24
25
26
27
28
29
30
31
32
33
34
35
	}

	///////////////////////////////////////////////////////////////////////
	// Misc. helpers

	// IsWhitespaceToBeEaten
	// . We can eat whitespace if:
	//   1. It's a space
	//   2. It's a tab, and we're either:
	//      a. In the flow context
	//      b. In the block context but not where a simple key could be allowed
	//         (i.e., not at the beginning of a line, or following '-', '?', or ':')
beder's avatar
beder committed
36
	bool Scanner::IsWhitespaceToBeEaten(char ch)
beder's avatar
beder committed
37
38
39
40
41
42
43
44
45
46
47
48
49
50
	{
		if(ch == ' ')
			return true;

		if(ch == '\t' && (m_flowLevel >= 0 || !m_simpleKeyAllowed))
			return true;

		return false;
	}

	// IsDocumentStart
	bool Scanner::IsDocumentStart()
	{
		// needs to be at the start of a new line
51
		if(INPUT.column != 0)
beder's avatar
beder committed
52
53
			return false;

54
		return Exp::DocStart.Matches(INPUT);
beder's avatar
beder committed
55
56
57
58
59
60
	}

	// IsDocumentEnd
	bool Scanner::IsDocumentEnd()
	{
		// needs to be at the start of a new line
61
		if(INPUT.column != 0)
beder's avatar
beder committed
62
63
			return false;

64
		return Exp::DocEnd.Matches(INPUT);
beder's avatar
beder committed
65
66
	}

beder's avatar
beder committed
67
68
69
	// IsBlockEntry
	bool Scanner::IsBlockEntry()
	{
70
		return Exp::BlockEntry.Matches(INPUT);
beder's avatar
beder committed
71
72
73
74
75
	}

	// IsKey
	bool Scanner::IsKey()
	{
76
		if(m_flowLevel > 0)
77
78
			return Exp::KeyInFlow.Matches(INPUT);
		return Exp::Key.Matches(INPUT);
beder's avatar
beder committed
79
80
81
82
83
	}

	// IsValue
	bool Scanner::IsValue()
	{
84
		if(m_flowLevel > 0)
85
86
			return Exp::ValueInFlow.Matches(INPUT);
		return Exp::Value.Matches(INPUT);
beder's avatar
beder committed
87
88
89
90
91
	}

	// IsPlainScalar
	bool Scanner::IsPlainScalar()
	{
92
		if(m_flowLevel > 0)
93
94
			return Exp::PlainScalarInFlow.Matches(INPUT);
		return Exp::PlainScalar.Matches(INPUT);
beder's avatar
beder committed
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
	}

	// ScanAndEnqueue
	// . Scans the token, then pushes it in the queue.
	// . Note: we also use a set of "limbo tokens", i.e., tokens
	//   that haven't yet been pushed. This way, if ScanToken()
	//   throws an exception, we'll be keeping track of 'pToken'
	//   somewhere, and it will be automatically cleaned up when
	//   the Scanner destructs.
	template <typename T> void Scanner::ScanAndEnqueue(T *pToken)
	{
		m_limboTokens.insert(pToken);
		m_tokens.push(ScanToken(pToken));
		m_limboTokens.erase(pToken);
	}

beder's avatar
beder committed
111
112
113
	// ScanNextToken
	// . The main scanning function; here we branch out and
	//   scan whatever the next token should be.
beder's avatar
beder committed
114
	void Scanner::ScanNextToken()
beder's avatar
beder committed
115
	{
116
117
118
		if(m_endedStream)
			return;

beder's avatar
beder committed
119
		if(!m_startedStream)
beder's avatar
beder committed
120
			return ScanAndEnqueue(new StreamStartToken);
beder's avatar
beder committed
121

beder's avatar
beder committed
122
		// get rid of whitespace, etc. (in between tokens it should be irrelevent)
beder's avatar
beder committed
123
		ScanToNextToken();
beder's avatar
beder committed
124
125

		// check the latest simple key
126
		ValidateSimpleKey();
beder's avatar
beder committed
127
128

		// maybe need to end some blocks
129
		PopIndentTo(INPUT.column);
beder's avatar
beder committed
130

beder's avatar
beder committed
131
132
133
134
135
		// *****
		// And now branch based on the next few characters!
		// *****

		// end of stream
beder's avatar
beder committed
136
		if(INPUT.peek() == EOF)
beder's avatar
beder committed
137
			return ScanAndEnqueue(new StreamEndToken);
beder's avatar
beder committed
138

beder's avatar
beder committed
139
		// document token
beder's avatar
beder committed
140
		if(IsDocumentStart())
beder's avatar
beder committed
141
			return ScanAndEnqueue(new DocumentStartToken);
beder's avatar
beder committed
142
143

		if(IsDocumentEnd())
beder's avatar
beder committed
144
			return ScanAndEnqueue(new DocumentEndToken);
beder's avatar
beder committed
145

beder's avatar
beder committed
146
		// flow start/end/entry
beder's avatar
beder committed
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
		if(INPUT.peek() == Keys::FlowSeqStart)
			return ScanAndEnqueue(new FlowSeqStartToken);

		if(INPUT.peek() == Keys::FlowSeqEnd)
			return ScanAndEnqueue(new FlowSeqEndToken);
		
		if(INPUT.peek() == Keys::FlowMapStart)
			return ScanAndEnqueue(new FlowMapStartToken);
		
		if(INPUT.peek() == Keys::FlowMapEnd)
			return ScanAndEnqueue(new FlowMapEndToken);

		if(INPUT.peek() == Keys::FlowEntry)
			return ScanAndEnqueue(new FlowEntryToken);

beder's avatar
beder committed
162
		// block/map stuff
beder's avatar
beder committed
163
164
165
166
167
168
169
170
171
		if(IsBlockEntry())
			return ScanAndEnqueue(new BlockEntryToken);

		if(IsKey())
			return ScanAndEnqueue(new KeyToken);

		if(IsValue())
			return ScanAndEnqueue(new ValueToken);

beder's avatar
beder committed
172
173
174
175
		if(INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor)
			return ScanAndEnqueue(new AnchorToken);

		// TODO: tag
beder's avatar
beder committed
176

beder's avatar
beder committed
177
178
179
		// special scalars
		if(m_flowLevel == 0 && (INPUT.peek() == Keys::LiteralScalar || INPUT.peek() == Keys::FoldedScalar))
			return ScanAndEnqueue(new BlockScalarToken);
beder's avatar
beder committed
180

beder's avatar
beder committed
181
182
		if(INPUT.peek() == '\'' || INPUT.peek() == '\"')
			return ScanAndEnqueue(new QuotedScalarToken);
beder's avatar
beder committed
183
184
185
186
187
188
189

		// plain scalars
		if(IsPlainScalar())
			return ScanAndEnqueue(new PlainScalarToken);

		// don't know what it is!
		throw UnknownToken();
beder's avatar
beder committed
190
191
192
193
194
195
196
197
	}

	// ScanToNextToken
	// . Eats input until we reach the next token-like thing.
	void Scanner::ScanToNextToken()
	{
		while(1) {
			// first eat whitespace
beder's avatar
beder committed
198
			while(IsWhitespaceToBeEaten(INPUT.peek()))
199
				INPUT.Eat(1);
beder's avatar
beder committed
200
201

			// then eat a comment
beder's avatar
beder committed
202
			if(Exp::Comment.Matches(INPUT)) {
beder's avatar
beder committed
203
				// eat until line break
beder's avatar
beder committed
204
				while(INPUT && !Exp::Break.Matches(INPUT))
205
					INPUT.Eat(1);
beder's avatar
beder committed
206
207
208
			}

			// if it's NOT a line break, then we're done!
beder's avatar
beder committed
209
			if(!Exp::Break.Matches(INPUT))
beder's avatar
beder committed
210
211
212
				break;

			// otherwise, let's eat the line break and keep going
213
			INPUT.EatLineBreak();
beder's avatar
beder committed
214

beder's avatar
beder committed
215
216
217
			// oh yeah, and let's get rid of that simple key
			ValidateSimpleKey();

beder's avatar
beder committed
218
219
220
221
222
223
			// new line - we may be able to accept a simple key now
			if(m_flowLevel == 0)
				m_simpleKeyAllowed = true;
        }
	}

beder's avatar
beder committed
224
225
226
	// PushIndentTo
	// . Pushes an indentation onto the stack, and enqueues the
	//   proper token (sequence start or mapping start).
beder's avatar
beder committed
227
228
	// . Returns the token it generates (if any).
	Token *Scanner::PushIndentTo(int column, bool sequence)
beder's avatar
beder committed
229
230
231
	{
		// are we in flow?
		if(m_flowLevel > 0)
beder's avatar
beder committed
232
			return 0;
beder's avatar
beder committed
233
234
235

		// is this actually an indentation?
		if(column <= m_indents.top())
beder's avatar
beder committed
236
			return 0;
beder's avatar
beder committed
237
238
239
240
241
242
243

		// now push
		m_indents.push(column);
		if(sequence)
			m_tokens.push(new BlockSeqStartToken);
		else
			m_tokens.push(new BlockMapStartToken);
beder's avatar
beder committed
244
245

		return m_tokens.front();
beder's avatar
beder committed
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
	}

	// PopIndentTo
	// . Pops indentations off the stack until we reach 'column' indentation,
	//   and enqueues the proper token each time.
	void Scanner::PopIndentTo(int column)
	{
		// are we in flow?
		if(m_flowLevel > 0)
			return;

		// now pop away
		while(!m_indents.empty() && m_indents.top() > column) {
			m_indents.pop();
			m_tokens.push(new BlockEndToken);
		}
	}

264
265
266
	// GetNextToken
	// . Returns the next token on the queue, and scans if only we need to.
	Token *Scanner::GetNextToken()
beder's avatar
beder committed
267
	{
268
		while(1) {
269
			Token *pToken = 0;
beder's avatar
beder committed
270

271
272
273
			// is there a token in the queue?
			if(!m_tokens.empty())
				pToken = m_tokens.front();
beder's avatar
beder committed
274

275
276
277
			// ... that's possible
			// (here's where we clean up the impossible tokens)
			if(pToken && !pToken->isPossible) {
beder's avatar
beder committed
278
279
				m_tokens.pop();
				delete pToken;
280
				continue;
beder's avatar
beder committed
281
			}
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313

			// and valid
			if(pToken && !pToken->isValid)
				pToken = 0;

			// then that's what we want
			if(pToken) {
				m_tokens.pop();
				return pToken;
			}

			// no token? maybe we've actually finished
			if(m_endedStream)
				break;

			// no? then scan...
			ScanNextToken();
		}

		return 0;
	}

	// temporary function for testing
	void Scanner::Scan()
	{
		while(1) {
			Token *pToken = GetNextToken();
			if(!pToken)
				break;

			std::cout << typeid(*pToken).name() << ": " << *pToken << std::endl;
			delete pToken;
beder's avatar
beder committed
314
		}
beder's avatar
beder committed
315
316
	}
}