scanner.cpp 6.99 KB
Newer Older
beder's avatar
beder committed
1
2
#include "scanner.h"
#include "token.h"
beder's avatar
beder committed
3
#include "exceptions.h"
4
#include "exp.h"
beder's avatar
beder committed
5
#include <iostream>
beder's avatar
beder committed
6
7
8
9

namespace YAML
{
	Scanner::Scanner(std::istream& in)
10
		: INPUT(in), m_startedStream(false), m_endedStream(false), m_simpleKeyAllowed(false), m_flowLevel(0), m_column(0)
beder's avatar
beder committed
11
12
13
14
15
	{
	}

	Scanner::~Scanner()
	{
beder's avatar
beder committed
16
17
18
19
20
21
22
23
		while(!m_tokens.empty()) {
			delete m_tokens.front();
			m_tokens.pop();
		}

		// delete limbo tokens (they're here for RAII)
		for(std::set <Token *>::const_iterator it=m_limboTokens.begin();it!=m_limboTokens.end();++it)
			delete *it;
beder's avatar
beder committed
24
25
26
27
28
29
30
31
32
33
	}

	///////////////////////////////////////////////////////////////////////
	// Misc. helpers

	// GetChar
	// . Extracts a character from the stream and updates our position
	char Scanner::GetChar()
	{
		m_column++;
34
35
36
37
		char ch = INPUT.get();
		if(ch == '\n')
			m_column = 0;
		return ch;
beder's avatar
beder committed
38
39
	}

beder's avatar
beder committed
40
41
42
43
44
45
46
47
48
49
	// GetChar
	// . Extracts 'n' characters from the stream and updates our position
	std::string Scanner::GetChar(int n)
	{
		std::string ret;
		for(int i=0;i<n;i++)
			ret += GetChar();
		return ret;
	}

beder's avatar
beder committed
50
51
52
	// Eat
	// . Eats 'n' characters and updates our position.
	void Scanner::Eat(int n)
beder's avatar
beder committed
53
	{
beder's avatar
beder committed
54
55
56
57
58
59
		for(int i=0;i<n;i++) {
			m_column++;
			char ch = INPUT.get();
			if(ch == '\n')
				m_column = 0;
		}
beder's avatar
beder committed
60
61
	}

beder's avatar
beder committed
62
	// GetLineBreak
beder's avatar
beder committed
63
	// . Eats with no checking
beder's avatar
beder committed
64
	void Scanner::EatLineBreak()
beder's avatar
beder committed
65
	{
beder's avatar
beder committed
66
67
		Eat(1);
		m_column = 0;
beder's avatar
beder committed
68
69
70
71
72
73
74
75
76
	}

	// IsWhitespaceToBeEaten
	// . We can eat whitespace if:
	//   1. It's a space
	//   2. It's a tab, and we're either:
	//      a. In the flow context
	//      b. In the block context but not where a simple key could be allowed
	//         (i.e., not at the beginning of a line, or following '-', '?', or ':')
beder's avatar
beder committed
77
	bool Scanner::IsWhitespaceToBeEaten(char ch)
beder's avatar
beder committed
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
	{
		if(ch == ' ')
			return true;

		if(ch == '\t' && (m_flowLevel >= 0 || !m_simpleKeyAllowed))
			return true;

		return false;
	}

	// IsDocumentStart
	bool Scanner::IsDocumentStart()
	{
		// needs to be at the start of a new line
		if(m_column != 0)
			return false;

95
		return Exp::DocStart.Matches(INPUT);
beder's avatar
beder committed
96
97
98
99
100
101
102
103
104
	}

	// IsDocumentEnd
	bool Scanner::IsDocumentEnd()
	{
		// needs to be at the start of a new line
		if(m_column != 0)
			return false;

105
		return Exp::DocEnd.Matches(INPUT);
beder's avatar
beder committed
106
107
	}

beder's avatar
beder committed
108
109
110
	// IsBlockEntry
	bool Scanner::IsBlockEntry()
	{
111
		return Exp::BlockEntry.Matches(INPUT);
beder's avatar
beder committed
112
113
114
115
116
	}

	// IsKey
	bool Scanner::IsKey()
	{
117
		if(m_flowLevel > 0)
118
119
			return Exp::KeyInFlow.Matches(INPUT);
		return Exp::Key.Matches(INPUT);
beder's avatar
beder committed
120
121
122
123
124
	}

	// IsValue
	bool Scanner::IsValue()
	{
125
		if(m_flowLevel > 0)
126
127
			return Exp::ValueInFlow.Matches(INPUT);
		return Exp::Value.Matches(INPUT);
beder's avatar
beder committed
128
129
130
131
132
	}

	// IsPlainScalar
	bool Scanner::IsPlainScalar()
	{
133
		if(m_flowLevel > 0)
134
135
			return Exp::PlainScalarInFlow.Matches(INPUT);
		return Exp::PlainScalar.Matches(INPUT);
beder's avatar
beder committed
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
	}

	// ScanAndEnqueue
	// . Scans the token, then pushes it in the queue.
	// . Note: we also use a set of "limbo tokens", i.e., tokens
	//   that haven't yet been pushed. This way, if ScanToken()
	//   throws an exception, we'll be keeping track of 'pToken'
	//   somewhere, and it will be automatically cleaned up when
	//   the Scanner destructs.
	template <typename T> void Scanner::ScanAndEnqueue(T *pToken)
	{
		m_limboTokens.insert(pToken);
		m_tokens.push(ScanToken(pToken));
		m_limboTokens.erase(pToken);
	}

beder's avatar
beder committed
152
153
154
	///////////////////////////////////////////////////////////////////////
	// The main scanning function

beder's avatar
beder committed
155
	void Scanner::ScanNextToken()
beder's avatar
beder committed
156
	{
157
158
159
		if(m_endedStream)
			return;

beder's avatar
beder committed
160
		if(!m_startedStream)
beder's avatar
beder committed
161
			return ScanAndEnqueue(new StreamStartToken);
beder's avatar
beder committed
162
163
164

		ScanToNextToken();
		// TODO: remove "obsolete potential simple keys"
beder's avatar
beder committed
165
		PopIndentTo(m_column);
beder's avatar
beder committed
166
167

		if(INPUT.peek() == EOF)
beder's avatar
beder committed
168
			return ScanAndEnqueue(new StreamEndToken);
beder's avatar
beder committed
169

beder's avatar
beder committed
170
		// are we at a document token?
beder's avatar
beder committed
171
		if(IsDocumentStart())
beder's avatar
beder committed
172
			return ScanAndEnqueue(new DocumentStartToken);
beder's avatar
beder committed
173
174

		if(IsDocumentEnd())
beder's avatar
beder committed
175
			return ScanAndEnqueue(new DocumentEndToken);
beder's avatar
beder committed
176

beder's avatar
beder committed
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
		// are we at a flow start/end/entry?
		if(INPUT.peek() == Keys::FlowSeqStart)
			return ScanAndEnqueue(new FlowSeqStartToken);

		if(INPUT.peek() == Keys::FlowSeqEnd)
			return ScanAndEnqueue(new FlowSeqEndToken);
		
		if(INPUT.peek() == Keys::FlowMapStart)
			return ScanAndEnqueue(new FlowMapStartToken);
		
		if(INPUT.peek() == Keys::FlowMapEnd)
			return ScanAndEnqueue(new FlowMapEndToken);

		if(INPUT.peek() == Keys::FlowEntry)
			return ScanAndEnqueue(new FlowEntryToken);

		// block/map stuff?
		if(IsBlockEntry())
			return ScanAndEnqueue(new BlockEntryToken);

		if(IsKey())
			return ScanAndEnqueue(new KeyToken);

		if(IsValue())
			return ScanAndEnqueue(new ValueToken);

		// TODO: alias/anchor/tag

		// TODO: special scalars
		if(INPUT.peek() == Keys::LiteralScalar && m_flowLevel == 0)
			return;

		if(INPUT.peek() == Keys::FoldedScalar && m_flowLevel == 0)
			return;

beder's avatar
beder committed
212
213
		if(INPUT.peek() == '\'' || INPUT.peek() == '\"')
			return ScanAndEnqueue(new QuotedScalarToken);
beder's avatar
beder committed
214
215
216
217
218
219
220

		// plain scalars
		if(IsPlainScalar())
			return ScanAndEnqueue(new PlainScalarToken);

		// don't know what it is!
		throw UnknownToken();
beder's avatar
beder committed
221
222
223
224
225
226
227
228
	}

	// ScanToNextToken
	// . Eats input until we reach the next token-like thing.
	void Scanner::ScanToNextToken()
	{
		while(1) {
			// first eat whitespace
beder's avatar
beder committed
229
230
			while(IsWhitespaceToBeEaten(INPUT.peek()))
				Eat(1);
beder's avatar
beder committed
231
232

			// then eat a comment
233
			if(Exp::Comment.Matches(INPUT.peek())) {
beder's avatar
beder committed
234
				// eat until line break
235
				while(INPUT && !Exp::Break.Matches(INPUT.peek()))
beder's avatar
beder committed
236
					Eat(1);
beder's avatar
beder committed
237
238
239
			}

			// if it's NOT a line break, then we're done!
240
			if(!Exp::Break.Matches(INPUT.peek()))
beder's avatar
beder committed
241
242
243
244
245
246
247
248
249
250
251
				break;

			// otherwise, let's eat the line break and keep going
			EatLineBreak();

			// new line - we may be able to accept a simple key now
			if(m_flowLevel == 0)
				m_simpleKeyAllowed = true;
        }
	}

beder's avatar
beder committed
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
	// PushIndentTo
	// . Pushes an indentation onto the stack, and enqueues the
	//   proper token (sequence start or mapping start).
	void Scanner::PushIndentTo(int column, bool sequence)
	{
		// are we in flow?
		if(m_flowLevel > 0)
			return;

		// is this actually an indentation?
		if(column <= m_indents.top())
			return;

		// now push
		m_indents.push(column);
		if(sequence)
			m_tokens.push(new BlockSeqStartToken);
		else
			m_tokens.push(new BlockMapStartToken);
	}

	// PopIndentTo
	// . Pops indentations off the stack until we reach 'column' indentation,
	//   and enqueues the proper token each time.
	void Scanner::PopIndentTo(int column)
	{
		// are we in flow?
		if(m_flowLevel > 0)
			return;

		// now pop away
		while(!m_indents.empty() && m_indents.top() > column) {
			m_indents.pop();
			m_tokens.push(new BlockEndToken);
		}
	}

beder's avatar
beder committed
289
290
291
	// temporary function for testing
	void Scanner::Scan()
	{
292
		while(1) {
beder's avatar
beder committed
293
			ScanNextToken();
294
295
			if(m_tokens.empty())
				break;
beder's avatar
beder committed
296
297
298
299

			while(!m_tokens.empty()) {
				Token *pToken = m_tokens.front();
				m_tokens.pop();
beder's avatar
beder committed
300
				std::cout << typeid(*pToken).name() << ": " << *pToken << std::endl;
beder's avatar
beder committed
301
302
303
				delete pToken;
			}
		}
beder's avatar
beder committed
304
305
	}
}