scanner.cpp 6.99 KB
Newer Older
Jesse Beder's avatar
Jesse Beder committed
1
2
#include "scanner.h"
#include "token.h"
Jesse Beder's avatar
Jesse Beder committed
3
#include "exceptions.h"
4
#include "exp.h"
Jesse Beder's avatar
Jesse Beder committed
5
#include <iostream>
Jesse Beder's avatar
Jesse Beder committed
6
7
8
9

namespace YAML
{
	Scanner::Scanner(std::istream& in)
10
		: INPUT(in), m_startedStream(false), m_endedStream(false), m_simpleKeyAllowed(false), m_flowLevel(0), m_column(0)
Jesse Beder's avatar
Jesse Beder committed
11
12
13
14
15
	{
	}

	Scanner::~Scanner()
	{
Jesse Beder's avatar
Jesse Beder committed
16
17
18
19
20
21
22
23
		while(!m_tokens.empty()) {
			delete m_tokens.front();
			m_tokens.pop();
		}

		// delete limbo tokens (they're here for RAII)
		for(std::set <Token *>::const_iterator it=m_limboTokens.begin();it!=m_limboTokens.end();++it)
			delete *it;
Jesse Beder's avatar
Jesse Beder committed
24
25
26
27
28
29
30
31
32
33
	}

	///////////////////////////////////////////////////////////////////////
	// Misc. helpers

	// GetChar
	// . Extracts a character from the stream and updates our position
	char Scanner::GetChar()
	{
		m_column++;
34
35
36
37
		char ch = INPUT.get();
		if(ch == '\n')
			m_column = 0;
		return ch;
Jesse Beder's avatar
Jesse Beder committed
38
39
	}

40
41
42
43
44
45
46
47
48
49
	// GetChar
	// . Extracts 'n' characters from the stream and updates our position
	std::string Scanner::GetChar(int n)
	{
		std::string ret;
		for(int i=0;i<n;i++)
			ret += GetChar();
		return ret;
	}

Jesse Beder's avatar
Jesse Beder committed
50
51
52
	// Eat
	// . Eats 'n' characters and updates our position.
	void Scanner::Eat(int n)
Jesse Beder's avatar
Jesse Beder committed
53
	{
Jesse Beder's avatar
Jesse Beder committed
54
55
56
57
58
59
		for(int i=0;i<n;i++) {
			m_column++;
			char ch = INPUT.get();
			if(ch == '\n')
				m_column = 0;
		}
Jesse Beder's avatar
Jesse Beder committed
60
61
	}

Jesse Beder's avatar
Jesse Beder committed
62
	// GetLineBreak
Jesse Beder's avatar
Jesse Beder committed
63
	// . Eats with no checking
Jesse Beder's avatar
Jesse Beder committed
64
	void Scanner::EatLineBreak()
Jesse Beder's avatar
Jesse Beder committed
65
	{
Jesse Beder's avatar
Jesse Beder committed
66
67
		Eat(1);
		m_column = 0;
Jesse Beder's avatar
Jesse Beder committed
68
69
70
71
72
73
74
75
76
	}

	// IsWhitespaceToBeEaten
	// . We can eat whitespace if:
	//   1. It's a space
	//   2. It's a tab, and we're either:
	//      a. In the flow context
	//      b. In the block context but not where a simple key could be allowed
	//         (i.e., not at the beginning of a line, or following '-', '?', or ':')
Jesse Beder's avatar
Jesse Beder committed
77
	bool Scanner::IsWhitespaceToBeEaten(char ch)
Jesse Beder's avatar
Jesse Beder committed
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
	{
		if(ch == ' ')
			return true;

		if(ch == '\t' && (m_flowLevel >= 0 || !m_simpleKeyAllowed))
			return true;

		return false;
	}

	// IsDocumentStart
	bool Scanner::IsDocumentStart()
	{
		// needs to be at the start of a new line
		if(m_column != 0)
			return false;

95
		return Exp::DocStart.Matches(INPUT);
Jesse Beder's avatar
Jesse Beder committed
96
97
98
99
100
101
102
103
104
	}

	// IsDocumentEnd
	bool Scanner::IsDocumentEnd()
	{
		// needs to be at the start of a new line
		if(m_column != 0)
			return false;

105
		return Exp::DocEnd.Matches(INPUT);
Jesse Beder's avatar
Jesse Beder committed
106
107
	}

Jesse Beder's avatar
Jesse Beder committed
108
109
110
	// IsBlockEntry
	bool Scanner::IsBlockEntry()
	{
111
		return Exp::BlockEntry.Matches(INPUT);
Jesse Beder's avatar
Jesse Beder committed
112
113
114
115
116
	}

	// IsKey
	bool Scanner::IsKey()
	{
117
		if(m_flowLevel > 0)
118
119
			return Exp::KeyInFlow.Matches(INPUT);
		return Exp::Key.Matches(INPUT);
Jesse Beder's avatar
Jesse Beder committed
120
121
122
123
124
	}

	// IsValue
	bool Scanner::IsValue()
	{
125
		if(m_flowLevel > 0)
126
127
			return Exp::ValueInFlow.Matches(INPUT);
		return Exp::Value.Matches(INPUT);
Jesse Beder's avatar
Jesse Beder committed
128
129
130
131
132
	}

	// IsPlainScalar
	bool Scanner::IsPlainScalar()
	{
133
		if(m_flowLevel > 0)
134
135
			return Exp::PlainScalarInFlow.Matches(INPUT);
		return Exp::PlainScalar.Matches(INPUT);
Jesse Beder's avatar
Jesse Beder committed
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
	}

	// ScanAndEnqueue
	// . Scans the token, then pushes it in the queue.
	// . Note: we also use a set of "limbo tokens", i.e., tokens
	//   that haven't yet been pushed. This way, if ScanToken()
	//   throws an exception, we'll be keeping track of 'pToken'
	//   somewhere, and it will be automatically cleaned up when
	//   the Scanner destructs.
	template <typename T> void Scanner::ScanAndEnqueue(T *pToken)
	{
		m_limboTokens.insert(pToken);
		m_tokens.push(ScanToken(pToken));
		m_limboTokens.erase(pToken);
	}

Jesse Beder's avatar
Jesse Beder committed
152
153
154
	///////////////////////////////////////////////////////////////////////
	// The main scanning function

Jesse Beder's avatar
Jesse Beder committed
155
	void Scanner::ScanNextToken()
Jesse Beder's avatar
Jesse Beder committed
156
	{
157
158
159
		if(m_endedStream)
			return;

Jesse Beder's avatar
Jesse Beder committed
160
		if(!m_startedStream)
Jesse Beder's avatar
Jesse Beder committed
161
			return ScanAndEnqueue(new StreamStartToken);
Jesse Beder's avatar
Jesse Beder committed
162
163
164

		ScanToNextToken();
		// TODO: remove "obsolete potential simple keys"
Jesse Beder's avatar
Jesse Beder committed
165
		PopIndentTo(m_column);
Jesse Beder's avatar
Jesse Beder committed
166
167

		if(INPUT.peek() == EOF)
Jesse Beder's avatar
Jesse Beder committed
168
			return ScanAndEnqueue(new StreamEndToken);
Jesse Beder's avatar
Jesse Beder committed
169

Jesse Beder's avatar
Jesse Beder committed
170
		// are we at a document token?
Jesse Beder's avatar
Jesse Beder committed
171
		if(IsDocumentStart())
Jesse Beder's avatar
Jesse Beder committed
172
			return ScanAndEnqueue(new DocumentStartToken);
Jesse Beder's avatar
Jesse Beder committed
173
174

		if(IsDocumentEnd())
Jesse Beder's avatar
Jesse Beder committed
175
			return ScanAndEnqueue(new DocumentEndToken);
Jesse Beder's avatar
Jesse Beder committed
176

Jesse Beder's avatar
Jesse Beder committed
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
		// are we at a flow start/end/entry?
		if(INPUT.peek() == Keys::FlowSeqStart)
			return ScanAndEnqueue(new FlowSeqStartToken);

		if(INPUT.peek() == Keys::FlowSeqEnd)
			return ScanAndEnqueue(new FlowSeqEndToken);
		
		if(INPUT.peek() == Keys::FlowMapStart)
			return ScanAndEnqueue(new FlowMapStartToken);
		
		if(INPUT.peek() == Keys::FlowMapEnd)
			return ScanAndEnqueue(new FlowMapEndToken);

		if(INPUT.peek() == Keys::FlowEntry)
			return ScanAndEnqueue(new FlowEntryToken);

		// block/map stuff?
		if(IsBlockEntry())
			return ScanAndEnqueue(new BlockEntryToken);

		if(IsKey())
			return ScanAndEnqueue(new KeyToken);

		if(IsValue())
			return ScanAndEnqueue(new ValueToken);

		// TODO: alias/anchor/tag

		// TODO: special scalars
		if(INPUT.peek() == Keys::LiteralScalar && m_flowLevel == 0)
			return;

		if(INPUT.peek() == Keys::FoldedScalar && m_flowLevel == 0)
			return;

212
213
		if(INPUT.peek() == '\'' || INPUT.peek() == '\"')
			return ScanAndEnqueue(new QuotedScalarToken);
Jesse Beder's avatar
Jesse Beder committed
214
215
216
217
218
219
220

		// plain scalars
		if(IsPlainScalar())
			return ScanAndEnqueue(new PlainScalarToken);

		// don't know what it is!
		throw UnknownToken();
Jesse Beder's avatar
Jesse Beder committed
221
222
223
224
225
226
227
228
	}

	// ScanToNextToken
	// . Eats input until we reach the next token-like thing.
	void Scanner::ScanToNextToken()
	{
		while(1) {
			// first eat whitespace
Jesse Beder's avatar
Jesse Beder committed
229
230
			while(IsWhitespaceToBeEaten(INPUT.peek()))
				Eat(1);
Jesse Beder's avatar
Jesse Beder committed
231
232

			// then eat a comment
233
			if(Exp::Comment.Matches(INPUT.peek())) {
Jesse Beder's avatar
Jesse Beder committed
234
				// eat until line break
235
				while(INPUT && !Exp::Break.Matches(INPUT.peek()))
Jesse Beder's avatar
Jesse Beder committed
236
					Eat(1);
Jesse Beder's avatar
Jesse Beder committed
237
238
239
			}

			// if it's NOT a line break, then we're done!
240
			if(!Exp::Break.Matches(INPUT.peek()))
Jesse Beder's avatar
Jesse Beder committed
241
242
243
244
245
246
247
248
249
250
251
				break;

			// otherwise, let's eat the line break and keep going
			EatLineBreak();

			// new line - we may be able to accept a simple key now
			if(m_flowLevel == 0)
				m_simpleKeyAllowed = true;
        }
	}

Jesse Beder's avatar
Jesse Beder committed
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
	// PushIndentTo
	// . Pushes an indentation onto the stack, and enqueues the
	//   proper token (sequence start or mapping start).
	void Scanner::PushIndentTo(int column, bool sequence)
	{
		// are we in flow?
		if(m_flowLevel > 0)
			return;

		// is this actually an indentation?
		if(column <= m_indents.top())
			return;

		// now push
		m_indents.push(column);
		if(sequence)
			m_tokens.push(new BlockSeqStartToken);
		else
			m_tokens.push(new BlockMapStartToken);
	}

	// PopIndentTo
	// . Pops indentations off the stack until we reach 'column' indentation,
	//   and enqueues the proper token each time.
	void Scanner::PopIndentTo(int column)
	{
		// are we in flow?
		if(m_flowLevel > 0)
			return;

		// now pop away
		while(!m_indents.empty() && m_indents.top() > column) {
			m_indents.pop();
			m_tokens.push(new BlockEndToken);
		}
	}

Jesse Beder's avatar
Jesse Beder committed
289
290
291
	// temporary function for testing
	void Scanner::Scan()
	{
292
		while(1) {
Jesse Beder's avatar
Jesse Beder committed
293
			ScanNextToken();
294
295
			if(m_tokens.empty())
				break;
Jesse Beder's avatar
Jesse Beder committed
296
297
298
299

			while(!m_tokens.empty()) {
				Token *pToken = m_tokens.front();
				m_tokens.pop();
300
				std::cout << typeid(*pToken).name() << ": " << *pToken << std::endl;
Jesse Beder's avatar
Jesse Beder committed
301
302
303
				delete pToken;
			}
		}
Jesse Beder's avatar
Jesse Beder committed
304
305
	}
}