"examples/vscode:/vscode.git/clone" did not exist on "b0cc7c202ba0ad8839c5bd5d89aa14150f220695"
scanner.cpp 6.75 KB
Newer Older
Jesse Beder's avatar
Jesse Beder committed
1
2
#include "scanner.h"
#include "token.h"
Jesse Beder's avatar
Jesse Beder committed
3
#include "exceptions.h"
4
#include "exp.h"
Jesse Beder's avatar
Jesse Beder committed
5
#include <iostream>
Jesse Beder's avatar
Jesse Beder committed
6
7
8
9

namespace YAML
{
	Scanner::Scanner(std::istream& in)
10
		: INPUT(in), m_startedStream(false), m_endedStream(false), m_simpleKeyAllowed(false), m_flowLevel(0), m_column(0)
Jesse Beder's avatar
Jesse Beder committed
11
12
13
14
15
	{
	}

	Scanner::~Scanner()
	{
Jesse Beder's avatar
Jesse Beder committed
16
17
18
19
20
21
22
23
		while(!m_tokens.empty()) {
			delete m_tokens.front();
			m_tokens.pop();
		}

		// delete limbo tokens (they're here for RAII)
		for(std::set <Token *>::const_iterator it=m_limboTokens.begin();it!=m_limboTokens.end();++it)
			delete *it;
Jesse Beder's avatar
Jesse Beder committed
24
25
26
27
28
29
30
31
32
33
	}

	///////////////////////////////////////////////////////////////////////
	// Misc. helpers

	// GetChar
	// . Extracts a character from the stream and updates our position
	char Scanner::GetChar()
	{
		m_column++;
34
35
36
37
		char ch = INPUT.get();
		if(ch == '\n')
			m_column = 0;
		return ch;
Jesse Beder's avatar
Jesse Beder committed
38
39
	}

Jesse Beder's avatar
Jesse Beder committed
40
41
42
	// Eat
	// . Eats 'n' characters and updates our position.
	void Scanner::Eat(int n)
Jesse Beder's avatar
Jesse Beder committed
43
	{
Jesse Beder's avatar
Jesse Beder committed
44
45
46
47
48
49
		for(int i=0;i<n;i++) {
			m_column++;
			char ch = INPUT.get();
			if(ch == '\n')
				m_column = 0;
		}
Jesse Beder's avatar
Jesse Beder committed
50
51
	}

Jesse Beder's avatar
Jesse Beder committed
52
	// GetLineBreak
Jesse Beder's avatar
Jesse Beder committed
53
	// . Eats with no checking
Jesse Beder's avatar
Jesse Beder committed
54
	void Scanner::EatLineBreak()
Jesse Beder's avatar
Jesse Beder committed
55
	{
Jesse Beder's avatar
Jesse Beder committed
56
57
		Eat(1);
		m_column = 0;
Jesse Beder's avatar
Jesse Beder committed
58
59
60
61
62
63
64
65
66
	}

	// IsWhitespaceToBeEaten
	// . We can eat whitespace if:
	//   1. It's a space
	//   2. It's a tab, and we're either:
	//      a. In the flow context
	//      b. In the block context but not where a simple key could be allowed
	//         (i.e., not at the beginning of a line, or following '-', '?', or ':')
Jesse Beder's avatar
Jesse Beder committed
67
	bool Scanner::IsWhitespaceToBeEaten(char ch)
Jesse Beder's avatar
Jesse Beder committed
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
	{
		if(ch == ' ')
			return true;

		if(ch == '\t' && (m_flowLevel >= 0 || !m_simpleKeyAllowed))
			return true;

		return false;
	}

	// IsDocumentStart
	bool Scanner::IsDocumentStart()
	{
		// needs to be at the start of a new line
		if(m_column != 0)
			return false;

85
		return Exp::DocStart.Matches(INPUT);
Jesse Beder's avatar
Jesse Beder committed
86
87
88
89
90
91
92
93
94
	}

	// IsDocumentEnd
	bool Scanner::IsDocumentEnd()
	{
		// needs to be at the start of a new line
		if(m_column != 0)
			return false;

95
		return Exp::DocEnd.Matches(INPUT);
Jesse Beder's avatar
Jesse Beder committed
96
97
	}

Jesse Beder's avatar
Jesse Beder committed
98
99
100
	// IsBlockEntry
	bool Scanner::IsBlockEntry()
	{
101
		return Exp::BlockEntry.Matches(INPUT);
Jesse Beder's avatar
Jesse Beder committed
102
103
104
105
106
	}

	// IsKey
	bool Scanner::IsKey()
	{
107
		if(m_flowLevel > 0)
108
109
			return Exp::KeyInFlow.Matches(INPUT);
		return Exp::Key.Matches(INPUT);
Jesse Beder's avatar
Jesse Beder committed
110
111
112
113
114
	}

	// IsValue
	bool Scanner::IsValue()
	{
115
		if(m_flowLevel > 0)
116
117
			return Exp::ValueInFlow.Matches(INPUT);
		return Exp::Value.Matches(INPUT);
Jesse Beder's avatar
Jesse Beder committed
118
119
120
121
122
	}

	// IsPlainScalar
	bool Scanner::IsPlainScalar()
	{
123
		if(m_flowLevel > 0)
124
125
			return Exp::PlainScalarInFlow.Matches(INPUT);
		return Exp::PlainScalar.Matches(INPUT);
Jesse Beder's avatar
Jesse Beder committed
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
	}

	// ScanAndEnqueue
	// . Scans the token, then pushes it in the queue.
	// . Note: we also use a set of "limbo tokens", i.e., tokens
	//   that haven't yet been pushed. This way, if ScanToken()
	//   throws an exception, we'll be keeping track of 'pToken'
	//   somewhere, and it will be automatically cleaned up when
	//   the Scanner destructs.
	template <typename T> void Scanner::ScanAndEnqueue(T *pToken)
	{
		m_limboTokens.insert(pToken);
		m_tokens.push(ScanToken(pToken));
		m_limboTokens.erase(pToken);
	}

Jesse Beder's avatar
Jesse Beder committed
142
143
144
	///////////////////////////////////////////////////////////////////////
	// The main scanning function

Jesse Beder's avatar
Jesse Beder committed
145
	void Scanner::ScanNextToken()
Jesse Beder's avatar
Jesse Beder committed
146
	{
147
148
149
		if(m_endedStream)
			return;

Jesse Beder's avatar
Jesse Beder committed
150
		if(!m_startedStream)
Jesse Beder's avatar
Jesse Beder committed
151
			return ScanAndEnqueue(new StreamStartToken);
Jesse Beder's avatar
Jesse Beder committed
152
153
154

		ScanToNextToken();
		// TODO: remove "obsolete potential simple keys"
Jesse Beder's avatar
Jesse Beder committed
155
		PopIndentTo(m_column);
Jesse Beder's avatar
Jesse Beder committed
156
157

		if(INPUT.peek() == EOF)
Jesse Beder's avatar
Jesse Beder committed
158
			return ScanAndEnqueue(new StreamEndToken);
Jesse Beder's avatar
Jesse Beder committed
159

Jesse Beder's avatar
Jesse Beder committed
160
		// are we at a document token?
Jesse Beder's avatar
Jesse Beder committed
161
		if(IsDocumentStart())
Jesse Beder's avatar
Jesse Beder committed
162
			return ScanAndEnqueue(new DocumentStartToken);
Jesse Beder's avatar
Jesse Beder committed
163
164

		if(IsDocumentEnd())
Jesse Beder's avatar
Jesse Beder committed
165
			return ScanAndEnqueue(new DocumentEndToken);
Jesse Beder's avatar
Jesse Beder committed
166

Jesse Beder's avatar
Jesse Beder committed
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
		// are we at a flow start/end/entry?
		if(INPUT.peek() == Keys::FlowSeqStart)
			return ScanAndEnqueue(new FlowSeqStartToken);

		if(INPUT.peek() == Keys::FlowSeqEnd)
			return ScanAndEnqueue(new FlowSeqEndToken);
		
		if(INPUT.peek() == Keys::FlowMapStart)
			return ScanAndEnqueue(new FlowMapStartToken);
		
		if(INPUT.peek() == Keys::FlowMapEnd)
			return ScanAndEnqueue(new FlowMapEndToken);

		if(INPUT.peek() == Keys::FlowEntry)
			return ScanAndEnqueue(new FlowEntryToken);

		// block/map stuff?
		if(IsBlockEntry())
			return ScanAndEnqueue(new BlockEntryToken);

		if(IsKey())
			return ScanAndEnqueue(new KeyToken);

		if(IsValue())
			return ScanAndEnqueue(new ValueToken);

		// TODO: alias/anchor/tag

		// TODO: special scalars
		if(INPUT.peek() == Keys::LiteralScalar && m_flowLevel == 0)
			return;

		if(INPUT.peek() == Keys::FoldedScalar && m_flowLevel == 0)
			return;

		if(INPUT.peek() == '\'')
			return;

		if(INPUT.peek() == '\"')
			return;

		// plain scalars
		if(IsPlainScalar())
			return ScanAndEnqueue(new PlainScalarToken);

		// don't know what it is!
		throw UnknownToken();
Jesse Beder's avatar
Jesse Beder committed
214
215
216
217
218
219
220
221
	}

	// ScanToNextToken
	// . Eats input until we reach the next token-like thing.
	void Scanner::ScanToNextToken()
	{
		while(1) {
			// first eat whitespace
Jesse Beder's avatar
Jesse Beder committed
222
223
			while(IsWhitespaceToBeEaten(INPUT.peek()))
				Eat(1);
Jesse Beder's avatar
Jesse Beder committed
224
225

			// then eat a comment
226
			if(Exp::Comment.Matches(INPUT.peek())) {
Jesse Beder's avatar
Jesse Beder committed
227
				// eat until line break
228
				while(INPUT && !Exp::Break.Matches(INPUT.peek()))
Jesse Beder's avatar
Jesse Beder committed
229
					Eat(1);
Jesse Beder's avatar
Jesse Beder committed
230
231
232
			}

			// if it's NOT a line break, then we're done!
233
			if(!Exp::Break.Matches(INPUT.peek()))
Jesse Beder's avatar
Jesse Beder committed
234
235
236
237
238
239
240
241
242
243
244
				break;

			// otherwise, let's eat the line break and keep going
			EatLineBreak();

			// new line - we may be able to accept a simple key now
			if(m_flowLevel == 0)
				m_simpleKeyAllowed = true;
        }
	}

Jesse Beder's avatar
Jesse Beder committed
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
	// PushIndentTo
	// . Pushes an indentation onto the stack, and enqueues the
	//   proper token (sequence start or mapping start).
	void Scanner::PushIndentTo(int column, bool sequence)
	{
		// are we in flow?
		if(m_flowLevel > 0)
			return;

		// is this actually an indentation?
		if(column <= m_indents.top())
			return;

		// now push
		m_indents.push(column);
		if(sequence)
			m_tokens.push(new BlockSeqStartToken);
		else
			m_tokens.push(new BlockMapStartToken);
	}

	// PopIndentTo
	// . Pops indentations off the stack until we reach 'column' indentation,
	//   and enqueues the proper token each time.
	void Scanner::PopIndentTo(int column)
	{
		// are we in flow?
		if(m_flowLevel > 0)
			return;

		// now pop away
		while(!m_indents.empty() && m_indents.top() > column) {
			m_indents.pop();
			m_tokens.push(new BlockEndToken);
		}
	}

Jesse Beder's avatar
Jesse Beder committed
282
283
284
	// temporary function for testing
	void Scanner::Scan()
	{
285
		while(1) {
Jesse Beder's avatar
Jesse Beder committed
286
			ScanNextToken();
287
288
			if(m_tokens.empty())
				break;
Jesse Beder's avatar
Jesse Beder committed
289
290
291
292
293
294
295
296

			while(!m_tokens.empty()) {
				Token *pToken = m_tokens.front();
				m_tokens.pop();
				std::cout << typeid(*pToken).name() << std::endl;
				delete pToken;
			}
		}
Jesse Beder's avatar
Jesse Beder committed
297
298
	}
}