scanner.cpp 6.64 KB
Newer Older
1
#include "crt.h"
Jesse Beder's avatar
Jesse Beder committed
2
3
#include "scanner.h"
#include "token.h"
Jesse Beder's avatar
Jesse Beder committed
4
#include "exceptions.h"
5
#include "exp.h"
6
#include <cassert>
Jesse Beder's avatar
Jesse Beder committed
7
8
9
10

namespace YAML
{
	Scanner::Scanner(std::istream& in)
11
		: INPUT(in), m_startedStream(false), m_endedStream(false), m_simpleKeyAllowed(false), m_flowLevel(0)
Jesse Beder's avatar
Jesse Beder committed
12
13
14
15
16
17
18
	{
	}

	Scanner::~Scanner()
	{
	}

19
	// empty
20
	// . Returns true if there are no more tokens to be read
21
	bool Scanner::empty()
22
	{
23
		EnsureTokensInQueue();
24
		return m_tokens.empty();
25
	}
Jesse Beder's avatar
Jesse Beder committed
26

27
	// pop
28
	// . Simply removes the next token on the queue.
29
	void Scanner::pop()
30
	{
31
		EnsureTokensInQueue();
32
33
		if(!m_tokens.empty())
			m_tokens.pop();
34
35
	}

36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
	// peek
	// . Returns (but does not remove) the next token on the queue.
	Token& Scanner::peek()
	{
		EnsureTokensInQueue();
		assert(!m_tokens.empty());  // should we be asserting here? I mean, we really just be checking
		                            // if it's empty before peeking.

		return m_tokens.front();
	}

	// EnsureTokensInQueue
	// . Scan until there's a valid token at the front of the queue,
	//   or we're sure the queue is empty.
	void Scanner::EnsureTokensInQueue()
Jesse Beder's avatar
Jesse Beder committed
51
	{
52
		while(1) {
53
54
			if(!m_tokens.empty()) {
				Token& token = m_tokens.front();
Jesse Beder's avatar
Jesse Beder committed
55

56
				// if this guy's valid, then we're done
57
				if(token.status == TS_VALID)
58
					return;
Jesse Beder's avatar
Jesse Beder committed
59

60
61
62
63
64
				// here's where we clean up the impossible tokens
				if(token.status == TS_INVALID) {
					m_tokens.pop();
					continue;
				}
65

66
67
				// note: what's left are the unverified tokens
			}
68
69
70

			// no token? maybe we've actually finished
			if(m_endedStream)
71
				return;
72
73
74
75

			// no? then scan...
			ScanNextToken();
		}
Jesse Beder's avatar
Jesse Beder committed
76
77
	}

Jesse Beder's avatar
Jesse Beder committed
78
79
80
	// ScanNextToken
	// . The main scanning function; here we branch out and
	//   scan whatever the next token should be.
Jesse Beder's avatar
Jesse Beder committed
81
	void Scanner::ScanNextToken()
Jesse Beder's avatar
Jesse Beder committed
82
	{
83
84
85
		if(m_endedStream)
			return;

Jesse Beder's avatar
Jesse Beder committed
86
		if(!m_startedStream)
87
			return StartStream();
Jesse Beder's avatar
Jesse Beder committed
88

Jesse Beder's avatar
Jesse Beder committed
89
		// get rid of whitespace, etc. (in between tokens it should be irrelevent)
Jesse Beder's avatar
Jesse Beder committed
90
		ScanToNextToken();
Jesse Beder's avatar
Jesse Beder committed
91
92

		// check the latest simple key
93
		VerifySimpleKey();
Jesse Beder's avatar
Jesse Beder committed
94
95

		// maybe need to end some blocks
96
		PopIndentTo(INPUT.column);
Jesse Beder's avatar
Jesse Beder committed
97

Jesse Beder's avatar
Jesse Beder committed
98
99
100
101
102
		// *****
		// And now branch based on the next few characters!
		// *****

		// end of stream
Jesse Beder's avatar
Jesse Beder committed
103
		if(INPUT.peek() == EOF)
104
			return EndStream();
Jesse Beder's avatar
Jesse Beder committed
105

Jesse Beder's avatar
Jesse Beder committed
106
		if(INPUT.column == 0 && INPUT.peek() == Keys::Directive)
107
			return ScanDirective();
Jesse Beder's avatar
Jesse Beder committed
108

Jesse Beder's avatar
Jesse Beder committed
109
		// document token
Jesse Beder's avatar
Jesse Beder committed
110
		if(INPUT.column == 0 && Exp::DocStart.Matches(INPUT))
111
			return ScanDocStart();
Jesse Beder's avatar
Jesse Beder committed
112

Jesse Beder's avatar
Jesse Beder committed
113
		if(INPUT.column == 0 && Exp::DocEnd.Matches(INPUT))
114
			return ScanDocEnd();
Jesse Beder's avatar
Jesse Beder committed
115

Jesse Beder's avatar
Jesse Beder committed
116
		// flow start/end/entry
117
118
		if(INPUT.peek() == Keys::FlowSeqStart || INPUT.peek() == Keys::FlowMapStart)
			return ScanFlowStart();
Jesse Beder's avatar
Jesse Beder committed
119

120
121
122
		if(INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd)
			return ScanFlowEnd();
	
Jesse Beder's avatar
Jesse Beder committed
123
		if(INPUT.peek() == Keys::FlowEntry)
124
			return ScanFlowEntry();
Jesse Beder's avatar
Jesse Beder committed
125

Jesse Beder's avatar
Jesse Beder committed
126
		// block/map stuff
Jesse Beder's avatar
Jesse Beder committed
127
		if(Exp::BlockEntry.Matches(INPUT))
128
			return ScanBlockEntry();
Jesse Beder's avatar
Jesse Beder committed
129

Jesse Beder's avatar
Jesse Beder committed
130
		if((m_flowLevel == 0 ? Exp::Key : Exp::KeyInFlow).Matches(INPUT))
131
			return ScanKey();
Jesse Beder's avatar
Jesse Beder committed
132

Jesse Beder's avatar
Jesse Beder committed
133
		if((m_flowLevel == 0 ? Exp::Value : Exp::ValueInFlow).Matches(INPUT))
134
			return ScanValue();
Jesse Beder's avatar
Jesse Beder committed
135

Jesse Beder's avatar
Jesse Beder committed
136
		// alias/anchor
Jesse Beder's avatar
Jesse Beder committed
137
		if(INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor)
138
			return ScanAnchorOrAlias();
Jesse Beder's avatar
Jesse Beder committed
139

Jesse Beder's avatar
Jesse Beder committed
140
141
		// tag
		if(INPUT.peek() == Keys::Tag)
142
			return ScanTag();
Jesse Beder's avatar
Jesse Beder committed
143

144
145
		// special scalars
		if(m_flowLevel == 0 && (INPUT.peek() == Keys::LiteralScalar || INPUT.peek() == Keys::FoldedScalar))
146
			return ScanBlockScalar();
Jesse Beder's avatar
Jesse Beder committed
147

148
		if(INPUT.peek() == '\'' || INPUT.peek() == '\"')
149
			return ScanQuotedScalar();
Jesse Beder's avatar
Jesse Beder committed
150
151

		// plain scalars
Jesse Beder's avatar
Jesse Beder committed
152
		if((m_flowLevel == 0 ? Exp::PlainScalar : Exp::PlainScalarInFlow).Matches(INPUT))
153
			return ScanPlainScalar();
Jesse Beder's avatar
Jesse Beder committed
154
155

		// don't know what it is!
156
		throw ParserException(INPUT.line, INPUT.column, ErrorMsg::UNKNOWN_TOKEN);
Jesse Beder's avatar
Jesse Beder committed
157
158
159
160
161
162
163
164
	}

	// ScanToNextToken
	// . Eats input until we reach the next token-like thing.
	void Scanner::ScanToNextToken()
	{
		while(1) {
			// first eat whitespace
Jesse Beder's avatar
Jesse Beder committed
165
			while(IsWhitespaceToBeEaten(INPUT.peek()))
166
				INPUT.eat(1);
Jesse Beder's avatar
Jesse Beder committed
167
168

			// then eat a comment
Jesse Beder's avatar
Jesse Beder committed
169
			if(Exp::Comment.Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
170
				// eat until line break
Jesse Beder's avatar
Jesse Beder committed
171
				while(INPUT && !Exp::Break.Matches(INPUT))
172
					INPUT.eat(1);
Jesse Beder's avatar
Jesse Beder committed
173
174
175
			}

			// if it's NOT a line break, then we're done!
Jesse Beder's avatar
Jesse Beder committed
176
			if(!Exp::Break.Matches(INPUT))
Jesse Beder's avatar
Jesse Beder committed
177
178
179
				break;

			// otherwise, let's eat the line break and keep going
180
			int n = Exp::Break.Match(INPUT);
181
			INPUT.eat(n);
Jesse Beder's avatar
Jesse Beder committed
182

Jesse Beder's avatar
Jesse Beder committed
183
			// oh yeah, and let's get rid of that simple key
184
			VerifySimpleKey();
Jesse Beder's avatar
Jesse Beder committed
185

Jesse Beder's avatar
Jesse Beder committed
186
187
188
189
190
191
			// new line - we may be able to accept a simple key now
			if(m_flowLevel == 0)
				m_simpleKeyAllowed = true;
        }
	}

192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
	///////////////////////////////////////////////////////////////////////
	// Misc. helpers

	// IsWhitespaceToBeEaten
	// . We can eat whitespace if:
	//   1. It's a space
	//   2. It's a tab, and we're either:
	//      a. In the flow context
	//      b. In the block context but not where a simple key could be allowed
	//         (i.e., not at the beginning of a line, or following '-', '?', or ':')
	bool Scanner::IsWhitespaceToBeEaten(char ch)
	{
		if(ch == ' ')
			return true;

		if(ch == '\t' && (m_flowLevel >= 0 || !m_simpleKeyAllowed))
			return true;

		return false;
	}

213
214
215
216
217
218
219
220
221
222
223
224
	// StartStream
	// . Set the initial conditions for starting a stream.
	void Scanner::StartStream()
	{
		m_startedStream = true;
		m_simpleKeyAllowed = true;
		m_indents.push(-1);
	}

	// EndStream
	// . Close out the stream, finish up, etc.
	void Scanner::EndStream()
225
	{
226
227
228
229
230
231
232
233
234
		// force newline
		if(INPUT.column > 0)
			INPUT.column = 0;

		PopIndentTo(-1);
		VerifyAllSimpleKeys();

		m_simpleKeyAllowed = false;
		m_endedStream = true;
235
236
	}

Jesse Beder's avatar
Jesse Beder committed
237
238
239
	// PushIndentTo
	// . Pushes an indentation onto the stack, and enqueues the
	//   proper token (sequence start or mapping start).
Jesse Beder's avatar
Jesse Beder committed
240
241
	// . Returns the token it generates (if any).
	Token *Scanner::PushIndentTo(int column, bool sequence)
Jesse Beder's avatar
Jesse Beder committed
242
243
244
	{
		// are we in flow?
		if(m_flowLevel > 0)
Jesse Beder's avatar
Jesse Beder committed
245
			return 0;
Jesse Beder's avatar
Jesse Beder committed
246
247
248

		// is this actually an indentation?
		if(column <= m_indents.top())
Jesse Beder's avatar
Jesse Beder committed
249
			return 0;
Jesse Beder's avatar
Jesse Beder committed
250
251
252
253

		// now push
		m_indents.push(column);
		if(sequence)
254
			m_tokens.push(Token(TT_BLOCK_SEQ_START, INPUT.line, INPUT.column));
Jesse Beder's avatar
Jesse Beder committed
255
		else
256
			m_tokens.push(Token(TT_BLOCK_MAP_START, INPUT.line, INPUT.column));
Jesse Beder's avatar
Jesse Beder committed
257

258
		return &m_tokens.back();
Jesse Beder's avatar
Jesse Beder committed
259
260
261
262
263
264
265
266
267
268
269
270
271
272
	}

	// PopIndentTo
	// . Pops indentations off the stack until we reach 'column' indentation,
	//   and enqueues the proper token each time.
	void Scanner::PopIndentTo(int column)
	{
		// are we in flow?
		if(m_flowLevel > 0)
			return;

		// now pop away
		while(!m_indents.empty() && m_indents.top() > column) {
			m_indents.pop();
273
			m_tokens.push(Token(TT_BLOCK_END, INPUT.line, INPUT.column));
Jesse Beder's avatar
Jesse Beder committed
274
275
		}
	}
Jesse Beder's avatar
Jesse Beder committed
276
}