scanner.cpp 6.88 KB
Newer Older
Jesse Beder's avatar
Jesse Beder committed
1
2
#include "scanner.h"
#include "token.h"
Jesse Beder's avatar
Jesse Beder committed
3
#include "exceptions.h"
4
#include "exp.h"
Jesse Beder's avatar
Jesse Beder committed
5
6
7
8

namespace YAML
{
	Scanner::Scanner(std::istream& in)
9
		: INPUT(in), m_startedStream(false), m_endedStream(false), m_simpleKeyAllowed(false), m_flowLevel(0)
Jesse Beder's avatar
Jesse Beder committed
10
11
12
13
14
	{
	}

	Scanner::~Scanner()
	{
Jesse Beder's avatar
Jesse Beder committed
15
16
17
18
19
20
21
22
		while(!m_tokens.empty()) {
			delete m_tokens.front();
			m_tokens.pop();
		}

		// delete limbo tokens (they're here for RAII)
		for(std::set <Token *>::const_iterator it=m_limboTokens.begin();it!=m_limboTokens.end();++it)
			delete *it;
Jesse Beder's avatar
Jesse Beder committed
23
24
	}

25
26
27
28
29
30
31
32
33
	// GetNextToken
	// . Removes and returns the next token on the queue.
	Token *Scanner::GetNextToken()
	{
		Token *pToken = PeekNextToken();
		if(!m_tokens.empty())
			m_tokens.pop();
		return pToken;
	}
Jesse Beder's avatar
Jesse Beder committed
34

35
36
37
	// PeekNextToken
	// . Returns (but does not remove) the next token on the queue, and scans if only we need to.
	Token *Scanner::PeekNextToken()
Jesse Beder's avatar
Jesse Beder committed
38
	{
39
40
		while(1) {
			Token *pToken = 0;
Jesse Beder's avatar
Jesse Beder committed
41

42
43
44
			// is there a token in the queue?
			if(!m_tokens.empty())
				pToken = m_tokens.front();
Jesse Beder's avatar
Jesse Beder committed
45

46
47
48
49
50
51
			// (here's where we clean up the impossible tokens)
			if(pToken && pToken->status == TS_INVALID) {
				m_tokens.pop();
				delete pToken;
				continue;
			}
Jesse Beder's avatar
Jesse Beder committed
52

53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
			// on unverified tokens, we just have to wait
			if(pToken && pToken->status == TS_UNVERIFIED)
				pToken = 0;

			// then that's what we want
			if(pToken)
				return pToken;

			// no token? maybe we've actually finished
			if(m_endedStream)
				break;

			// no? then scan...
			ScanNextToken();
		}

		return 0;
Jesse Beder's avatar
Jesse Beder committed
70
71
	}

Jesse Beder's avatar
Jesse Beder committed
72
73
74
	// ScanNextToken
	// . The main scanning function; here we branch out and
	//   scan whatever the next token should be.
Jesse Beder's avatar
Jesse Beder committed
75
	void Scanner::ScanNextToken()
Jesse Beder's avatar
Jesse Beder committed
76
	{
77
78
79
		if(m_endedStream)
			return;

Jesse Beder's avatar
Jesse Beder committed
80
		if(!m_startedStream)
Jesse Beder's avatar
Jesse Beder committed
81
			return ScanAndEnqueue(new StreamStartToken);
Jesse Beder's avatar
Jesse Beder committed
82

Jesse Beder's avatar
Jesse Beder committed
83
		// get rid of whitespace, etc. (in between tokens it should be irrelevent)
Jesse Beder's avatar
Jesse Beder committed
84
		ScanToNextToken();
Jesse Beder's avatar
Jesse Beder committed
85
86

		// check the latest simple key
87
		VerifySimpleKey();
Jesse Beder's avatar
Jesse Beder committed
88
89

		// maybe need to end some blocks
90
		PopIndentTo(INPUT.column);
Jesse Beder's avatar
Jesse Beder committed
91

Jesse Beder's avatar
Jesse Beder committed
92
93
94
95
96
		// *****
		// And now branch based on the next few characters!
		// *****

		// end of stream
Jesse Beder's avatar
Jesse Beder committed
97
		if(INPUT.peek() == EOF)
Jesse Beder's avatar
Jesse Beder committed
98
			return ScanAndEnqueue(new StreamEndToken);
Jesse Beder's avatar
Jesse Beder committed
99

Jesse Beder's avatar
Jesse Beder committed
100
101
102
		if(INPUT.column == 0 && INPUT.peek() == Keys::Directive)
			return ScanAndEnqueue(new DirectiveToken);

Jesse Beder's avatar
Jesse Beder committed
103
		// document token
Jesse Beder's avatar
Jesse Beder committed
104
		if(INPUT.column == 0 && Exp::DocStart.Matches(INPUT))
Jesse Beder's avatar
Jesse Beder committed
105
			return ScanAndEnqueue(new DocumentStartToken);
Jesse Beder's avatar
Jesse Beder committed
106

Jesse Beder's avatar
Jesse Beder committed
107
		if(INPUT.column == 0 && Exp::DocEnd.Matches(INPUT))
Jesse Beder's avatar
Jesse Beder committed
108
			return ScanAndEnqueue(new DocumentEndToken);
Jesse Beder's avatar
Jesse Beder committed
109

Jesse Beder's avatar
Jesse Beder committed
110
		// flow start/end/entry
Jesse Beder's avatar
Jesse Beder committed
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
		if(INPUT.peek() == Keys::FlowSeqStart)
			return ScanAndEnqueue(new FlowSeqStartToken);

		if(INPUT.peek() == Keys::FlowSeqEnd)
			return ScanAndEnqueue(new FlowSeqEndToken);
		
		if(INPUT.peek() == Keys::FlowMapStart)
			return ScanAndEnqueue(new FlowMapStartToken);
		
		if(INPUT.peek() == Keys::FlowMapEnd)
			return ScanAndEnqueue(new FlowMapEndToken);

		if(INPUT.peek() == Keys::FlowEntry)
			return ScanAndEnqueue(new FlowEntryToken);

Jesse Beder's avatar
Jesse Beder committed
126
		// block/map stuff
Jesse Beder's avatar
Jesse Beder committed
127
		if(Exp::BlockEntry.Matches(INPUT))
Jesse Beder's avatar
Jesse Beder committed
128
129
			return ScanAndEnqueue(new BlockEntryToken);

Jesse Beder's avatar
Jesse Beder committed
130
		if((m_flowLevel == 0 ? Exp::Key : Exp::KeyInFlow).Matches(INPUT))
Jesse Beder's avatar
Jesse Beder committed
131
132
			return ScanAndEnqueue(new KeyToken);

Jesse Beder's avatar
Jesse Beder committed
133
		if((m_flowLevel == 0 ? Exp::Value : Exp::ValueInFlow).Matches(INPUT))
Jesse Beder's avatar
Jesse Beder committed
134
135
			return ScanAndEnqueue(new ValueToken);

Jesse Beder's avatar
Jesse Beder committed
136
		// alias/anchor
Jesse Beder's avatar
Jesse Beder committed
137
138
139
		if(INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor)
			return ScanAndEnqueue(new AnchorToken);

Jesse Beder's avatar
Jesse Beder committed
140
141
142
		// tag
		if(INPUT.peek() == Keys::Tag)
			return ScanAndEnqueue(new TagToken);
Jesse Beder's avatar
Jesse Beder committed
143

144
145
146
		// special scalars
		if(m_flowLevel == 0 && (INPUT.peek() == Keys::LiteralScalar || INPUT.peek() == Keys::FoldedScalar))
			return ScanAndEnqueue(new BlockScalarToken);
Jesse Beder's avatar
Jesse Beder committed
147

148
149
		if(INPUT.peek() == '\'' || INPUT.peek() == '\"')
			return ScanAndEnqueue(new QuotedScalarToken);
Jesse Beder's avatar
Jesse Beder committed
150
151

		// plain scalars
Jesse Beder's avatar
Jesse Beder committed
152
		if((m_flowLevel == 0 ? Exp::PlainScalar : Exp::PlainScalarInFlow).Matches(INPUT))
Jesse Beder's avatar
Jesse Beder committed
153
154
155
156
			return ScanAndEnqueue(new PlainScalarToken);

		// don't know what it is!
		throw UnknownToken();
Jesse Beder's avatar
Jesse Beder committed
157
158
159
160
161
162
163
164
	}

	// ScanToNextToken
	// . Eats input until we reach the next token-like thing.
	void Scanner::ScanToNextToken()
	{
		while(1) {
			// first eat whitespace
Jesse Beder's avatar
Jesse Beder committed
165
			while(IsWhitespaceToBeEaten(INPUT.peek()))
166
				INPUT.Eat(1);
Jesse Beder's avatar
Jesse Beder committed
167
168

			// then eat a comment
Jesse Beder's avatar
Jesse Beder committed
169
			if(Exp::Comment.Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
170
				// eat until line break
Jesse Beder's avatar
Jesse Beder committed
171
				while(INPUT && !Exp::Break.Matches(INPUT))
172
					INPUT.Eat(1);
Jesse Beder's avatar
Jesse Beder committed
173
174
175
			}

			// if it's NOT a line break, then we're done!
Jesse Beder's avatar
Jesse Beder committed
176
			if(!Exp::Break.Matches(INPUT))
Jesse Beder's avatar
Jesse Beder committed
177
178
179
				break;

			// otherwise, let's eat the line break and keep going
180
181
			int n = Exp::Break.Match(INPUT);
			INPUT.Eat(n);
Jesse Beder's avatar
Jesse Beder committed
182

Jesse Beder's avatar
Jesse Beder committed
183
			// oh yeah, and let's get rid of that simple key
184
			VerifySimpleKey();
Jesse Beder's avatar
Jesse Beder committed
185

Jesse Beder's avatar
Jesse Beder committed
186
187
188
189
190
191
			// new line - we may be able to accept a simple key now
			if(m_flowLevel == 0)
				m_simpleKeyAllowed = true;
        }
	}

192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
	///////////////////////////////////////////////////////////////////////
	// Misc. helpers

	// IsWhitespaceToBeEaten
	// . We can eat whitespace if:
	//   1. It's a space
	//   2. It's a tab, and we're either:
	//      a. In the flow context
	//      b. In the block context but not where a simple key could be allowed
	//         (i.e., not at the beginning of a line, or following '-', '?', or ':')
	bool Scanner::IsWhitespaceToBeEaten(char ch)
	{
		if(ch == ' ')
			return true;

		if(ch == '\t' && (m_flowLevel >= 0 || !m_simpleKeyAllowed))
			return true;

		return false;
	}

	// ScanAndEnqueue
	// . Scans the token, then pushes it in the queue.
	// . Note: we also use a set of "limbo tokens", i.e., tokens
	//   that haven't yet been pushed. This way, if ScanToken()
	//   throws an exception, we'll be keeping track of 'pToken'
	//   somewhere, and it will be automatically cleaned up when
	//   the Scanner destructs.
	template <typename T> void Scanner::ScanAndEnqueue(T *pToken)
	{
		m_limboTokens.insert(pToken);
		m_tokens.push(ScanToken(pToken));
		m_limboTokens.erase(pToken);
	}

Jesse Beder's avatar
Jesse Beder committed
227
228
229
	// PushIndentTo
	// . Pushes an indentation onto the stack, and enqueues the
	//   proper token (sequence start or mapping start).
Jesse Beder's avatar
Jesse Beder committed
230
231
	// . Returns the token it generates (if any).
	Token *Scanner::PushIndentTo(int column, bool sequence)
Jesse Beder's avatar
Jesse Beder committed
232
233
234
	{
		// are we in flow?
		if(m_flowLevel > 0)
Jesse Beder's avatar
Jesse Beder committed
235
			return 0;
Jesse Beder's avatar
Jesse Beder committed
236
237
238

		// is this actually an indentation?
		if(column <= m_indents.top())
Jesse Beder's avatar
Jesse Beder committed
239
			return 0;
Jesse Beder's avatar
Jesse Beder committed
240
241
242
243
244
245
246

		// now push
		m_indents.push(column);
		if(sequence)
			m_tokens.push(new BlockSeqStartToken);
		else
			m_tokens.push(new BlockMapStartToken);
Jesse Beder's avatar
Jesse Beder committed
247
248

		return m_tokens.front();
Jesse Beder's avatar
Jesse Beder committed
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
	}

	// PopIndentTo
	// . Pops indentations off the stack until we reach 'column' indentation,
	//   and enqueues the proper token each time.
	void Scanner::PopIndentTo(int column)
	{
		// are we in flow?
		if(m_flowLevel > 0)
			return;

		// now pop away
		while(!m_indents.empty() && m_indents.top() > column) {
			m_indents.pop();
			m_tokens.push(new BlockEndToken);
		}
	}
Jesse Beder's avatar
Jesse Beder committed
266
}