scanner.cpp 6.48 KB
Newer Older
1
#include "crt.h"
Jesse Beder's avatar
Jesse Beder committed
2
3
#include "scanner.h"
#include "token.h"
Jesse Beder's avatar
Jesse Beder committed
4
#include "exceptions.h"
5
#include "exp.h"
Jesse Beder's avatar
Jesse Beder committed
6
7
8
9

namespace YAML
{
	Scanner::Scanner(std::istream& in)
10
		: INPUT(in), m_startedStream(false), m_endedStream(false), m_simpleKeyAllowed(false), m_flowLevel(0)
Jesse Beder's avatar
Jesse Beder committed
11
12
13
14
15
16
17
	{
	}

	Scanner::~Scanner()
	{
	}

18
19
20
	// IsEmpty
	// . Returns true if there are no more tokens to be read
	bool Scanner::IsEmpty()
21
	{
22
23
		PeekToken();  // to ensure that there are tokens in the queue, if possible
		return m_tokens.empty();
24
	}
Jesse Beder's avatar
Jesse Beder committed
25

26
	// PopToken
27
	// . Simply removes the next token on the queue.
28
	void Scanner::PopToken()
29
	{
30
31
32
		PeekToken();  // to ensure that there are tokens in the queue
		if(!m_tokens.empty())
			m_tokens.pop();
33
34
	}

35
	// PeekToken
36
	// . Returns (but does not remove) the next token on the queue, and scans if only we need to.
37
	Token& Scanner::PeekToken()
Jesse Beder's avatar
Jesse Beder committed
38
	{
39
		while(1) {
40
41
			if(!m_tokens.empty()) {
				Token& token = m_tokens.front();
Jesse Beder's avatar
Jesse Beder committed
42

43
44
45
				// return this guy if it's valid
				if(token.status == TS_VALID)
					return token;
Jesse Beder's avatar
Jesse Beder committed
46

47
48
49
50
51
				// here's where we clean up the impossible tokens
				if(token.status == TS_INVALID) {
					m_tokens.pop();
					continue;
				}
52

53
54
				// note: what's left are the unverified tokens
			}
55
56
57
58
59
60
61
62
63

			// no token? maybe we've actually finished
			if(m_endedStream)
				break;

			// no? then scan...
			ScanNextToken();
		}

64
		// TODO: find something to return here, or assert (but can't do that! maybe split into two functions?)
Jesse Beder's avatar
Jesse Beder committed
65
66
	}

Jesse Beder's avatar
Jesse Beder committed
67
68
69
	// ScanNextToken
	// . The main scanning function; here we branch out and
	//   scan whatever the next token should be.
Jesse Beder's avatar
Jesse Beder committed
70
	void Scanner::ScanNextToken()
Jesse Beder's avatar
Jesse Beder committed
71
	{
72
73
74
		if(m_endedStream)
			return;

Jesse Beder's avatar
Jesse Beder committed
75
		if(!m_startedStream)
76
			return StartStream();
Jesse Beder's avatar
Jesse Beder committed
77

Jesse Beder's avatar
Jesse Beder committed
78
		// get rid of whitespace, etc. (in between tokens it should be irrelevent)
Jesse Beder's avatar
Jesse Beder committed
79
		ScanToNextToken();
Jesse Beder's avatar
Jesse Beder committed
80
81

		// check the latest simple key
82
		VerifySimpleKey();
Jesse Beder's avatar
Jesse Beder committed
83
84

		// maybe need to end some blocks
85
		PopIndentTo(INPUT.column);
Jesse Beder's avatar
Jesse Beder committed
86

Jesse Beder's avatar
Jesse Beder committed
87
88
89
90
91
		// *****
		// And now branch based on the next few characters!
		// *****

		// end of stream
Jesse Beder's avatar
Jesse Beder committed
92
		if(INPUT.peek() == EOF)
93
			return EndStream();
Jesse Beder's avatar
Jesse Beder committed
94

Jesse Beder's avatar
Jesse Beder committed
95
		if(INPUT.column == 0 && INPUT.peek() == Keys::Directive)
96
			return ScanDirective();
Jesse Beder's avatar
Jesse Beder committed
97

Jesse Beder's avatar
Jesse Beder committed
98
		// document token
Jesse Beder's avatar
Jesse Beder committed
99
		if(INPUT.column == 0 && Exp::DocStart.Matches(INPUT))
100
			return ScanDocStart();
Jesse Beder's avatar
Jesse Beder committed
101

Jesse Beder's avatar
Jesse Beder committed
102
		if(INPUT.column == 0 && Exp::DocEnd.Matches(INPUT))
103
			return ScanDocEnd();
Jesse Beder's avatar
Jesse Beder committed
104

Jesse Beder's avatar
Jesse Beder committed
105
		// flow start/end/entry
106
107
		if(INPUT.peek() == Keys::FlowSeqStart || INPUT.peek() == Keys::FlowMapStart)
			return ScanFlowStart();
Jesse Beder's avatar
Jesse Beder committed
108

109
110
111
		if(INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd)
			return ScanFlowEnd();
	
Jesse Beder's avatar
Jesse Beder committed
112
		if(INPUT.peek() == Keys::FlowEntry)
113
			return ScanFlowEntry();
Jesse Beder's avatar
Jesse Beder committed
114

Jesse Beder's avatar
Jesse Beder committed
115
		// block/map stuff
Jesse Beder's avatar
Jesse Beder committed
116
		if(Exp::BlockEntry.Matches(INPUT))
117
			return ScanBlockEntry();
Jesse Beder's avatar
Jesse Beder committed
118

Jesse Beder's avatar
Jesse Beder committed
119
		if((m_flowLevel == 0 ? Exp::Key : Exp::KeyInFlow).Matches(INPUT))
120
			return ScanKey();
Jesse Beder's avatar
Jesse Beder committed
121

Jesse Beder's avatar
Jesse Beder committed
122
		if((m_flowLevel == 0 ? Exp::Value : Exp::ValueInFlow).Matches(INPUT))
123
			return ScanValue();
Jesse Beder's avatar
Jesse Beder committed
124

Jesse Beder's avatar
Jesse Beder committed
125
		// alias/anchor
Jesse Beder's avatar
Jesse Beder committed
126
		if(INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor)
127
			return ScanAnchorOrAlias();
Jesse Beder's avatar
Jesse Beder committed
128

Jesse Beder's avatar
Jesse Beder committed
129
130
		// tag
		if(INPUT.peek() == Keys::Tag)
131
			return ScanTag();
Jesse Beder's avatar
Jesse Beder committed
132

133
134
		// special scalars
		if(m_flowLevel == 0 && (INPUT.peek() == Keys::LiteralScalar || INPUT.peek() == Keys::FoldedScalar))
135
			return ScanBlockScalar();
Jesse Beder's avatar
Jesse Beder committed
136

137
		if(INPUT.peek() == '\'' || INPUT.peek() == '\"')
138
			return ScanQuotedScalar();
Jesse Beder's avatar
Jesse Beder committed
139
140

		// plain scalars
Jesse Beder's avatar
Jesse Beder committed
141
		if((m_flowLevel == 0 ? Exp::PlainScalar : Exp::PlainScalarInFlow).Matches(INPUT))
142
			return ScanPlainScalar();
Jesse Beder's avatar
Jesse Beder committed
143
144

		// don't know what it is!
145
		throw ParserException(INPUT.line, INPUT.column, ErrorMsg::UNKNOWN_TOKEN);
Jesse Beder's avatar
Jesse Beder committed
146
147
148
149
150
151
152
153
	}

	// ScanToNextToken
	// . Eats input until we reach the next token-like thing.
	void Scanner::ScanToNextToken()
	{
		while(1) {
			// first eat whitespace
Jesse Beder's avatar
Jesse Beder committed
154
			while(IsWhitespaceToBeEaten(INPUT.peek()))
155
				INPUT.eat(1);
Jesse Beder's avatar
Jesse Beder committed
156
157

			// then eat a comment
Jesse Beder's avatar
Jesse Beder committed
158
			if(Exp::Comment.Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
159
				// eat until line break
Jesse Beder's avatar
Jesse Beder committed
160
				while(INPUT && !Exp::Break.Matches(INPUT))
161
					INPUT.eat(1);
Jesse Beder's avatar
Jesse Beder committed
162
163
164
			}

			// if it's NOT a line break, then we're done!
Jesse Beder's avatar
Jesse Beder committed
165
			if(!Exp::Break.Matches(INPUT))
Jesse Beder's avatar
Jesse Beder committed
166
167
168
				break;

			// otherwise, let's eat the line break and keep going
169
			int n = Exp::Break.Match(INPUT);
170
			INPUT.eat(n);
Jesse Beder's avatar
Jesse Beder committed
171

Jesse Beder's avatar
Jesse Beder committed
172
			// oh yeah, and let's get rid of that simple key
173
			VerifySimpleKey();
Jesse Beder's avatar
Jesse Beder committed
174

Jesse Beder's avatar
Jesse Beder committed
175
176
177
178
179
180
			// new line - we may be able to accept a simple key now
			if(m_flowLevel == 0)
				m_simpleKeyAllowed = true;
        }
	}

181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
	///////////////////////////////////////////////////////////////////////
	// Misc. helpers

	// IsWhitespaceToBeEaten
	// . We can eat whitespace if:
	//   1. It's a space
	//   2. It's a tab, and we're either:
	//      a. In the flow context
	//      b. In the block context but not where a simple key could be allowed
	//         (i.e., not at the beginning of a line, or following '-', '?', or ':')
	bool Scanner::IsWhitespaceToBeEaten(char ch)
	{
		if(ch == ' ')
			return true;

		if(ch == '\t' && (m_flowLevel >= 0 || !m_simpleKeyAllowed))
			return true;

		return false;
	}

202
203
204
205
206
207
208
209
210
211
212
213
	// StartStream
	// . Set the initial conditions for starting a stream.
	void Scanner::StartStream()
	{
		m_startedStream = true;
		m_simpleKeyAllowed = true;
		m_indents.push(-1);
	}

	// EndStream
	// . Close out the stream, finish up, etc.
	void Scanner::EndStream()
214
	{
215
216
217
218
219
220
221
222
223
		// force newline
		if(INPUT.column > 0)
			INPUT.column = 0;

		PopIndentTo(-1);
		VerifyAllSimpleKeys();

		m_simpleKeyAllowed = false;
		m_endedStream = true;
224
225
	}

Jesse Beder's avatar
Jesse Beder committed
226
227
228
	// PushIndentTo
	// . Pushes an indentation onto the stack, and enqueues the
	//   proper token (sequence start or mapping start).
Jesse Beder's avatar
Jesse Beder committed
229
230
	// . Returns the token it generates (if any).
	Token *Scanner::PushIndentTo(int column, bool sequence)
Jesse Beder's avatar
Jesse Beder committed
231
232
233
	{
		// are we in flow?
		if(m_flowLevel > 0)
Jesse Beder's avatar
Jesse Beder committed
234
			return 0;
Jesse Beder's avatar
Jesse Beder committed
235
236
237

		// is this actually an indentation?
		if(column <= m_indents.top())
Jesse Beder's avatar
Jesse Beder committed
238
			return 0;
Jesse Beder's avatar
Jesse Beder committed
239
240
241
242

		// now push
		m_indents.push(column);
		if(sequence)
243
			m_tokens.push(Token(TT_BLOCK_SEQ_START, INPUT.line, INPUT.column));
Jesse Beder's avatar
Jesse Beder committed
244
		else
245
			m_tokens.push(Token(TT_BLOCK_MAP_START, INPUT.line, INPUT.column));
Jesse Beder's avatar
Jesse Beder committed
246

247
		return &m_tokens.back();
Jesse Beder's avatar
Jesse Beder committed
248
249
250
251
252
253
254
255
256
257
258
259
260
261
	}

	// PopIndentTo
	// . Pops indentations off the stack until we reach 'column' indentation,
	//   and enqueues the proper token each time.
	void Scanner::PopIndentTo(int column)
	{
		// are we in flow?
		if(m_flowLevel > 0)
			return;

		// now pop away
		while(!m_indents.empty() && m_indents.top() > column) {
			m_indents.pop();
262
			m_tokens.push(Token(TT_BLOCK_END, INPUT.line, INPUT.column));
Jesse Beder's avatar
Jesse Beder committed
263
264
		}
	}
Jesse Beder's avatar
Jesse Beder committed
265
}