scantoken.cpp 9.11 KB
Newer Older
1
2
3
4
#include "scanner.h"
#include "token.h"
#include "exceptions.h"
#include "exp.h"
5
#include "scanscalar.h"
6
7
8
9
10
11

namespace YAML
{
	///////////////////////////////////////////////////////////////////////
	// Specialization for scanning specific tokens

12
	// Directive
beder's avatar
beder committed
13
	// . Note: no semantic checking is done here (that's for the parser to do)
14
	void Scanner::ScanDirective()
beder's avatar
beder committed
15
	{
16
17
18
		std::string name;
		std::vector <std::string> params;

beder's avatar
beder committed
19
20
21
22
23
24
25
26
27
28
29
		// pop indents and simple keys
		PopIndentTo(-1);
		VerifyAllSimpleKeys();

		m_simpleKeyAllowed = false;

		// eat indicator
		INPUT.Eat(1);

		// read name
		while(INPUT.peek() != EOF && !Exp::BlankOrBreak.Matches(INPUT))
30
			name += INPUT.GetChar();
beder's avatar
beder committed
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46

		// read parameters
		while(1) {
			// first get rid of whitespace
			while(Exp::Blank.Matches(INPUT))
				INPUT.Eat(1);

			// break on newline or comment
			if(INPUT.peek() == EOF || Exp::Break.Matches(INPUT) || Exp::Comment.Matches(INPUT))
				break;

			// now read parameter
			std::string param;
			while(INPUT.peek() != EOF && !Exp::BlankOrBreak.Matches(INPUT))
				param += INPUT.GetChar();

47
			params.push_back(param);
beder's avatar
beder committed
48
49
		}
		
50
51
52
53
		Token *pToken = new Token(TT_DIRECTIVE);
		pToken->value = name;
		pToken->params = params;
		m_tokens.push(pToken);
beder's avatar
beder committed
54
55
	}

56
57
	// DocStart
	void Scanner::ScanDocStart()
58
	{
59
		PopIndentTo(INPUT.column);
60
		VerifyAllSimpleKeys();
61
62
63
		m_simpleKeyAllowed = false;

		// eat
64
		INPUT.Eat(3);
65
		m_tokens.push(new Token(TT_DOC_START));
66
67
	}

68
69
	// DocEnd
	void Scanner::ScanDocEnd()
70
71
	{
		PopIndentTo(-1);
72
		VerifyAllSimpleKeys();
73
74
75
		m_simpleKeyAllowed = false;

		// eat
76
		INPUT.Eat(3);
77
		m_tokens.push(new Token(TT_DOC_END));
78
79
	}

80
81
	// FlowStart
	void Scanner::ScanFlowStart()
82
	{
83
		// flows can be simple keys
beder's avatar
beder committed
84
		InsertSimpleKey();
beder's avatar
beder committed
85
		m_flowLevel++;
86
87
88
		m_simpleKeyAllowed = true;

		// eat
89
90
91
		char ch = INPUT.GetChar();
		TOKEN_TYPE type = (ch == Keys::FlowSeqStart ? TT_FLOW_SEQ_START : TT_FLOW_MAP_START);
		m_tokens.push(new Token(type));
92
93
	}

94
95
	// FlowEnd
	void Scanner::ScanFlowEnd()
96
	{
beder's avatar
beder committed
97
98
99
100
		if(m_flowLevel == 0)
			throw IllegalFlowEnd();

		m_flowLevel--;
101
102
103
		m_simpleKeyAllowed = false;

		// eat
104
105
106
		char ch = INPUT.GetChar();
		TOKEN_TYPE type = (ch == Keys::FlowSeqEnd ? TT_FLOW_SEQ_END : TT_FLOW_MAP_END);
		m_tokens.push(new Token(type));
107
108
	}

109
110
	// FlowEntry
	void Scanner::ScanFlowEntry()
111
112
113
114
	{
		m_simpleKeyAllowed = true;

		// eat
115
		INPUT.Eat(1);
116
		m_tokens.push(new Token(TT_FLOW_ENTRY));
117
118
	}

119
120
	// BlockEntry
	void Scanner::ScanBlockEntry()
121
122
	{
		// we better be in the block context!
beder's avatar
beder committed
123
124
		if(m_flowLevel > 0)
			throw IllegalBlockEntry();
125

beder's avatar
beder committed
126
127
128
		// can we put it here?
		if(!m_simpleKeyAllowed)
			throw IllegalBlockEntry();
129

130
		PushIndentTo(INPUT.column, true);
131
132
133
		m_simpleKeyAllowed = true;

		// eat
134
		INPUT.Eat(1);
135
		m_tokens.push(new Token(TT_BLOCK_ENTRY));
136
137
	}

138
139
	// Key
	void Scanner::ScanKey()
140
	{
beder's avatar
beder committed
141
		// handle keys diffently in the block context (and manage indents)
142
143
144
145
		if(m_flowLevel == 0) {
			if(!m_simpleKeyAllowed)
				throw IllegalMapKey();

146
			PushIndentTo(INPUT.column, false);
147
148
149
150
151
152
153
154
155
		}

		// can only put a simple key here if we're in block context
		if(m_flowLevel == 0)
			m_simpleKeyAllowed = true;
		else
			m_simpleKeyAllowed = false;

		// eat
156
		INPUT.Eat(1);
157
		m_tokens.push(new Token(TT_KEY));
158
159
	}

160
161
	// Value
	void Scanner::ScanValue()
162
	{
beder's avatar
beder committed
163
		// does this follow a simple key?
164
		if(m_isLastKeyValid) {
beder's avatar
beder committed
165
166
			// can't follow a simple key with another simple key (dunno why, though - it seems fine)
			m_simpleKeyAllowed = false;
167
		} else {
beder's avatar
beder committed
168
			// handle values diffently in the block context (and manage indents)
169
170
171
172
			if(m_flowLevel == 0) {
				if(!m_simpleKeyAllowed)
					throw IllegalMapValue();

173
				PushIndentTo(INPUT.column, false);
174
175
			}

beder's avatar
beder committed
176
177
178
179
180
181
			// can only put a simple key here if we're in block context
			if(m_flowLevel == 0)
				m_simpleKeyAllowed = true;
			else
				m_simpleKeyAllowed = false;
		}
182
183

		// eat
184
		INPUT.Eat(1);
185
		m_tokens.push(new Token(TT_VALUE));
186
187
	}

188
189
	// AnchorOrAlias
	void Scanner::ScanAnchorOrAlias()
beder's avatar
beder committed
190
	{
191
192
193
		bool alias;
		std::string tag;

beder's avatar
beder committed
194
195
196
197
198
199
		// insert a potential simple key
		if(m_simpleKeyAllowed)
			InsertSimpleKey();
		m_simpleKeyAllowed = false;

		// eat the indicator
200
		char indicator = INPUT.GetChar();
201
		alias = (indicator == Keys::Alias);
beder's avatar
beder committed
202
203
204

		// now eat the content
		while(Exp::AlphaNumeric.Matches(INPUT))
205
			tag += INPUT.GetChar();
beder's avatar
beder committed
206
207
208
209
210
211
212
213
214
215

		// we need to have read SOMETHING!
		if(tag.empty())
			throw AnchorNotFound();

		// and needs to end correctly
		if(INPUT.peek() != EOF && !Exp::AnchorEnd.Matches(INPUT))
			throw IllegalCharacterInAnchor();

		// and we're done
216
		Token *pToken = new Token(alias ? TT_ALIAS : TT_ANCHOR);
beder's avatar
beder committed
217
		pToken->value = tag;
218
		m_tokens.push(pToken);
beder's avatar
beder committed
219
	}
220

221
222
	// Tag
	void Scanner::ScanTag()
beder's avatar
beder committed
223
	{
224
225
		std::string handle, suffix;

beder's avatar
beder committed
226
227
228
229
230
231
232
233
234
235
		// insert a potential simple key
		if(m_simpleKeyAllowed)
			InsertSimpleKey();
		m_simpleKeyAllowed = false;

		// eat the indicator
		INPUT.Eat(1);

		// read the handle
		while(INPUT.peek() != EOF && INPUT.peek() != Keys::Tag && !Exp::BlankOrBreak.Matches(INPUT))
236
			handle += INPUT.GetChar();
beder's avatar
beder committed
237
238
239
240
241
242
243
244

		// is there a suffix?
		if(INPUT.peek() == Keys::Tag) {
			// eat the indicator
			INPUT.Eat(1);

			// then read it
			while(INPUT.peek() != EOF && !Exp::BlankOrBreak.Matches(INPUT))
245
				suffix += INPUT.GetChar();
beder's avatar
beder committed
246
247
		}

248
249
250
251
		Token *pToken = new Token(TT_TAG);
		pToken->value = handle;
		pToken->params.push_back(suffix);
		m_tokens.push(pToken);
beder's avatar
beder committed
252
253
	}

254
255
	// PlainScalar
	void Scanner::ScanPlainScalar()
256
	{
257
258
		std::string scalar;

259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
		// set up the scanning parameters
		ScanScalarParams params;
		params.end = (m_flowLevel > 0 ? Exp::EndScalarInFlow : Exp::EndScalar) || (RegEx(' ') + Exp::Comment);
		params.eatEnd = false;
		params.indent = (m_flowLevel > 0 ? 0 : m_indents.top() + 1);
		params.fold = true;
		params.eatLeadingWhitespace = true;
		params.trimTrailingSpaces = true;
		params.chomp = CLIP;
		params.onDocIndicator = BREAK;
		params.onTabInIndentation = THROW;

		// insert a potential simple key
		if(m_simpleKeyAllowed)
			InsertSimpleKey();

275
		scalar = ScanScalar(INPUT, params);
276
277
278
279
280
281
282
283
284

		// can have a simple key only if we ended the scalar by starting a new line
		m_simpleKeyAllowed = params.leadingSpaces;

		// finally, we can't have any colons in a scalar, so if we ended on a colon, there
		// had better be a break after it
		if(Exp::IllegalColonInScalar.Matches(INPUT))
			throw IllegalScalar();

285
286
287
		Token *pToken = new Token(TT_SCALAR);
		pToken->value = scalar;
		m_tokens.push(pToken);
288
289
	}

290
291
	// QuotedScalar
	void Scanner::ScanQuotedScalar()
292
	{
293
294
		std::string scalar;

295
296
		// eat single or double quote
		char quote = INPUT.GetChar();
297
		bool single = (quote == '\'');
298
299
300

		// setup the scanning parameters
		ScanScalarParams params;
301
		params.end = (single ? RegEx(quote) && !Exp::EscSingleQuote : RegEx(quote));
302
		params.eatEnd = true;
303
		params.escape = (single ? '\'' : '\\');
304
305
306
307
308
309
310
311
312
313
314
		params.indent = 0;
		params.fold = true;
		params.eatLeadingWhitespace = true;
		params.trimTrailingSpaces = false;
		params.chomp = CLIP;
		params.onDocIndicator = THROW;

		// insert a potential simple key
		if(m_simpleKeyAllowed)
			InsertSimpleKey();

315
		scalar = ScanScalar(INPUT, params);
316
317
		m_simpleKeyAllowed = false;

318
319
320
		Token *pToken = new Token(TT_SCALAR);
		pToken->value = scalar;
		m_tokens.push(pToken);
321
322
323
324
325
326
	}

	// BlockScalarToken
	// . These need a little extra processing beforehand.
	// . We need to scan the line where the indicator is (this doesn't count as part of the scalar),
	//   and then we need to figure out what level of indentation we'll be using.
327
	void Scanner::ScanBlockScalar()
328
	{
329
330
		std::string scalar;

331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
		ScanScalarParams params;
		params.indent = 1;
		params.detectIndent = true;

		// eat block indicator ('|' or '>')
		char indicator = INPUT.GetChar();
		params.fold = (indicator == Keys::FoldedScalar);

		// eat chomping/indentation indicators
		int n = Exp::Chomp.Match(INPUT);
		for(int i=0;i<n;i++) {
			char ch = INPUT.GetChar();
			if(ch == '+')
				params.chomp = KEEP;
			else if(ch == '-')
				params.chomp = STRIP;
			else if(Exp::Digit.Matches(ch)) {
				if(ch == '0')
					throw ZeroIndentationInBlockScalar();

				params.indent = ch - '0';
				params.detectIndent = false;
			}
		}

		// now eat whitespace
		while(Exp::Blank.Matches(INPUT))
			INPUT.Eat(1);

		// and comments to the end of the line
		if(Exp::Comment.Matches(INPUT))
			while(INPUT && !Exp::Break.Matches(INPUT))
				INPUT.Eat(1);

		// if it's not a line break, then we ran into a bad character inline
		if(INPUT && !Exp::Break.Matches(INPUT))
			throw UnexpectedCharacterInBlockScalar();

		// set the initial indentation
		if(m_indents.top() >= 0)
			params.indent += m_indents.top();

		params.eatLeadingWhitespace = false;
		params.trimTrailingSpaces = false;
		params.onTabInIndentation = THROW;

377
		scalar = ScanScalar(INPUT, params);
378
379
380

		// simple keys always ok after block scalars (since we're gonna start a new line anyways)
		m_simpleKeyAllowed = true;
381
382
383
384

		Token *pToken = new Token(TT_SCALAR);
		pToken->value = scalar;
		m_tokens.push(pToken);
385
	}
386
}