scantoken.cpp 9.85 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#include "crt.h"
#include "scanner.h"
#include "token.h"
#include "exceptions.h"
#include "exp.h"
#include "scanscalar.h"
#include <sstream>

namespace YAML
{
	///////////////////////////////////////////////////////////////////////
	// Specialization for scanning specific tokens

	// Directive
	// . Note: no semantic checking is done here (that's for the parser to do)
	void Scanner::ScanDirective()
	{
		std::string name;
		std::vector <std::string> params;

		// pop indents and simple keys
22
		PopAllIndents();
23
		PopAllSimpleKeys();
24
25
26
27

		m_simpleKeyAllowed = false;

		// store pos and eat indicator
28
		Mark mark = INPUT.mark();
29
30
31
		INPUT.eat(1);

		// read name
32
		while(INPUT && !Exp::BlankOrBreak.Matches(INPUT))
33
34
35
36
37
38
39
40
41
			name += INPUT.get();

		// read parameters
		while(1) {
			// first get rid of whitespace
			while(Exp::Blank.Matches(INPUT))
				INPUT.eat(1);

			// break on newline or comment
42
			if(!INPUT || Exp::Break.Matches(INPUT) || Exp::Comment.Matches(INPUT))
43
44
45
46
				break;

			// now read parameter
			std::string param;
47
			while(INPUT && !Exp::BlankOrBreak.Matches(INPUT))
48
49
50
51
52
				param += INPUT.get();

			params.push_back(param);
		}
		
53
		Token token(Token::DIRECTIVE, mark);
54
55
56
57
58
59
60
61
		token.value = name;
		token.params = params;
		m_tokens.push(token);
	}

	// DocStart
	void Scanner::ScanDocStart()
	{
62
		PopAllIndents();
63
		PopAllSimpleKeys();
64
65
66
		m_simpleKeyAllowed = false;

		// eat
67
		Mark mark = INPUT.mark();
68
		INPUT.eat(3);
69
		m_tokens.push(Token(Token::DOC_START, mark));
70
71
72
73
74
	}

	// DocEnd
	void Scanner::ScanDocEnd()
	{
75
		PopAllIndents();
76
		PopAllSimpleKeys();
77
78
79
		m_simpleKeyAllowed = false;

		// eat
80
		Mark mark = INPUT.mark();
81
		INPUT.eat(3);
82
		m_tokens.push(Token(Token::DOC_END, mark));
83
84
85
86
87
88
	}

	// FlowStart
	void Scanner::ScanFlowStart()
	{
		// flows can be simple keys
89
		InsertPotentialSimpleKey();
90
91
92
93
		m_flowLevel++;
		m_simpleKeyAllowed = true;

		// eat
94
		Mark mark = INPUT.mark();
95
		char ch = INPUT.get();
96
		Token::TYPE type = (ch == Keys::FlowSeqStart ? Token::FLOW_SEQ_START : Token::FLOW_MAP_START);
97
		m_tokens.push(Token(type, mark));
98
99
100
101
102
103
	}

	// FlowEnd
	void Scanner::ScanFlowEnd()
	{
		if(m_flowLevel == 0)
104
			throw ParserException(INPUT.mark(), ErrorMsg::FLOW_END);
105

106
		InvalidateSimpleKey();
107
108
109
110
		m_flowLevel--;
		m_simpleKeyAllowed = false;

		// eat
111
		Mark mark = INPUT.mark();
112
		char ch = INPUT.get();
113
		Token::TYPE type = (ch == Keys::FlowSeqEnd ? Token::FLOW_SEQ_END : Token::FLOW_MAP_END);
114
		m_tokens.push(Token(type, mark));
115
116
117
118
119
120
121
122
	}

	// FlowEntry
	void Scanner::ScanFlowEntry()
	{
		m_simpleKeyAllowed = true;

		// eat
123
		Mark mark = INPUT.mark();
124
		INPUT.eat(1);
125
		m_tokens.push(Token(Token::FLOW_ENTRY, mark));
126
127
128
129
130
131
132
	}

	// BlockEntry
	void Scanner::ScanBlockEntry()
	{
		// we better be in the block context!
		if(m_flowLevel > 0)
133
			throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
134
135
136

		// can we put it here?
		if(!m_simpleKeyAllowed)
137
			throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
138

139
		PushIndentTo(INPUT.column(), IndentMarker::SEQ);
140
141
142
		m_simpleKeyAllowed = true;

		// eat
143
		Mark mark = INPUT.mark();
144
		INPUT.eat(1);
145
		m_tokens.push(Token(Token::BLOCK_ENTRY, mark));
146
147
148
149
150
151
152
153
	}

	// Key
	void Scanner::ScanKey()
	{
		// handle keys diffently in the block context (and manage indents)
		if(m_flowLevel == 0) {
			if(!m_simpleKeyAllowed)
154
				throw ParserException(INPUT.mark(), ErrorMsg::MAP_KEY);
155

156
			PushIndentTo(INPUT.column(), IndentMarker::MAP);
157
158
159
160
161
162
163
164
165
		}

		// can only put a simple key here if we're in block context
		if(m_flowLevel == 0)
			m_simpleKeyAllowed = true;
		else
			m_simpleKeyAllowed = false;

		// eat
166
		Mark mark = INPUT.mark();
167
		INPUT.eat(1);
168
		m_tokens.push(Token(Token::KEY, mark));
169
170
171
172
173
	}

	// Value
	void Scanner::ScanValue()
	{
174
175
176
177
178
179
180
		// just in case we have an empty key
		InsertPotentialSimpleKey();
		
		// and check that simple key
		bool isSimpleKey = VerifySimpleKey();
		
		if(isSimpleKey) {
181
182
183
184
185
186
			// can't follow a simple key with another simple key (dunno why, though - it seems fine)
			m_simpleKeyAllowed = false;
		} else {
			// handle values diffently in the block context (and manage indents)
			if(m_flowLevel == 0) {
				if(!m_simpleKeyAllowed)
187
					throw ParserException(INPUT.mark(), ErrorMsg::MAP_VALUE);
188

189
				PushIndentTo(INPUT.column(), IndentMarker::MAP);
190
191
192
			}

			// can only put a simple key here if we're in block context
193
			m_simpleKeyAllowed = (m_flowLevel == 0);
194
195
196
		}

		// eat
197
		Mark mark = INPUT.mark();
198
		INPUT.eat(1);
199
		m_tokens.push(Token(Token::VALUE, mark));
200
201
202
203
204
205
206
207
208
209
	}

	// AnchorOrAlias
	void Scanner::ScanAnchorOrAlias()
	{
		bool alias;
		std::string name;

		// insert a potential simple key
		if(m_simpleKeyAllowed)
210
			InsertPotentialSimpleKey();
211
212
213
		m_simpleKeyAllowed = false;

		// eat the indicator
214
		Mark mark = INPUT.mark();
215
216
217
218
219
220
221
222
223
		char indicator = INPUT.get();
		alias = (indicator == Keys::Alias);

		// now eat the content
		while(Exp::AlphaNumeric.Matches(INPUT))
			name += INPUT.get();

		// we need to have read SOMETHING!
		if(name.empty())
224
			throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND : ErrorMsg::ANCHOR_NOT_FOUND);
225
226

		// and needs to end correctly
227
		if(INPUT && !Exp::AnchorEnd.Matches(INPUT))
228
			throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS : ErrorMsg::CHAR_IN_ANCHOR);
229
230

		// and we're done
231
		Token token(alias ? Token::ALIAS : Token::ANCHOR, mark);
232
233
234
235
236
237
238
239
240
241
242
		token.value = name;
		m_tokens.push(token);
	}

	// Tag
	void Scanner::ScanTag()
	{
		std::string handle, suffix;

		// insert a potential simple key
		if(m_simpleKeyAllowed)
243
			InsertPotentialSimpleKey();
244
245
246
		m_simpleKeyAllowed = false;

		// eat the indicator
247
		Mark mark = INPUT.mark();
248
249
250
		handle += INPUT.get();

		// read the handle
251
		while(INPUT && INPUT.peek() != Keys::Tag && !Exp::BlankOrBreak.Matches(INPUT))
252
253
254
255
256
257
258
259
			handle += INPUT.get();

		// is there a suffix?
		if(INPUT.peek() == Keys::Tag) {
			// eat the indicator
			handle += INPUT.get();

			// then read it
260
			while(INPUT && !Exp::BlankOrBreak.Matches(INPUT))
261
262
263
264
265
266
267
				suffix += INPUT.get();
		} else {
			// this is a bit weird: we keep just the '!' as the handle and move the rest to the suffix
			suffix = handle.substr(1);
			handle = "!";
		}

268
		Token token(Token::TAG, mark);
269
270
271
272
273
274
275
276
277
278
279
280
281
282
		token.value = handle;
		token.params.push_back(suffix);
		m_tokens.push(token);
	}

	// PlainScalar
	void Scanner::ScanPlainScalar()
	{
		std::string scalar;

		// set up the scanning parameters
		ScanScalarParams params;
		params.end = (m_flowLevel > 0 ? Exp::EndScalarInFlow : Exp::EndScalar) || (Exp::BlankOrBreak + Exp::Comment);
		params.eatEnd = false;
283
		params.indent = (m_flowLevel > 0 ? 0 : GetTopIndent() + 1);
284
285
286
287
288
289
290
291
292
		params.fold = true;
		params.eatLeadingWhitespace = true;
		params.trimTrailingSpaces = true;
		params.chomp = CLIP;
		params.onDocIndicator = BREAK;
		params.onTabInIndentation = THROW;

		// insert a potential simple key
		if(m_simpleKeyAllowed)
293
			InsertPotentialSimpleKey();
294

295
		Mark mark = INPUT.mark();
296
297
298
299
300
301
302
		scalar = ScanScalar(INPUT, params);

		// can have a simple key only if we ended the scalar by starting a new line
		m_simpleKeyAllowed = params.leadingSpaces;

		// finally, check and see if we ended on an illegal character
		//if(Exp::IllegalCharInScalar.Matches(INPUT))
303
		//	throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_SCALAR);
304

305
		Token token(Token::SCALAR, mark);
306
307
308
309
310
311
312
313
314
		token.value = scalar;
		m_tokens.push(token);
	}

	// QuotedScalar
	void Scanner::ScanQuotedScalar()
	{
		std::string scalar;

315
316
		// peek at single or double quote (don't eat because we need to preserve (for the time being) the input position)
		char quote = INPUT.peek();
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
		bool single = (quote == '\'');

		// setup the scanning parameters
		ScanScalarParams params;
		params.end = (single ? RegEx(quote) && !Exp::EscSingleQuote : RegEx(quote));
		params.eatEnd = true;
		params.escape = (single ? '\'' : '\\');
		params.indent = 0;
		params.fold = true;
		params.eatLeadingWhitespace = true;
		params.trimTrailingSpaces = false;
		params.chomp = CLIP;
		params.onDocIndicator = THROW;

		// insert a potential simple key
		if(m_simpleKeyAllowed)
333
			InsertPotentialSimpleKey();
334

335
		Mark mark = INPUT.mark();
336
337
338
339
340

		// now eat that opening quote
		INPUT.get();
		
		// and scan
341
342
343
		scalar = ScanScalar(INPUT, params);
		m_simpleKeyAllowed = false;

344
		Token token(Token::SCALAR, mark);
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
		token.value = scalar;
		m_tokens.push(token);
	}

	// BlockScalarToken
	// . These need a little extra processing beforehand.
	// . We need to scan the line where the indicator is (this doesn't count as part of the scalar),
	//   and then we need to figure out what level of indentation we'll be using.
	void Scanner::ScanBlockScalar()
	{
		std::string scalar;

		ScanScalarParams params;
		params.indent = 1;
		params.detectIndent = true;

		// eat block indicator ('|' or '>')
362
		Mark mark = INPUT.mark();
363
364
365
366
367
368
369
370
371
372
373
374
375
		char indicator = INPUT.get();
		params.fold = (indicator == Keys::FoldedScalar);

		// eat chomping/indentation indicators
		int n = Exp::Chomp.Match(INPUT);
		for(int i=0;i<n;i++) {
			char ch = INPUT.get();
			if(ch == '+')
				params.chomp = KEEP;
			else if(ch == '-')
				params.chomp = STRIP;
			else if(Exp::Digit.Matches(ch)) {
				if(ch == '0')
376
					throw ParserException(INPUT.mark(), ErrorMsg::ZERO_INDENT_IN_BLOCK);
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393

				params.indent = ch - '0';
				params.detectIndent = false;
			}
		}

		// now eat whitespace
		while(Exp::Blank.Matches(INPUT))
			INPUT.eat(1);

		// and comments to the end of the line
		if(Exp::Comment.Matches(INPUT))
			while(INPUT && !Exp::Break.Matches(INPUT))
				INPUT.eat(1);

		// if it's not a line break, then we ran into a bad character inline
		if(INPUT && !Exp::Break.Matches(INPUT))
394
			throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_BLOCK);
395
396

		// set the initial indentation
397
398
		if(GetTopIndent() >= 0)
			params.indent += GetTopIndent();
399
400
401
402
403
404
405
406
407
408

		params.eatLeadingWhitespace = false;
		params.trimTrailingSpaces = false;
		params.onTabInIndentation = THROW;

		scalar = ScanScalar(INPUT, params);

		// simple keys always ok after block scalars (since we're gonna start a new line anyways)
		m_simpleKeyAllowed = true;

409
		Token token(Token::SCALAR, mark);
410
411
412
413
		token.value = scalar;
		m_tokens.push(token);
	}
}