scantoken.cpp 10.7 KB
Newer Older
1
2
3
4
5
#include "scanner.h"
#include "token.h"
#include "exceptions.h"
#include "exp.h"
#include "scanscalar.h"
6
7
#include "scantag.h"
#include "tag.h"
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#include <sstream>

namespace YAML
{
	///////////////////////////////////////////////////////////////////////
	// Specialization for scanning specific tokens

	// Directive
	// . Note: no semantic checking is done here (that's for the parser to do)
	void Scanner::ScanDirective()
	{
		std::string name;
		std::vector <std::string> params;

		// pop indents and simple keys
23
		PopAllIndents();
24
		PopAllSimpleKeys();
25
26

		m_simpleKeyAllowed = false;
27
		m_canBeJSONFlow = false;
28
29

		// store pos and eat indicator
30
		Token token(Token::DIRECTIVE, INPUT.mark());
31
32
33
		INPUT.eat(1);

		// read name
34
		while(INPUT && !Exp::BlankOrBreak.Matches(INPUT))
35
			token.value += INPUT.get();
36
37
38
39
40
41
42
43

		// read parameters
		while(1) {
			// first get rid of whitespace
			while(Exp::Blank.Matches(INPUT))
				INPUT.eat(1);

			// break on newline or comment
44
			if(!INPUT || Exp::Break.Matches(INPUT) || Exp::Comment.Matches(INPUT))
45
46
47
48
				break;

			// now read parameter
			std::string param;
49
			while(INPUT && !Exp::BlankOrBreak.Matches(INPUT))
50
51
				param += INPUT.get();

52
			token.params.push_back(param);
53
54
55
56
57
58
59
60
		}
		
		m_tokens.push(token);
	}

	// DocStart
	void Scanner::ScanDocStart()
	{
61
		PopAllIndents();
62
		PopAllSimpleKeys();
63
		m_simpleKeyAllowed = false;
64
		m_canBeJSONFlow = false;
65
66

		// eat
67
		Mark mark = INPUT.mark();
68
		INPUT.eat(3);
69
		m_tokens.push(Token(Token::DOC_START, mark));
70
71
72
73
74
	}

	// DocEnd
	void Scanner::ScanDocEnd()
	{
75
		PopAllIndents();
76
		PopAllSimpleKeys();
77
		m_simpleKeyAllowed = false;
78
		m_canBeJSONFlow = false;
79
80

		// eat
81
		Mark mark = INPUT.mark();
82
		INPUT.eat(3);
83
		m_tokens.push(Token(Token::DOC_END, mark));
84
85
86
87
88
89
	}

	// FlowStart
	void Scanner::ScanFlowStart()
	{
		// flows can be simple keys
90
		InsertPotentialSimpleKey();
91
		m_simpleKeyAllowed = true;
92
		m_canBeJSONFlow = false;
93
94

		// eat
95
		Mark mark = INPUT.mark();
96
		char ch = INPUT.get();
97
98
99
		FLOW_MARKER flowType = (ch == Keys::FlowSeqStart ? FLOW_SEQ : FLOW_MAP);
		m_flows.push(flowType);
		Token::TYPE type = (flowType == FLOW_SEQ ? Token::FLOW_SEQ_START : Token::FLOW_MAP_START);
100
		m_tokens.push(Token(type, mark));
101
102
103
104
105
	}

	// FlowEnd
	void Scanner::ScanFlowEnd()
	{
106
		if(InBlockContext())
107
			throw ParserException(INPUT.mark(), ErrorMsg::FLOW_END);
108

109
		// we might have a solo entry in the flow context
110
111
112
113
114
115
		if(InFlowContext()) {
			if(m_flows.top() == FLOW_MAP && VerifySimpleKey())
				m_tokens.push(Token(Token::VALUE, INPUT.mark()));
			else if(m_flows.top() == FLOW_SEQ)
				InvalidateSimpleKey();
		}
116

117
		m_simpleKeyAllowed = false;
118
		m_canBeJSONFlow = true;
119
120

		// eat
121
		Mark mark = INPUT.mark();
122
		char ch = INPUT.get();
123
124
125
126
127
128
129
130

		// check that it matches the start
		FLOW_MARKER flowType = (ch == Keys::FlowSeqEnd ? FLOW_SEQ : FLOW_MAP);
		if(m_flows.top() != flowType)
			throw ParserException(mark, ErrorMsg::FLOW_END);
		m_flows.pop();
		
		Token::TYPE type = (flowType ? Token::FLOW_SEQ_END : Token::FLOW_MAP_END);
131
		m_tokens.push(Token(type, mark));
132
133
134
135
136
	}

	// FlowEntry
	void Scanner::ScanFlowEntry()
	{
137
138
139
140
141
142
143
		// we might have a solo entry in the flow context
		if(InFlowContext()) {
			if(m_flows.top() == FLOW_MAP && VerifySimpleKey())
				m_tokens.push(Token(Token::VALUE, INPUT.mark()));
			else if(m_flows.top() == FLOW_SEQ)
				InvalidateSimpleKey();
		}
144
		
145
		m_simpleKeyAllowed = true;
146
		m_canBeJSONFlow = false;
147
148

		// eat
149
		Mark mark = INPUT.mark();
150
		INPUT.eat(1);
151
		m_tokens.push(Token(Token::FLOW_ENTRY, mark));
152
153
154
155
156
157
	}

	// BlockEntry
	void Scanner::ScanBlockEntry()
	{
		// we better be in the block context!
158
		if(InFlowContext())
159
			throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
160
161
162

		// can we put it here?
		if(!m_simpleKeyAllowed)
163
			throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
164

165
		PushIndentTo(INPUT.column(), IndentMarker::SEQ);
166
		m_simpleKeyAllowed = true;
167
		m_canBeJSONFlow = false;
168
169

		// eat
170
		Mark mark = INPUT.mark();
171
		INPUT.eat(1);
172
		m_tokens.push(Token(Token::BLOCK_ENTRY, mark));
173
174
175
176
177
178
	}

	// Key
	void Scanner::ScanKey()
	{
		// handle keys diffently in the block context (and manage indents)
179
		if(InBlockContext()) {
180
			if(!m_simpleKeyAllowed)
181
				throw ParserException(INPUT.mark(), ErrorMsg::MAP_KEY);
182

183
			PushIndentTo(INPUT.column(), IndentMarker::MAP);
184
185
186
		}

		// can only put a simple key here if we're in block context
187
		m_simpleKeyAllowed = InBlockContext();
188
189

		// eat
190
		Mark mark = INPUT.mark();
191
		INPUT.eat(1);
192
		m_tokens.push(Token(Token::KEY, mark));
193
194
195
196
197
	}

	// Value
	void Scanner::ScanValue()
	{
198
199
		// and check that simple key
		bool isSimpleKey = VerifySimpleKey();
200
		m_canBeJSONFlow = false;
201
202
		
		if(isSimpleKey) {
203
204
205
206
			// can't follow a simple key with another simple key (dunno why, though - it seems fine)
			m_simpleKeyAllowed = false;
		} else {
			// handle values diffently in the block context (and manage indents)
207
			if(InBlockContext()) {
208
				if(!m_simpleKeyAllowed)
209
					throw ParserException(INPUT.mark(), ErrorMsg::MAP_VALUE);
210

211
				PushIndentTo(INPUT.column(), IndentMarker::MAP);
212
213
214
			}

			// can only put a simple key here if we're in block context
215
			m_simpleKeyAllowed = InBlockContext();
216
217
218
		}

		// eat
219
		Mark mark = INPUT.mark();
220
		INPUT.eat(1);
221
		m_tokens.push(Token(Token::VALUE, mark));
222
223
224
225
226
227
228
229
230
	}

	// AnchorOrAlias
	void Scanner::ScanAnchorOrAlias()
	{
		bool alias;
		std::string name;

		// insert a potential simple key
231
		InsertPotentialSimpleKey();
232
		m_simpleKeyAllowed = false;
233
		m_canBeJSONFlow = false;
234
235

		// eat the indicator
236
		Mark mark = INPUT.mark();
237
238
239
240
241
242
243
244
245
		char indicator = INPUT.get();
		alias = (indicator == Keys::Alias);

		// now eat the content
		while(Exp::AlphaNumeric.Matches(INPUT))
			name += INPUT.get();

		// we need to have read SOMETHING!
		if(name.empty())
246
			throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND : ErrorMsg::ANCHOR_NOT_FOUND);
247
248

		// and needs to end correctly
249
		if(INPUT && !Exp::AnchorEnd.Matches(INPUT))
250
			throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS : ErrorMsg::CHAR_IN_ANCHOR);
251
252

		// and we're done
253
		Token token(alias ? Token::ALIAS : Token::ANCHOR, mark);
254
255
256
257
258
259
260
261
		token.value = name;
		m_tokens.push(token);
	}

	// Tag
	void Scanner::ScanTag()
	{
		// insert a potential simple key
262
		InsertPotentialSimpleKey();
263
		m_simpleKeyAllowed = false;
264
		m_canBeJSONFlow = false;
265

266
		Token token(Token::TAG, INPUT.mark());
267

268
269
270
271
272
		// eat the indicator
		INPUT.get();
		
		if(INPUT && INPUT.peek() == Keys::VerbatimTagStart){
			std::string tag = ScanVerbatimTag(INPUT);
273

274
275
			token.value = tag;
			token.data = Tag::VERBATIM;
276
		} else {
277
278
279
280
281
282
283
284
285
286
287
			bool canBeHandle;
			token.value = ScanTagHandle(INPUT, canBeHandle);
			token.data = (token.value.empty() ? Tag::SECONDARY_HANDLE : Tag::PRIMARY_HANDLE);
			
			// is there a suffix?
			if(canBeHandle && INPUT.peek() == Keys::Tag) {
				// eat the indicator
				INPUT.get();
				token.params.push_back(ScanTagSuffix(INPUT));
				token.data = Tag::NAMED_HANDLE;
			}
288
289
290
291
292
293
294
295
296
297
298
299
		}

		m_tokens.push(token);
	}

	// PlainScalar
	void Scanner::ScanPlainScalar()
	{
		std::string scalar;

		// set up the scanning parameters
		ScanScalarParams params;
300
		params.end = (InFlowContext() ? Exp::EndScalarInFlow : Exp::EndScalar) || (Exp::BlankOrBreak + Exp::Comment);
301
		params.eatEnd = false;
302
		params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1);
303
		params.fold = FOLD_FLOW;
304
305
		params.eatLeadingWhitespace = true;
		params.trimTrailingSpaces = true;
306
		params.chomp = STRIP;
307
308
309
310
		params.onDocIndicator = BREAK;
		params.onTabInIndentation = THROW;

		// insert a potential simple key
311
		InsertPotentialSimpleKey();
312

313
		Mark mark = INPUT.mark();
314
315
316
317
		scalar = ScanScalar(INPUT, params);

		// can have a simple key only if we ended the scalar by starting a new line
		m_simpleKeyAllowed = params.leadingSpaces;
318
		m_canBeJSONFlow = false;
319
320
321

		// finally, check and see if we ended on an illegal character
		//if(Exp::IllegalCharInScalar.Matches(INPUT))
322
		//	throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_SCALAR);
323

324
		Token token(Token::SCALAR, mark);
325
326
327
328
329
330
331
332
333
		token.value = scalar;
		m_tokens.push(token);
	}

	// QuotedScalar
	void Scanner::ScanQuotedScalar()
	{
		std::string scalar;

334
335
		// peek at single or double quote (don't eat because we need to preserve (for the time being) the input position)
		char quote = INPUT.peek();
336
337
338
339
340
341
342
343
		bool single = (quote == '\'');

		// setup the scanning parameters
		ScanScalarParams params;
		params.end = (single ? RegEx(quote) && !Exp::EscSingleQuote : RegEx(quote));
		params.eatEnd = true;
		params.escape = (single ? '\'' : '\\');
		params.indent = 0;
344
		params.fold = FOLD_FLOW;
345
346
347
348
349
350
		params.eatLeadingWhitespace = true;
		params.trimTrailingSpaces = false;
		params.chomp = CLIP;
		params.onDocIndicator = THROW;

		// insert a potential simple key
351
		InsertPotentialSimpleKey();
352

353
		Mark mark = INPUT.mark();
354
355
356
357
358

		// now eat that opening quote
		INPUT.get();
		
		// and scan
359
360
		scalar = ScanScalar(INPUT, params);
		m_simpleKeyAllowed = false;
361
		m_canBeJSONFlow = true;
362

363
		Token token(Token::SCALAR, mark);
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
		token.value = scalar;
		m_tokens.push(token);
	}

	// BlockScalarToken
	// . These need a little extra processing beforehand.
	// . We need to scan the line where the indicator is (this doesn't count as part of the scalar),
	//   and then we need to figure out what level of indentation we'll be using.
	void Scanner::ScanBlockScalar()
	{
		std::string scalar;

		ScanScalarParams params;
		params.indent = 1;
		params.detectIndent = true;

		// eat block indicator ('|' or '>')
381
		Mark mark = INPUT.mark();
382
		char indicator = INPUT.get();
383
		params.fold = (indicator == Keys::FoldedScalar ? FOLD_BLOCK : DONT_FOLD);
384
385

		// eat chomping/indentation indicators
386
		params.chomp = CLIP;
387
388
389
390
391
392
393
394
395
		int n = Exp::Chomp.Match(INPUT);
		for(int i=0;i<n;i++) {
			char ch = INPUT.get();
			if(ch == '+')
				params.chomp = KEEP;
			else if(ch == '-')
				params.chomp = STRIP;
			else if(Exp::Digit.Matches(ch)) {
				if(ch == '0')
396
					throw ParserException(INPUT.mark(), ErrorMsg::ZERO_INDENT_IN_BLOCK);
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413

				params.indent = ch - '0';
				params.detectIndent = false;
			}
		}

		// now eat whitespace
		while(Exp::Blank.Matches(INPUT))
			INPUT.eat(1);

		// and comments to the end of the line
		if(Exp::Comment.Matches(INPUT))
			while(INPUT && !Exp::Break.Matches(INPUT))
				INPUT.eat(1);

		// if it's not a line break, then we ran into a bad character inline
		if(INPUT && !Exp::Break.Matches(INPUT))
414
			throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_BLOCK);
415
416

		// set the initial indentation
417
418
		if(GetTopIndent() >= 0)
			params.indent += GetTopIndent();
419
420
421
422
423
424
425
426
427

		params.eatLeadingWhitespace = false;
		params.trimTrailingSpaces = false;
		params.onTabInIndentation = THROW;

		scalar = ScanScalar(INPUT, params);

		// simple keys always ok after block scalars (since we're gonna start a new line anyways)
		m_simpleKeyAllowed = true;
428
		m_canBeJSONFlow = false;
429

430
		Token token(Token::SCALAR, mark);
431
432
433
434
		token.value = scalar;
		m_tokens.push(token);
	}
}