scantoken.cpp 10.8 KB
Newer Older
1
2
3
4
#include "scanner.h"
#include "token.h"
#include "exceptions.h"
#include "exp.h"
5
#include "scanscalar.h"
6
#include <sstream>
7
8
9
10
11
12

namespace YAML
{
	///////////////////////////////////////////////////////////////////////
	// Specialization for scanning specific tokens

13
	// Directive
Jesse Beder's avatar
Jesse Beder committed
14
	// . Note: no semantic checking is done here (that's for the parser to do)
15
	void Scanner::ScanDirective()
Jesse Beder's avatar
Jesse Beder committed
16
	{
17
18
19
		std::string name;
		std::vector <std::string> params;

Jesse Beder's avatar
Jesse Beder committed
20
21
22
23
24
25
		// pop indents and simple keys
		PopIndentTo(-1);
		VerifyAllSimpleKeys();

		m_simpleKeyAllowed = false;

26
27
		// store pos and eat indicator
		int line = INPUT.line, column = INPUT.column;
28
		INPUT.eat(1);
Jesse Beder's avatar
Jesse Beder committed
29
30
31

		// read name
		while(INPUT.peek() != EOF && !Exp::BlankOrBreak.Matches(INPUT))
32
			name += INPUT.get();
Jesse Beder's avatar
Jesse Beder committed
33
34
35
36
37

		// read parameters
		while(1) {
			// first get rid of whitespace
			while(Exp::Blank.Matches(INPUT))
38
				INPUT.eat(1);
Jesse Beder's avatar
Jesse Beder committed
39
40
41
42
43
44
45
46

			// break on newline or comment
			if(INPUT.peek() == EOF || Exp::Break.Matches(INPUT) || Exp::Comment.Matches(INPUT))
				break;

			// now read parameter
			std::string param;
			while(INPUT.peek() != EOF && !Exp::BlankOrBreak.Matches(INPUT))
47
				param += INPUT.get();
Jesse Beder's avatar
Jesse Beder committed
48

49
			params.push_back(param);
Jesse Beder's avatar
Jesse Beder committed
50
51
		}
		
52
		Token *pToken = new Token(TT_DIRECTIVE, line, column);
53
54
55
		pToken->value = name;
		pToken->params = params;
		m_tokens.push(pToken);
Jesse Beder's avatar
Jesse Beder committed
56
57
	}

58
59
	// DocStart
	void Scanner::ScanDocStart()
60
	{
61
		PopIndentTo(INPUT.column);
62
		VerifyAllSimpleKeys();
63
64
65
		m_simpleKeyAllowed = false;

		// eat
66
		int line = INPUT.line, column = INPUT.column;
67
		INPUT.eat(3);
68
		m_tokens.push(new Token(TT_DOC_START, line, column));
69
70
	}

71
72
	// DocEnd
	void Scanner::ScanDocEnd()
73
74
	{
		PopIndentTo(-1);
75
		VerifyAllSimpleKeys();
76
77
78
		m_simpleKeyAllowed = false;

		// eat
79
		int line = INPUT.line, column = INPUT.column;
80
		INPUT.eat(3);
81
		m_tokens.push(new Token(TT_DOC_END, line, column));
82
83
	}

84
85
	// FlowStart
	void Scanner::ScanFlowStart()
86
	{
87
		// flows can be simple keys
Jesse Beder's avatar
Jesse Beder committed
88
		InsertSimpleKey();
Jesse Beder's avatar
Jesse Beder committed
89
		m_flowLevel++;
90
91
92
		m_simpleKeyAllowed = true;

		// eat
93
		int line = INPUT.line, column = INPUT.column;
94
		char ch = INPUT.get();
95
		TOKEN_TYPE type = (ch == Keys::FlowSeqStart ? TT_FLOW_SEQ_START : TT_FLOW_MAP_START);
96
		m_tokens.push(new Token(type, line, column));
97
98
	}

99
100
	// FlowEnd
	void Scanner::ScanFlowEnd()
101
	{
Jesse Beder's avatar
Jesse Beder committed
102
		if(m_flowLevel == 0)
103
			throw ParserException(INPUT.line, INPUT.column, "illegal flow end");
Jesse Beder's avatar
Jesse Beder committed
104
105

		m_flowLevel--;
106
107
108
		m_simpleKeyAllowed = false;

		// eat
109
		int line = INPUT.line, column = INPUT.column;
110
		char ch = INPUT.get();
111
		TOKEN_TYPE type = (ch == Keys::FlowSeqEnd ? TT_FLOW_SEQ_END : TT_FLOW_MAP_END);
112
		m_tokens.push(new Token(type, line, column));
113
114
	}

115
116
	// FlowEntry
	void Scanner::ScanFlowEntry()
117
118
119
120
	{
		m_simpleKeyAllowed = true;

		// eat
121
		int line = INPUT.line, column = INPUT.column;
122
		INPUT.eat(1);
123
		m_tokens.push(new Token(TT_FLOW_ENTRY, line, column));
124
125
	}

126
127
	// BlockEntry
	void Scanner::ScanBlockEntry()
128
129
	{
		// we better be in the block context!
Jesse Beder's avatar
Jesse Beder committed
130
		if(m_flowLevel > 0)
131
			throw ParserException(INPUT.line, INPUT.column, "illegal block entry");
132

Jesse Beder's avatar
Jesse Beder committed
133
134
		// can we put it here?
		if(!m_simpleKeyAllowed)
135
			throw ParserException(INPUT.line, INPUT.column, "illegal block entry");
136

137
		PushIndentTo(INPUT.column, true);
138
139
140
		m_simpleKeyAllowed = true;

		// eat
141
		int line = INPUT.line, column = INPUT.column;
142
		INPUT.eat(1);
143
		m_tokens.push(new Token(TT_BLOCK_ENTRY, line, column));
144
145
	}

146
147
	// Key
	void Scanner::ScanKey()
148
	{
Jesse Beder's avatar
Jesse Beder committed
149
		// handle keys diffently in the block context (and manage indents)
150
151
		if(m_flowLevel == 0) {
			if(!m_simpleKeyAllowed)
152
				throw ParserException(INPUT.line, INPUT.column, "illegal map key");
153

154
			PushIndentTo(INPUT.column, false);
155
156
157
158
159
160
161
162
163
		}

		// can only put a simple key here if we're in block context
		if(m_flowLevel == 0)
			m_simpleKeyAllowed = true;
		else
			m_simpleKeyAllowed = false;

		// eat
164
		int line = INPUT.line, column = INPUT.column;
165
		INPUT.eat(1);
166
		m_tokens.push(new Token(TT_KEY, line, column));
167
168
	}

169
170
	// Value
	void Scanner::ScanValue()
171
	{
Jesse Beder's avatar
Jesse Beder committed
172
		// does this follow a simple key?
173
		if(m_isLastKeyValid) {
Jesse Beder's avatar
Jesse Beder committed
174
175
			// can't follow a simple key with another simple key (dunno why, though - it seems fine)
			m_simpleKeyAllowed = false;
176
		} else {
Jesse Beder's avatar
Jesse Beder committed
177
			// handle values diffently in the block context (and manage indents)
178
179
			if(m_flowLevel == 0) {
				if(!m_simpleKeyAllowed)
180
					throw ParserException(INPUT.line, INPUT.column, "illegal map value");
181

182
				PushIndentTo(INPUT.column, false);
183
184
			}

Jesse Beder's avatar
Jesse Beder committed
185
186
187
188
189
190
			// can only put a simple key here if we're in block context
			if(m_flowLevel == 0)
				m_simpleKeyAllowed = true;
			else
				m_simpleKeyAllowed = false;
		}
191
192

		// eat
193
		int line = INPUT.line, column = INPUT.column;
194
		INPUT.eat(1);
195
		m_tokens.push(new Token(TT_VALUE, line, column));
196
197
	}

198
199
	// AnchorOrAlias
	void Scanner::ScanAnchorOrAlias()
Jesse Beder's avatar
Jesse Beder committed
200
	{
201
		bool alias;
202
		std::string name;
203

Jesse Beder's avatar
Jesse Beder committed
204
205
206
207
208
209
		// insert a potential simple key
		if(m_simpleKeyAllowed)
			InsertSimpleKey();
		m_simpleKeyAllowed = false;

		// eat the indicator
210
		int line = INPUT.line, column = INPUT.column;
211
		char indicator = INPUT.get();
212
		alias = (indicator == Keys::Alias);
Jesse Beder's avatar
Jesse Beder committed
213
214
215

		// now eat the content
		while(Exp::AlphaNumeric.Matches(INPUT))
216
			name += INPUT.get();
Jesse Beder's avatar
Jesse Beder committed
217
218

		// we need to have read SOMETHING!
219
220
221
222
223
224
225
		if(name.empty()) {
			std::stringstream msg;
			msg << (alias ? "alias" : "anchor");
			msg << " not found after ";
			msg << (alias ? "*" : "&");
			throw ParserException(INPUT.line, INPUT.column, msg.str());
		}
Jesse Beder's avatar
Jesse Beder committed
226
227

		// and needs to end correctly
228
229
230
231
232
233
		if(INPUT.peek() != EOF && !Exp::AnchorEnd.Matches(INPUT)) {
			std::stringstream msg;
			msg << "illegal character found while scanning ";
			msg << (alias ? "alias" : "anchor");
			throw ParserException(INPUT.line, INPUT.column, msg.str());
		}
Jesse Beder's avatar
Jesse Beder committed
234
235

		// and we're done
236
		Token *pToken = new Token(alias ? TT_ALIAS : TT_ANCHOR, line, column);
237
		pToken->value = name;
238
		m_tokens.push(pToken);
Jesse Beder's avatar
Jesse Beder committed
239
	}
240

241
242
	// Tag
	void Scanner::ScanTag()
Jesse Beder's avatar
Jesse Beder committed
243
	{
244
245
		std::string handle, suffix;

Jesse Beder's avatar
Jesse Beder committed
246
247
248
249
250
251
		// insert a potential simple key
		if(m_simpleKeyAllowed)
			InsertSimpleKey();
		m_simpleKeyAllowed = false;

		// eat the indicator
252
		int line = INPUT.line, column = INPUT.column;
253
		handle += INPUT.get();
Jesse Beder's avatar
Jesse Beder committed
254
255
256

		// read the handle
		while(INPUT.peek() != EOF && INPUT.peek() != Keys::Tag && !Exp::BlankOrBreak.Matches(INPUT))
257
			handle += INPUT.get();
Jesse Beder's avatar
Jesse Beder committed
258
259
260
261

		// is there a suffix?
		if(INPUT.peek() == Keys::Tag) {
			// eat the indicator
262
			handle += INPUT.get();
Jesse Beder's avatar
Jesse Beder committed
263
264
265

			// then read it
			while(INPUT.peek() != EOF && !Exp::BlankOrBreak.Matches(INPUT))
266
				suffix += INPUT.get();
267
268
269
270
		} else {
			// this is a bit weird: we keep just the '!' as the handle and move the rest to the suffix
			suffix = handle.substr(1);
			handle = "!";
Jesse Beder's avatar
Jesse Beder committed
271
272
		}

273
		Token *pToken = new Token(TT_TAG, line, column);
274
275
276
		pToken->value = handle;
		pToken->params.push_back(suffix);
		m_tokens.push(pToken);
Jesse Beder's avatar
Jesse Beder committed
277
278
	}

279
280
	// PlainScalar
	void Scanner::ScanPlainScalar()
281
	{
282
283
		std::string scalar;

284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
		// set up the scanning parameters
		ScanScalarParams params;
		params.end = (m_flowLevel > 0 ? Exp::EndScalarInFlow : Exp::EndScalar) || (RegEx(' ') + Exp::Comment);
		params.eatEnd = false;
		params.indent = (m_flowLevel > 0 ? 0 : m_indents.top() + 1);
		params.fold = true;
		params.eatLeadingWhitespace = true;
		params.trimTrailingSpaces = true;
		params.chomp = CLIP;
		params.onDocIndicator = BREAK;
		params.onTabInIndentation = THROW;

		// insert a potential simple key
		if(m_simpleKeyAllowed)
			InsertSimpleKey();

300
		int line = INPUT.line, column = INPUT.column;
301
		scalar = ScanScalar(INPUT, params);
302
303
304
305
306
307
308

		// can have a simple key only if we ended the scalar by starting a new line
		m_simpleKeyAllowed = params.leadingSpaces;

		// finally, we can't have any colons in a scalar, so if we ended on a colon, there
		// had better be a break after it
		if(Exp::IllegalColonInScalar.Matches(INPUT))
309
			throw ParserException(INPUT.line, INPUT.column, "illegal character in scalar");
310

311
		Token *pToken = new Token(TT_SCALAR, line, column);
312
313
		pToken->value = scalar;
		m_tokens.push(pToken);
314
315
	}

316
317
	// QuotedScalar
	void Scanner::ScanQuotedScalar()
318
	{
319
320
		std::string scalar;

321
		// eat single or double quote
322
		char quote = INPUT.get();
323
		bool single = (quote == '\'');
324
325
326

		// setup the scanning parameters
		ScanScalarParams params;
327
		params.end = (single ? RegEx(quote) && !Exp::EscSingleQuote : RegEx(quote));
328
		params.eatEnd = true;
329
		params.escape = (single ? '\'' : '\\');
330
331
332
333
334
335
336
337
338
339
340
		params.indent = 0;
		params.fold = true;
		params.eatLeadingWhitespace = true;
		params.trimTrailingSpaces = false;
		params.chomp = CLIP;
		params.onDocIndicator = THROW;

		// insert a potential simple key
		if(m_simpleKeyAllowed)
			InsertSimpleKey();

341
		int line = INPUT.line, column = INPUT.column;
342
		scalar = ScanScalar(INPUT, params);
343
344
		m_simpleKeyAllowed = false;

345
		Token *pToken = new Token(TT_SCALAR, line, column);
346
347
		pToken->value = scalar;
		m_tokens.push(pToken);
348
349
350
351
352
353
	}

	// BlockScalarToken
	// . These need a little extra processing beforehand.
	// . We need to scan the line where the indicator is (this doesn't count as part of the scalar),
	//   and then we need to figure out what level of indentation we'll be using.
354
	void Scanner::ScanBlockScalar()
355
	{
356
357
		std::string scalar;

358
359
360
361
362
		ScanScalarParams params;
		params.indent = 1;
		params.detectIndent = true;

		// eat block indicator ('|' or '>')
363
		int line = INPUT.line, column = INPUT.column;
364
		char indicator = INPUT.get();
365
366
367
368
369
		params.fold = (indicator == Keys::FoldedScalar);

		// eat chomping/indentation indicators
		int n = Exp::Chomp.Match(INPUT);
		for(int i=0;i<n;i++) {
370
			char ch = INPUT.get();
371
372
373
374
375
376
			if(ch == '+')
				params.chomp = KEEP;
			else if(ch == '-')
				params.chomp = STRIP;
			else if(Exp::Digit.Matches(ch)) {
				if(ch == '0')
377
					throw ParserException(INPUT.line, INPUT.column, "cannot set zero indentation for a block scalar");
378
379
380
381
382
383
384
385

				params.indent = ch - '0';
				params.detectIndent = false;
			}
		}

		// now eat whitespace
		while(Exp::Blank.Matches(INPUT))
386
			INPUT.eat(1);
387
388
389
390

		// and comments to the end of the line
		if(Exp::Comment.Matches(INPUT))
			while(INPUT && !Exp::Break.Matches(INPUT))
391
				INPUT.eat(1);
392
393
394

		// if it's not a line break, then we ran into a bad character inline
		if(INPUT && !Exp::Break.Matches(INPUT))
395
			throw ParserException(INPUT.line, INPUT.column, "unexpected character in block scalar");
396
397
398
399
400
401
402
403
404

		// set the initial indentation
		if(m_indents.top() >= 0)
			params.indent += m_indents.top();

		params.eatLeadingWhitespace = false;
		params.trimTrailingSpaces = false;
		params.onTabInIndentation = THROW;

405
		scalar = ScanScalar(INPUT, params);
406
407
408

		// simple keys always ok after block scalars (since we're gonna start a new line anyways)
		m_simpleKeyAllowed = true;
409

410
		Token *pToken = new Token(TT_SCALAR, line, column);
411
412
		pToken->value = scalar;
		m_tokens.push(pToken);
413
	}
414
}