scantoken.cpp 9.24 KB
Newer Older
1
2
3
4
#include "scanner.h"
#include "token.h"
#include "exceptions.h"
#include "exp.h"
5
#include "scanscalar.h"
6
7
8
9
10
11

namespace YAML
{
	///////////////////////////////////////////////////////////////////////
	// Specialization for scanning specific tokens

12
	// Directive
beder's avatar
beder committed
13
	// . Note: no semantic checking is done here (that's for the parser to do)
14
	void Scanner::ScanDirective()
beder's avatar
beder committed
15
	{
16
17
18
		std::string name;
		std::vector <std::string> params;

beder's avatar
beder committed
19
20
21
22
23
24
25
		// pop indents and simple keys
		PopIndentTo(-1);
		VerifyAllSimpleKeys();

		m_simpleKeyAllowed = false;

		// eat indicator
26
		INPUT.eat(1);
beder's avatar
beder committed
27
28
29

		// read name
		while(INPUT.peek() != EOF && !Exp::BlankOrBreak.Matches(INPUT))
30
			name += INPUT.get();
beder's avatar
beder committed
31
32
33
34
35

		// read parameters
		while(1) {
			// first get rid of whitespace
			while(Exp::Blank.Matches(INPUT))
36
				INPUT.eat(1);
beder's avatar
beder committed
37
38
39
40
41
42
43
44

			// break on newline or comment
			if(INPUT.peek() == EOF || Exp::Break.Matches(INPUT) || Exp::Comment.Matches(INPUT))
				break;

			// now read parameter
			std::string param;
			while(INPUT.peek() != EOF && !Exp::BlankOrBreak.Matches(INPUT))
45
				param += INPUT.get();
beder's avatar
beder committed
46

47
			params.push_back(param);
beder's avatar
beder committed
48
49
		}
		
50
51
52
53
		Token *pToken = new Token(TT_DIRECTIVE);
		pToken->value = name;
		pToken->params = params;
		m_tokens.push(pToken);
beder's avatar
beder committed
54
55
	}

56
57
	// DocStart
	void Scanner::ScanDocStart()
58
	{
59
		PopIndentTo(INPUT.column);
60
		VerifyAllSimpleKeys();
61
62
63
		m_simpleKeyAllowed = false;

		// eat
64
		INPUT.eat(3);
65
		m_tokens.push(new Token(TT_DOC_START));
66
67
	}

68
69
	// DocEnd
	void Scanner::ScanDocEnd()
70
71
	{
		PopIndentTo(-1);
72
		VerifyAllSimpleKeys();
73
74
75
		m_simpleKeyAllowed = false;

		// eat
76
		INPUT.eat(3);
77
		m_tokens.push(new Token(TT_DOC_END));
78
79
	}

80
81
	// FlowStart
	void Scanner::ScanFlowStart()
82
	{
83
		// flows can be simple keys
beder's avatar
beder committed
84
		InsertSimpleKey();
beder's avatar
beder committed
85
		m_flowLevel++;
86
87
88
		m_simpleKeyAllowed = true;

		// eat
89
		char ch = INPUT.get();
90
91
		TOKEN_TYPE type = (ch == Keys::FlowSeqStart ? TT_FLOW_SEQ_START : TT_FLOW_MAP_START);
		m_tokens.push(new Token(type));
92
93
	}

94
95
	// FlowEnd
	void Scanner::ScanFlowEnd()
96
	{
beder's avatar
beder committed
97
98
99
100
		if(m_flowLevel == 0)
			throw IllegalFlowEnd();

		m_flowLevel--;
101
102
103
		m_simpleKeyAllowed = false;

		// eat
104
		char ch = INPUT.get();
105
106
		TOKEN_TYPE type = (ch == Keys::FlowSeqEnd ? TT_FLOW_SEQ_END : TT_FLOW_MAP_END);
		m_tokens.push(new Token(type));
107
108
	}

109
110
	// FlowEntry
	void Scanner::ScanFlowEntry()
111
112
113
114
	{
		m_simpleKeyAllowed = true;

		// eat
115
		INPUT.eat(1);
116
		m_tokens.push(new Token(TT_FLOW_ENTRY));
117
118
	}

119
120
	// BlockEntry
	void Scanner::ScanBlockEntry()
121
122
	{
		// we better be in the block context!
beder's avatar
beder committed
123
124
		if(m_flowLevel > 0)
			throw IllegalBlockEntry();
125

beder's avatar
beder committed
126
127
128
		// can we put it here?
		if(!m_simpleKeyAllowed)
			throw IllegalBlockEntry();
129

130
		PushIndentTo(INPUT.column, true);
131
132
133
		m_simpleKeyAllowed = true;

		// eat
134
		INPUT.eat(1);
135
		m_tokens.push(new Token(TT_BLOCK_ENTRY));
136
137
	}

138
139
	// Key
	void Scanner::ScanKey()
140
	{
beder's avatar
beder committed
141
		// handle keys diffently in the block context (and manage indents)
142
143
144
145
		if(m_flowLevel == 0) {
			if(!m_simpleKeyAllowed)
				throw IllegalMapKey();

146
			PushIndentTo(INPUT.column, false);
147
148
149
150
151
152
153
154
155
		}

		// can only put a simple key here if we're in block context
		if(m_flowLevel == 0)
			m_simpleKeyAllowed = true;
		else
			m_simpleKeyAllowed = false;

		// eat
156
		INPUT.eat(1);
157
		m_tokens.push(new Token(TT_KEY));
158
159
	}

160
161
	// Value
	void Scanner::ScanValue()
162
	{
beder's avatar
beder committed
163
		// does this follow a simple key?
164
		if(m_isLastKeyValid) {
beder's avatar
beder committed
165
166
			// can't follow a simple key with another simple key (dunno why, though - it seems fine)
			m_simpleKeyAllowed = false;
167
		} else {
beder's avatar
beder committed
168
			// handle values diffently in the block context (and manage indents)
169
170
171
172
			if(m_flowLevel == 0) {
				if(!m_simpleKeyAllowed)
					throw IllegalMapValue();

173
				PushIndentTo(INPUT.column, false);
174
175
			}

beder's avatar
beder committed
176
177
178
179
180
181
			// can only put a simple key here if we're in block context
			if(m_flowLevel == 0)
				m_simpleKeyAllowed = true;
			else
				m_simpleKeyAllowed = false;
		}
182
183

		// eat
184
		INPUT.eat(1);
185
		m_tokens.push(new Token(TT_VALUE));
186
187
	}

188
189
	// AnchorOrAlias
	void Scanner::ScanAnchorOrAlias()
beder's avatar
beder committed
190
	{
191
		bool alias;
192
		std::string name;
193

beder's avatar
beder committed
194
195
196
197
198
199
		// insert a potential simple key
		if(m_simpleKeyAllowed)
			InsertSimpleKey();
		m_simpleKeyAllowed = false;

		// eat the indicator
200
		char indicator = INPUT.get();
201
		alias = (indicator == Keys::Alias);
beder's avatar
beder committed
202
203
204

		// now eat the content
		while(Exp::AlphaNumeric.Matches(INPUT))
205
			name += INPUT.get();
beder's avatar
beder committed
206
207

		// we need to have read SOMETHING!
208
		if(name.empty())
beder's avatar
beder committed
209
210
211
212
213
214
215
			throw AnchorNotFound();

		// and needs to end correctly
		if(INPUT.peek() != EOF && !Exp::AnchorEnd.Matches(INPUT))
			throw IllegalCharacterInAnchor();

		// and we're done
216
		Token *pToken = new Token(alias ? TT_ALIAS : TT_ANCHOR);
217
		pToken->value = name;
218
		m_tokens.push(pToken);
beder's avatar
beder committed
219
	}
220

221
222
	// Tag
	void Scanner::ScanTag()
beder's avatar
beder committed
223
	{
224
225
		std::string handle, suffix;

beder's avatar
beder committed
226
227
228
229
230
231
		// insert a potential simple key
		if(m_simpleKeyAllowed)
			InsertSimpleKey();
		m_simpleKeyAllowed = false;

		// eat the indicator
232
		handle += INPUT.get();
beder's avatar
beder committed
233
234
235

		// read the handle
		while(INPUT.peek() != EOF && INPUT.peek() != Keys::Tag && !Exp::BlankOrBreak.Matches(INPUT))
236
			handle += INPUT.get();
beder's avatar
beder committed
237
238
239
240

		// is there a suffix?
		if(INPUT.peek() == Keys::Tag) {
			// eat the indicator
241
			handle += INPUT.get();
beder's avatar
beder committed
242
243
244

			// then read it
			while(INPUT.peek() != EOF && !Exp::BlankOrBreak.Matches(INPUT))
245
				suffix += INPUT.get();
246
247
248
249
		} else {
			// this is a bit weird: we keep just the '!' as the handle and move the rest to the suffix
			suffix = handle.substr(1);
			handle = "!";
beder's avatar
beder committed
250
251
		}

252
253
254
255
		Token *pToken = new Token(TT_TAG);
		pToken->value = handle;
		pToken->params.push_back(suffix);
		m_tokens.push(pToken);
beder's avatar
beder committed
256
257
	}

258
259
	// PlainScalar
	void Scanner::ScanPlainScalar()
260
	{
261
262
		std::string scalar;

263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
		// set up the scanning parameters
		ScanScalarParams params;
		params.end = (m_flowLevel > 0 ? Exp::EndScalarInFlow : Exp::EndScalar) || (RegEx(' ') + Exp::Comment);
		params.eatEnd = false;
		params.indent = (m_flowLevel > 0 ? 0 : m_indents.top() + 1);
		params.fold = true;
		params.eatLeadingWhitespace = true;
		params.trimTrailingSpaces = true;
		params.chomp = CLIP;
		params.onDocIndicator = BREAK;
		params.onTabInIndentation = THROW;

		// insert a potential simple key
		if(m_simpleKeyAllowed)
			InsertSimpleKey();

279
		scalar = ScanScalar(INPUT, params);
280
281
282
283
284
285
286
287
288

		// can have a simple key only if we ended the scalar by starting a new line
		m_simpleKeyAllowed = params.leadingSpaces;

		// finally, we can't have any colons in a scalar, so if we ended on a colon, there
		// had better be a break after it
		if(Exp::IllegalColonInScalar.Matches(INPUT))
			throw IllegalScalar();

289
290
291
		Token *pToken = new Token(TT_SCALAR);
		pToken->value = scalar;
		m_tokens.push(pToken);
292
293
	}

294
295
	// QuotedScalar
	void Scanner::ScanQuotedScalar()
296
	{
297
298
		std::string scalar;

299
		// eat single or double quote
300
		char quote = INPUT.get();
301
		bool single = (quote == '\'');
302
303
304

		// setup the scanning parameters
		ScanScalarParams params;
305
		params.end = (single ? RegEx(quote) && !Exp::EscSingleQuote : RegEx(quote));
306
		params.eatEnd = true;
307
		params.escape = (single ? '\'' : '\\');
308
309
310
311
312
313
314
315
316
317
318
		params.indent = 0;
		params.fold = true;
		params.eatLeadingWhitespace = true;
		params.trimTrailingSpaces = false;
		params.chomp = CLIP;
		params.onDocIndicator = THROW;

		// insert a potential simple key
		if(m_simpleKeyAllowed)
			InsertSimpleKey();

319
		scalar = ScanScalar(INPUT, params);
320
321
		m_simpleKeyAllowed = false;

322
323
324
		Token *pToken = new Token(TT_SCALAR);
		pToken->value = scalar;
		m_tokens.push(pToken);
325
326
327
328
329
330
	}

	// BlockScalarToken
	// . These need a little extra processing beforehand.
	// . We need to scan the line where the indicator is (this doesn't count as part of the scalar),
	//   and then we need to figure out what level of indentation we'll be using.
331
	void Scanner::ScanBlockScalar()
332
	{
333
334
		std::string scalar;

335
336
337
338
339
		ScanScalarParams params;
		params.indent = 1;
		params.detectIndent = true;

		// eat block indicator ('|' or '>')
340
		char indicator = INPUT.get();
341
342
343
344
345
		params.fold = (indicator == Keys::FoldedScalar);

		// eat chomping/indentation indicators
		int n = Exp::Chomp.Match(INPUT);
		for(int i=0;i<n;i++) {
346
			char ch = INPUT.get();
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
			if(ch == '+')
				params.chomp = KEEP;
			else if(ch == '-')
				params.chomp = STRIP;
			else if(Exp::Digit.Matches(ch)) {
				if(ch == '0')
					throw ZeroIndentationInBlockScalar();

				params.indent = ch - '0';
				params.detectIndent = false;
			}
		}

		// now eat whitespace
		while(Exp::Blank.Matches(INPUT))
362
			INPUT.eat(1);
363
364
365
366

		// and comments to the end of the line
		if(Exp::Comment.Matches(INPUT))
			while(INPUT && !Exp::Break.Matches(INPUT))
367
				INPUT.eat(1);
368
369
370
371
372
373
374
375
376
377
378
379
380

		// if it's not a line break, then we ran into a bad character inline
		if(INPUT && !Exp::Break.Matches(INPUT))
			throw UnexpectedCharacterInBlockScalar();

		// set the initial indentation
		if(m_indents.top() >= 0)
			params.indent += m_indents.top();

		params.eatLeadingWhitespace = false;
		params.trimTrailingSpaces = false;
		params.onTabInIndentation = THROW;

381
		scalar = ScanScalar(INPUT, params);
382
383
384

		// simple keys always ok after block scalars (since we're gonna start a new line anyways)
		m_simpleKeyAllowed = true;
385
386
387
388

		Token *pToken = new Token(TT_SCALAR);
		pToken->value = scalar;
		m_tokens.push(pToken);
389
	}
390
}