"vscode:/vscode.git/clone" did not exist on "47e21d7d37a9578e5143858726aa2447812c63d4"
scantoken.cpp 10.5 KB
Newer Older
1
#include "crt.h"
2
3
4
5
#include "scanner.h"
#include "token.h"
#include "exceptions.h"
#include "exp.h"
6
#include "scanscalar.h"
7
#include <sstream>
8
9
10
11
12
13

namespace YAML
{
	///////////////////////////////////////////////////////////////////////
	// Specialization for scanning specific tokens

14
	// Directive
Jesse Beder's avatar
Jesse Beder committed
15
	// . Note: no semantic checking is done here (that's for the parser to do)
16
	void Scanner::ScanDirective()
Jesse Beder's avatar
Jesse Beder committed
17
	{
18
19
20
		std::string name;
		std::vector <std::string> params;

Jesse Beder's avatar
Jesse Beder committed
21
22
23
24
25
26
		// pop indents and simple keys
		PopIndentTo(-1);
		VerifyAllSimpleKeys();

		m_simpleKeyAllowed = false;

27
28
		// store pos and eat indicator
		int line = INPUT.line, column = INPUT.column;
29
		INPUT.eat(1);
Jesse Beder's avatar
Jesse Beder committed
30
31
32

		// read name
		while(INPUT.peek() != EOF && !Exp::BlankOrBreak.Matches(INPUT))
33
			name += INPUT.get();
Jesse Beder's avatar
Jesse Beder committed
34
35
36
37
38

		// read parameters
		while(1) {
			// first get rid of whitespace
			while(Exp::Blank.Matches(INPUT))
39
				INPUT.eat(1);
Jesse Beder's avatar
Jesse Beder committed
40
41
42
43
44
45
46
47

			// break on newline or comment
			if(INPUT.peek() == EOF || Exp::Break.Matches(INPUT) || Exp::Comment.Matches(INPUT))
				break;

			// now read parameter
			std::string param;
			while(INPUT.peek() != EOF && !Exp::BlankOrBreak.Matches(INPUT))
48
				param += INPUT.get();
Jesse Beder's avatar
Jesse Beder committed
49

50
			params.push_back(param);
Jesse Beder's avatar
Jesse Beder committed
51
52
		}
		
53
54
55
56
		Token token(TT_DIRECTIVE, line, column);
		token.value = name;
		token.params = params;
		m_tokens.push(token);
Jesse Beder's avatar
Jesse Beder committed
57
58
	}

59
60
	// DocStart
	void Scanner::ScanDocStart()
61
	{
62
		PopIndentTo(INPUT.column);
63
		VerifyAllSimpleKeys();
64
65
66
		m_simpleKeyAllowed = false;

		// eat
67
		int line = INPUT.line, column = INPUT.column;
68
		INPUT.eat(3);
69
		m_tokens.push(Token(TT_DOC_START, line, column));
70
71
	}

72
73
	// DocEnd
	void Scanner::ScanDocEnd()
74
75
	{
		PopIndentTo(-1);
76
		VerifyAllSimpleKeys();
77
78
79
		m_simpleKeyAllowed = false;

		// eat
80
		int line = INPUT.line, column = INPUT.column;
81
		INPUT.eat(3);
82
		m_tokens.push(Token(TT_DOC_END, line, column));
83
84
	}

85
86
	// FlowStart
	void Scanner::ScanFlowStart()
87
	{
88
		// flows can be simple keys
Jesse Beder's avatar
Jesse Beder committed
89
		InsertSimpleKey();
Jesse Beder's avatar
Jesse Beder committed
90
		m_flowLevel++;
91
92
93
		m_simpleKeyAllowed = true;

		// eat
94
		int line = INPUT.line, column = INPUT.column;
95
		char ch = INPUT.get();
96
		TOKEN_TYPE type = (ch == Keys::FlowSeqStart ? TT_FLOW_SEQ_START : TT_FLOW_MAP_START);
97
		m_tokens.push(Token(type, line, column));
98
99
	}

100
101
	// FlowEnd
	void Scanner::ScanFlowEnd()
102
	{
Jesse Beder's avatar
Jesse Beder committed
103
		if(m_flowLevel == 0)
104
			throw ParserException(INPUT.line, INPUT.column, ErrorMsg::FLOW_END);
Jesse Beder's avatar
Jesse Beder committed
105
106

		m_flowLevel--;
107
108
109
		m_simpleKeyAllowed = false;

		// eat
110
		int line = INPUT.line, column = INPUT.column;
111
		char ch = INPUT.get();
112
		TOKEN_TYPE type = (ch == Keys::FlowSeqEnd ? TT_FLOW_SEQ_END : TT_FLOW_MAP_END);
113
		m_tokens.push(Token(type, line, column));
114
115
	}

116
117
	// FlowEntry
	void Scanner::ScanFlowEntry()
118
119
120
121
	{
		m_simpleKeyAllowed = true;

		// eat
122
		int line = INPUT.line, column = INPUT.column;
123
		INPUT.eat(1);
124
		m_tokens.push(Token(TT_FLOW_ENTRY, line, column));
125
126
	}

127
128
	// BlockEntry
	void Scanner::ScanBlockEntry()
129
130
	{
		// we better be in the block context!
Jesse Beder's avatar
Jesse Beder committed
131
		if(m_flowLevel > 0)
132
			throw ParserException(INPUT.line, INPUT.column, ErrorMsg::BLOCK_ENTRY);
133

Jesse Beder's avatar
Jesse Beder committed
134
135
		// can we put it here?
		if(!m_simpleKeyAllowed)
136
			throw ParserException(INPUT.line, INPUT.column, ErrorMsg::BLOCK_ENTRY);
137

138
		PushIndentTo(INPUT.column, true);
139
140
141
		m_simpleKeyAllowed = true;

		// eat
142
		int line = INPUT.line, column = INPUT.column;
143
		INPUT.eat(1);
144
		m_tokens.push(Token(TT_BLOCK_ENTRY, line, column));
145
146
	}

147
148
	// Key
	void Scanner::ScanKey()
149
	{
Jesse Beder's avatar
Jesse Beder committed
150
		// handle keys diffently in the block context (and manage indents)
151
152
		if(m_flowLevel == 0) {
			if(!m_simpleKeyAllowed)
153
				throw ParserException(INPUT.line, INPUT.column, ErrorMsg::MAP_KEY);
154

155
			PushIndentTo(INPUT.column, false);
156
157
158
159
160
161
162
163
164
		}

		// can only put a simple key here if we're in block context
		if(m_flowLevel == 0)
			m_simpleKeyAllowed = true;
		else
			m_simpleKeyAllowed = false;

		// eat
165
		int line = INPUT.line, column = INPUT.column;
166
		INPUT.eat(1);
167
		m_tokens.push(Token(TT_KEY, line, column));
168
169
	}

170
171
	// Value
	void Scanner::ScanValue()
172
	{
Jesse Beder's avatar
Jesse Beder committed
173
		// does this follow a simple key?
174
		if(m_isLastKeyValid) {
Jesse Beder's avatar
Jesse Beder committed
175
176
			// can't follow a simple key with another simple key (dunno why, though - it seems fine)
			m_simpleKeyAllowed = false;
177
		} else {
Jesse Beder's avatar
Jesse Beder committed
178
			// handle values diffently in the block context (and manage indents)
179
180
			if(m_flowLevel == 0) {
				if(!m_simpleKeyAllowed)
181
					throw ParserException(INPUT.line, INPUT.column, ErrorMsg::MAP_VALUE);
182

183
				PushIndentTo(INPUT.column, false);
184
185
			}

Jesse Beder's avatar
Jesse Beder committed
186
187
188
189
190
191
			// can only put a simple key here if we're in block context
			if(m_flowLevel == 0)
				m_simpleKeyAllowed = true;
			else
				m_simpleKeyAllowed = false;
		}
192
193

		// eat
194
		int line = INPUT.line, column = INPUT.column;
195
		INPUT.eat(1);
196
		m_tokens.push(Token(TT_VALUE, line, column));
197
198
	}

199
200
	// AnchorOrAlias
	void Scanner::ScanAnchorOrAlias()
Jesse Beder's avatar
Jesse Beder committed
201
	{
202
		bool alias;
203
		std::string name;
204

Jesse Beder's avatar
Jesse Beder committed
205
206
207
208
209
210
		// insert a potential simple key
		if(m_simpleKeyAllowed)
			InsertSimpleKey();
		m_simpleKeyAllowed = false;

		// eat the indicator
211
		int line = INPUT.line, column = INPUT.column;
212
		char indicator = INPUT.get();
213
		alias = (indicator == Keys::Alias);
Jesse Beder's avatar
Jesse Beder committed
214
215
216

		// now eat the content
		while(Exp::AlphaNumeric.Matches(INPUT))
217
			name += INPUT.get();
Jesse Beder's avatar
Jesse Beder committed
218
219

		// we need to have read SOMETHING!
220
221
		if(name.empty())
			throw ParserException(INPUT.line, INPUT.column, alias ? ErrorMsg::ALIAS_NOT_FOUND : ErrorMsg::ANCHOR_NOT_FOUND);
Jesse Beder's avatar
Jesse Beder committed
222
223

		// and needs to end correctly
224
225
		if(INPUT.peek() != EOF && !Exp::AnchorEnd.Matches(INPUT))
			throw ParserException(INPUT.line, INPUT.column, alias ? ErrorMsg::CHAR_IN_ALIAS : ErrorMsg::CHAR_IN_ANCHOR);
Jesse Beder's avatar
Jesse Beder committed
226
227

		// and we're done
228
229
230
		Token token(alias ? TT_ALIAS : TT_ANCHOR, line, column);
		token.value = name;
		m_tokens.push(token);
Jesse Beder's avatar
Jesse Beder committed
231
	}
232

233
234
	// Tag
	void Scanner::ScanTag()
Jesse Beder's avatar
Jesse Beder committed
235
	{
236
237
		std::string handle, suffix;

Jesse Beder's avatar
Jesse Beder committed
238
239
240
241
242
243
		// insert a potential simple key
		if(m_simpleKeyAllowed)
			InsertSimpleKey();
		m_simpleKeyAllowed = false;

		// eat the indicator
244
		int line = INPUT.line, column = INPUT.column;
245
		handle += INPUT.get();
Jesse Beder's avatar
Jesse Beder committed
246
247
248

		// read the handle
		while(INPUT.peek() != EOF && INPUT.peek() != Keys::Tag && !Exp::BlankOrBreak.Matches(INPUT))
249
			handle += INPUT.get();
Jesse Beder's avatar
Jesse Beder committed
250
251
252
253

		// is there a suffix?
		if(INPUT.peek() == Keys::Tag) {
			// eat the indicator
254
			handle += INPUT.get();
Jesse Beder's avatar
Jesse Beder committed
255
256
257

			// then read it
			while(INPUT.peek() != EOF && !Exp::BlankOrBreak.Matches(INPUT))
258
				suffix += INPUT.get();
259
260
261
262
		} else {
			// this is a bit weird: we keep just the '!' as the handle and move the rest to the suffix
			suffix = handle.substr(1);
			handle = "!";
Jesse Beder's avatar
Jesse Beder committed
263
264
		}

265
266
267
268
		Token token(TT_TAG, line, column);
		token.value = handle;
		token.params.push_back(suffix);
		m_tokens.push(token);
Jesse Beder's avatar
Jesse Beder committed
269
270
	}

271
272
	// PlainScalar
	void Scanner::ScanPlainScalar()
273
	{
274
275
		std::string scalar;

276
277
		// set up the scanning parameters
		ScanScalarParams params;
278
		params.end = (m_flowLevel > 0 ? Exp::EndScalarInFlow : Exp::EndScalar) || (Exp::BlankOrBreak + Exp::Comment);
279
280
281
282
283
284
285
286
287
288
289
290
291
		params.eatEnd = false;
		params.indent = (m_flowLevel > 0 ? 0 : m_indents.top() + 1);
		params.fold = true;
		params.eatLeadingWhitespace = true;
		params.trimTrailingSpaces = true;
		params.chomp = CLIP;
		params.onDocIndicator = BREAK;
		params.onTabInIndentation = THROW;

		// insert a potential simple key
		if(m_simpleKeyAllowed)
			InsertSimpleKey();

292
		int line = INPUT.line, column = INPUT.column;
293
		scalar = ScanScalar(INPUT, params);
294
295
296
297

		// can have a simple key only if we ended the scalar by starting a new line
		m_simpleKeyAllowed = params.leadingSpaces;

298
299
300
		// finally, check and see if we ended on an illegal character
		//if(Exp::IllegalCharInScalar.Matches(INPUT))
		//	throw ParserException(INPUT.line, INPUT.column, ErrorMsg::CHAR_IN_SCALAR);
301

302
303
304
		Token token(TT_SCALAR, line, column);
		token.value = scalar;
		m_tokens.push(token);
305
306
	}

307
308
	// QuotedScalar
	void Scanner::ScanQuotedScalar()
309
	{
310
311
		std::string scalar;

312
		// eat single or double quote
313
		char quote = INPUT.get();
314
		bool single = (quote == '\'');
315
316
317

		// setup the scanning parameters
		ScanScalarParams params;
318
		params.end = (single ? RegEx(quote) && !Exp::EscSingleQuote : RegEx(quote));
319
		params.eatEnd = true;
320
		params.escape = (single ? '\'' : '\\');
321
322
323
324
325
326
327
328
329
330
331
		params.indent = 0;
		params.fold = true;
		params.eatLeadingWhitespace = true;
		params.trimTrailingSpaces = false;
		params.chomp = CLIP;
		params.onDocIndicator = THROW;

		// insert a potential simple key
		if(m_simpleKeyAllowed)
			InsertSimpleKey();

332
		int line = INPUT.line, column = INPUT.column;
333
		scalar = ScanScalar(INPUT, params);
334
335
		m_simpleKeyAllowed = false;

336
337
338
		Token token(TT_SCALAR, line, column);
		token.value = scalar;
		m_tokens.push(token);
339
340
341
342
343
344
	}

	// BlockScalarToken
	// . These need a little extra processing beforehand.
	// . We need to scan the line where the indicator is (this doesn't count as part of the scalar),
	//   and then we need to figure out what level of indentation we'll be using.
345
	void Scanner::ScanBlockScalar()
346
	{
347
348
		std::string scalar;

349
350
351
352
353
		ScanScalarParams params;
		params.indent = 1;
		params.detectIndent = true;

		// eat block indicator ('|' or '>')
354
		int line = INPUT.line, column = INPUT.column;
355
		char indicator = INPUT.get();
356
357
358
359
360
		params.fold = (indicator == Keys::FoldedScalar);

		// eat chomping/indentation indicators
		int n = Exp::Chomp.Match(INPUT);
		for(int i=0;i<n;i++) {
361
			char ch = INPUT.get();
362
363
364
365
366
367
			if(ch == '+')
				params.chomp = KEEP;
			else if(ch == '-')
				params.chomp = STRIP;
			else if(Exp::Digit.Matches(ch)) {
				if(ch == '0')
368
					throw ParserException(INPUT.line, INPUT.column, ErrorMsg::ZERO_INDENT_IN_BLOCK);
369
370
371
372
373
374
375
376

				params.indent = ch - '0';
				params.detectIndent = false;
			}
		}

		// now eat whitespace
		while(Exp::Blank.Matches(INPUT))
377
			INPUT.eat(1);
378
379
380
381

		// and comments to the end of the line
		if(Exp::Comment.Matches(INPUT))
			while(INPUT && !Exp::Break.Matches(INPUT))
382
				INPUT.eat(1);
383
384
385

		// if it's not a line break, then we ran into a bad character inline
		if(INPUT && !Exp::Break.Matches(INPUT))
386
			throw ParserException(INPUT.line, INPUT.column, ErrorMsg::CHAR_IN_BLOCK);
387
388
389
390
391
392
393
394
395

		// set the initial indentation
		if(m_indents.top() >= 0)
			params.indent += m_indents.top();

		params.eatLeadingWhitespace = false;
		params.trimTrailingSpaces = false;
		params.onTabInIndentation = THROW;

396
		scalar = ScanScalar(INPUT, params);
397
398
399

		// simple keys always ok after block scalars (since we're gonna start a new line anyways)
		m_simpleKeyAllowed = true;
400

401
402
403
		Token token(TT_SCALAR, line, column);
		token.value = scalar;
		m_tokens.push(token);
404
	}
405
}