scanscalar.cpp 11.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#include "scanscalar.h"
#include "scanner.h"
#include "exp.h"
#include "exceptions.h"
#include "token.h"

namespace YAML
{
	//////////////////////////////////////////////////////////
	// WhitespaceInfo

	WhitespaceInfo::WhitespaceInfo(): leadingBlanks(false), fold(true), chomp(0), increment(0)
	{
	}

	void WhitespaceInfo::SetChompers(char ch)
	{
		if(ch == '+')
			chomp = 1;
		else if(ch == '-')
			chomp = -1;
		else if(Exp::Digit.Matches(ch)) {
			increment = ch - '0';
			if(increment == 0)
				throw ZeroIndentationInBlockScalar();
		}
	}

	void WhitespaceInfo::AddBlank(char ch)
	{
		if(!leadingBlanks)
			whitespace += ch;
	}

	void WhitespaceInfo::AddBreak(const std::string& line)
	{
		// where to store this character?
		if(!leadingBlanks) {
			leadingBlanks = true;
			whitespace = "";
			leadingBreaks += line;
		} else
			trailingBreaks += line;
	}

	std::string WhitespaceInfo::Join(bool lastLine)
	{
		std::string ret;

		if(leadingBlanks) {
			// fold line break?
			if(fold && Exp::Break.Matches(leadingBreaks) && trailingBreaks.empty() && !lastLine)
				ret = " ";
			else if(!lastLine || chomp != -1)
				ret = leadingBreaks;

			if(!lastLine || chomp == 1)
				ret += trailingBreaks;

			leadingBlanks = false;
			leadingBreaks = "";
			trailingBreaks = "";
		} else if(!whitespace.empty()) {
			ret = whitespace;
			whitespace = "";
		}

		return ret;
	}

	// PlainScalarToken
	// . We scan these in passes of two steps each: First, grab all non-whitespace
	//   characters we can, and then grab all whitespace characters we can.
	// . This has the benefit of letting us handle leading whitespace (which is chomped)
	//   and in-line whitespace (which is kept) separately.
	template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken)
	{
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
		//// now eat and store the scalar
		//std::string scalar;
		//WhitespaceInfo info;

		//while(INPUT) {
		//	// doc start/end tokens
		//	if(IsDocumentStart() || IsDocumentEnd())
		//		break;

		//	// comment
		//	if(Exp::Comment.Matches(INPUT))
		//		break;

		//	// first eat non-blanks
		//	while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
		//		// illegal colon in flow context
		//		if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT))
		//			throw IllegalScalar();

		//		// characters that might end the scalar
		//		if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT))
		//			break;
		//		if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT))
		//			break;

		//		// finally, read the character!
		//		scalar += GetChar();
		//	}

		//	// did we hit a non-blank character that ended us?
		//	if(!Exp::BlankOrBreak.Matches(INPUT))
		//		break;

		//	// now eat blanks
		//	while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) {
		//		if(Exp::Blank.Matches(INPUT)) {
		//			// can't use tabs as indentation! only spaces!
		//			if(INPUT.peek() == '\t' && info.leadingBlanks && m_column <= m_indents.top())
		//				throw IllegalTabInScalar();

		//			info.AddBlank(GetChar());
		//		} else	{
		//			// we know it's a line break; see how many characters to read
		//			int n = Exp::Break.Match(INPUT);
		//			std::string line = GetChar(n);
		//			info.AddBreak(line);

		//			// and we can't continue a simple key to the next line
		//			ValidateSimpleKey();
		//		}
		//	}

		//	// break if we're below the indentation level
		//	if(m_flowLevel == 0 && m_column <= m_indents.top())
		//		break;

		//	// finally join whitespace
		//	scalar += info.Join();
		//}

138
139
140
141
142
143
144
145
		ScanScalarInfo info;
		info.end = (m_flowLevel > 0 ? Exp::EndScalarInFlow : Exp::EndScalar) || (RegEx(' ') + Exp::Comment);
		info.eatEnd = false;
		info.indent = (m_flowLevel > 0 ? 0 : m_indents.top() + 1);
		info.fold = true;
		info.eatLeadingWhitespace = true;
		info.trimTrailingSpaces = true;
		info.chomp = CLIP;
146

147
148
149
150
		// insert a potential simple key
		if(m_simpleKeyAllowed)
			InsertSimpleKey();

151
		pToken->value = ScanScalar(INPUT, info);
152

153
154
		m_simpleKeyAllowed = false;
		if(true/*info.leadingBlanks*/)
155
156
157
158
159
160
161
162
			m_simpleKeyAllowed = true;

		return pToken;
	}

	// QuotedScalarToken
	template <> QuotedScalarToken *Scanner::ScanToken(QuotedScalarToken *pToken)
	{
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
		//// now eat and store the scalar
		//std::string scalar;
		//WhitespaceInfo info;

		//while(INPUT) {
		//	if(IsDocumentStart() || IsDocumentEnd())
		//		throw DocIndicatorInQuote();

		//	if(INPUT.peek() == EOF)
		//		throw EOFInQuote();

		//	// first eat non-blanks
		//	while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
		//		// escaped single quote?
		//		if(pToken->single && Exp::EscSingleQuote.Matches(INPUT)) {
		//			int n = Exp::EscSingleQuote.Match(INPUT);
		//			scalar += GetChar(n);
		//			continue;
		//		}

		//		// is the quote ending?
		//		if(INPUT.peek() == quote)
		//			break;

		//		// escaped newline?
		//		if(Exp::EscBreak.Matches(INPUT))
		//			break;

		//		// other escape sequence
		//		if(INPUT.peek() == '\\') {
		//			int length = 0;
		//			scalar += Exp::Escape(INPUT, length);
		//			m_column += length;
		//			continue;
		//		}

		//		// and finally, just add the damn character
		//		scalar += GetChar();
		//	}

		//	// is the quote ending?
		//	if(INPUT.peek() == quote) {
		//		// eat and go
		//		GetChar();
		//		break;
		//	}

		//	// now we eat blanks
		//	while(Exp::BlankOrBreak.Matches(INPUT)) {
		//		if(Exp::Blank.Matches(INPUT)) {
		//			info.AddBlank(GetChar());
		//		} else {
		//			// we know it's a line break; see how many characters to read
		//			int n = Exp::Break.Match(INPUT);
		//			std::string line = GetChar(n);
		//			info.AddBreak(line);

		//			// and we can't continue a simple key to the next line
		//			ValidateSimpleKey();
		//		}
		//	}

		//	// and finally join the whitespace
		//	scalar += info.Join();
		//}
228
229

		// eat single or double quote
230
		char quote = INPUT.GetChar();
231
232
		pToken->single = (quote == '\'');

233
234
235
236
237
238
239
240
241
		ScanScalarInfo info;
		info.end = (pToken->single ? RegEx(quote) && !Exp::EscSingleQuote : RegEx(quote));
		info.eatEnd = true;
		info.escape = (pToken->single ? '\'' : '\\');
		info.indent = 0;
		info.fold = true;
		info.eatLeadingWhitespace = true;
		info.trimTrailingSpaces = false;
		info.chomp = CLIP;
242

243
244
245
		// insert a potential simple key
		if(m_simpleKeyAllowed)
			InsertSimpleKey();
246

247
		pToken->value = ScanScalar(INPUT, info);
248
		m_simpleKeyAllowed = false;
249
250
251
252
253
254
255
256
257
258

		return pToken;
	}

	// BlockScalarToken
	template <> BlockScalarToken *Scanner::ScanToken(BlockScalarToken *pToken)
	{
		WhitespaceInfo info;

		// eat block indicator ('|' or '>')
259
		char indicator = INPUT.GetChar();
260
261
262
263
264
		info.fold = (indicator == Keys::FoldedScalar);

		// eat chomping/indentation indicators
		int n = Exp::Chomp.Match(INPUT);
		for(int i=0;i<n;i++)
265
			info.SetChompers(INPUT.GetChar());
266
267
268

		// first eat whitespace
		while(Exp::Blank.Matches(INPUT))
269
			INPUT.Eat(1);
270
271
272
273

		// and comments to the end of the line
		if(Exp::Comment.Matches(INPUT))
			while(INPUT && !Exp::Break.Matches(INPUT))
274
				INPUT.Eat(1);
275
276
277
278
279
280

		// if it's not a line break, then we ran into a bad character inline
		if(INPUT && !Exp::Break.Matches(INPUT))
			throw UnexpectedCharacterInBlockScalar();

		// and eat that baby
281
		INPUT.EatLineBreak();
282
283
284
285
286
287

		// set the initial indentation
		int indent = info.increment;
		if(info.increment && m_indents.top() >= 0)
			indent += m_indents.top();

288
		GetBlockIndentation(INPUT, indent, info.trailingBreaks, m_indents.top());
289

290
291
292
293
294
295
296
297
		ScanScalarInfo sinfo;
		sinfo.indent = indent;
		sinfo.fold = info.fold;
		sinfo.eatLeadingWhitespace = false;
		sinfo.trimTrailingSpaces = false;
		sinfo.chomp = (CHOMP) info.chomp;

		pToken->value = ScanScalar(INPUT, sinfo);
298

299
300
		// simple keys always ok after block scalars (since we're gonna start a new line anyways)
		m_simpleKeyAllowed = true;
301
302
303
304
305
306
307
		return pToken;
	}

	// GetBlockIndentation
	// . Helper to scanning a block scalar.
	// . Eats leading *indentation* zeros (i.e., those that come before 'indent'),
	//   and updates 'indent' (if it hasn't been set yet).
308
	void GetBlockIndentation(Stream& INPUT, int& indent, std::string& breaks, int topIndent)
309
310
311
312
313
	{
		int maxIndent = 0;

		while(1) {
			// eat as many indentation spaces as we can
314
315
			while((indent == 0 || INPUT.column < indent) && INPUT.peek() == ' ')
				INPUT.Eat(1);
316

317
318
			if(INPUT.column > maxIndent)
				maxIndent = INPUT.column;
319
320

			// do we need more indentation, but we've got a tab?
321
			if((indent == 0 || INPUT.column < indent) && INPUT.peek() == '\t')
322
323
324
325
326
327
328
329
				throw IllegalTabInScalar();   // TODO: are literal scalar lines allowed to have tabs here?

			// is this a non-empty line?
			if(!Exp::Break.Matches(INPUT))
				break;

			// otherwise, eat the line break and move on
			int n = Exp::Break.Match(INPUT);
330
			breaks += INPUT.GetChar(n);
331
332
333
334
335
		}

		// finally, set the indentation
		if(indent == 0) {
			indent = maxIndent;
336
337
			if(indent < topIndent + 1)
				indent = topIndent + 1;
338
339
340
341
			if(indent < 1)
				indent = 1;
		}
	}
342
343

	// ScanScalar
344
	std::string ScanScalar(Stream& INPUT, ScanScalarInfo info)
345
346
347
348
349
350
351
	{
		bool emptyLine = false, moreIndented = false;
		std::string scalar;

		while(INPUT) {
			// ********************************
			// Phase #1: scan until line ending
352
			while(!info.end.Matches(INPUT) && !Exp::Break.Matches(INPUT)) {
353
354
355
356
				if(INPUT.peek() == EOF)
					break;

				// escaped newline? (only if we're escaping on slash)
357
				if(info.escape == '\\' && Exp::EscBreak.Matches(INPUT)) {
358
					int n = Exp::EscBreak.Match(INPUT);
359
					INPUT.Eat(n);
360
361
362
363
					continue;
				}

				// escape this?
364
				if(INPUT.peek() == info.escape) {
365
					scalar += Exp::Escape(INPUT);
366
367
368
369
					continue;
				}

				// otherwise, just add the damn character
370
				scalar += INPUT.GetChar();
371
372
373
374
			}

			// eof? if we're looking to eat something, then we throw
			if(INPUT.peek() == EOF) {
375
				if(info.eatEnd)
376
377
378
379
380
					throw EOFInQuote();
				break;
			}

			// are we done via character match?
381
			int n = info.end.Match(INPUT);
382
			if(n >= 0) {
383
				if(info.eatEnd)
384
					INPUT.Eat(n);
385
386
387
388
389
390
				break;
			}

			// ********************************
			// Phase #2: eat line ending
			n = Exp::Break.Match(INPUT);
391
			INPUT.Eat(n);
392
393
394
395
396

			// ********************************
			// Phase #3: scan initial spaces

			// first the required indentation
397
			while(INPUT.peek() == ' ' && INPUT.column < info.indent)
398
				INPUT.Eat(1);
399
400

			// and then the rest of the whitespace
401
			if(info.eatLeadingWhitespace) {
402
				while(Exp::Blank.Matches(INPUT))
403
					INPUT.Eat(1);
404
405
406
407
408
409
			}

			// was this an empty line?
			bool nextEmptyLine = Exp::Break.Matches(INPUT);
			bool nextMoreIndented = (INPUT.peek() == ' ');

410
			if(info.fold && !emptyLine && !nextEmptyLine && !moreIndented && !nextMoreIndented)
411
412
413
414
415
416
417
418
				scalar += " ";
			else
				scalar += "\n"; 

			emptyLine = nextEmptyLine;
			moreIndented = nextMoreIndented;

			// are we done via indentation?
419
			if(!emptyLine && INPUT.column < info.indent)
420
421
422
423
				break;
		}

		// post-processing
424
		if(info.trimTrailingSpaces) {
425
426
427
428
429
			unsigned pos = scalar.find_last_not_of(' ');
			if(pos < scalar.size())
				scalar.erase(pos + 1);
		}

430
		if(info.chomp <= 0) {
431
			unsigned pos = scalar.find_last_not_of('\n');
432
			if(info.chomp == 0 && pos + 1 < scalar.size())
433
				scalar.erase(pos + 2);
434
			else if(info.chomp == -1 && pos < scalar.size())
435
436
437
438
439
				scalar.erase(pos + 1);
		}

		return scalar;
	}
440
}