scanscalar.cpp 4.31 KB
Newer Older
1
#include "crt.h"
2
3
4
5
6
7
8
9
#include "scanscalar.h"
#include "scanner.h"
#include "exp.h"
#include "exceptions.h"
#include "token.h"

namespace YAML
{
10
	// ScanScalar
11
12
13
14
15
16
17
18
19
20
	// . This is where the scalar magic happens.
	//
	// . We do the scanning in three phases:
	//   1. Scan until newline
	//   2. Eat newline
	//   3. Scan leading blanks.
	//
	// . Depending on the parameters given, we store or stop
	//   and different places in the above flow.
	std::string ScanScalar(Stream& INPUT, ScanScalarParams& params)
21
	{
22
		bool foundNonEmptyLine = false, pastOpeningBreak = false;
23
24
		bool emptyLine = false, moreIndented = false;
		std::string scalar;
25
		params.leadingSpaces = false;
26
27
28
29

		while(INPUT) {
			// ********************************
			// Phase #1: scan until line ending
30
			while(!params.end.Matches(INPUT) && !Exp::Break.Matches(INPUT)) {
31
32
33
				if(INPUT.peek() == EOF)
					break;

34
35
				// document indicator?
				if(INPUT.column == 0 && Exp::DocIndicator.Matches(INPUT)) {
36
					if(params.onDocIndicator == BREAK)
37
						break;
38
					else if(params.onDocIndicator == THROW)
39
						throw ParserException(INPUT.line, INPUT.column, ErrorMsg::DOC_IN_SCALAR);
40
41
42
				}

				foundNonEmptyLine = true;
43
				pastOpeningBreak = true;
44

45
				// escaped newline? (only if we're escaping on slash)
46
				if(params.escape == '\\' && Exp::EscBreak.Matches(INPUT)) {
47
					int n = Exp::EscBreak.Match(INPUT);
48
					INPUT.eat(n);
49
50
51
52
					continue;
				}

				// escape this?
53
				if(INPUT.peek() == params.escape) {
54
					scalar += Exp::Escape(INPUT);
55
56
57
58
					continue;
				}

				// otherwise, just add the damn character
59
				scalar += INPUT.get();
60
61
62
63
			}

			// eof? if we're looking to eat something, then we throw
			if(INPUT.peek() == EOF) {
64
				if(params.eatEnd)
65
					throw ParserException(INPUT.line, INPUT.column, ErrorMsg::EOF_IN_SCALAR);
66
67
68
				break;
			}

69
			// doc indicator?
70
			if(params.onDocIndicator == BREAK && INPUT.column == 0 && Exp::DocIndicator.Matches(INPUT))
71
72
				break;

73
			// are we done via character match?
74
			int n = params.end.Match(INPUT);
75
			if(n >= 0) {
76
				if(params.eatEnd)
77
					INPUT.eat(n);
78
79
80
81
82
83
				break;
			}

			// ********************************
			// Phase #2: eat line ending
			n = Exp::Break.Match(INPUT);
84
			INPUT.eat(n);
85
86
87
88
89

			// ********************************
			// Phase #3: scan initial spaces

			// first the required indentation
90
			while(INPUT.peek() == ' ' && (INPUT.column < params.indent || (params.detectIndent && !foundNonEmptyLine)))
91
				INPUT.eat(1);
92

93
			// update indent if we're auto-detecting
94
95
			if(params.detectIndent && !foundNonEmptyLine)
				params.indent = std::max(params.indent, INPUT.column);
96

97
			// and then the rest of the whitespace
98
99
			while(Exp::Blank.Matches(INPUT)) {
				// we check for tabs that masquerade as indentation
100
				if(INPUT.peek() == '\t'&& INPUT.column < params.indent && params.onTabInIndentation == THROW)
101
					throw ParserException(INPUT.line, INPUT.column, ErrorMsg::TAB_IN_INDENTATION);
102

103
				if(!params.eatLeadingWhitespace)
104
105
					break;

106
				INPUT.eat(1);
107
108
109
110
111
112
			}

			// was this an empty line?
			bool nextEmptyLine = Exp::Break.Matches(INPUT);
			bool nextMoreIndented = (INPUT.peek() == ' ');

113
			// for block scalars, we always start with a newline, so we should ignore it (not fold or keep)
114
115
116
117
118
119
			bool useNewLine = pastOpeningBreak;
			// and for folded scalars, we don't fold the very last newline to a space
			if(params.fold && !emptyLine && INPUT.column < params.indent)
				useNewLine = false;

			if(useNewLine) {
120
121
122
123
124
				if(params.fold && !emptyLine && !nextEmptyLine && !moreIndented && !nextMoreIndented)
					scalar += " ";
				else
					scalar += "\n";
			}
125
126
127

			emptyLine = nextEmptyLine;
			moreIndented = nextMoreIndented;
128
			pastOpeningBreak = true;
129
130

			// are we done via indentation?
131
132
			if(!emptyLine && INPUT.column < params.indent) {
				params.leadingSpaces = true;
133
				break;
134
			}
135
136
137
		}

		// post-processing
138
		if(params.trimTrailingSpaces) {
139
140
141
142
143
			unsigned pos = scalar.find_last_not_of(' ');
			if(pos < scalar.size())
				scalar.erase(pos + 1);
		}

144
		if(params.chomp <= 0) {
145
			unsigned pos = scalar.find_last_not_of('\n');
146
			if(params.chomp == 0 && pos + 1 < scalar.size())
147
				scalar.erase(pos + 2);
148
			else if(params.chomp == -1 && pos < scalar.size())
149
150
151
152
153
				scalar.erase(pos + 1);
		}

		return scalar;
	}
154
}