scanscalar.cpp 6.16 KB
Newer Older
1
2
3
#include "scanscalar.h"
#include "scanner.h"
#include "exp.h"
4
#include "yaml-cpp/exceptions.h"
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#include "token.h"

namespace YAML
{
	// ScanScalar
	// . This is where the scalar magic happens.
	//
	// . We do the scanning in three phases:
	//   1. Scan until newline
	//   2. Eat newline
	//   3. Scan leading blanks.
	//
	// . Depending on the parameters given, we store or stop
	//   and different places in the above flow.
	std::string ScanScalar(Stream& INPUT, ScanScalarParams& params)
	{
21
22
		bool foundNonEmptyLine = false;
		bool pastOpeningBreak = (params.fold == FOLD_FLOW);
23
		bool emptyLine = false, moreIndented = false;
24
25
		int foldedNewlineCount = 0;
		bool foldedNewlineStartedMoreIndented = false;
26
        std::size_t lastEscapedChar = std::string::npos;
27
28
29
30
31
32
		std::string scalar;
		params.leadingSpaces = false;

		while(INPUT) {
			// ********************************
			// Phase #1: scan until line ending
33
34
			
			std::size_t lastNonWhitespaceChar = scalar.size();
35
			bool escapedNewline = false;
36
			while(!params.end.Matches(INPUT) && !Exp::Break().Matches(INPUT)) {
37
				if(!INPUT)
38
39
40
					break;

				// document indicator?
41
				if(INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) {
42
43
44
					if(params.onDocIndicator == BREAK)
						break;
					else if(params.onDocIndicator == THROW)
45
						throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR);
46
47
48
49
50
51
				}

				foundNonEmptyLine = true;
				pastOpeningBreak = true;

				// escaped newline? (only if we're escaping on slash)
52
				if(params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) {
53
54
					// eat escape character and get out (but preserve trailing whitespace!)
					INPUT.get();
55
					lastNonWhitespaceChar = scalar.size();
56
                    lastEscapedChar = scalar.size();
57
58
					escapedNewline = true;
					break;
59
60
61
62
63
				}

				// escape this?
				if(INPUT.peek() == params.escape) {
					scalar += Exp::Escape(INPUT);
64
					lastNonWhitespaceChar = scalar.size();
65
                    lastEscapedChar = scalar.size();
66
67
68
69
					continue;
				}

				// otherwise, just add the damn character
70
71
72
73
				char ch = INPUT.get();
				scalar += ch;
				if(ch != ' ' && ch != '\t')
					lastNonWhitespaceChar = scalar.size();
74
75
76
			}

			// eof? if we're looking to eat something, then we throw
77
			if(!INPUT) {
78
				if(params.eatEnd)
79
					throw ParserException(INPUT.mark(), ErrorMsg::EOF_IN_SCALAR);
80
81
82
83
				break;
			}

			// doc indicator?
84
			if(params.onDocIndicator == BREAK && INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT))
85
86
87
88
89
90
91
92
93
				break;

			// are we done via character match?
			int n = params.end.Match(INPUT);
			if(n >= 0) {
				if(params.eatEnd)
					INPUT.eat(n);
				break;
			}
94
95
96
97
98
			
			// do we remove trailing whitespace?
			if(params.fold == FOLD_FLOW)
				scalar.erase(lastNonWhitespaceChar);
			
99
100
			// ********************************
			// Phase #2: eat line ending
101
			n = Exp::Break().Match(INPUT);
102
103
104
105
106
107
			INPUT.eat(n);

			// ********************************
			// Phase #3: scan initial spaces

			// first the required indentation
108
			while(INPUT.peek() == ' ' && (INPUT.column() < params.indent || (params.detectIndent && !foundNonEmptyLine)))
109
110
111
112
				INPUT.eat(1);

			// update indent if we're auto-detecting
			if(params.detectIndent && !foundNonEmptyLine)
113
				params.indent = std::max(params.indent, INPUT.column());
114
115

			// and then the rest of the whitespace
116
			while(Exp::Blank().Matches(INPUT)) {
117
				// we check for tabs that masquerade as indentation
118
119
				if(INPUT.peek() == '\t'&& INPUT.column() < params.indent && params.onTabInIndentation == THROW)
					throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION);
120
121
122
123
124
125
126
127

				if(!params.eatLeadingWhitespace)
					break;

				INPUT.eat(1);
			}

			// was this an empty line?
128
129
			bool nextEmptyLine = Exp::Break().Matches(INPUT);
			bool nextMoreIndented = Exp::Blank().Matches(INPUT);
130
			if(params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine)
131
				foldedNewlineStartedMoreIndented = moreIndented;
132
133

			// for block scalars, we always start with a newline, so we should ignore it (not fold or keep)
134
			if(pastOpeningBreak) {
135
136
				switch(params.fold) {
					case DONT_FOLD:
137
						scalar += "\n";
138
139
140
141
142
143
144
						break;
					case FOLD_BLOCK:
						if(!emptyLine && !nextEmptyLine && !moreIndented && !nextMoreIndented && INPUT.column() >= params.indent)
							scalar += " ";
						else if(nextEmptyLine)
							foldedNewlineCount++;
						else
145
							scalar += "\n";
146
147
148
						
						if(!nextEmptyLine && foldedNewlineCount > 0) {
							scalar += std::string(foldedNewlineCount - 1, '\n');
149
							if(foldedNewlineStartedMoreIndented || nextMoreIndented | !foundNonEmptyLine)
150
151
152
153
154
155
156
								scalar += "\n";
							foldedNewlineCount = 0;
						}
						break;
					case FOLD_FLOW:
						if(nextEmptyLine)
							scalar += "\n";
157
						else if(!emptyLine && !nextEmptyLine && !escapedNewline)
158
159
							scalar += " ";
						break;
160
				}
161
162
163
164
165
166
167
			}

			emptyLine = nextEmptyLine;
			moreIndented = nextMoreIndented;
			pastOpeningBreak = true;

			// are we done via indentation?
168
			if(!emptyLine && INPUT.column() < params.indent) {
169
170
171
172
173
174
175
				params.leadingSpaces = true;
				break;
			}
		}

		// post-processing
		if(params.trimTrailingSpaces) {
176
			std::size_t pos = scalar.find_last_not_of(' ');
177
178
179
180
            if(lastEscapedChar != std::string::npos) {
                if(pos < lastEscapedChar || pos == std::string::npos)
                    pos = lastEscapedChar;
            }
181
182
183
184
			if(pos < scalar.size())
				scalar.erase(pos + 1);
		}

185
186
		switch(params.chomp) {
			case CLIP: {
187
188
189
190
191
				std::size_t pos = scalar.find_last_not_of('\n');
                if(lastEscapedChar != std::string::npos) {
                    if(pos < lastEscapedChar || pos == std::string::npos)
                        pos = lastEscapedChar;
                }
192
193
194
195
196
197
				if(pos == std::string::npos)
					scalar.erase();
				else if(pos + 1 < scalar.size())
					scalar.erase(pos + 2);
			} break;
			case STRIP: {
198
199
200
201
202
				std::size_t pos = scalar.find_last_not_of('\n');
                if(lastEscapedChar != std::string::npos) {
                    if(pos < lastEscapedChar || pos == std::string::npos)
                        pos = lastEscapedChar;
                }
203
204
205
206
207
208
209
				if(pos == std::string::npos)
					scalar.erase();
				else if(pos < scalar.size())
					scalar.erase(pos + 1);
			} break;
			default:
				break;
210
211
212
213
214
		}

		return scalar;
	}
}