"vscode:/vscode.git/clone" did not exist on "a6e310af7e82e484150b9980c17c2a100e601a53"
emitterutils.cpp 9.68 KB
Newer Older
1
2
3
#include "emitterutils.h"
#include "exp.h"
#include "indentation.h"
4
#include "yaml-cpp/binary.h"
5
#include "yaml-cpp/exceptions.h"
6
#include "stringsource.h"
7
8
#include <sstream>
#include <iomanip>
9
10
11
12
13
14

namespace YAML
{
	namespace Utils
	{
		namespace {
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
			enum {REPLACEMENT_CHARACTER = 0xFFFD};

			bool IsAnchorChar(int ch) { // test for ns-anchor-char
				switch (ch) {
					case ',': case '[': case ']': case '{': case '}': // c-flow-indicator
					case ' ': case '\t': // s-white
					case 0xFEFF: // c-byte-order-mark
					case 0xA: case 0xD: // b-char
						return false;
					case 0x85:
						return true;
				}

				if (ch < 0x20)
					return false;

				if (ch < 0x7E)
					return true;

				if (ch < 0xA0)
					return false;
				if (ch >= 0xD800 && ch <= 0xDFFF)
					return false;
				if ((ch & 0xFFFE) == 0xFFFE)
					return false;
				if ((ch >= 0xFDD0) && (ch <= 0xFDEF))
					return false;
				if (ch > 0x10FFFF)
					return false;

				return true;
			}
			
			int Utf8BytesIndicated(char ch) {
				int byteVal = static_cast<unsigned char>(ch);
				switch (byteVal >> 4) {
					case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
						return 1;
					case 12: case 13:
						return 2;
					case 14:
						return 3;
					case 15:
						return 4;
					default:
					  return -1;
				}
			}

			bool IsTrailingByte(char ch) {
				return (ch & 0xC0) == 0x80;
66
67
			}
			
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
			bool GetNextCodePointAndAdvance(int& codePoint, std::string::const_iterator& first, std::string::const_iterator last) {
				if (first == last)
					return false;
				
				int nBytes = Utf8BytesIndicated(*first);
				if (nBytes < 1) {
					// Bad lead byte
					++first;
					codePoint = REPLACEMENT_CHARACTER;
					return true;
				}
				
				if (nBytes == 1) {
					codePoint = *first++;
					return true;
				}
				
				// Gather bits from trailing bytes
				codePoint = static_cast<unsigned char>(*first) & ~(0xFF << (7 - nBytes));
				++first;
				--nBytes;
				for (; nBytes > 0; ++first, --nBytes) {
					if ((first == last) || !IsTrailingByte(*first)) {
						codePoint = REPLACEMENT_CHARACTER;
						break;
					}
					codePoint <<= 6;
					codePoint |= *first & 0x3F;
				}

				// Check for illegal code points
				if (codePoint > 0x10FFFF)
					codePoint = REPLACEMENT_CHARACTER;
				else if (codePoint >= 0xD800 && codePoint <= 0xDFFF)
					codePoint = REPLACEMENT_CHARACTER;
				else if ((codePoint & 0xFFFE) == 0xFFFE)
					codePoint = REPLACEMENT_CHARACTER;
				else if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF)
					codePoint = REPLACEMENT_CHARACTER;
				return true;
			}
			
			void WriteCodePoint(ostream& out, int codePoint) {
				if (codePoint < 0 || codePoint > 0x10FFFF) {
					codePoint = REPLACEMENT_CHARACTER;
				}
				if (codePoint < 0x7F) {
					out << static_cast<char>(codePoint);
				} else if (codePoint < 0x7FF) {
					out << static_cast<char>(0xC0 | (codePoint >> 6))
					    << static_cast<char>(0x80 | (codePoint & 0x3F));
				} else if (codePoint < 0xFFFF) {
					out << static_cast<char>(0xE0 | (codePoint >> 12))
					    << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
					    << static_cast<char>(0x80 | (codePoint & 0x3F));
				} else {
					out << static_cast<char>(0xF0 | (codePoint >> 18))
					    << static_cast<char>(0x80 | ((codePoint >> 12) & 0x3F))
					    << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
					    << static_cast<char>(0x80 | (codePoint & 0x3F));
				}
			}
			
			bool IsValidPlainScalar(const std::string& str, bool inFlow, bool allowOnlyAscii) {
132
133
134
				if(str.empty())
					return false;
				
135
				// first check the start
136
				const RegEx& start = (inFlow ? Exp::PlainScalarInFlow() : Exp::PlainScalar());
137
138
139
140
141
142
143
144
				if(!start.Matches(str))
					return false;
				
				// and check the end for plain whitespace (which can't be faithfully kept in a plain scalar)
				if(!str.empty() && *str.rbegin() == ' ')
					return false;

				// then check until something is disallowed
145
146
147
148
149
150
				const RegEx& disallowed = (inFlow ? Exp::EndScalarInFlow() : Exp::EndScalar())
				                          || (Exp::BlankOrBreak() + Exp::Comment())
				                          || Exp::NotPrintable()
				                          || Exp::Utf8_ByteOrderMark()
				                          || Exp::Break()
				                          || Exp::Tab();
151
152
				StringCharSource buffer(str.c_str(), str.size());
				while(buffer) {
153
154
					if(disallowed.Matches(buffer))
						return false;
155
156
					if(allowOnlyAscii && (0x7F < static_cast<unsigned char>(buffer[0]))) 
						return false;
157
158
159
160
161
					++buffer;
				}
				
				return true;
			}
Jesse Beder's avatar
Jesse Beder committed
162

163
164
			void WriteDoubleQuoteEscapeSequence(ostream& out, int codePoint) {
				static const char hexDigits[] = "0123456789abcdef";
Jesse Beder's avatar
Jesse Beder committed
165

166
167
168
169
170
171
172
173
				char escSeq[] = "\\U00000000";
				int digits = 8;
				if (codePoint < 0xFF) {
					escSeq[1] = 'x';
					digits = 2;
				} else if (codePoint < 0xFFFF) {
					escSeq[1] = 'u';
					digits = 4;
Jesse Beder's avatar
Jesse Beder committed
174
				}
Jesse Beder's avatar
Jesse Beder committed
175

176
177
178
179
				// Write digits into the escape sequence
				int i = 2;
				for (; digits > 0; --digits, ++i) {
					escSeq[i] = hexDigits[(codePoint >> (4 * (digits - 1))) & 0xF];
Jesse Beder's avatar
Jesse Beder committed
180
				}
181
182
183

				escSeq[i] = 0; // terminate with NUL character
				out << escSeq;
Jesse Beder's avatar
Jesse Beder committed
184
185
			}

186
187
188
189
190
191
192
193
			bool WriteAliasName(ostream& out, const std::string& str) {
				int codePoint;
				for(std::string::const_iterator i = str.begin();
					GetNextCodePointAndAdvance(codePoint, i, str.end());
					)
				{
					if (!IsAnchorChar(codePoint))
						return false;
Jesse Beder's avatar
Jesse Beder committed
194

195
					WriteCodePoint(out, codePoint);
Jesse Beder's avatar
Jesse Beder committed
196
				}
197
				return true;
Jesse Beder's avatar
Jesse Beder committed
198
			}
199
200
		}
		
201
		bool WriteString(ostream& out, const std::string& str, bool inFlow, bool escapeNonAscii)
202
		{
203
			if(IsValidPlainScalar(str, inFlow, escapeNonAscii)) {
204
205
206
				out << str;
				return true;
			} else
207
				return WriteDoubleQuotedString(out, str, escapeNonAscii);
208
209
210
211
212
		}
		
		bool WriteSingleQuotedString(ostream& out, const std::string& str)
		{
			out << "'";
213
214
215
216
217
218
219
220
221
			int codePoint;
			for(std::string::const_iterator i = str.begin();
				GetNextCodePointAndAdvance(codePoint, i, str.end());
				) 
			{
				if (codePoint == '\n')
					return false;  // We can't handle a new line and the attendant indentation yet

				if (codePoint == '\'')
222
223
					out << "''";
				else
224
					WriteCodePoint(out, codePoint);
225
226
227
228
229
			}
			out << "'";
			return true;
		}
		
230
		bool WriteDoubleQuotedString(ostream& out, const std::string& str, bool escapeNonAscii)
231
232
		{
			out << "\"";
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
			int codePoint;
			for(std::string::const_iterator i = str.begin();
				GetNextCodePointAndAdvance(codePoint, i, str.end());
				) 
			{
				if (codePoint == '\"')
					out << "\\\"";
				else if (codePoint == '\\')
					out << "\\\\";
				else if (codePoint < 0x20 || (codePoint >= 0x80 && codePoint <= 0xA0)) // Control characters and non-breaking space
					WriteDoubleQuoteEscapeSequence(out, codePoint);
				else if (codePoint == 0xFEFF) // Byte order marks (ZWNS) should be escaped (YAML 1.2, sec. 5.2)	
					WriteDoubleQuoteEscapeSequence(out, codePoint);
				else if (escapeNonAscii && codePoint > 0x7E)
					WriteDoubleQuoteEscapeSequence(out, codePoint);
				else
					WriteCodePoint(out, codePoint);
250
251
252
253
254
255
256
257
258
			}
			out << "\"";
			return true;
		}

		bool WriteLiteralString(ostream& out, const std::string& str, int indent)
		{
			out << "|\n";
			out << IndentTo(indent);
259
260
261
262
263
264
265
			int codePoint;
			for(std::string::const_iterator i = str.begin();
				GetNextCodePointAndAdvance(codePoint, i, str.end());
				)
			{
				if (codePoint == '\n')
				  out << "\n" << IndentTo(indent);
266
				else
267
				  WriteCodePoint(out, codePoint);
268
269
270
271
			}
			return true;
		}
		
Jesse Beder's avatar
Jesse Beder committed
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
		bool WriteChar(ostream& out, char ch)
		{
			if(('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z'))
				out << ch;
			else if((0x20 <= ch && ch <= 0x7e) || ch == ' ')
				out << "\"" << ch << "\"";
			else if(ch == '\t')
				out << "\"\\t\"";
			else if(ch == '\n')
				out << "\"\\n\"";
			else if(ch == '\b')
				out << "\"\\b\"";
			else {
				out << "\"";
				WriteDoubleQuoteEscapeSequence(out, ch);
				out << "\"";
			}
			return true;
		}

292
293
		bool WriteComment(ostream& out, const std::string& str, int postCommentIndent)
		{
294
			const unsigned curIndent = out.col();
295
			out << "#" << Indentation(postCommentIndent);
296
297
298
299
300
301
			int codePoint;
			for(std::string::const_iterator i = str.begin();
				GetNextCodePointAndAdvance(codePoint, i, str.end());
				)
			{
				if(codePoint == '\n')
302
303
					out << "\n" << IndentTo(curIndent) << "#" << Indentation(postCommentIndent);
				else
304
					WriteCodePoint(out, codePoint);
305
306
307
308
309
310
311
			}
			return true;
		}

		bool WriteAlias(ostream& out, const std::string& str)
		{
			out << "*";
312
			return WriteAliasName(out, str);
313
314
315
316
317
		}
		
		bool WriteAnchor(ostream& out, const std::string& str)
		{
			out << "&";
318
			return WriteAliasName(out, str);
319
		}
320

321
		bool WriteTag(ostream& out, const std::string& str, bool verbatim)
322
		{
323
			out << (verbatim ? "!<" : "!");
324
			StringCharSource buffer(str.c_str(), str.size());
325
			const RegEx& reValid = verbatim ? Exp::URI() : Exp::Tag();
326
			while(buffer) {
327
				int n = reValid.Match(buffer);
328
329
330
331
332
333
334
335
				if(n <= 0)
					return false;

				while(--n >= 0) {
					out << buffer[0];
					++buffer;
				}
			}
336
337
			if (verbatim)
				out << ">";
338
339
			return true;
		}
340

341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
		bool WriteTagWithPrefix(ostream& out, const std::string& prefix, const std::string& tag)
		{
			out << "!";
			StringCharSource prefixBuffer(prefix.c_str(), prefix.size());
			while(prefixBuffer) {
				int n = Exp::URI().Match(prefixBuffer);
				if(n <= 0)
					return false;
				
				while(--n >= 0) {
					out << prefixBuffer[0];
					++prefixBuffer;
				}
			}

			out << "!";
			StringCharSource tagBuffer(tag.c_str(), tag.size());
			while(tagBuffer) {
				int n = Exp::Tag().Match(tagBuffer);
				if(n <= 0)
					return false;
				
				while(--n >= 0) {
					out << tagBuffer[0];
					++tagBuffer;
				}
			}
			return true;
		}

371
		bool WriteBinary(ostream& out, const Binary& binary)
372
		{
373
374
            WriteBase64(out, binary.data(), binary.size());
            return true;
375
		}
376
377
378
	}
}