"...composable_kernel_rocm.git" did not exist on "35269cf77a4a4f36a13cbe26fc964edc3384b8a9"
emitterutils.cpp 13 KB
Newer Older
1
#include <algorithm>
2
#include <cstdint>
Jesse Beder's avatar
Jesse Beder committed
3
4
5
#include <iomanip>
#include <sstream>

6
7
8
#include "emitterutils.h"
#include "exp.h"
#include "indentation.h"
Jesse Beder's avatar
Jesse Beder committed
9
10
#include "regex_yaml.h"
#include "regeximpl.h"
11
#include "stringsource.h"
Jesse Beder's avatar
Jesse Beder committed
12
#include "yaml-cpp/binary.h"  // IWYU pragma: keep
13
#include "yaml-cpp/null.h"
14
#include "yaml-cpp/ostream_wrapper.h"
15

Jesse Beder's avatar
Jesse Beder committed
16
17
18
namespace YAML {
namespace Utils {
namespace {
Jesse Beder's avatar
Jesse Beder committed
19
enum { REPLACEMENT_CHARACTER = 0xFFFD };
Jesse Beder's avatar
Jesse Beder committed
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37

bool IsAnchorChar(int ch) {  // test for ns-anchor-char
  switch (ch) {
    case ',':
    case '[':
    case ']':
    case '{':
    case '}':  // c-flow-indicator
    case ' ':
    case '\t':    // s-white
    case 0xFEFF:  // c-byte-order-mark
    case 0xA:
    case 0xD:  // b-char
      return false;
    case 0x85:
      return true;
  }

38
  if (ch < 0x20) {
Jesse Beder's avatar
Jesse Beder committed
39
    return false;
40
  }
Jesse Beder's avatar
Jesse Beder committed
41

42
  if (ch < 0x7E) {
Jesse Beder's avatar
Jesse Beder committed
43
    return true;
44
  }
Jesse Beder's avatar
Jesse Beder committed
45

46
  if (ch < 0xA0) {
Jesse Beder's avatar
Jesse Beder committed
47
    return false;
48
49
  }
  if (ch >= 0xD800 && ch <= 0xDFFF) {
Jesse Beder's avatar
Jesse Beder committed
50
    return false;
51
52
  }
  if ((ch & 0xFFFE) == 0xFFFE) {
Jesse Beder's avatar
Jesse Beder committed
53
    return false;
54
55
  }
  if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) {
Jesse Beder's avatar
Jesse Beder committed
56
    return false;
57
58
  }
  if (ch > 0x10FFFF) {
Jesse Beder's avatar
Jesse Beder committed
59
    return false;
60
  }
Jesse Beder's avatar
Jesse Beder committed
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138

  return true;
}

int Utf8BytesIndicated(char ch) {
  int byteVal = static_cast<unsigned char>(ch);
  switch (byteVal >> 4) {
    case 0:
    case 1:
    case 2:
    case 3:
    case 4:
    case 5:
    case 6:
    case 7:
      return 1;
    case 12:
    case 13:
      return 2;
    case 14:
      return 3;
    case 15:
      return 4;
    default:
      return -1;
  }
}

bool IsTrailingByte(char ch) { return (ch & 0xC0) == 0x80; }

bool GetNextCodePointAndAdvance(int& codePoint,
                                std::string::const_iterator& first,
                                std::string::const_iterator last) {
  if (first == last)
    return false;

  int nBytes = Utf8BytesIndicated(*first);
  if (nBytes < 1) {
    // Bad lead byte
    ++first;
    codePoint = REPLACEMENT_CHARACTER;
    return true;
  }

  if (nBytes == 1) {
    codePoint = *first++;
    return true;
  }

  // Gather bits from trailing bytes
  codePoint = static_cast<unsigned char>(*first) & ~(0xFF << (7 - nBytes));
  ++first;
  --nBytes;
  for (; nBytes > 0; ++first, --nBytes) {
    if ((first == last) || !IsTrailingByte(*first)) {
      codePoint = REPLACEMENT_CHARACTER;
      break;
    }
    codePoint <<= 6;
    codePoint |= *first & 0x3F;
  }

  // Check for illegal code points
  if (codePoint > 0x10FFFF)
    codePoint = REPLACEMENT_CHARACTER;
  else if (codePoint >= 0xD800 && codePoint <= 0xDFFF)
    codePoint = REPLACEMENT_CHARACTER;
  else if ((codePoint & 0xFFFE) == 0xFFFE)
    codePoint = REPLACEMENT_CHARACTER;
  else if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF)
    codePoint = REPLACEMENT_CHARACTER;
  return true;
}

void WriteCodePoint(ostream_wrapper& out, int codePoint) {
  if (codePoint < 0 || codePoint > 0x10FFFF) {
    codePoint = REPLACEMENT_CHARACTER;
  }
maek's avatar
maek committed
139
  if (codePoint <= 0x7F) {
Jesse Beder's avatar
Jesse Beder committed
140
    out << static_cast<char>(codePoint);
maek's avatar
maek committed
141
  } else if (codePoint <= 0x7FF) {
Jesse Beder's avatar
Jesse Beder committed
142
143
    out << static_cast<char>(0xC0 | (codePoint >> 6))
        << static_cast<char>(0x80 | (codePoint & 0x3F));
maek's avatar
maek committed
144
  } else if (codePoint <= 0xFFFF) {
Jesse Beder's avatar
Jesse Beder committed
145
146
147
148
149
150
151
152
153
154
155
156
157
    out << static_cast<char>(0xE0 | (codePoint >> 12))
        << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
        << static_cast<char>(0x80 | (codePoint & 0x3F));
  } else {
    out << static_cast<char>(0xF0 | (codePoint >> 18))
        << static_cast<char>(0x80 | ((codePoint >> 12) & 0x3F))
        << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
        << static_cast<char>(0x80 | (codePoint & 0x3F));
  }
}

bool IsValidPlainScalar(const std::string& str, FlowType::value flowType,
                        bool allowOnlyAscii) {
158
  // check against null
159
  if (IsNullString(str)) {
Jesse Beder's avatar
Jesse Beder committed
160
    return false;
161
  }
Jesse Beder's avatar
Jesse Beder committed
162

163
  // check the start
Jesse Beder's avatar
Jesse Beder committed
164
165
  const RegEx& start = (flowType == FlowType::Flow ? Exp::PlainScalarInFlow()
                                                   : Exp::PlainScalar());
166
  if (!start.Matches(str)) {
Jesse Beder's avatar
Jesse Beder committed
167
    return false;
168
  }
Jesse Beder's avatar
Jesse Beder committed
169
170
171

  // and check the end for plain whitespace (which can't be faithfully kept in a
  // plain scalar)
172
  if (!str.empty() && *str.rbegin() == ' ') {
Jesse Beder's avatar
Jesse Beder committed
173
    return false;
174
  }
Jesse Beder's avatar
Jesse Beder committed
175
176

  // then check until something is disallowed
177
  static const RegEx disallowed_flow =
178
179
      Exp::EndScalarInFlow() | (Exp::BlankOrBreak() + Exp::Comment()) |
      Exp::NotPrintable() | Exp::Utf8_ByteOrderMark() | Exp::Break() |
180
      Exp::Tab() | Exp::Ampersand();
181
  static const RegEx disallowed_block =
182
183
      Exp::EndScalar() | (Exp::BlankOrBreak() + Exp::Comment()) |
      Exp::NotPrintable() | Exp::Utf8_ByteOrderMark() | Exp::Break() |
184
      Exp::Tab() | Exp::Ampersand();
185
186
187
  const RegEx& disallowed =
      flowType == FlowType::Flow ? disallowed_flow : disallowed_block;

Jesse Beder's avatar
Jesse Beder committed
188
189
  StringCharSource buffer(str.c_str(), str.size());
  while (buffer) {
190
    if (disallowed.Matches(buffer)) {
Jesse Beder's avatar
Jesse Beder committed
191
      return false;
192
193
    }
    if (allowOnlyAscii && (0x80 <= static_cast<unsigned char>(buffer[0]))) {
Jesse Beder's avatar
Jesse Beder committed
194
      return false;
195
    }
Jesse Beder's avatar
Jesse Beder committed
196
197
198
199
200
201
202
203
    ++buffer;
  }

  return true;
}

bool IsValidSingleQuotedScalar(const std::string& str, bool escapeNonAscii) {
  // TODO: check for non-printable characters?
204
205
206
207
  return std::none_of(str.begin(), str.end(), [=](char ch) {
    return (escapeNonAscii && (0x80 <= static_cast<unsigned char>(ch))) ||
           (ch == '\n');
  });
Jesse Beder's avatar
Jesse Beder committed
208
209
210
211
}

bool IsValidLiteralScalar(const std::string& str, FlowType::value flowType,
                          bool escapeNonAscii) {
212
  if (flowType == FlowType::Flow) {
Jesse Beder's avatar
Jesse Beder committed
213
    return false;
214
  }
Jesse Beder's avatar
Jesse Beder committed
215
216

  // TODO: check for non-printable characters?
217
218
219
  return std::none_of(str.begin(), str.end(), [=](char ch) {
    return (escapeNonAscii && (0x80 <= static_cast<unsigned char>(ch)));
  });
Jesse Beder's avatar
Jesse Beder committed
220
221
}

222
223
224
225
226
227
228
229
230
231
std::pair<uint16_t, uint16_t> EncodeUTF16SurrogatePair(int codePoint) {
  const uint32_t leadOffset = 0xD800 - (0x10000 >> 10);

  return {
    leadOffset | (codePoint >> 10),
    0xDC00 | (codePoint & 0x3FF),
  };
}

void WriteDoubleQuoteEscapeSequence(ostream_wrapper& out, int codePoint, StringEscaping::value stringEscapingStyle) {
Jesse Beder's avatar
Jesse Beder committed
232
233
234
235
  static const char hexDigits[] = "0123456789abcdef";

  out << "\\";
  int digits = 8;
236
  if (codePoint < 0xFF && stringEscapingStyle != StringEscaping::JSON) {
Jesse Beder's avatar
Jesse Beder committed
237
238
239
240
241
    out << "x";
    digits = 2;
  } else if (codePoint < 0xFFFF) {
    out << "u";
    digits = 4;
242
  } else if (stringEscapingStyle != StringEscaping::JSON) {
Jesse Beder's avatar
Jesse Beder committed
243
244
    out << "U";
    digits = 8;
245
246
247
248
249
  } else {
    auto surrogatePair = EncodeUTF16SurrogatePair(codePoint);
    WriteDoubleQuoteEscapeSequence(out, surrogatePair.first, stringEscapingStyle);
    WriteDoubleQuoteEscapeSequence(out, surrogatePair.second, stringEscapingStyle);
    return;
Jesse Beder's avatar
Jesse Beder committed
250
251
252
253
254
255
256
257
258
259
260
  }

  // Write digits into the escape sequence
  for (; digits > 0; --digits)
    out << hexDigits[(codePoint >> (4 * (digits - 1))) & 0xF];
}

bool WriteAliasName(ostream_wrapper& out, const std::string& str) {
  int codePoint;
  for (std::string::const_iterator i = str.begin();
       GetNextCodePointAndAdvance(codePoint, i, str.end());) {
261
    if (!IsAnchorChar(codePoint)) {
Jesse Beder's avatar
Jesse Beder committed
262
      return false;
263
    }
Jesse Beder's avatar
Jesse Beder committed
264
265
266
267
268

    WriteCodePoint(out, codePoint);
  }
  return true;
}
269
}  // namespace
Jesse Beder's avatar
Jesse Beder committed
270
271
272
273
274
275
276

StringFormat::value ComputeStringFormat(const std::string& str,
                                        EMITTER_MANIP strFormat,
                                        FlowType::value flowType,
                                        bool escapeNonAscii) {
  switch (strFormat) {
    case Auto:
277
      if (IsValidPlainScalar(str, flowType, escapeNonAscii)) {
Jesse Beder's avatar
Jesse Beder committed
278
        return StringFormat::Plain;
279
      }
Jesse Beder's avatar
Jesse Beder committed
280
281
      return StringFormat::DoubleQuoted;
    case SingleQuoted:
282
      if (IsValidSingleQuotedScalar(str, escapeNonAscii)) {
Jesse Beder's avatar
Jesse Beder committed
283
        return StringFormat::SingleQuoted;
284
      }
Jesse Beder's avatar
Jesse Beder committed
285
286
287
288
      return StringFormat::DoubleQuoted;
    case DoubleQuoted:
      return StringFormat::DoubleQuoted;
    case Literal:
289
      if (IsValidLiteralScalar(str, flowType, escapeNonAscii)) {
Jesse Beder's avatar
Jesse Beder committed
290
        return StringFormat::Literal;
291
      }
Jesse Beder's avatar
Jesse Beder committed
292
293
294
295
296
297
298
299
300
301
302
303
304
      return StringFormat::DoubleQuoted;
    default:
      break;
  }

  return StringFormat::DoubleQuoted;
}

bool WriteSingleQuotedString(ostream_wrapper& out, const std::string& str) {
  out << "'";
  int codePoint;
  for (std::string::const_iterator i = str.begin();
       GetNextCodePointAndAdvance(codePoint, i, str.end());) {
305
    if (codePoint == '\n') {
Jesse Beder's avatar
Jesse Beder committed
306
307
      return false;  // We can't handle a new line and the attendant indentation
                     // yet
308
    }
Jesse Beder's avatar
Jesse Beder committed
309

310
    if (codePoint == '\'') {
Jesse Beder's avatar
Jesse Beder committed
311
      out << "''";
312
    } else {
Jesse Beder's avatar
Jesse Beder committed
313
      WriteCodePoint(out, codePoint);
314
    }
Jesse Beder's avatar
Jesse Beder committed
315
316
317
318
319
320
  }
  out << "'";
  return true;
}

bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
321
                             StringEscaping::value stringEscaping) {
Jesse Beder's avatar
Jesse Beder committed
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
  out << "\"";
  int codePoint;
  for (std::string::const_iterator i = str.begin();
       GetNextCodePointAndAdvance(codePoint, i, str.end());) {
    switch (codePoint) {
      case '\"':
        out << "\\\"";
        break;
      case '\\':
        out << "\\\\";
        break;
      case '\n':
        out << "\\n";
        break;
      case '\t':
        out << "\\t";
        break;
      case '\r':
        out << "\\r";
        break;
      case '\b':
        out << "\\b";
        break;
345
346
347
      case '\f':
        out << "\\f";
        break;
Jesse Beder's avatar
Jesse Beder committed
348
349
350
      default:
        if (codePoint < 0x20 ||
            (codePoint >= 0x80 &&
351
             codePoint <= 0xA0)) {  // Control characters and non-breaking space
352
          WriteDoubleQuoteEscapeSequence(out, codePoint, stringEscaping);
353
354
        } else if (codePoint == 0xFEFF) {  // Byte order marks (ZWNS) should be
                                           // escaped (YAML 1.2, sec. 5.2)
355
356
357
          WriteDoubleQuoteEscapeSequence(out, codePoint, stringEscaping);
        } else if (stringEscaping == StringEscaping::NonAscii && codePoint > 0x7E) {
          WriteDoubleQuoteEscapeSequence(out, codePoint, stringEscaping);
358
        } else {
Jesse Beder's avatar
Jesse Beder committed
359
          WriteCodePoint(out, codePoint);
360
        }
Jesse Beder's avatar
Jesse Beder committed
361
362
363
364
365
366
367
    }
  }
  out << "\"";
  return true;
}

bool WriteLiteralString(ostream_wrapper& out, const std::string& str,
368
                        std::size_t indent) {
Jesse Beder's avatar
Jesse Beder committed
369
370
371
372
  out << "|\n";
  int codePoint;
  for (std::string::const_iterator i = str.begin();
       GetNextCodePointAndAdvance(codePoint, i, str.end());) {
373
    if (codePoint == '\n') {
374
      out << "\n";
375
    } else {
376
      out<< IndentTo(indent);
Jesse Beder's avatar
Jesse Beder committed
377
      WriteCodePoint(out, codePoint);
378
    }
Jesse Beder's avatar
Jesse Beder committed
379
380
381
382
  }
  return true;
}

383
bool WriteChar(ostream_wrapper& out, char ch, StringEscaping::value stringEscapingStyle) {
384
  if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) {
Jesse Beder's avatar
Jesse Beder committed
385
    out << ch;
386
  } else if (ch == '\"') {
387
    out << R"("\"")";
388
  } else if (ch == '\t') {
389
    out << R"("\t")";
390
  } else if (ch == '\n') {
391
    out << R"("\n")";
392
  } else if (ch == '\b') {
393
    out << R"("\b")";
394
395
396
397
  } else if (ch == '\r') {
    out << R"("\r")";
  } else if (ch == '\f') {
    out << R"("\f")";
398
  } else if (ch == '\\') {
399
    out << R"("\\")";
400
  } else if (0x20 <= ch && ch <= 0x7e) {
401
    out << "\"" << ch << "\"";
402
  } else {
Jesse Beder's avatar
Jesse Beder committed
403
    out << "\"";
404
    WriteDoubleQuoteEscapeSequence(out, ch, stringEscapingStyle);
Jesse Beder's avatar
Jesse Beder committed
405
406
407
408
409
410
    out << "\"";
  }
  return true;
}

bool WriteComment(ostream_wrapper& out, const std::string& str,
411
                  std::size_t postCommentIndent) {
412
  const std::size_t curIndent = out.col();
Jesse Beder's avatar
Jesse Beder committed
413
414
415
416
417
418
  out << "#" << Indentation(postCommentIndent);
  out.set_comment();
  int codePoint;
  for (std::string::const_iterator i = str.begin();
       GetNextCodePointAndAdvance(codePoint, i, str.end());) {
    if (codePoint == '\n') {
419
420
      out << "\n"
          << IndentTo(curIndent) << "#" << Indentation(postCommentIndent);
Jesse Beder's avatar
Jesse Beder committed
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
      out.set_comment();
    } else {
      WriteCodePoint(out, codePoint);
    }
  }
  return true;
}

bool WriteAlias(ostream_wrapper& out, const std::string& str) {
  out << "*";
  return WriteAliasName(out, str);
}

bool WriteAnchor(ostream_wrapper& out, const std::string& str) {
  out << "&";
  return WriteAliasName(out, str);
}

bool WriteTag(ostream_wrapper& out, const std::string& str, bool verbatim) {
  out << (verbatim ? "!<" : "!");
  StringCharSource buffer(str.c_str(), str.size());
  const RegEx& reValid = verbatim ? Exp::URI() : Exp::Tag();
  while (buffer) {
    int n = reValid.Match(buffer);
445
    if (n <= 0) {
Jesse Beder's avatar
Jesse Beder committed
446
      return false;
447
    }
Jesse Beder's avatar
Jesse Beder committed
448
449
450
451
452
453

    while (--n >= 0) {
      out << buffer[0];
      ++buffer;
    }
  }
454
  if (verbatim) {
Jesse Beder's avatar
Jesse Beder committed
455
    out << ">";
456
  }
Jesse Beder's avatar
Jesse Beder committed
457
  return true;
458
459
}

Jesse Beder's avatar
Jesse Beder committed
460
461
462
463
464
465
bool WriteTagWithPrefix(ostream_wrapper& out, const std::string& prefix,
                        const std::string& tag) {
  out << "!";
  StringCharSource prefixBuffer(prefix.c_str(), prefix.size());
  while (prefixBuffer) {
    int n = Exp::URI().Match(prefixBuffer);
466
    if (n <= 0) {
Jesse Beder's avatar
Jesse Beder committed
467
      return false;
468
    }
Jesse Beder's avatar
Jesse Beder committed
469
470
471
472
473
474
475
476
477
478
479

    while (--n >= 0) {
      out << prefixBuffer[0];
      ++prefixBuffer;
    }
  }

  out << "!";
  StringCharSource tagBuffer(tag.c_str(), tag.size());
  while (tagBuffer) {
    int n = Exp::Tag().Match(tagBuffer);
480
    if (n <= 0) {
Jesse Beder's avatar
Jesse Beder committed
481
      return false;
482
    }
Jesse Beder's avatar
Jesse Beder committed
483
484
485
486
487
488
489
490
491
492
493

    while (--n >= 0) {
      out << tagBuffer[0];
      ++tagBuffer;
    }
  }
  return true;
}

bool WriteBinary(ostream_wrapper& out, const Binary& binary) {
  WriteDoubleQuotedString(out, EncodeBase64(binary.data(), binary.size()),
494
                          StringEscaping::None);
Jesse Beder's avatar
Jesse Beder committed
495
496
  return true;
}
497
498
}  // namespace Utils
}  // namespace YAML