exp.cpp 3.17 KB
Newer Older
1
2
#include <sstream>

Jesse Beder's avatar
Jesse Beder committed
3
4
5
6
7
8
9
10
#include "exp.h"
#include "stream.h"
#include "yaml-cpp/exceptions.h"  // IWYU pragma: keep

namespace YAML {
struct Mark;
}  // namespace YAML

Jesse Beder's avatar
Jesse Beder committed
11
12
13
14
namespace YAML {
namespace Exp {
unsigned ParseHex(const std::string& str, const Mark& mark) {
  unsigned value = 0;
15
  for (char ch : str) {
Jesse Beder's avatar
Jesse Beder committed
16
17
18
19
20
21
22
23
24
    int digit = 0;
    if ('a' <= ch && ch <= 'f')
      digit = ch - 'a' + 10;
    else if ('A' <= ch && ch <= 'F')
      digit = ch - 'A' + 10;
    else if ('0' <= ch && ch <= '9')
      digit = ch - '0';
    else
      throw ParserException(mark, ErrorMsg::INVALID_HEX);
25

Jesse Beder's avatar
Jesse Beder committed
26
27
    value = (value << 4) + digit;
  }
28

Jesse Beder's avatar
Jesse Beder committed
29
30
  return value;
}
31

Jesse Beder's avatar
Jesse Beder committed
32
std::string Str(unsigned ch) { return std::string(1, static_cast<char>(ch)); }
33

Jesse Beder's avatar
Jesse Beder committed
34
35
36
37
38
39
40
41
42
// Escape
// . Translates the next 'codeLength' characters into a hex number and returns
// the result.
// . Throws if it's not actually hex.
std::string Escape(Stream& in, int codeLength) {
  // grab string
  std::string str;
  for (int i = 0; i < codeLength; i++)
    str += in.get();
43

Jesse Beder's avatar
Jesse Beder committed
44
45
  // get the value
  unsigned value = ParseHex(str, in.mark());
46

Jesse Beder's avatar
Jesse Beder committed
47
48
49
50
51
52
  // legal unicode?
  if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
    std::stringstream msg;
    msg << ErrorMsg::INVALID_UNICODE << value;
    throw ParserException(in.mark(), msg.str());
  }
53

Jesse Beder's avatar
Jesse Beder committed
54
55
56
57
58
59
60
61
62
63
64
65
  // now break it up into chars
  if (value <= 0x7F)
    return Str(value);
  else if (value <= 0x7FF)
    return Str(0xC0 + (value >> 6)) + Str(0x80 + (value & 0x3F));
  else if (value <= 0xFFFF)
    return Str(0xE0 + (value >> 12)) + Str(0x80 + ((value >> 6) & 0x3F)) +
           Str(0x80 + (value & 0x3F));
  else
    return Str(0xF0 + (value >> 18)) + Str(0x80 + ((value >> 12) & 0x3F)) +
           Str(0x80 + ((value >> 6) & 0x3F)) + Str(0x80 + (value & 0x3F));
}
66

Jesse Beder's avatar
Jesse Beder committed
67
68
69
70
71
72
73
74
// Escape
// . Escapes the sequence starting 'in' (it must begin with a '\' or single
// quote)
//   and returns the result.
// . Throws if it's an unknown escape character.
std::string Escape(Stream& in) {
  // eat slash
  char escape = in.get();
75

Jesse Beder's avatar
Jesse Beder committed
76
77
  // switch on escape character
  char ch = in.get();
78

Jesse Beder's avatar
Jesse Beder committed
79
80
81
  // first do single quote, since it's easier
  if (escape == '\'' && ch == '\'')
    return "\'";
82

Jesse Beder's avatar
Jesse Beder committed
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
  // now do the slash (we're not gonna check if it's a slash - you better pass
  // one!)
  switch (ch) {
    case '0':
      return std::string(1, '\x00');
    case 'a':
      return "\x07";
    case 'b':
      return "\x08";
    case 't':
    case '\t':
      return "\x09";
    case 'n':
      return "\x0A";
    case 'v':
      return "\x0B";
    case 'f':
      return "\x0C";
    case 'r':
      return "\x0D";
    case 'e':
      return "\x1B";
    case ' ':
      return "\x20";
    case '\"':
      return "\"";
    case '\'':
      return "\'";
    case '\\':
      return "\\";
    case '/':
      return "/";
    case 'N':
      return "\x85";
    case '_':
      return "\xA0";
    case 'L':
      return "\xE2\x80\xA8";  // LS (#x2028)
    case 'P':
      return "\xE2\x80\xA9";  // PS (#x2029)
    case 'x':
      return Escape(in, 2);
    case 'u':
      return Escape(in, 4);
    case 'U':
      return Escape(in, 8);
  }
130

Jesse Beder's avatar
Jesse Beder committed
131
132
133
  std::stringstream msg;
  throw ParserException(in.mark(), std::string(ErrorMsg::INVALID_ESCAPE) + ch);
}
134
135
}  // namespace Exp
}  // namespace YAML