exp.cpp 3.18 KB
Newer Older
1
2
#include <sstream>

Jesse Beder's avatar
Jesse Beder committed
3
4
5
6
7
8
9
10
#include "exp.h"
#include "stream.h"
#include "yaml-cpp/exceptions.h"  // IWYU pragma: keep

namespace YAML {
struct Mark;
}  // namespace YAML

Jesse Beder's avatar
Jesse Beder committed
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
namespace YAML {
namespace Exp {
unsigned ParseHex(const std::string& str, const Mark& mark) {
  unsigned value = 0;
  for (std::size_t i = 0; i < str.size(); i++) {
    char ch = str[i];
    int digit = 0;
    if ('a' <= ch && ch <= 'f')
      digit = ch - 'a' + 10;
    else if ('A' <= ch && ch <= 'F')
      digit = ch - 'A' + 10;
    else if ('0' <= ch && ch <= '9')
      digit = ch - '0';
    else
      throw ParserException(mark, ErrorMsg::INVALID_HEX);
26

Jesse Beder's avatar
Jesse Beder committed
27
28
    value = (value << 4) + digit;
  }
29

Jesse Beder's avatar
Jesse Beder committed
30
31
  return value;
}
32

Jesse Beder's avatar
Jesse Beder committed
33
std::string Str(unsigned ch) { return std::string(1, static_cast<char>(ch)); }
34

Jesse Beder's avatar
Jesse Beder committed
35
36
37
38
39
40
41
42
43
// Escape
// . Translates the next 'codeLength' characters into a hex number and returns
// the result.
// . Throws if it's not actually hex.
std::string Escape(Stream& in, int codeLength) {
  // grab string
  std::string str;
  for (int i = 0; i < codeLength; i++)
    str += in.get();
44

Jesse Beder's avatar
Jesse Beder committed
45
46
  // get the value
  unsigned value = ParseHex(str, in.mark());
47

Jesse Beder's avatar
Jesse Beder committed
48
49
50
51
52
53
  // legal unicode?
  if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
    std::stringstream msg;
    msg << ErrorMsg::INVALID_UNICODE << value;
    throw ParserException(in.mark(), msg.str());
  }
54

Jesse Beder's avatar
Jesse Beder committed
55
56
57
58
59
60
61
62
63
64
65
66
  // now break it up into chars
  if (value <= 0x7F)
    return Str(value);
  else if (value <= 0x7FF)
    return Str(0xC0 + (value >> 6)) + Str(0x80 + (value & 0x3F));
  else if (value <= 0xFFFF)
    return Str(0xE0 + (value >> 12)) + Str(0x80 + ((value >> 6) & 0x3F)) +
           Str(0x80 + (value & 0x3F));
  else
    return Str(0xF0 + (value >> 18)) + Str(0x80 + ((value >> 12) & 0x3F)) +
           Str(0x80 + ((value >> 6) & 0x3F)) + Str(0x80 + (value & 0x3F));
}
67

Jesse Beder's avatar
Jesse Beder committed
68
69
70
71
72
73
74
75
// Escape
// . Escapes the sequence starting 'in' (it must begin with a '\' or single
// quote)
//   and returns the result.
// . Throws if it's an unknown escape character.
std::string Escape(Stream& in) {
  // eat slash
  char escape = in.get();
76

Jesse Beder's avatar
Jesse Beder committed
77
78
  // switch on escape character
  char ch = in.get();
79

Jesse Beder's avatar
Jesse Beder committed
80
81
82
  // first do single quote, since it's easier
  if (escape == '\'' && ch == '\'')
    return "\'";
83

Jesse Beder's avatar
Jesse Beder committed
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
  // now do the slash (we're not gonna check if it's a slash - you better pass
  // one!)
  switch (ch) {
    case '0':
      return std::string(1, '\x00');
    case 'a':
      return "\x07";
    case 'b':
      return "\x08";
    case 't':
    case '\t':
      return "\x09";
    case 'n':
      return "\x0A";
    case 'v':
      return "\x0B";
    case 'f':
      return "\x0C";
    case 'r':
      return "\x0D";
    case 'e':
      return "\x1B";
    case ' ':
      return "\x20";
    case '\"':
      return "\"";
    case '\'':
      return "\'";
    case '\\':
      return "\\";
    case '/':
      return "/";
    case 'N':
      return "\x85";
    case '_':
      return "\xA0";
    case 'L':
      return "\xE2\x80\xA8";  // LS (#x2028)
    case 'P':
      return "\xE2\x80\xA9";  // PS (#x2029)
    case 'x':
      return Escape(in, 2);
    case 'u':
      return Escape(in, 4);
    case 'U':
      return Escape(in, 8);
  }
131

Jesse Beder's avatar
Jesse Beder committed
132
133
134
135
  std::stringstream msg;
  throw ParserException(in.mark(), std::string(ErrorMsg::INVALID_ESCAPE) + ch);
}
}
136
}