exp.cpp 2.99 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#include "exp.h"
#include "exceptions.h"

namespace YAML
{
	namespace Exp
	{
		unsigned ParseHex(std::string str)
		{
			unsigned value = 0;
			for(unsigned i=0;i<str.size();i++) {
				char ch = str[i];
				int digit = 0;
				if('a' <= ch && ch <= 'f')
					digit = ch - 'a' + 10;
				else if('A' <= ch && ch <= 'F')
					digit = ch - 'A' + 10;
				else if('0' <= ch && ch <= '9')
					digit = ch - '0';
				else
					throw NonHexNumber(ch);

				value = (value << 4) + digit;
			}

			return value;
		}

		std::string Str(char ch)
		{
			return std::string("") + ch;
		}

		// Escape
		// . Translates the next 'codeLength' characters into a hex number and returns the result.
		// . Throws if it's not actually hex.
		std::string Escape(std::istream& in, int& length, int codeLength)
		{
			// grab string
			length += codeLength;
			std::string str;
			for(int i=0;i<codeLength;i++)
				str += in.get();

			// get the value
			unsigned value = ParseHex(str);

			// legal unicode?
Jesse Beder's avatar
Jesse Beder committed
49
50
51
52
53
54
55
56
57
58
59
60
			if((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF)
				throw InvalidUnicode(value);

			// now break it up into chars
			if(value <= 0x7F)
				return Str(value);
			else if(value <= 0x7FF)
				return Str(0xC0 + (value >> 6)) + Str(0x80 + (value & 0x3F));
			else if(value <= 0xFFFF)
				return Str(0xE0 + (value >> 12)) + Str(0x80 + ((value >> 6) & 0x3F)) + Str(0x80 + (value & 0x3F));
			else
				return Str(0xF0 + (value >> 18)) + Str(0x80 + ((value >> 12) & 0x3F)) +
61
62
63
64
					Str(0x80 + ((value >> 6) & 0x3F)) + Str(0x80 + (value & 0x3F));
		}

		// Escape
65
		// . Escapes the sequence starting 'in' (it must begin with a '\' or single quote)
66
67
68
69
70
71
72
73
74
		//   and returns the result.
		// . Fills 'length' with how many characters we ate.
		// . Throws if it's an unknown escape character.
		std::string Escape(std::istream& in, int& length)
		{
			// slash + character
			length = 2;

			// eat slash
75
			char escape = in.get();
76
77
78

			// switch on escape character
			char ch = in.get();
79
80
81
82
83
84

			// first do single quote, since it's easier
			if(escape == '\'' && ch == '\'')
				return "\'";

			// now do the slash (we're not gonna check if it's a slash - you better pass one!)
Jesse Beder's avatar
Jesse Beder committed
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
			switch(ch) {
				case '0': return "\0";
				case 'a': return "\x07";
				case 'b': return "\x08";
				case 't':
                case '\t': return "\x09";
				case 'n': return "\x0A";
				case 'v': return "\x0B";
				case 'f': return "\x0C";
				case 'r': return "\x0D";
				case 'e': return "\x1B";
				case ' ': return "\x20";
				case '\"': return "\"";
				case '\'': return "\'";
				case '\\': return "\\";
				case 'N': return "\xC2\x85";  // NEL (#x85)
				case '_': return "\xC2\xA0";  // #xA0
				case 'L': return "\xE2\x80\xA8";  // LS (#x2028)
				case 'P': return "\xE2\x80\xA9";  // PS (#x2029)
				case 'x': return Escape(in, length, 2);
				case 'u': return Escape(in, length, 4);
106
107
108
109
110
111
112
				case 'U': return Escape(in, length, 8);
			}

			throw UnknownEscapeSequence(ch);
		}
	}
}