regex.cpp 6.03 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#include "crt.h"
#include "regex.h"
#include "stream.h"
#include <iostream>

namespace YAML
{
	RegEx::RegEx(REGEX_OP op): m_op(op), m_pOp(0)
	{
		SetOp();
	}

	RegEx::RegEx(const RegEx& rhs): m_pOp(0)
	{
		m_op = rhs.m_op;
		m_a = rhs.m_a;
		m_z = rhs.m_z;
		m_params = rhs.m_params;

		SetOp();
	}

	RegEx::RegEx(): m_op(REGEX_EMPTY), m_pOp(0)
	{
		SetOp();
	}

	RegEx::RegEx(char ch): m_op(REGEX_MATCH), m_pOp(0), m_a(ch)
	{
		SetOp();
	}

	RegEx::RegEx(char a, char z): m_op(REGEX_RANGE), m_pOp(0), m_a(a), m_z(z)
	{
		SetOp();
	}

	RegEx::RegEx(const std::string& str, REGEX_OP op): m_op(op), m_pOp(0)
	{
		for(unsigned i=0;i<str.size();i++)
			m_params.push_back(RegEx(str[i]));

		SetOp();
	}

	RegEx::~RegEx()
	{
		delete m_pOp;
	}

	RegEx& RegEx::operator = (const RegEx& rhs)
	{
		delete m_pOp;
		m_pOp = 0;

		m_op = rhs.m_op;
		m_a = rhs.m_a;
		m_z = rhs.m_z;
		m_params = rhs.m_params;

		SetOp();

		return *this;
	}

	void RegEx::SetOp()
	{
		delete m_pOp;
		m_pOp = 0;
		switch(m_op) {
			case REGEX_MATCH: m_pOp = new MatchOperator; break;
			case REGEX_RANGE: m_pOp = new RangeOperator; break;
			case REGEX_OR: m_pOp = new OrOperator; break;
			case REGEX_AND: m_pOp = new AndOperator; break;
			case REGEX_NOT: m_pOp = new NotOperator; break;
			case REGEX_SEQ: m_pOp = new SeqOperator; break;
Jesse Beder's avatar
Jesse Beder committed
77
			default: break;
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
		}
	}

	bool RegEx::Matches(char ch) const
	{
		std::string str;
		str += ch;
		return Matches(str);
	}

	bool RegEx::Matches(const std::string& str) const
	{
		return Match(str) >= 0;
	}

93
	bool RegEx::Matches(const Buffer& buffer) const
94
	{
95
		return Match(buffer) >= 0;
96
97
	}
	
98
	bool RegEx::Matches(const Stream& in) const
99
100
101
102
103
104
105
106
107
108
109
110
111
	{
		return Match(in) >= 0;
	}

	// Match
	// . Matches the given string against this regular expression.
	// . Returns the number of characters matched.
	// . Returns -1 if no characters were matched (the reason for
	//   not returning zero is that we may have an empty regex
	//   which is ALWAYS successful at matching zero characters).
	int RegEx::Match(const std::string& str) const
	{
		if(!m_pOp)
112
			return str.empty() ? 0 : -1;  // the empty regex only is successful on the empty string
113
114
115
116
117

		return m_pOp->Match(str, *this);
	}

	// Match
118
	int RegEx::Match(const Stream& in) const
119
	{
120
		return Match(in.current());
121
122
123
	}
	
	// Match
124
125
126
	// . The buffer version does the same thing as the string version;
	//   REMEMBER that we only match from the start of the buffer!
	int RegEx::Match(const Buffer& buffer) const
127
128
	{
		if(!m_pOp)
129
			return !buffer ? 0 : -1;  // see above
130

131
		return m_pOp->Match(buffer, *this);
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
	}

	RegEx operator ! (const RegEx& ex)
	{
		RegEx ret(REGEX_NOT);
		ret.m_params.push_back(ex);
		return ret;
	}

	RegEx operator || (const RegEx& ex1, const RegEx& ex2)
	{
		RegEx ret(REGEX_OR);
		ret.m_params.push_back(ex1);
		ret.m_params.push_back(ex2);
		return ret;
	}

	RegEx operator && (const RegEx& ex1, const RegEx& ex2)
	{
		RegEx ret(REGEX_AND);
		ret.m_params.push_back(ex1);
		ret.m_params.push_back(ex2);
		return ret;
	}

	RegEx operator + (const RegEx& ex1, const RegEx& ex2)
	{
		RegEx ret(REGEX_SEQ);
		ret.m_params.push_back(ex1);
		ret.m_params.push_back(ex2);
		return ret;
	}

	//////////////////////////////////////////////////////////////////////////////
	// Operators

	// MatchOperator
	int RegEx::MatchOperator::Match(const std::string& str, const RegEx& regex) const
	{
		if(str.empty() || str[0] != regex.m_a)
			return -1;
		return 1;
	}

	
177
	int RegEx::MatchOperator::Match(const Buffer& buffer, const RegEx& regex) const
178
	{
179
		if(!buffer || buffer[0] != regex.m_a)
180
181
182
183
184
185
186
187
188
189
190
191
			return -1;
		return 1;
	}

	// RangeOperator
	int RegEx::RangeOperator::Match(const std::string& str, const RegEx& regex) const
	{
		if(str.empty() || regex.m_a > str[0] || regex.m_z < str[0])
			return -1;
		return 1;
	}

192
	int RegEx::RangeOperator::Match(const Buffer& buffer, const RegEx& regex) const
193
	{
194
		if(!buffer || regex.m_a > buffer[0] || regex.m_z < buffer[0])
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
			return -1;
		return 1;
	}

	// OrOperator
	int RegEx::OrOperator::Match(const std::string& str, const RegEx& regex) const
	{
		for(unsigned i=0;i<regex.m_params.size();i++) {
			int n = regex.m_params[i].Match(str);
			if(n >= 0)
				return n;
		}
		return -1;
	}

210
	int RegEx::OrOperator::Match(const Buffer& buffer, const RegEx& regex) const
211
212
	{
		for(unsigned i=0;i<regex.m_params.size();i++) {
213
			int n = regex.m_params[i].Match(buffer);
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
			if(n >= 0)
				return n;
		}
		return -1;
	}

	// AndOperator
	// Note: 'AND' is a little funny, since we may be required to match things
	//       of different lengths. If we find a match, we return the length of
	//       the FIRST entry on the list.
	int RegEx::AndOperator::Match(const std::string& str, const RegEx& regex) const
	{
		int first = -1;
		for(unsigned i=0;i<regex.m_params.size();i++) {
			int n = regex.m_params[i].Match(str);
			if(n == -1)
				return -1;
			if(i == 0)
				first = n;
		}
		return first;
	}

237
	int RegEx::AndOperator::Match(const Buffer& buffer, const RegEx& regex) const
238
239
240
	{
		int first = -1;
		for(unsigned i=0;i<regex.m_params.size();i++) {
241
			int n = regex.m_params[i].Match(buffer);
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
			if(n == -1)
				return -1;
			if(i == 0)
				first = n;
		}
		return first;
	}

	// NotOperator
	int RegEx::NotOperator::Match(const std::string& str, const RegEx& regex) const
	{
		if(regex.m_params.empty())
			return -1;
		if(regex.m_params[0].Match(str) >= 0)
			return -1;
		return 1;
	}

260
	int RegEx::NotOperator::Match(const Buffer& buffer, const RegEx& regex) const
261
262
263
	{
		if(regex.m_params.empty())
			return -1;
264
		if(regex.m_params[0].Match(buffer) >= 0)
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
			return -1;
		return 1;
	}

	// SeqOperator
	int RegEx::SeqOperator::Match(const std::string& str, const RegEx& regex) const
	{
		int offset = 0;
		for(unsigned i=0;i<regex.m_params.size();i++) {
			int n = regex.m_params[i].Match(str.substr(offset));
			if(n == -1)
				return -1;
			offset += n;
		}
		return offset;
	}
	
282
	int RegEx::SeqOperator::Match(const Buffer& buffer, const RegEx& regex) const
283
284
285
	{
		int offset = 0;
		for(unsigned i=0;i<regex.m_params.size();i++) {
286
			int n = regex.m_params[i].Match(buffer + offset);
287
288
289
290
291
292
293
294
295
296
			if(n == -1)
				return -1;

			offset += n;
		}

		return offset;
	}
}