scanner.cpp 8.67 KB
Newer Older
1
#include <cassert>
2
#include <memory>
3

Jesse Beder's avatar
Jesse Beder committed
4
5
6
7
8
#include "exp.h"
#include "scanner.h"
#include "token.h"
#include "yaml-cpp/exceptions.h"  // IWYU pragma: keep

Jesse Beder's avatar
Jesse Beder committed
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
namespace YAML {
Scanner::Scanner(std::istream& in)
    : INPUT(in),
      m_startedStream(false),
      m_endedStream(false),
      m_simpleKeyAllowed(false),
      m_canBeJSONFlow(false) {}

Scanner::~Scanner() {}

bool Scanner::empty() {
  EnsureTokensInQueue();
  return m_tokens.empty();
}

void Scanner::pop() {
  EnsureTokensInQueue();
  if (!m_tokens.empty())
    m_tokens.pop();
}

Token& Scanner::peek() {
  EnsureTokensInQueue();
  assert(!m_tokens.empty());  // should we be asserting here? I mean, we really
                              // just be checking
                              // if it's empty before peeking.
35

36
37
38
39
40
41
42
#if 0
		static Token *pLast = 0;
		if(pLast != &m_tokens.front())
			std::cerr << "peek: " << m_tokens.front() << "\n";
		pLast = &m_tokens.front();
#endif

Jesse Beder's avatar
Jesse Beder committed
43
44
45
46
47
48
49
50
51
52
53
  return m_tokens.front();
}

Mark Scanner::mark() const { return INPUT.mark(); }

void Scanner::EnsureTokensInQueue() {
  while (1) {
    if (!m_tokens.empty()) {
      Token& token = m_tokens.front();

      // if this guy's valid, then we're done
54
      if (token.status == Token::VALID) {
Jesse Beder's avatar
Jesse Beder committed
55
        return;
56
      }
Jesse Beder's avatar
Jesse Beder committed
57
58
59
60
61
62
63
64
65
66
67

      // here's where we clean up the impossible tokens
      if (token.status == Token::INVALID) {
        m_tokens.pop();
        continue;
      }

      // note: what's left are the unverified tokens
    }

    // no token? maybe we've actually finished
68
      if (m_endedStream) {
Jesse Beder's avatar
Jesse Beder committed
69
      return;
70
      }
Jesse Beder's avatar
Jesse Beder committed
71
72
73
74
75
76
77

    // no? then scan...
    ScanNextToken();
  }
}

void Scanner::ScanNextToken() {
78
  if (m_endedStream) {
Jesse Beder's avatar
Jesse Beder committed
79
    return;
80
  }
Jesse Beder's avatar
Jesse Beder committed
81

82
  if (!m_startedStream) {
Jesse Beder's avatar
Jesse Beder committed
83
    return StartStream();
84
  }
Jesse Beder's avatar
Jesse Beder committed
85
86
87
88
89
90
91
92
93
94
95
96

  // get rid of whitespace, etc. (in between tokens it should be irrelevent)
  ScanToNextToken();

  // maybe need to end some blocks
  PopIndentToHere();

  // *****
  // And now branch based on the next few characters!
  // *****

  // end of stream
97
  if (!INPUT) {
Jesse Beder's avatar
Jesse Beder committed
98
    return EndStream();
99
  }
Jesse Beder's avatar
Jesse Beder committed
100

101
  if (INPUT.column() == 0 && INPUT.peek() == Keys::Directive) {
Jesse Beder's avatar
Jesse Beder committed
102
    return ScanDirective();
103
  }
Jesse Beder's avatar
Jesse Beder committed
104
105

  // document token
106
  if (INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
107
    return ScanDocStart();
108
  }
Jesse Beder's avatar
Jesse Beder committed
109

110
  if (INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
111
    return ScanDocEnd();
112
  }
Jesse Beder's avatar
Jesse Beder committed
113
114

  // flow start/end/entry
115
116
  if (INPUT.peek() == Keys::FlowSeqStart ||
      INPUT.peek() == Keys::FlowMapStart) {
Jesse Beder's avatar
Jesse Beder committed
117
    return ScanFlowStart();
118
  }
Jesse Beder's avatar
Jesse Beder committed
119

120
  if (INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd) {
Jesse Beder's avatar
Jesse Beder committed
121
    return ScanFlowEnd();
122
  }
Jesse Beder's avatar
Jesse Beder committed
123

124
  if (INPUT.peek() == Keys::FlowEntry) {
Jesse Beder's avatar
Jesse Beder committed
125
    return ScanFlowEntry();
126
  }
Jesse Beder's avatar
Jesse Beder committed
127
128

  // block/map stuff
129
  if (Exp::BlockEntry().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
130
    return ScanBlockEntry();
131
  }
Jesse Beder's avatar
Jesse Beder committed
132

133
  if ((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
134
    return ScanKey();
135
  }
Jesse Beder's avatar
Jesse Beder committed
136

137
  if (GetValueRegex().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
138
    return ScanValue();
139
  }
Jesse Beder's avatar
Jesse Beder committed
140
141

  // alias/anchor
142
  if (INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor) {
Jesse Beder's avatar
Jesse Beder committed
143
    return ScanAnchorOrAlias();
144
  }
Jesse Beder's avatar
Jesse Beder committed
145
146

  // tag
147
  if (INPUT.peek() == Keys::Tag) {
Jesse Beder's avatar
Jesse Beder committed
148
    return ScanTag();
149
  }
Jesse Beder's avatar
Jesse Beder committed
150
151
152

  // special scalars
  if (InBlockContext() && (INPUT.peek() == Keys::LiteralScalar ||
153
                           INPUT.peek() == Keys::FoldedScalar)) {
Jesse Beder's avatar
Jesse Beder committed
154
    return ScanBlockScalar();
155
  }
Jesse Beder's avatar
Jesse Beder committed
156

157
  if (INPUT.peek() == '\'' || INPUT.peek() == '\"') {
Jesse Beder's avatar
Jesse Beder committed
158
    return ScanQuotedScalar();
159
  }
Jesse Beder's avatar
Jesse Beder committed
160
161
162

  // plain scalars
  if ((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow())
163
          .Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
164
    return ScanPlainScalar();
165
  }
Jesse Beder's avatar
Jesse Beder committed
166
167
168
169
170
171
172
173
174

  // don't know what it is!
  throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN);
}

void Scanner::ScanToNextToken() {
  while (1) {
    // first eat whitespace
    while (INPUT && IsWhitespaceToBeEaten(INPUT.peek())) {
175
      if (InBlockContext() && Exp::Tab().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
176
        m_simpleKeyAllowed = false;
177
      }
Jesse Beder's avatar
Jesse Beder committed
178
179
      INPUT.eat(1);
    }
180

Jesse Beder's avatar
Jesse Beder committed
181
182
183
    // then eat a comment
    if (Exp::Comment().Matches(INPUT)) {
      // eat until line break
184
      while (INPUT && !Exp::Break().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
185
        INPUT.eat(1);
186
      }
187
188
    }

Jesse Beder's avatar
Jesse Beder committed
189
    // if it's NOT a line break, then we're done!
190
    if (!Exp::Break().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
191
      break;
192
    }
Jesse Beder's avatar
Jesse Beder committed
193
194
195
196
197
198
199
200
201

    // otherwise, let's eat the line break and keep going
    int n = Exp::Break().Match(INPUT);
    INPUT.eat(n);

    // oh yeah, and let's get rid of that simple key
    InvalidateSimpleKey();

    // new line - we may be able to accept a simple key now
202
    if (InBlockContext()) {
Jesse Beder's avatar
Jesse Beder committed
203
      m_simpleKeyAllowed = true;
204
    }
Jesse Beder's avatar
Jesse Beder committed
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
  }
}

///////////////////////////////////////////////////////////////////////
// Misc. helpers

// IsWhitespaceToBeEaten
// . We can eat whitespace if it's a space or tab
// . Note: originally tabs in block context couldn't be eaten
//         "where a simple key could be allowed
//         (i.e., not at the beginning of a line, or following '-', '?', or
// ':')"
//   I think this is wrong, since tabs can be non-content whitespace; it's just
//   that they can't contribute to indentation, so once you've seen a tab in a
//   line, you can't start a simple key
bool Scanner::IsWhitespaceToBeEaten(char ch) {
221
  if (ch == ' ') {
Jesse Beder's avatar
Jesse Beder committed
222
    return true;
223
  }
Jesse Beder's avatar
Jesse Beder committed
224

225
  if (ch == '\t') {
Jesse Beder's avatar
Jesse Beder committed
226
    return true;
227
  }
Jesse Beder's avatar
Jesse Beder committed
228
229
230
231
232

  return false;
}

const RegEx& Scanner::GetValueRegex() const {
233
  if (InBlockContext()) {
Jesse Beder's avatar
Jesse Beder committed
234
    return Exp::Value();
235
  }
Jesse Beder's avatar
Jesse Beder committed
236
237
238
239
240
241
242

  return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow();
}

void Scanner::StartStream() {
  m_startedStream = true;
  m_simpleKeyAllowed = true;
243
244
  std::unique_ptr<IndentMarker> pIndent(
      new IndentMarker(-1, IndentMarker::NONE));
245
  m_indentRefs.push_back(std::move(pIndent));
Jesse Beder's avatar
Jesse Beder committed
246
247
248
249
250
  m_indents.push(&m_indentRefs.back());
}

void Scanner::EndStream() {
  // force newline
251
  if (INPUT.column() > 0) {
Jesse Beder's avatar
Jesse Beder committed
252
    INPUT.ResetColumn();
253
  }
Jesse Beder's avatar
Jesse Beder committed
254
255
256
257
258
259

  PopAllIndents();
  PopAllSimpleKeys();

  m_simpleKeyAllowed = false;
  m_endedStream = true;
260
}
261

Jesse Beder's avatar
Jesse Beder committed
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
Token* Scanner::PushToken(Token::TYPE type) {
  m_tokens.push(Token(type, INPUT.mark()));
  return &m_tokens.back();
}

Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const {
  switch (type) {
    case IndentMarker::SEQ:
      return Token::BLOCK_SEQ_START;
    case IndentMarker::MAP:
      return Token::BLOCK_MAP_START;
    case IndentMarker::NONE:
      assert(false);
      break;
  }
  assert(false);
  throw std::runtime_error("yaml-cpp: internal error, invalid indent type");
}

Scanner::IndentMarker* Scanner::PushIndentTo(int column,
                                             IndentMarker::INDENT_TYPE type) {
  // are we in flow?
284
  if (InFlowContext()) {
Jesse Beder's avatar
Jesse Beder committed
285
    return 0;
286
  }
Jesse Beder's avatar
Jesse Beder committed
287

288
  std::unique_ptr<IndentMarker> pIndent(new IndentMarker(column, type));
Jesse Beder's avatar
Jesse Beder committed
289
290
291
292
  IndentMarker& indent = *pIndent;
  const IndentMarker& lastIndent = *m_indents.top();

  // is this actually an indentation?
293
  if (indent.column < lastIndent.column) {
Jesse Beder's avatar
Jesse Beder committed
294
    return 0;
295
  }
Jesse Beder's avatar
Jesse Beder committed
296
297
  if (indent.column == lastIndent.column &&
      !(indent.type == IndentMarker::SEQ &&
298
        lastIndent.type == IndentMarker::MAP)) {
Jesse Beder's avatar
Jesse Beder committed
299
    return 0;
300
  }
Jesse Beder's avatar
Jesse Beder committed
301
302
303
304
305
306

  // push a start token
  indent.pStartToken = PushToken(GetStartTokenFor(type));

  // and then the indent
  m_indents.push(&indent);
307
  m_indentRefs.push_back(std::move(pIndent));
Jesse Beder's avatar
Jesse Beder committed
308
309
310
311
312
  return &m_indentRefs.back();
}

void Scanner::PopIndentToHere() {
  // are we in flow?
313
  if (InFlowContext()) {
Jesse Beder's avatar
Jesse Beder committed
314
    return;
315
  }
Jesse Beder's avatar
Jesse Beder committed
316
317
318
319

  // now pop away
  while (!m_indents.empty()) {
    const IndentMarker& indent = *m_indents.top();
320
    if (indent.column < INPUT.column()) {
Jesse Beder's avatar
Jesse Beder committed
321
      break;
322
    }
323
324
    if (indent.column == INPUT.column() &&
        !(indent.type == IndentMarker::SEQ &&
325
          !Exp::BlockEntry().Matches(INPUT))) {
Jesse Beder's avatar
Jesse Beder committed
326
      break;
327
    }
Jesse Beder's avatar
Jesse Beder committed
328
329
330
331

    PopIndent();
  }

332
333
  while (!m_indents.empty() &&
         m_indents.top()->status == IndentMarker::INVALID) {
Jesse Beder's avatar
Jesse Beder committed
334
    PopIndent();
335
  }
Jesse Beder's avatar
Jesse Beder committed
336
337
338
339
}

void Scanner::PopAllIndents() {
  // are we in flow?
340
  if (InFlowContext()) {
Jesse Beder's avatar
Jesse Beder committed
341
    return;
342
  }
Jesse Beder's avatar
Jesse Beder committed
343
344
345
346

  // now pop away
  while (!m_indents.empty()) {
    const IndentMarker& indent = *m_indents.top();
347
    if (indent.type == IndentMarker::NONE) {
Jesse Beder's avatar
Jesse Beder committed
348
      break;
349
    }
Jesse Beder's avatar
Jesse Beder committed
350
351
352
353
354
355
356
357
358
359
360
361
362
363

    PopIndent();
  }
}

void Scanner::PopIndent() {
  const IndentMarker& indent = *m_indents.top();
  m_indents.pop();

  if (indent.status != IndentMarker::VALID) {
    InvalidateSimpleKey();
    return;
  }

364
  if (indent.type == IndentMarker::SEQ) {
Jesse Beder's avatar
Jesse Beder committed
365
    m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark()));
366
  } else if (indent.type == IndentMarker::MAP) {
Jesse Beder's avatar
Jesse Beder committed
367
    m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark()));
368
  }
Jesse Beder's avatar
Jesse Beder committed
369
370
371
}

int Scanner::GetTopIndent() const {
372
  if (m_indents.empty()) {
Jesse Beder's avatar
Jesse Beder committed
373
    return 0;
374
  }
Jesse Beder's avatar
Jesse Beder committed
375
376
377
378
379
380
381
382
383
384
385
  return m_indents.top()->column;
}

void Scanner::ThrowParserException(const std::string& msg) const {
  Mark mark = Mark::null_mark();
  if (!m_tokens.empty()) {
    const Token& token = m_tokens.front();
    mark = token.mark;
  }
  throw ParserException(mark, msg);
}
386
}  // namespace YAML