scanner.cpp 9.63 KB
Newer Older
1
#include <cassert>
2
#include <memory>
3

Jesse Beder's avatar
Jesse Beder committed
4
5
6
7
8
#include "exp.h"
#include "scanner.h"
#include "token.h"
#include "yaml-cpp/exceptions.h"  // IWYU pragma: keep

Jesse Beder's avatar
Jesse Beder committed
9
10
11
namespace YAML {
Scanner::Scanner(std::istream& in)
    : INPUT(in),
12
      m_tokens{},
Jesse Beder's avatar
Jesse Beder committed
13
14
15
      m_startedStream(false),
      m_endedStream(false),
      m_simpleKeyAllowed(false),
16
      m_scalarValueAllowed(false),
17
18
19
20
21
      m_canBeJSONFlow(false),
      m_simpleKeys{},
      m_indents{},
      m_indentRefs{},
      m_flows{} {}
Jesse Beder's avatar
Jesse Beder committed
22

23
Scanner::~Scanner() = default;
Jesse Beder's avatar
Jesse Beder committed
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40

bool Scanner::empty() {
  EnsureTokensInQueue();
  return m_tokens.empty();
}

void Scanner::pop() {
  EnsureTokensInQueue();
  if (!m_tokens.empty())
    m_tokens.pop();
}

Token& Scanner::peek() {
  EnsureTokensInQueue();
  assert(!m_tokens.empty());  // should we be asserting here? I mean, we really
                              // just be checking
                              // if it's empty before peeking.
41

42
43
44
45
46
47
48
#if 0
		static Token *pLast = 0;
		if(pLast != &m_tokens.front())
			std::cerr << "peek: " << m_tokens.front() << "\n";
		pLast = &m_tokens.front();
#endif

Jesse Beder's avatar
Jesse Beder committed
49
50
51
52
53
54
  return m_tokens.front();
}

Mark Scanner::mark() const { return INPUT.mark(); }

void Scanner::EnsureTokensInQueue() {
55
  while (true) {
Jesse Beder's avatar
Jesse Beder committed
56
57
58
59
    if (!m_tokens.empty()) {
      Token& token = m_tokens.front();

      // if this guy's valid, then we're done
60
      if (token.status == Token::VALID) {
Jesse Beder's avatar
Jesse Beder committed
61
        return;
62
      }
Jesse Beder's avatar
Jesse Beder committed
63
64
65
66
67
68
69
70
71
72
73

      // here's where we clean up the impossible tokens
      if (token.status == Token::INVALID) {
        m_tokens.pop();
        continue;
      }

      // note: what's left are the unverified tokens
    }

    // no token? maybe we've actually finished
Jesse Beder's avatar
Jesse Beder committed
74
    if (m_endedStream) {
Jesse Beder's avatar
Jesse Beder committed
75
      return;
Jesse Beder's avatar
Jesse Beder committed
76
    }
Jesse Beder's avatar
Jesse Beder committed
77
78
79
80
81
82
83

    // no? then scan...
    ScanNextToken();
  }
}

void Scanner::ScanNextToken() {
84
  if (m_endedStream) {
Jesse Beder's avatar
Jesse Beder committed
85
    return;
86
  }
Jesse Beder's avatar
Jesse Beder committed
87

88
  if (!m_startedStream) {
Jesse Beder's avatar
Jesse Beder committed
89
    return StartStream();
90
  }
Jesse Beder's avatar
Jesse Beder committed
91

Josh Soref's avatar
Josh Soref committed
92
  // get rid of whitespace, etc. (in between tokens it should be irrelevant)
Jesse Beder's avatar
Jesse Beder committed
93
94
95
96
97
98
99
100
101
102
  ScanToNextToken();

  // maybe need to end some blocks
  PopIndentToHere();

  // *****
  // And now branch based on the next few characters!
  // *****

  // end of stream
103
  if (!INPUT) {
Jesse Beder's avatar
Jesse Beder committed
104
    return EndStream();
105
  }
Jesse Beder's avatar
Jesse Beder committed
106

107
  if (INPUT.column() == 0 && INPUT.peek() == Keys::Directive) {
Jesse Beder's avatar
Jesse Beder committed
108
    return ScanDirective();
109
  }
Jesse Beder's avatar
Jesse Beder committed
110
111

  // document token
112
  if (INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
113
    return ScanDocStart();
114
  }
Jesse Beder's avatar
Jesse Beder committed
115

116
  if (INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
117
    return ScanDocEnd();
118
  }
Jesse Beder's avatar
Jesse Beder committed
119
120

  // flow start/end/entry
121
122
  if (INPUT.peek() == Keys::FlowSeqStart ||
      INPUT.peek() == Keys::FlowMapStart) {
Jesse Beder's avatar
Jesse Beder committed
123
    return ScanFlowStart();
124
  }
Jesse Beder's avatar
Jesse Beder committed
125

126
  if (INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd) {
Jesse Beder's avatar
Jesse Beder committed
127
    return ScanFlowEnd();
128
  }
Jesse Beder's avatar
Jesse Beder committed
129

130
  if (INPUT.peek() == Keys::FlowEntry) {
131
132
133
134
135
136
137
138
139
140
141
    // values starting with `,` are not allowed.
    // eg: reject `,foo`
    if (INPUT.column() == 0) {
      throw ParserException(INPUT.mark(), ErrorMsg::UNEXPECTED_FLOW);
    }
    // if we already parsed a quoted scalar value and we are not in a flow,
    // then `,` is not a valid character.
    // eg: reject `"foo",`
    if (!m_scalarValueAllowed) {
      throw ParserException(INPUT.mark(), ErrorMsg::UNEXPECTED_SCALAR);
    }
Jesse Beder's avatar
Jesse Beder committed
142
    return ScanFlowEntry();
143
  }
Jesse Beder's avatar
Jesse Beder committed
144
145

  // block/map stuff
146
  if (Exp::BlockEntry().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
147
    return ScanBlockEntry();
148
  }
Jesse Beder's avatar
Jesse Beder committed
149

150
  if ((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
151
    return ScanKey();
152
  }
Jesse Beder's avatar
Jesse Beder committed
153

154
  if (GetValueRegex().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
155
    return ScanValue();
156
  }
Jesse Beder's avatar
Jesse Beder committed
157
158

  // alias/anchor
159
  if (INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor) {
Jesse Beder's avatar
Jesse Beder committed
160
    return ScanAnchorOrAlias();
161
  }
Jesse Beder's avatar
Jesse Beder committed
162
163

  // tag
164
  if (INPUT.peek() == Keys::Tag) {
Jesse Beder's avatar
Jesse Beder committed
165
    return ScanTag();
166
  }
Jesse Beder's avatar
Jesse Beder committed
167
168
169

  // special scalars
  if (InBlockContext() && (INPUT.peek() == Keys::LiteralScalar ||
170
                           INPUT.peek() == Keys::FoldedScalar)) {
Jesse Beder's avatar
Jesse Beder committed
171
    return ScanBlockScalar();
172
  }
Jesse Beder's avatar
Jesse Beder committed
173

174
175
176
177
178
179
180
  // if we already parsed a quoted scalar value in this line,
  // another scalar value is an error.
  // eg: reject `"foo" "bar"`
  if (!m_scalarValueAllowed) {
    throw ParserException(INPUT.mark(), ErrorMsg::UNEXPECTED_SCALAR);
  }

181
  if (INPUT.peek() == '\'' || INPUT.peek() == '\"') {
Jesse Beder's avatar
Jesse Beder committed
182
    return ScanQuotedScalar();
183
  }
Jesse Beder's avatar
Jesse Beder committed
184
185
186

  // plain scalars
  if ((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow())
187
          .Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
188
    return ScanPlainScalar();
189
  }
Jesse Beder's avatar
Jesse Beder committed
190
191
192
193
194
195

  // don't know what it is!
  throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN);
}

void Scanner::ScanToNextToken() {
196
  while (true) {
Jesse Beder's avatar
Jesse Beder committed
197
198
    // first eat whitespace
    while (INPUT && IsWhitespaceToBeEaten(INPUT.peek())) {
199
      if (InBlockContext() && Exp::Tab().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
200
        m_simpleKeyAllowed = false;
201
      }
Jesse Beder's avatar
Jesse Beder committed
202
203
      INPUT.eat(1);
    }
204

Jesse Beder's avatar
Jesse Beder committed
205
206
207
    // then eat a comment
    if (Exp::Comment().Matches(INPUT)) {
      // eat until line break
208
      while (INPUT && !Exp::Break().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
209
        INPUT.eat(1);
210
      }
211
212
    }

Jesse Beder's avatar
Jesse Beder committed
213
    // if it's NOT a line break, then we're done!
214
    if (!Exp::Break().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
215
      break;
216
    }
Jesse Beder's avatar
Jesse Beder committed
217
218
219
220
221
222
223
224

    // otherwise, let's eat the line break and keep going
    int n = Exp::Break().Match(INPUT);
    INPUT.eat(n);

    // oh yeah, and let's get rid of that simple key
    InvalidateSimpleKey();

225
226
227
    // new line - we accept a scalar value now
    m_scalarValueAllowed = true;

Jesse Beder's avatar
Jesse Beder committed
228
    // new line - we may be able to accept a simple key now
229
    if (InBlockContext()) {
Jesse Beder's avatar
Jesse Beder committed
230
      m_simpleKeyAllowed = true;
231
    }
Jesse Beder's avatar
Jesse Beder committed
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
  }
}

///////////////////////////////////////////////////////////////////////
// Misc. helpers

// IsWhitespaceToBeEaten
// . We can eat whitespace if it's a space or tab
// . Note: originally tabs in block context couldn't be eaten
//         "where a simple key could be allowed
//         (i.e., not at the beginning of a line, or following '-', '?', or
// ':')"
//   I think this is wrong, since tabs can be non-content whitespace; it's just
//   that they can't contribute to indentation, so once you've seen a tab in a
//   line, you can't start a simple key
bool Scanner::IsWhitespaceToBeEaten(char ch) {
248
  if (ch == ' ') {
Jesse Beder's avatar
Jesse Beder committed
249
    return true;
250
  }
Jesse Beder's avatar
Jesse Beder committed
251

252
  if (ch == '\t') {
Jesse Beder's avatar
Jesse Beder committed
253
    return true;
254
  }
Jesse Beder's avatar
Jesse Beder committed
255
256
257
258
259

  return false;
}

const RegEx& Scanner::GetValueRegex() const {
260
  if (InBlockContext()) {
Jesse Beder's avatar
Jesse Beder committed
261
    return Exp::Value();
262
  }
Jesse Beder's avatar
Jesse Beder committed
263
264
265
266
267
268
269

  return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow();
}

void Scanner::StartStream() {
  m_startedStream = true;
  m_simpleKeyAllowed = true;
270
  m_scalarValueAllowed = true;
271
272
  std::unique_ptr<IndentMarker> pIndent(
      new IndentMarker(-1, IndentMarker::NONE));
273
  m_indentRefs.push_back(std::move(pIndent));
Jesse Beder's avatar
Jesse Beder committed
274
275
276
277
278
  m_indents.push(&m_indentRefs.back());
}

void Scanner::EndStream() {
  // force newline
279
  if (INPUT.column() > 0) {
Jesse Beder's avatar
Jesse Beder committed
280
    INPUT.ResetColumn();
281
  }
Jesse Beder's avatar
Jesse Beder committed
282
283
284
285
286

  PopAllIndents();
  PopAllSimpleKeys();

  m_simpleKeyAllowed = false;
287
  m_scalarValueAllowed = false;
Jesse Beder's avatar
Jesse Beder committed
288
  m_endedStream = true;
289
}
290

Jesse Beder's avatar
Jesse Beder committed
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
Token* Scanner::PushToken(Token::TYPE type) {
  m_tokens.push(Token(type, INPUT.mark()));
  return &m_tokens.back();
}

Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const {
  switch (type) {
    case IndentMarker::SEQ:
      return Token::BLOCK_SEQ_START;
    case IndentMarker::MAP:
      return Token::BLOCK_MAP_START;
    case IndentMarker::NONE:
      assert(false);
      break;
  }
  assert(false);
  throw std::runtime_error("yaml-cpp: internal error, invalid indent type");
}

Scanner::IndentMarker* Scanner::PushIndentTo(int column,
                                             IndentMarker::INDENT_TYPE type) {
  // are we in flow?
313
  if (InFlowContext()) {
314
    return nullptr;
315
  }
Jesse Beder's avatar
Jesse Beder committed
316

317
  std::unique_ptr<IndentMarker> pIndent(new IndentMarker(column, type));
Jesse Beder's avatar
Jesse Beder committed
318
319
320
321
  IndentMarker& indent = *pIndent;
  const IndentMarker& lastIndent = *m_indents.top();

  // is this actually an indentation?
322
  if (indent.column < lastIndent.column) {
323
    return nullptr;
324
  }
Jesse Beder's avatar
Jesse Beder committed
325
326
  if (indent.column == lastIndent.column &&
      !(indent.type == IndentMarker::SEQ &&
327
        lastIndent.type == IndentMarker::MAP)) {
328
    return nullptr;
329
  }
Jesse Beder's avatar
Jesse Beder committed
330
331
332
333
334
335

  // push a start token
  indent.pStartToken = PushToken(GetStartTokenFor(type));

  // and then the indent
  m_indents.push(&indent);
336
  m_indentRefs.push_back(std::move(pIndent));
Jesse Beder's avatar
Jesse Beder committed
337
338
339
340
341
  return &m_indentRefs.back();
}

void Scanner::PopIndentToHere() {
  // are we in flow?
342
  if (InFlowContext()) {
Jesse Beder's avatar
Jesse Beder committed
343
    return;
344
  }
Jesse Beder's avatar
Jesse Beder committed
345
346
347
348

  // now pop away
  while (!m_indents.empty()) {
    const IndentMarker& indent = *m_indents.top();
349
    if (indent.column < INPUT.column()) {
Jesse Beder's avatar
Jesse Beder committed
350
      break;
351
    }
352
353
    if (indent.column == INPUT.column() &&
        !(indent.type == IndentMarker::SEQ &&
354
          !Exp::BlockEntry().Matches(INPUT))) {
Jesse Beder's avatar
Jesse Beder committed
355
      break;
356
    }
Jesse Beder's avatar
Jesse Beder committed
357
358
359
360

    PopIndent();
  }

361
362
  while (!m_indents.empty() &&
         m_indents.top()->status == IndentMarker::INVALID) {
Jesse Beder's avatar
Jesse Beder committed
363
    PopIndent();
364
  }
Jesse Beder's avatar
Jesse Beder committed
365
366
367
368
}

void Scanner::PopAllIndents() {
  // are we in flow?
369
  if (InFlowContext()) {
Jesse Beder's avatar
Jesse Beder committed
370
    return;
371
  }
Jesse Beder's avatar
Jesse Beder committed
372
373
374
375

  // now pop away
  while (!m_indents.empty()) {
    const IndentMarker& indent = *m_indents.top();
376
    if (indent.type == IndentMarker::NONE) {
Jesse Beder's avatar
Jesse Beder committed
377
      break;
378
    }
Jesse Beder's avatar
Jesse Beder committed
379
380
381
382
383
384
385
386
387
388
389
390
391
392

    PopIndent();
  }
}

void Scanner::PopIndent() {
  const IndentMarker& indent = *m_indents.top();
  m_indents.pop();

  if (indent.status != IndentMarker::VALID) {
    InvalidateSimpleKey();
    return;
  }

393
  if (indent.type == IndentMarker::SEQ) {
Jesse Beder's avatar
Jesse Beder committed
394
    m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark()));
395
  } else if (indent.type == IndentMarker::MAP) {
Jesse Beder's avatar
Jesse Beder committed
396
    m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark()));
397
  }
Jesse Beder's avatar
Jesse Beder committed
398
399
400
}

int Scanner::GetTopIndent() const {
401
  if (m_indents.empty()) {
Jesse Beder's avatar
Jesse Beder committed
402
    return 0;
403
  }
Jesse Beder's avatar
Jesse Beder committed
404
405
406
407
408
409
410
411
412
413
414
  return m_indents.top()->column;
}

void Scanner::ThrowParserException(const std::string& msg) const {
  Mark mark = Mark::null_mark();
  if (!m_tokens.empty()) {
    const Token& token = m_tokens.front();
    mark = token.mark;
  }
  throw ParserException(mark, msg);
}
415
}  // namespace YAML