"git@developer.sourcefind.cn:orangecat/ollama.git" did not exist on "3020d2dc58f788f5de0af7ff0f556f0ac626bc57"
scanner.cpp 8.79 KB
Newer Older
1
#include <cassert>
2
#include <memory>
3

Jesse Beder's avatar
Jesse Beder committed
4
5
6
7
8
#include "exp.h"
#include "scanner.h"
#include "token.h"
#include "yaml-cpp/exceptions.h"  // IWYU pragma: keep

Jesse Beder's avatar
Jesse Beder committed
9
10
11
namespace YAML {
Scanner::Scanner(std::istream& in)
    : INPUT(in),
12
      m_tokens{},
Jesse Beder's avatar
Jesse Beder committed
13
14
15
      m_startedStream(false),
      m_endedStream(false),
      m_simpleKeyAllowed(false),
16
17
18
19
20
      m_canBeJSONFlow(false),
      m_simpleKeys{},
      m_indents{},
      m_indentRefs{},
      m_flows{} {}
Jesse Beder's avatar
Jesse Beder committed
21

22
Scanner::~Scanner() = default;
Jesse Beder's avatar
Jesse Beder committed
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39

bool Scanner::empty() {
  EnsureTokensInQueue();
  return m_tokens.empty();
}

void Scanner::pop() {
  EnsureTokensInQueue();
  if (!m_tokens.empty())
    m_tokens.pop();
}

Token& Scanner::peek() {
  EnsureTokensInQueue();
  assert(!m_tokens.empty());  // should we be asserting here? I mean, we really
                              // just be checking
                              // if it's empty before peeking.
40

41
42
43
44
45
46
47
#if 0
		static Token *pLast = 0;
		if(pLast != &m_tokens.front())
			std::cerr << "peek: " << m_tokens.front() << "\n";
		pLast = &m_tokens.front();
#endif

Jesse Beder's avatar
Jesse Beder committed
48
49
50
51
52
53
54
55
56
57
58
  return m_tokens.front();
}

Mark Scanner::mark() const { return INPUT.mark(); }

void Scanner::EnsureTokensInQueue() {
  while (1) {
    if (!m_tokens.empty()) {
      Token& token = m_tokens.front();

      // if this guy's valid, then we're done
59
      if (token.status == Token::VALID) {
Jesse Beder's avatar
Jesse Beder committed
60
        return;
61
      }
Jesse Beder's avatar
Jesse Beder committed
62
63
64
65
66
67
68
69
70
71
72

      // here's where we clean up the impossible tokens
      if (token.status == Token::INVALID) {
        m_tokens.pop();
        continue;
      }

      // note: what's left are the unverified tokens
    }

    // no token? maybe we've actually finished
Jesse Beder's avatar
Jesse Beder committed
73
    if (m_endedStream) {
Jesse Beder's avatar
Jesse Beder committed
74
      return;
Jesse Beder's avatar
Jesse Beder committed
75
    }
Jesse Beder's avatar
Jesse Beder committed
76
77
78
79
80
81
82

    // no? then scan...
    ScanNextToken();
  }
}

void Scanner::ScanNextToken() {
83
  if (m_endedStream) {
Jesse Beder's avatar
Jesse Beder committed
84
    return;
85
  }
Jesse Beder's avatar
Jesse Beder committed
86

87
  if (!m_startedStream) {
Jesse Beder's avatar
Jesse Beder committed
88
    return StartStream();
89
  }
Jesse Beder's avatar
Jesse Beder committed
90
91
92
93
94
95
96
97
98
99
100
101

  // get rid of whitespace, etc. (in between tokens it should be irrelevent)
  ScanToNextToken();

  // maybe need to end some blocks
  PopIndentToHere();

  // *****
  // And now branch based on the next few characters!
  // *****

  // end of stream
102
  if (!INPUT) {
Jesse Beder's avatar
Jesse Beder committed
103
    return EndStream();
104
  }
Jesse Beder's avatar
Jesse Beder committed
105

106
  if (INPUT.column() == 0 && INPUT.peek() == Keys::Directive) {
Jesse Beder's avatar
Jesse Beder committed
107
    return ScanDirective();
108
  }
Jesse Beder's avatar
Jesse Beder committed
109
110

  // document token
111
  if (INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
112
    return ScanDocStart();
113
  }
Jesse Beder's avatar
Jesse Beder committed
114

115
  if (INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
116
    return ScanDocEnd();
117
  }
Jesse Beder's avatar
Jesse Beder committed
118
119

  // flow start/end/entry
120
121
  if (INPUT.peek() == Keys::FlowSeqStart ||
      INPUT.peek() == Keys::FlowMapStart) {
Jesse Beder's avatar
Jesse Beder committed
122
    return ScanFlowStart();
123
  }
Jesse Beder's avatar
Jesse Beder committed
124

125
  if (INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd) {
Jesse Beder's avatar
Jesse Beder committed
126
    return ScanFlowEnd();
127
  }
Jesse Beder's avatar
Jesse Beder committed
128

129
  if (INPUT.peek() == Keys::FlowEntry) {
Jesse Beder's avatar
Jesse Beder committed
130
    return ScanFlowEntry();
131
  }
Jesse Beder's avatar
Jesse Beder committed
132
133

  // block/map stuff
134
  if (Exp::BlockEntry().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
135
    return ScanBlockEntry();
136
  }
Jesse Beder's avatar
Jesse Beder committed
137

138
  if ((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
139
    return ScanKey();
140
  }
Jesse Beder's avatar
Jesse Beder committed
141

142
  if (GetValueRegex().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
143
    return ScanValue();
144
  }
Jesse Beder's avatar
Jesse Beder committed
145
146

  // alias/anchor
147
  if (INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor) {
Jesse Beder's avatar
Jesse Beder committed
148
    return ScanAnchorOrAlias();
149
  }
Jesse Beder's avatar
Jesse Beder committed
150
151

  // tag
152
  if (INPUT.peek() == Keys::Tag) {
Jesse Beder's avatar
Jesse Beder committed
153
    return ScanTag();
154
  }
Jesse Beder's avatar
Jesse Beder committed
155
156
157

  // special scalars
  if (InBlockContext() && (INPUT.peek() == Keys::LiteralScalar ||
158
                           INPUT.peek() == Keys::FoldedScalar)) {
Jesse Beder's avatar
Jesse Beder committed
159
    return ScanBlockScalar();
160
  }
Jesse Beder's avatar
Jesse Beder committed
161

162
  if (INPUT.peek() == '\'' || INPUT.peek() == '\"') {
Jesse Beder's avatar
Jesse Beder committed
163
    return ScanQuotedScalar();
164
  }
Jesse Beder's avatar
Jesse Beder committed
165
166
167

  // plain scalars
  if ((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow())
168
          .Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
169
    return ScanPlainScalar();
170
  }
Jesse Beder's avatar
Jesse Beder committed
171
172
173
174
175
176
177
178
179

  // don't know what it is!
  throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN);
}

void Scanner::ScanToNextToken() {
  while (1) {
    // first eat whitespace
    while (INPUT && IsWhitespaceToBeEaten(INPUT.peek())) {
180
      if (InBlockContext() && Exp::Tab().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
181
        m_simpleKeyAllowed = false;
182
      }
Jesse Beder's avatar
Jesse Beder committed
183
184
      INPUT.eat(1);
    }
185

Jesse Beder's avatar
Jesse Beder committed
186
187
188
    // then eat a comment
    if (Exp::Comment().Matches(INPUT)) {
      // eat until line break
189
      while (INPUT && !Exp::Break().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
190
        INPUT.eat(1);
191
      }
192
193
    }

Jesse Beder's avatar
Jesse Beder committed
194
    // if it's NOT a line break, then we're done!
195
    if (!Exp::Break().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
196
      break;
197
    }
Jesse Beder's avatar
Jesse Beder committed
198
199
200
201
202
203
204
205
206

    // otherwise, let's eat the line break and keep going
    int n = Exp::Break().Match(INPUT);
    INPUT.eat(n);

    // oh yeah, and let's get rid of that simple key
    InvalidateSimpleKey();

    // new line - we may be able to accept a simple key now
207
    if (InBlockContext()) {
Jesse Beder's avatar
Jesse Beder committed
208
      m_simpleKeyAllowed = true;
209
    }
Jesse Beder's avatar
Jesse Beder committed
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
  }
}

///////////////////////////////////////////////////////////////////////
// Misc. helpers

// IsWhitespaceToBeEaten
// . We can eat whitespace if it's a space or tab
// . Note: originally tabs in block context couldn't be eaten
//         "where a simple key could be allowed
//         (i.e., not at the beginning of a line, or following '-', '?', or
// ':')"
//   I think this is wrong, since tabs can be non-content whitespace; it's just
//   that they can't contribute to indentation, so once you've seen a tab in a
//   line, you can't start a simple key
bool Scanner::IsWhitespaceToBeEaten(char ch) {
226
  if (ch == ' ') {
Jesse Beder's avatar
Jesse Beder committed
227
    return true;
228
  }
Jesse Beder's avatar
Jesse Beder committed
229

230
  if (ch == '\t') {
Jesse Beder's avatar
Jesse Beder committed
231
    return true;
232
  }
Jesse Beder's avatar
Jesse Beder committed
233
234
235
236
237

  return false;
}

const RegEx& Scanner::GetValueRegex() const {
238
  if (InBlockContext()) {
Jesse Beder's avatar
Jesse Beder committed
239
    return Exp::Value();
240
  }
Jesse Beder's avatar
Jesse Beder committed
241
242
243
244
245
246
247

  return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow();
}

void Scanner::StartStream() {
  m_startedStream = true;
  m_simpleKeyAllowed = true;
248
249
  std::unique_ptr<IndentMarker> pIndent(
      new IndentMarker(-1, IndentMarker::NONE));
250
  m_indentRefs.push_back(std::move(pIndent));
Jesse Beder's avatar
Jesse Beder committed
251
252
253
254
255
  m_indents.push(&m_indentRefs.back());
}

void Scanner::EndStream() {
  // force newline
256
  if (INPUT.column() > 0) {
Jesse Beder's avatar
Jesse Beder committed
257
    INPUT.ResetColumn();
258
  }
Jesse Beder's avatar
Jesse Beder committed
259
260
261
262
263
264

  PopAllIndents();
  PopAllSimpleKeys();

  m_simpleKeyAllowed = false;
  m_endedStream = true;
265
}
266

Jesse Beder's avatar
Jesse Beder committed
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
Token* Scanner::PushToken(Token::TYPE type) {
  m_tokens.push(Token(type, INPUT.mark()));
  return &m_tokens.back();
}

Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const {
  switch (type) {
    case IndentMarker::SEQ:
      return Token::BLOCK_SEQ_START;
    case IndentMarker::MAP:
      return Token::BLOCK_MAP_START;
    case IndentMarker::NONE:
      assert(false);
      break;
  }
  assert(false);
  throw std::runtime_error("yaml-cpp: internal error, invalid indent type");
}

Scanner::IndentMarker* Scanner::PushIndentTo(int column,
                                             IndentMarker::INDENT_TYPE type) {
  // are we in flow?
289
  if (InFlowContext()) {
290
    return nullptr;
291
  }
Jesse Beder's avatar
Jesse Beder committed
292

293
  std::unique_ptr<IndentMarker> pIndent(new IndentMarker(column, type));
Jesse Beder's avatar
Jesse Beder committed
294
295
296
297
  IndentMarker& indent = *pIndent;
  const IndentMarker& lastIndent = *m_indents.top();

  // is this actually an indentation?
298
  if (indent.column < lastIndent.column) {
299
    return nullptr;
300
  }
Jesse Beder's avatar
Jesse Beder committed
301
302
  if (indent.column == lastIndent.column &&
      !(indent.type == IndentMarker::SEQ &&
303
        lastIndent.type == IndentMarker::MAP)) {
304
    return nullptr;
305
  }
Jesse Beder's avatar
Jesse Beder committed
306
307
308
309
310
311

  // push a start token
  indent.pStartToken = PushToken(GetStartTokenFor(type));

  // and then the indent
  m_indents.push(&indent);
312
  m_indentRefs.push_back(std::move(pIndent));
Jesse Beder's avatar
Jesse Beder committed
313
314
315
316
317
  return &m_indentRefs.back();
}

void Scanner::PopIndentToHere() {
  // are we in flow?
318
  if (InFlowContext()) {
Jesse Beder's avatar
Jesse Beder committed
319
    return;
320
  }
Jesse Beder's avatar
Jesse Beder committed
321
322
323
324

  // now pop away
  while (!m_indents.empty()) {
    const IndentMarker& indent = *m_indents.top();
325
    if (indent.column < INPUT.column()) {
Jesse Beder's avatar
Jesse Beder committed
326
      break;
327
    }
328
329
    if (indent.column == INPUT.column() &&
        !(indent.type == IndentMarker::SEQ &&
330
          !Exp::BlockEntry().Matches(INPUT))) {
Jesse Beder's avatar
Jesse Beder committed
331
      break;
332
    }
Jesse Beder's avatar
Jesse Beder committed
333
334
335
336

    PopIndent();
  }

337
338
  while (!m_indents.empty() &&
         m_indents.top()->status == IndentMarker::INVALID) {
Jesse Beder's avatar
Jesse Beder committed
339
    PopIndent();
340
  }
Jesse Beder's avatar
Jesse Beder committed
341
342
343
344
}

void Scanner::PopAllIndents() {
  // are we in flow?
345
  if (InFlowContext()) {
Jesse Beder's avatar
Jesse Beder committed
346
    return;
347
  }
Jesse Beder's avatar
Jesse Beder committed
348
349
350
351

  // now pop away
  while (!m_indents.empty()) {
    const IndentMarker& indent = *m_indents.top();
352
    if (indent.type == IndentMarker::NONE) {
Jesse Beder's avatar
Jesse Beder committed
353
      break;
354
    }
Jesse Beder's avatar
Jesse Beder committed
355
356
357
358
359
360
361
362
363
364
365
366
367
368

    PopIndent();
  }
}

void Scanner::PopIndent() {
  const IndentMarker& indent = *m_indents.top();
  m_indents.pop();

  if (indent.status != IndentMarker::VALID) {
    InvalidateSimpleKey();
    return;
  }

369
  if (indent.type == IndentMarker::SEQ) {
Jesse Beder's avatar
Jesse Beder committed
370
    m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark()));
371
  } else if (indent.type == IndentMarker::MAP) {
Jesse Beder's avatar
Jesse Beder committed
372
    m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark()));
373
  }
Jesse Beder's avatar
Jesse Beder committed
374
375
376
}

int Scanner::GetTopIndent() const {
377
  if (m_indents.empty()) {
Jesse Beder's avatar
Jesse Beder committed
378
    return 0;
379
  }
Jesse Beder's avatar
Jesse Beder committed
380
381
382
383
384
385
386
387
388
389
390
  return m_indents.top()->column;
}

void Scanner::ThrowParserException(const std::string& msg) const {
  Mark mark = Mark::null_mark();
  if (!m_tokens.empty()) {
    const Token& token = m_tokens.front();
    mark = token.mark;
  }
  throw ParserException(mark, msg);
}
391
}  // namespace YAML