scanner.h 4.69 KB
Newer Older
1
2
3
#ifndef SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#define SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66

Jesse Beder's avatar
Jesse Beder committed
4
5
6
#if defined(_MSC_VER) ||                                            \
    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
7
8
9
#pragma once
#endif

10
#include <cstddef>
11
12
#include <ios>
#include <queue>
Jesse Beder's avatar
Jesse Beder committed
13
14
15
#include <stack>
#include <string>

16
#include "ptr_vector.h"
17
18
#include "stream.h"
#include "token.h"
Jesse Beder's avatar
Jesse Beder committed
19
#include "yaml-cpp/mark.h"
20

Jesse Beder's avatar
Jesse Beder committed
21
22
23
24
namespace YAML {
class Node;
class RegEx;

25
26
27
/**
 * A scanner transforms a stream of characters into a stream of tokens.
 */
Jesse Beder's avatar
Jesse Beder committed
28
29
class Scanner {
 public:
30
  explicit Scanner(std::istream &in);
Jesse Beder's avatar
Jesse Beder committed
31
32
  ~Scanner();

33
  /** Returns true if there are no more tokens to be read. */
Jesse Beder's avatar
Jesse Beder committed
34
  bool empty();
35
36

  /** Removes the next token in the queue. */
Jesse Beder's avatar
Jesse Beder committed
37
  void pop();
38
39

  /** Returns, but does not remove, the next token in the queue. */
Jesse Beder's avatar
Jesse Beder committed
40
  Token &peek();
41
42

  /** Returns the current mark in the input stream. */
Jesse Beder's avatar
Jesse Beder committed
43
44
45
46
  Mark mark() const;

 private:
  struct IndentMarker {
Jesse Beder's avatar
Jesse Beder committed
47
48
    enum INDENT_TYPE { MAP, SEQ, NONE };
    enum STATUS { VALID, INVALID, UNKNOWN };
Jesse Beder's avatar
Jesse Beder committed
49
    IndentMarker(int column_, INDENT_TYPE type_)
50
        : column(column_), type(type_), status(VALID), pStartToken(nullptr) {}
Jesse Beder's avatar
Jesse Beder committed
51
52
53
54
55
56
57

    int column;
    INDENT_TYPE type;
    STATUS status;
    Token *pStartToken;
  };

Jesse Beder's avatar
Jesse Beder committed
58
  enum FLOW_MARKER { FLOW_MAP, FLOW_SEQ };
Jesse Beder's avatar
Jesse Beder committed
59
60
61

 private:
  // scanning
62
63
64
65
66
67

  /**
   * Scans until there's a valid token at the front of the queue, or the queue
   * is empty. The state can be checked by {@link #empty}, and the next token
   * retrieved by {@link #peek}.
   */
Jesse Beder's avatar
Jesse Beder committed
68
  void EnsureTokensInQueue();
69
70
71
72
73

  /**
   * The main scanning function; this method branches out to scan whatever the
   * next token should be.
   */
Jesse Beder's avatar
Jesse Beder committed
74
  void ScanNextToken();
75
76

  /** Eats the input stream until it reaches the next token-like thing. */
Jesse Beder's avatar
Jesse Beder committed
77
  void ScanToNextToken();
78
79

  /** Sets the initial conditions for starting a stream. */
Jesse Beder's avatar
Jesse Beder committed
80
  void StartStream();
81
82

  /** Closes out the stream, finish up, etc. */
Jesse Beder's avatar
Jesse Beder committed
83
  void EndStream();
84

Jesse Beder's avatar
Jesse Beder committed
85
86
87
88
  Token *PushToken(Token::TYPE type);

  bool InFlowContext() const { return !m_flows.empty(); }
  bool InBlockContext() const { return m_flows.empty(); }
89
  std::size_t GetFlowLevel() const { return m_flows.size(); }
Jesse Beder's avatar
Jesse Beder committed
90
91

  Token::TYPE GetStartTokenFor(IndentMarker::INDENT_TYPE type) const;
92
93
94
95
96
97
98

  /**
   * Pushes an indentation onto the stack, and enqueues the proper token
   * (sequence start or mapping start).
   *
   * @return the indent marker it generates (if any).
   */
Jesse Beder's avatar
Jesse Beder committed
99
  IndentMarker *PushIndentTo(int column, IndentMarker::INDENT_TYPE type);
100
101
102
103
104
105

  /**
   * Pops indentations off the stack until it reaches the current indentation
   * level, and enqueues the proper token each time. Then pops all invalid
   * indentations off.
   */
Jesse Beder's avatar
Jesse Beder committed
106
  void PopIndentToHere();
107
108
109
110
111

  /**
   * Pops all indentations (except for the base empty one) off the stack, and
   * enqueues the proper token each time.
   */
Jesse Beder's avatar
Jesse Beder committed
112
  void PopAllIndents();
113
114

  /** Pops a single indent, pushing the proper token. */
Jesse Beder's avatar
Jesse Beder committed
115
116
117
118
119
120
121
122
123
124
125
  void PopIndent();
  int GetTopIndent() const;

  // checking input
  bool CanInsertPotentialSimpleKey() const;
  bool ExistsActiveSimpleKey() const;
  void InsertPotentialSimpleKey();
  void InvalidateSimpleKey();
  bool VerifySimpleKey();
  void PopAllSimpleKeys();

126
127
128
129
  /**
   * Throws a ParserException with the current token location (if available),
   * and does not parse any more tokens.
   */
Jesse Beder's avatar
Jesse Beder committed
130
131
132
  void ThrowParserException(const std::string &msg) const;

  bool IsWhitespaceToBeEaten(char ch);
133
134
135
136

  /**
   * Returns the appropriate regex to check if the next token is a value token.
   */
Jesse Beder's avatar
Jesse Beder committed
137
138
139
  const RegEx &GetValueRegex() const;

  struct SimpleKey {
140
    SimpleKey(const Mark &mark_, std::size_t flowLevel_);
Jesse Beder's avatar
Jesse Beder committed
141
142
143
144
145

    void Validate();
    void Invalidate();

    Mark mark;
146
    std::size_t flowLevel;
Jesse Beder's avatar
Jesse Beder committed
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
    IndentMarker *pIndent;
    Token *pMapStart, *pKey;
  };

  // and the tokens
  void ScanDirective();
  void ScanDocStart();
  void ScanDocEnd();
  void ScanBlockSeqStart();
  void ScanBlockMapSTart();
  void ScanBlockEnd();
  void ScanBlockEntry();
  void ScanFlowStart();
  void ScanFlowEnd();
  void ScanFlowEntry();
  void ScanKey();
  void ScanValue();
  void ScanAnchorOrAlias();
  void ScanTag();
  void ScanPlainScalar();
  void ScanQuotedScalar();
  void ScanBlockScalar();

 private:
  // the stream
  Stream INPUT;

  // the output (tokens)
  std::queue<Token> m_tokens;

  // state info
  bool m_startedStream, m_endedStream;
  bool m_simpleKeyAllowed;
180
  bool m_scalarValueAllowed;
Jesse Beder's avatar
Jesse Beder committed
181
182
183
184
185
186
  bool m_canBeJSONFlow;
  std::stack<SimpleKey> m_simpleKeys;
  std::stack<IndentMarker *> m_indents;
  ptr_vector<IndentMarker> m_indentRefs;  // for "garbage collection"
  std::stack<FLOW_MARKER> m_flows;
};
187
}
188

Jesse Beder's avatar
Jesse Beder committed
189
#endif  // SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66