scanner.h 4.7 KB
Newer Older
1
2
3
#ifndef SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
#define SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66

Jesse Beder's avatar
Jesse Beder committed
4
5
6
#if defined(_MSC_VER) ||                                            \
    (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
     (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
7
8
9
#pragma once
#endif

10
#include <cstddef>
11
#include <ios>
Jesse Beder's avatar
Jesse Beder committed
12
#include <map>
13
14
#include <queue>
#include <set>
Jesse Beder's avatar
Jesse Beder committed
15
16
17
#include <stack>
#include <string>

18
#include "ptr_vector.h"
19
20
#include "stream.h"
#include "token.h"
Jesse Beder's avatar
Jesse Beder committed
21
#include "yaml-cpp/mark.h"
22

Jesse Beder's avatar
Jesse Beder committed
23
24
25
26
namespace YAML {
class Node;
class RegEx;

27
28
29
/**
 * A scanner transforms a stream of characters into a stream of tokens.
 */
Jesse Beder's avatar
Jesse Beder committed
30
31
class Scanner {
 public:
32
  explicit Scanner(std::istream &in);
Jesse Beder's avatar
Jesse Beder committed
33
34
  ~Scanner();

35
  /** Returns true if there are no more tokens to be read. */
Jesse Beder's avatar
Jesse Beder committed
36
  bool empty();
37
38

  /** Removes the next token in the queue. */
Jesse Beder's avatar
Jesse Beder committed
39
  void pop();
40
41

  /** Returns, but does not remove, the next token in the queue. */
Jesse Beder's avatar
Jesse Beder committed
42
  Token &peek();
43
44

  /** Returns the current mark in the input stream. */
Jesse Beder's avatar
Jesse Beder committed
45
46
47
48
  Mark mark() const;

 private:
  struct IndentMarker {
Jesse Beder's avatar
Jesse Beder committed
49
50
    enum INDENT_TYPE { MAP, SEQ, NONE };
    enum STATUS { VALID, INVALID, UNKNOWN };
Jesse Beder's avatar
Jesse Beder committed
51
    IndentMarker(int column_, INDENT_TYPE type_)
52
        : column(column_), type(type_), status(VALID), pStartToken(nullptr) {}
Jesse Beder's avatar
Jesse Beder committed
53
54
55
56
57
58
59

    int column;
    INDENT_TYPE type;
    STATUS status;
    Token *pStartToken;
  };

Jesse Beder's avatar
Jesse Beder committed
60
  enum FLOW_MARKER { FLOW_MAP, FLOW_SEQ };
Jesse Beder's avatar
Jesse Beder committed
61
62
63

 private:
  // scanning
64
65
66
67
68
69

  /**
   * Scans until there's a valid token at the front of the queue, or the queue
   * is empty. The state can be checked by {@link #empty}, and the next token
   * retrieved by {@link #peek}.
   */
Jesse Beder's avatar
Jesse Beder committed
70
  void EnsureTokensInQueue();
71
72
73
74
75

  /**
   * The main scanning function; this method branches out to scan whatever the
   * next token should be.
   */
Jesse Beder's avatar
Jesse Beder committed
76
  void ScanNextToken();
77
78

  /** Eats the input stream until it reaches the next token-like thing. */
Jesse Beder's avatar
Jesse Beder committed
79
  void ScanToNextToken();
80
81

  /** Sets the initial conditions for starting a stream. */
Jesse Beder's avatar
Jesse Beder committed
82
  void StartStream();
83
84

  /** Closes out the stream, finish up, etc. */
Jesse Beder's avatar
Jesse Beder committed
85
  void EndStream();
86

Jesse Beder's avatar
Jesse Beder committed
87
88
89
90
  Token *PushToken(Token::TYPE type);

  bool InFlowContext() const { return !m_flows.empty(); }
  bool InBlockContext() const { return m_flows.empty(); }
91
  std::size_t GetFlowLevel() const { return m_flows.size(); }
Jesse Beder's avatar
Jesse Beder committed
92
93

  Token::TYPE GetStartTokenFor(IndentMarker::INDENT_TYPE type) const;
94
95
96
97
98
99
100

  /**
   * Pushes an indentation onto the stack, and enqueues the proper token
   * (sequence start or mapping start).
   *
   * @return the indent marker it generates (if any).
   */
Jesse Beder's avatar
Jesse Beder committed
101
  IndentMarker *PushIndentTo(int column, IndentMarker::INDENT_TYPE type);
102
103
104
105
106
107

  /**
   * Pops indentations off the stack until it reaches the current indentation
   * level, and enqueues the proper token each time. Then pops all invalid
   * indentations off.
   */
Jesse Beder's avatar
Jesse Beder committed
108
  void PopIndentToHere();
109
110
111
112
113

  /**
   * Pops all indentations (except for the base empty one) off the stack, and
   * enqueues the proper token each time.
   */
Jesse Beder's avatar
Jesse Beder committed
114
  void PopAllIndents();
115
116

  /** Pops a single indent, pushing the proper token. */
Jesse Beder's avatar
Jesse Beder committed
117
118
119
120
121
122
123
124
125
126
127
  void PopIndent();
  int GetTopIndent() const;

  // checking input
  bool CanInsertPotentialSimpleKey() const;
  bool ExistsActiveSimpleKey() const;
  void InsertPotentialSimpleKey();
  void InvalidateSimpleKey();
  bool VerifySimpleKey();
  void PopAllSimpleKeys();

128
129
130
131
  /**
   * Throws a ParserException with the current token location (if available),
   * and does not parse any more tokens.
   */
Jesse Beder's avatar
Jesse Beder committed
132
133
134
  void ThrowParserException(const std::string &msg) const;

  bool IsWhitespaceToBeEaten(char ch);
135
136
137
138

  /**
   * Returns the appropriate regex to check if the next token is a value token.
   */
Jesse Beder's avatar
Jesse Beder committed
139
140
141
  const RegEx &GetValueRegex() const;

  struct SimpleKey {
142
    SimpleKey(const Mark &mark_, std::size_t flowLevel_);
Jesse Beder's avatar
Jesse Beder committed
143
144
145
146
147

    void Validate();
    void Invalidate();

    Mark mark;
148
    std::size_t flowLevel;
Jesse Beder's avatar
Jesse Beder committed
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
    IndentMarker *pIndent;
    Token *pMapStart, *pKey;
  };

  // and the tokens
  void ScanDirective();
  void ScanDocStart();
  void ScanDocEnd();
  void ScanBlockSeqStart();
  void ScanBlockMapSTart();
  void ScanBlockEnd();
  void ScanBlockEntry();
  void ScanFlowStart();
  void ScanFlowEnd();
  void ScanFlowEntry();
  void ScanKey();
  void ScanValue();
  void ScanAnchorOrAlias();
  void ScanTag();
  void ScanPlainScalar();
  void ScanQuotedScalar();
  void ScanBlockScalar();

 private:
  // the stream
  Stream INPUT;

  // the output (tokens)
  std::queue<Token> m_tokens;

  // state info
  bool m_startedStream, m_endedStream;
  bool m_simpleKeyAllowed;
  bool m_canBeJSONFlow;
  std::stack<SimpleKey> m_simpleKeys;
  std::stack<IndentMarker *> m_indents;
  ptr_vector<IndentMarker> m_indentRefs;  // for "garbage collection"
  std::stack<FLOW_MARKER> m_flows;
};
188
}
189

Jesse Beder's avatar
Jesse Beder committed
190
#endif  // SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66