scanscalar.cpp 6.8 KB
Newer Older
1
2
3
4
#include "scanscalar.h"

#include <algorithm>

5
#include "exp.h"
Jesse Beder's avatar
Jesse Beder committed
6
7
8
#include "regeximpl.h"
#include "stream.h"
#include "yaml-cpp/exceptions.h"  // IWYU pragma: keep
9

Jesse Beder's avatar
Jesse Beder committed
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
namespace YAML {
// ScanScalar
// . This is where the scalar magic happens.
//
// . We do the scanning in three phases:
//   1. Scan until newline
//   2. Eat newline
//   3. Scan leading blanks.
//
// . Depending on the parameters given, we store or stop
//   and different places in the above flow.
std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) {
  bool foundNonEmptyLine = false;
  bool pastOpeningBreak = (params.fold == FOLD_FLOW);
  bool emptyLine = false, moreIndented = false;
  int foldedNewlineCount = 0;
  bool foldedNewlineStartedMoreIndented = false;
  std::size_t lastEscapedChar = std::string::npos;
  std::string scalar;
  params.leadingSpaces = false;

31
32
33
34
  if (!params.end) {
    params.end = &Exp::Empty();
  }

Jesse Beder's avatar
Jesse Beder committed
35
36
37
38
39
40
  while (INPUT) {
    // ********************************
    // Phase #1: scan until line ending

    std::size_t lastNonWhitespaceChar = scalar.size();
    bool escapedNewline = false;
41
    while (!params.end->Matches(INPUT) && !Exp::Break().Matches(INPUT)) {
42
      if (!INPUT) {
Jesse Beder's avatar
Jesse Beder committed
43
        break;
44
      }
Jesse Beder's avatar
Jesse Beder committed
45
46
47

      // document indicator?
      if (INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) {
48
        if (params.onDocIndicator == BREAK) {
Jesse Beder's avatar
Jesse Beder committed
49
          break;
50
        } else if (params.onDocIndicator == THROW) {
Jesse Beder's avatar
Jesse Beder committed
51
          throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR);
52
        }
Jesse Beder's avatar
Jesse Beder committed
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
      }

      foundNonEmptyLine = true;
      pastOpeningBreak = true;

      // escaped newline? (only if we're escaping on slash)
      if (params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) {
        // eat escape character and get out (but preserve trailing whitespace!)
        INPUT.get();
        lastNonWhitespaceChar = scalar.size();
        lastEscapedChar = scalar.size();
        escapedNewline = true;
        break;
      }

      // escape this?
      if (INPUT.peek() == params.escape) {
        scalar += Exp::Escape(INPUT);
        lastNonWhitespaceChar = scalar.size();
        lastEscapedChar = scalar.size();
        continue;
      }

      // otherwise, just add the damn character
      char ch = INPUT.get();
      scalar += ch;
79
      if (ch != ' ' && ch != '\t') {
Jesse Beder's avatar
Jesse Beder committed
80
        lastNonWhitespaceChar = scalar.size();
81
      }
Jesse Beder's avatar
Jesse Beder committed
82
83
84
85
    }

    // eof? if we're looking to eat something, then we throw
    if (!INPUT) {
86
      if (params.eatEnd) {
Jesse Beder's avatar
Jesse Beder committed
87
        throw ParserException(INPUT.mark(), ErrorMsg::EOF_IN_SCALAR);
88
      }
Jesse Beder's avatar
Jesse Beder committed
89
90
91
92
93
      break;
    }

    // doc indicator?
    if (params.onDocIndicator == BREAK && INPUT.column() == 0 &&
94
        Exp::DocIndicator().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
95
      break;
96
    }
Jesse Beder's avatar
Jesse Beder committed
97
98

    // are we done via character match?
99
    int n = params.end->Match(INPUT);
Jesse Beder's avatar
Jesse Beder committed
100
    if (n >= 0) {
101
      if (params.eatEnd) {
Jesse Beder's avatar
Jesse Beder committed
102
        INPUT.eat(n);
103
      }
Jesse Beder's avatar
Jesse Beder committed
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
      break;
    }

    // do we remove trailing whitespace?
    if (params.fold == FOLD_FLOW)
      scalar.erase(lastNonWhitespaceChar);

    // ********************************
    // Phase #2: eat line ending
    n = Exp::Break().Match(INPUT);
    INPUT.eat(n);

    // ********************************
    // Phase #3: scan initial spaces

    // first the required indentation
120
121
122
123
    while (INPUT.peek() == ' ' &&
           (INPUT.column() < params.indent ||
            (params.detectIndent && !foundNonEmptyLine)) &&
           !params.end->Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
124
      INPUT.eat(1);
125
    }
Jesse Beder's avatar
Jesse Beder committed
126
127

    // update indent if we're auto-detecting
128
    if (params.detectIndent && !foundNonEmptyLine) {
Jesse Beder's avatar
Jesse Beder committed
129
      params.indent = std::max(params.indent, INPUT.column());
130
    }
Jesse Beder's avatar
Jesse Beder committed
131
132
133
134
135

    // and then the rest of the whitespace
    while (Exp::Blank().Matches(INPUT)) {
      // we check for tabs that masquerade as indentation
      if (INPUT.peek() == '\t' && INPUT.column() < params.indent &&
136
          params.onTabInIndentation == THROW) {
Jesse Beder's avatar
Jesse Beder committed
137
        throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION);
138
139
140
141
142
      }

      if (!params.eatLeadingWhitespace) {
        break;
      }
Jesse Beder's avatar
Jesse Beder committed
143

144
      if (params.end->Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
145
        break;
146
      }
Jesse Beder's avatar
Jesse Beder committed
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165

      INPUT.eat(1);
    }

    // was this an empty line?
    bool nextEmptyLine = Exp::Break().Matches(INPUT);
    bool nextMoreIndented = Exp::Blank().Matches(INPUT);
    if (params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine)
      foldedNewlineStartedMoreIndented = moreIndented;

    // for block scalars, we always start with a newline, so we should ignore it
    // (not fold or keep)
    if (pastOpeningBreak) {
      switch (params.fold) {
        case DONT_FOLD:
          scalar += "\n";
          break;
        case FOLD_BLOCK:
          if (!emptyLine && !nextEmptyLine && !moreIndented &&
166
              !nextMoreIndented && INPUT.column() >= params.indent) {
Jesse Beder's avatar
Jesse Beder committed
167
            scalar += " ";
168
          } else if (nextEmptyLine) {
Jesse Beder's avatar
Jesse Beder committed
169
            foldedNewlineCount++;
170
          } else {
Jesse Beder's avatar
Jesse Beder committed
171
            scalar += "\n";
172
          }
Jesse Beder's avatar
Jesse Beder committed
173
174
175
176

          if (!nextEmptyLine && foldedNewlineCount > 0) {
            scalar += std::string(foldedNewlineCount - 1, '\n');
            if (foldedNewlineStartedMoreIndented ||
177
                nextMoreIndented | !foundNonEmptyLine) {
Jesse Beder's avatar
Jesse Beder committed
178
              scalar += "\n";
179
            }
Jesse Beder's avatar
Jesse Beder committed
180
181
182
183
            foldedNewlineCount = 0;
          }
          break;
        case FOLD_FLOW:
184
          if (nextEmptyLine) {
Jesse Beder's avatar
Jesse Beder committed
185
            scalar += "\n";
186
          } else if (!emptyLine && !escapedNewline) {
Jesse Beder's avatar
Jesse Beder committed
187
            scalar += " ";
188
          }
Jesse Beder's avatar
Jesse Beder committed
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
          break;
      }
    }

    emptyLine = nextEmptyLine;
    moreIndented = nextMoreIndented;
    pastOpeningBreak = true;

    // are we done via indentation?
    if (!emptyLine && INPUT.column() < params.indent) {
      params.leadingSpaces = true;
      break;
    }
  }

  // post-processing
  if (params.trimTrailingSpaces) {
    std::size_t pos = scalar.find_last_not_of(' ');
    if (lastEscapedChar != std::string::npos) {
208
      if (pos < lastEscapedChar || pos == std::string::npos) {
Jesse Beder's avatar
Jesse Beder committed
209
        pos = lastEscapedChar;
210
      }
Jesse Beder's avatar
Jesse Beder committed
211
    }
212
    if (pos < scalar.size()) {
Jesse Beder's avatar
Jesse Beder committed
213
      scalar.erase(pos + 1);
214
    }
Jesse Beder's avatar
Jesse Beder committed
215
216
217
218
219
220
  }

  switch (params.chomp) {
    case CLIP: {
      std::size_t pos = scalar.find_last_not_of('\n');
      if (lastEscapedChar != std::string::npos) {
221
        if (pos < lastEscapedChar || pos == std::string::npos) {
Jesse Beder's avatar
Jesse Beder committed
222
          pos = lastEscapedChar;
223
        }
Jesse Beder's avatar
Jesse Beder committed
224
      }
225
      if (pos == std::string::npos) {
Jesse Beder's avatar
Jesse Beder committed
226
        scalar.erase();
227
      } else if (pos + 1 < scalar.size()) {
Jesse Beder's avatar
Jesse Beder committed
228
        scalar.erase(pos + 2);
229
      }
Jesse Beder's avatar
Jesse Beder committed
230
231
232
233
    } break;
    case STRIP: {
      std::size_t pos = scalar.find_last_not_of('\n');
      if (lastEscapedChar != std::string::npos) {
234
        if (pos < lastEscapedChar || pos == std::string::npos) {
Jesse Beder's avatar
Jesse Beder committed
235
          pos = lastEscapedChar;
236
        }
Jesse Beder's avatar
Jesse Beder committed
237
      }
238
      if (pos == std::string::npos) {
Jesse Beder's avatar
Jesse Beder committed
239
        scalar.erase();
240
      } else if (pos < scalar.size()) {
Jesse Beder's avatar
Jesse Beder committed
241
        scalar.erase(pos + 1);
242
      }
Jesse Beder's avatar
Jesse Beder committed
243
244
245
246
247
248
249
    } break;
    default:
      break;
  }

  return scalar;
}
250
}