scanscalar.cpp 6.83 KB
Newer Older
1
2
3
4
#include "scanscalar.h"

#include <algorithm>

5
#include "exp.h"
Jesse Beder's avatar
Jesse Beder committed
6
7
8
#include "regeximpl.h"
#include "stream.h"
#include "yaml-cpp/exceptions.h"  // IWYU pragma: keep
9

Jesse Beder's avatar
Jesse Beder committed
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
namespace YAML {
// ScanScalar
// . This is where the scalar magic happens.
//
// . We do the scanning in three phases:
//   1. Scan until newline
//   2. Eat newline
//   3. Scan leading blanks.
//
// . Depending on the parameters given, we store or stop
//   and different places in the above flow.
std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) {
  bool foundNonEmptyLine = false;
  bool pastOpeningBreak = (params.fold == FOLD_FLOW);
  bool emptyLine = false, moreIndented = false;
  int foldedNewlineCount = 0;
  bool foldedNewlineStartedMoreIndented = false;
  std::size_t lastEscapedChar = std::string::npos;
  std::string scalar;
  params.leadingSpaces = false;

31
32
33
34
  if (!params.end) {
    params.end = &Exp::Empty();
  }

Jesse Beder's avatar
Jesse Beder committed
35
36
37
38
39
40
  while (INPUT) {
    // ********************************
    // Phase #1: scan until line ending

    std::size_t lastNonWhitespaceChar = scalar.size();
    bool escapedNewline = false;
41
    while (!params.end->Matches(INPUT) && !Exp::Break().Matches(INPUT)) {
42
      if (!INPUT) {
Jesse Beder's avatar
Jesse Beder committed
43
        break;
44
      }
Jesse Beder's avatar
Jesse Beder committed
45
46
47

      // document indicator?
      if (INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) {
48
        if (params.onDocIndicator == BREAK) {
Jesse Beder's avatar
Jesse Beder committed
49
          break;
50
51
        }
        if (params.onDocIndicator == THROW) {
Jesse Beder's avatar
Jesse Beder committed
52
          throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR);
53
        }
Jesse Beder's avatar
Jesse Beder committed
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
      }

      foundNonEmptyLine = true;
      pastOpeningBreak = true;

      // escaped newline? (only if we're escaping on slash)
      if (params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) {
        // eat escape character and get out (but preserve trailing whitespace!)
        INPUT.get();
        lastNonWhitespaceChar = scalar.size();
        lastEscapedChar = scalar.size();
        escapedNewline = true;
        break;
      }

      // escape this?
      if (INPUT.peek() == params.escape) {
        scalar += Exp::Escape(INPUT);
        lastNonWhitespaceChar = scalar.size();
        lastEscapedChar = scalar.size();
        continue;
      }

      // otherwise, just add the damn character
      char ch = INPUT.get();
      scalar += ch;
80
      if (ch != ' ' && ch != '\t') {
Jesse Beder's avatar
Jesse Beder committed
81
        lastNonWhitespaceChar = scalar.size();
82
      }
Jesse Beder's avatar
Jesse Beder committed
83
84
85
86
    }

    // eof? if we're looking to eat something, then we throw
    if (!INPUT) {
87
      if (params.eatEnd) {
Jesse Beder's avatar
Jesse Beder committed
88
        throw ParserException(INPUT.mark(), ErrorMsg::EOF_IN_SCALAR);
89
      }
Jesse Beder's avatar
Jesse Beder committed
90
91
92
93
94
      break;
    }

    // doc indicator?
    if (params.onDocIndicator == BREAK && INPUT.column() == 0 &&
95
        Exp::DocIndicator().Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
96
      break;
97
    }
Jesse Beder's avatar
Jesse Beder committed
98
99

    // are we done via character match?
100
    int n = params.end->Match(INPUT);
Jesse Beder's avatar
Jesse Beder committed
101
    if (n >= 0) {
102
      if (params.eatEnd) {
Jesse Beder's avatar
Jesse Beder committed
103
        INPUT.eat(n);
104
      }
Jesse Beder's avatar
Jesse Beder committed
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
      break;
    }

    // do we remove trailing whitespace?
    if (params.fold == FOLD_FLOW)
      scalar.erase(lastNonWhitespaceChar);

    // ********************************
    // Phase #2: eat line ending
    n = Exp::Break().Match(INPUT);
    INPUT.eat(n);

    // ********************************
    // Phase #3: scan initial spaces

    // first the required indentation
121
122
123
124
    while (INPUT.peek() == ' ' &&
           (INPUT.column() < params.indent ||
            (params.detectIndent && !foundNonEmptyLine)) &&
           !params.end->Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
125
      INPUT.eat(1);
126
    }
Jesse Beder's avatar
Jesse Beder committed
127
128

    // update indent if we're auto-detecting
129
    if (params.detectIndent && !foundNonEmptyLine) {
Jesse Beder's avatar
Jesse Beder committed
130
      params.indent = std::max(params.indent, INPUT.column());
131
    }
Jesse Beder's avatar
Jesse Beder committed
132
133
134
135
136

    // and then the rest of the whitespace
    while (Exp::Blank().Matches(INPUT)) {
      // we check for tabs that masquerade as indentation
      if (INPUT.peek() == '\t' && INPUT.column() < params.indent &&
137
          params.onTabInIndentation == THROW) {
Jesse Beder's avatar
Jesse Beder committed
138
        throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION);
139
140
141
142
143
      }

      if (!params.eatLeadingWhitespace) {
        break;
      }
Jesse Beder's avatar
Jesse Beder committed
144

145
      if (params.end->Matches(INPUT)) {
Jesse Beder's avatar
Jesse Beder committed
146
        break;
147
      }
Jesse Beder's avatar
Jesse Beder committed
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166

      INPUT.eat(1);
    }

    // was this an empty line?
    bool nextEmptyLine = Exp::Break().Matches(INPUT);
    bool nextMoreIndented = Exp::Blank().Matches(INPUT);
    if (params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine)
      foldedNewlineStartedMoreIndented = moreIndented;

    // for block scalars, we always start with a newline, so we should ignore it
    // (not fold or keep)
    if (pastOpeningBreak) {
      switch (params.fold) {
        case DONT_FOLD:
          scalar += "\n";
          break;
        case FOLD_BLOCK:
          if (!emptyLine && !nextEmptyLine && !moreIndented &&
167
              !nextMoreIndented && INPUT.column() >= params.indent) {
Jesse Beder's avatar
Jesse Beder committed
168
            scalar += " ";
169
          } else if (nextEmptyLine) {
Jesse Beder's avatar
Jesse Beder committed
170
            foldedNewlineCount++;
171
          } else {
Jesse Beder's avatar
Jesse Beder committed
172
            scalar += "\n";
173
          }
Jesse Beder's avatar
Jesse Beder committed
174
175
176
177

          if (!nextEmptyLine && foldedNewlineCount > 0) {
            scalar += std::string(foldedNewlineCount - 1, '\n');
            if (foldedNewlineStartedMoreIndented ||
178
                nextMoreIndented | !foundNonEmptyLine) {
Jesse Beder's avatar
Jesse Beder committed
179
              scalar += "\n";
180
            }
Jesse Beder's avatar
Jesse Beder committed
181
182
183
184
            foldedNewlineCount = 0;
          }
          break;
        case FOLD_FLOW:
185
          if (nextEmptyLine) {
Jesse Beder's avatar
Jesse Beder committed
186
            scalar += "\n";
187
          } else if (!emptyLine && !escapedNewline) {
Jesse Beder's avatar
Jesse Beder committed
188
            scalar += " ";
189
          }
Jesse Beder's avatar
Jesse Beder committed
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
          break;
      }
    }

    emptyLine = nextEmptyLine;
    moreIndented = nextMoreIndented;
    pastOpeningBreak = true;

    // are we done via indentation?
    if (!emptyLine && INPUT.column() < params.indent) {
      params.leadingSpaces = true;
      break;
    }
  }

  // post-processing
  if (params.trimTrailingSpaces) {
207
    std::size_t pos = scalar.find_last_not_of(" \t");
Jesse Beder's avatar
Jesse Beder committed
208
    if (lastEscapedChar != std::string::npos) {
209
      if (pos < lastEscapedChar || pos == std::string::npos) {
Jesse Beder's avatar
Jesse Beder committed
210
        pos = lastEscapedChar;
211
      }
Jesse Beder's avatar
Jesse Beder committed
212
    }
213
    if (pos < scalar.size()) {
Jesse Beder's avatar
Jesse Beder committed
214
      scalar.erase(pos + 1);
215
    }
Jesse Beder's avatar
Jesse Beder committed
216
217
218
219
220
221
  }

  switch (params.chomp) {
    case CLIP: {
      std::size_t pos = scalar.find_last_not_of('\n');
      if (lastEscapedChar != std::string::npos) {
222
        if (pos < lastEscapedChar || pos == std::string::npos) {
Jesse Beder's avatar
Jesse Beder committed
223
          pos = lastEscapedChar;
224
        }
Jesse Beder's avatar
Jesse Beder committed
225
      }
226
      if (pos == std::string::npos) {
Jesse Beder's avatar
Jesse Beder committed
227
        scalar.erase();
228
      } else if (pos + 1 < scalar.size()) {
Jesse Beder's avatar
Jesse Beder committed
229
        scalar.erase(pos + 2);
230
      }
Jesse Beder's avatar
Jesse Beder committed
231
232
233
234
    } break;
    case STRIP: {
      std::size_t pos = scalar.find_last_not_of('\n');
      if (lastEscapedChar != std::string::npos) {
235
        if (pos < lastEscapedChar || pos == std::string::npos) {
Jesse Beder's avatar
Jesse Beder committed
236
          pos = lastEscapedChar;
237
        }
Jesse Beder's avatar
Jesse Beder committed
238
      }
239
      if (pos == std::string::npos) {
Jesse Beder's avatar
Jesse Beder committed
240
        scalar.erase();
241
      } else if (pos < scalar.size()) {
Jesse Beder's avatar
Jesse Beder committed
242
        scalar.erase(pos + 1);
243
      }
Jesse Beder's avatar
Jesse Beder committed
244
245
246
247
248
249
250
    } break;
    default:
      break;
  }

  return scalar;
}
Ian Taylor's avatar
Ian Taylor committed
251
}  // namespace YAML