"examples/community/latent_consistency_interpolate.py" did not exist on "2bfa55f4ed39f9dd6fed82720d87f6d4e60f21e3"
Commit 0d5a97bf authored by Jesse Beder's avatar Jesse Beder
Browse files

Refactored common scalar scanning code (from plain, quoted, and block) to one function.

parent 6c193d6f
......@@ -62,7 +62,7 @@ namespace YAML
}
// Escape
// . Escapes the sequence starting 'in' (it must begin with a '\')
// . Escapes the sequence starting 'in' (it must begin with a '\' or single quote)
// and returns the result.
// . Fills 'length' with how many characters we ate.
// . Throws if it's an unknown escape character.
......@@ -72,10 +72,16 @@ namespace YAML
length = 2;
// eat slash
in.get();
char escape = in.get();
// switch on escape character
char ch = in.get();
// first do single quote, since it's easier
if(escape == '\'' && ch == '\'')
return "\'";
// now do the slash (we're not gonna check if it's a slash - you better pass one!)
switch(ch) {
case '0': return "\0";
case 'a': return "\x07";
......
......@@ -53,6 +53,7 @@ namespace YAML
case REGEX_MATCH: m_pOp = new MatchOperator; break;
case REGEX_RANGE: m_pOp = new RangeOperator; break;
case REGEX_OR: m_pOp = new OrOperator; break;
case REGEX_AND: m_pOp = new AndOperator; break;
case REGEX_NOT: m_pOp = new NotOperator; break;
case REGEX_SEQ: m_pOp = new SeqOperator; break;
}
......@@ -80,19 +81,13 @@ namespace YAML
// . Returns the number of characters matched.
// . Returns -1 if no characters were matched (the reason for
// not returning zero is that we may have an empty regex
// which SHOULD be considered successfully matching nothing,
// but that of course matches zero characters).
// which is ALWAYS successful at matching zero characters).
int RegEx::Match(const std::string& str) const
{
if(!m_pOp)
return -1;
return 0;
return m_pOp->Match(str, *this);
//case REGEX_EMPTY:
// if(str.empty())
// return 0;
// return -1;
}
// Match
......@@ -131,6 +126,14 @@ namespace YAML
return ret;
}
RegEx operator && (const RegEx& ex1, const RegEx& ex2)
{
RegEx ret(REGEX_AND);
ret.m_params.push_back(ex1);
ret.m_params.push_back(ex2);
return ret;
}
RegEx operator + (const RegEx& ex1, const RegEx& ex2)
{
RegEx ret(REGEX_SEQ);
......@@ -194,6 +197,36 @@ namespace YAML
return -1;
}
// AndOperator
// Note: 'AND' is a little funny, since we may be required to match things
// of different lengths. If we find a match, we return the length of
// the FIRST entry on the list.
int RegEx::AndOperator::Match(const std::string& str, const RegEx& regex) const
{
int first = -1;
for(unsigned i=0;i<regex.m_params.size();i++) {
int n = regex.m_params[i].Match(str);
if(n == -1)
return -1;
if(i == 0)
first = n;
}
return first;
}
int RegEx::AndOperator::Match(std::istream& in, const RegEx& regex) const
{
int first = -1;
for(unsigned i=0;i<regex.m_params.size();i++) {
int n = regex.m_params[i].Match(in);
if(n == -1)
return -1;
if(i == 0)
first = n;
}
return first;
}
// NotOperator
int RegEx::NotOperator::Match(const std::string& str, const RegEx& regex) const
{
......
......@@ -6,7 +6,7 @@
namespace YAML
{
enum REGEX_OP { REGEX_EMPTY, REGEX_MATCH, REGEX_RANGE, REGEX_OR, REGEX_NOT, REGEX_SEQ };
enum REGEX_OP { REGEX_EMPTY, REGEX_MATCH, REGEX_RANGE, REGEX_OR, REGEX_AND, REGEX_NOT, REGEX_SEQ };
// simplified regular expressions
// . Only straightforward matches (no repeated characters)
......@@ -35,6 +35,11 @@ namespace YAML
virtual int Match(std::istream& in, const RegEx& regex) const;
};
struct AndOperator: public Operator {
virtual int Match(const std::string& str, const RegEx& regex) const;
virtual int Match(std::istream& in, const RegEx& regex) const;
};
struct NotOperator: public Operator {
virtual int Match(const std::string& str, const RegEx& regex) const;
virtual int Match(std::istream& in, const RegEx& regex) const;
......@@ -63,6 +68,7 @@ namespace YAML
friend RegEx operator ! (const RegEx& ex);
friend RegEx operator || (const RegEx& ex1, const RegEx& ex2);
friend RegEx operator && (const RegEx& ex1, const RegEx& ex2);
friend RegEx operator + (const RegEx& ex1, const RegEx& ex2);
private:
......
......@@ -5,6 +5,7 @@
#include <queue>
#include <stack>
#include <set>
#include "regex.h"
namespace YAML
{
......@@ -44,6 +45,7 @@ namespace YAML
bool IsPlainScalar();
void GetBlockIndentation(int& indent, std::string& breaks);
std::string ScanScalar(RegEx end, bool eatEnd, int indent, char escape, bool fold, bool eatLeadingWhitespace, bool trimTrailingSpaces, int chomp);
struct SimpleKey {
SimpleKey(int pos_, int line_, int column_, int flowLevel_);
......
......@@ -75,74 +75,77 @@ namespace YAML
// and in-line whitespace (which is kept) separately.
template <> PlainScalarToken *Scanner::ScanToken(PlainScalarToken *pToken)
{
//// now eat and store the scalar
//std::string scalar;
//WhitespaceInfo info;
//while(INPUT) {
// // doc start/end tokens
// if(IsDocumentStart() || IsDocumentEnd())
// break;
// // comment
// if(Exp::Comment.Matches(INPUT))
// break;
// // first eat non-blanks
// while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
// // illegal colon in flow context
// if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT))
// throw IllegalScalar();
// // characters that might end the scalar
// if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT))
// break;
// if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT))
// break;
// // finally, read the character!
// scalar += GetChar();
// }
// // did we hit a non-blank character that ended us?
// if(!Exp::BlankOrBreak.Matches(INPUT))
// break;
// // now eat blanks
// while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) {
// if(Exp::Blank.Matches(INPUT)) {
// // can't use tabs as indentation! only spaces!
// if(INPUT.peek() == '\t' && info.leadingBlanks && m_column <= m_indents.top())
// throw IllegalTabInScalar();
// info.AddBlank(GetChar());
// } else {
// // we know it's a line break; see how many characters to read
// int n = Exp::Break.Match(INPUT);
// std::string line = GetChar(n);
// info.AddBreak(line);
// // and we can't continue a simple key to the next line
// ValidateSimpleKey();
// }
// }
// // break if we're below the indentation level
// if(m_flowLevel == 0 && m_column <= m_indents.top())
// break;
// // finally join whitespace
// scalar += info.Join();
//}
RegEx end = (m_flowLevel > 0 ? Exp::EndScalarInFlow : Exp::EndScalar) || (RegEx(' ') + Exp::Comment);
int indent = (m_flowLevel > 0 ? 0 : m_indents.top() + 1);
// insert a potential simple key
if(m_simpleKeyAllowed)
InsertSimpleKey();
m_simpleKeyAllowed = false;
// now eat and store the scalar
std::string scalar;
WhitespaceInfo info;
while(INPUT) {
// doc start/end tokens
if(IsDocumentStart() || IsDocumentEnd())
break;
// comment
if(Exp::Comment.Matches(INPUT))
break;
// first eat non-blanks
while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
// illegal colon in flow context
if(m_flowLevel > 0 && Exp::IllegalColonInScalar.Matches(INPUT))
throw IllegalScalar();
// characters that might end the scalar
if(m_flowLevel > 0 && Exp::EndScalarInFlow.Matches(INPUT))
break;
if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT))
break;
// finally, read the character!
scalar += GetChar();
}
// did we hit a non-blank character that ended us?
if(!Exp::BlankOrBreak.Matches(INPUT))
break;
// now eat blanks
while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) {
if(Exp::Blank.Matches(INPUT)) {
// can't use tabs as indentation! only spaces!
if(INPUT.peek() == '\t' && info.leadingBlanks && m_column <= m_indents.top())
throw IllegalTabInScalar();
info.AddBlank(GetChar());
} else {
// we know it's a line break; see how many characters to read
int n = Exp::Break.Match(INPUT);
std::string line = GetChar(n);
info.AddBreak(line);
// and we can't continue a simple key to the next line
ValidateSimpleKey();
}
}
// break if we're below the indentation level
if(m_flowLevel == 0 && m_column <= m_indents.top())
break;
pToken->value = ScanScalar(end, false, indent, 0, true, true, true, 0);
// finally join whitespace
scalar += info.Join();
}
// now modify our token
pToken->value = scalar;
if(info.leadingBlanks)
m_simpleKeyAllowed = false;
if(true/*info.leadingBlanks*/)
m_simpleKeyAllowed = true;
return pToken;
......@@ -151,91 +154,92 @@ namespace YAML
// QuotedScalarToken
template <> QuotedScalarToken *Scanner::ScanToken(QuotedScalarToken *pToken)
{
// insert a potential simple key
if(m_simpleKeyAllowed)
InsertSimpleKey();
m_simpleKeyAllowed = false;
//// now eat and store the scalar
//std::string scalar;
//WhitespaceInfo info;
//while(INPUT) {
// if(IsDocumentStart() || IsDocumentEnd())
// throw DocIndicatorInQuote();
// if(INPUT.peek() == EOF)
// throw EOFInQuote();
// // first eat non-blanks
// while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
// // escaped single quote?
// if(pToken->single && Exp::EscSingleQuote.Matches(INPUT)) {
// int n = Exp::EscSingleQuote.Match(INPUT);
// scalar += GetChar(n);
// continue;
// }
// // is the quote ending?
// if(INPUT.peek() == quote)
// break;
// // escaped newline?
// if(Exp::EscBreak.Matches(INPUT))
// break;
// // other escape sequence
// if(INPUT.peek() == '\\') {
// int length = 0;
// scalar += Exp::Escape(INPUT, length);
// m_column += length;
// continue;
// }
// // and finally, just add the damn character
// scalar += GetChar();
// }
// // is the quote ending?
// if(INPUT.peek() == quote) {
// // eat and go
// GetChar();
// break;
// }
// // now we eat blanks
// while(Exp::BlankOrBreak.Matches(INPUT)) {
// if(Exp::Blank.Matches(INPUT)) {
// info.AddBlank(GetChar());
// } else {
// // we know it's a line break; see how many characters to read
// int n = Exp::Break.Match(INPUT);
// std::string line = GetChar(n);
// info.AddBreak(line);
// // and we can't continue a simple key to the next line
// ValidateSimpleKey();
// }
// }
// // and finally join the whitespace
// scalar += info.Join();
//}
// eat single or double quote
char quote = GetChar();
pToken->single = (quote == '\'');
// now eat and store the scalar
std::string scalar;
WhitespaceInfo info;
while(INPUT) {
if(IsDocumentStart() || IsDocumentEnd())
throw DocIndicatorInQuote();
if(INPUT.peek() == EOF)
throw EOFInQuote();
// first eat non-blanks
while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
// escaped single quote?
if(pToken->single && Exp::EscSingleQuote.Matches(INPUT)) {
int n = Exp::EscSingleQuote.Match(INPUT);
scalar += GetChar(n);
continue;
}
// is the quote ending?
if(INPUT.peek() == quote)
break;
RegEx end = (pToken->single ? RegEx(quote) && !Exp::EscSingleQuote : RegEx(quote));
char escape = (pToken->single ? '\'' : '\\');
// escaped newline?
if(Exp::EscBreak.Matches(INPUT))
break;
// other escape sequence
if(INPUT.peek() == '\\') {
int length = 0;
scalar += Exp::Escape(INPUT, length);
m_column += length;
continue;
}
// and finally, just add the damn character
scalar += GetChar();
}
// is the quote ending?
if(INPUT.peek() == quote) {
// eat and go
GetChar();
break;
}
// now we eat blanks
while(Exp::BlankOrBreak.Matches(INPUT)) {
if(Exp::Blank.Matches(INPUT)) {
info.AddBlank(GetChar());
} else {
// we know it's a line break; see how many characters to read
int n = Exp::Break.Match(INPUT);
std::string line = GetChar(n);
info.AddBreak(line);
// and we can't continue a simple key to the next line
ValidateSimpleKey();
}
}
// insert a potential simple key
if(m_simpleKeyAllowed)
InsertSimpleKey();
// and finally join the whitespace
scalar += info.Join();
}
pToken->value = ScanScalar(end, true, 0, escape, true, true, false, 0);
m_simpleKeyAllowed = false;
pToken->value = scalar;
return pToken;
}
// BlockScalarToken
template <> BlockScalarToken *Scanner::ScanToken(BlockScalarToken *pToken)
{
// simple keys always ok after block scalars (since we're gonna start a new line anyways)
m_simpleKeyAllowed = true;
WhitespaceInfo info;
// eat block indicator ('|' or '>')
......@@ -268,37 +272,13 @@ namespace YAML
if(info.increment && m_indents.top() >= 0)
indent += m_indents.top();
// finally, grab that scalar
std::string scalar;
while(INPUT) {
// initialize indentation
GetBlockIndentation(indent, info.trailingBreaks);
// are we done with this guy (i.e. at a lower indentation?)
if(m_column != indent)
break;
bool trailingBlank = Exp::Blank.Matches(INPUT);
scalar += info.Join();
bool leadingBlank = Exp::Blank.Matches(INPUT);
// now eat and save the line
while(INPUT.peek() != EOF && !Exp::Break.Matches(INPUT))
scalar += GetChar();
// we know it's a line break; see how many characters to read
int n = Exp::Break.Match(INPUT);
std::string line = GetChar(n);
info.AddBreak(line);
}
// one last whitespace join (with chompers this time)
scalar += info.Join(true);
GetBlockIndentation(indent, info.trailingBreaks);
// finally set the scalar
pToken->value = scalar;
bool eatLeadingWhitespace = false;
pToken->value = ScanScalar(RegEx(), false, indent, 0, info.fold, eatLeadingWhitespace, false, info.chomp);
// simple keys always ok after block scalars (since we're gonna start a new line anyways)
m_simpleKeyAllowed = true;
return pToken;
}
......@@ -340,4 +320,104 @@ namespace YAML
indent = 1;
}
}
// ScanScalar
std::string Scanner::ScanScalar(RegEx end, bool eatEnd, int indent, char escape, bool fold, bool eatLeadingWhitespace, bool trimTrailingSpaces, int chomp)
{
bool emptyLine = false, moreIndented = false;
std::string scalar;
while(INPUT) {
// ********************************
// Phase #1: scan until line ending
while(!end.Matches(INPUT) && !Exp::Break.Matches(INPUT)) {
if(INPUT.peek() == EOF)
break;
// escaped newline? (only if we're escaping on slash)
if(escape == '\\' && Exp::EscBreak.Matches(INPUT)) {
int n = Exp::EscBreak.Match(INPUT);
Eat(n);
continue;
}
// escape this?
if(INPUT.peek() == escape) {
int length = 0;
scalar += Exp::Escape(INPUT, length);
m_column += length;
continue;
}
// otherwise, just add the damn character
scalar += GetChar();
}
// eof? if we're looking to eat something, then we throw
if(INPUT.peek() == EOF) {
if(eatEnd)
throw EOFInQuote();
break;
}
// are we done via character match?
int n = end.Match(INPUT);
if(n >= 0) {
if(eatEnd)
Eat(n);
break;
}
// ********************************
// Phase #2: eat line ending
n = Exp::Break.Match(INPUT);
Eat(n);
// ********************************
// Phase #3: scan initial spaces
// first the required indentation
while(INPUT.peek() == ' ' && m_column < indent)
Eat(1);
// and then the rest of the whitespace
if(eatLeadingWhitespace) {
while(Exp::Blank.Matches(INPUT))
Eat(1);
}
// was this an empty line?
bool nextEmptyLine = Exp::Break.Matches(INPUT);
bool nextMoreIndented = (INPUT.peek() == ' ');
if(fold && !emptyLine && !nextEmptyLine && !moreIndented && !nextMoreIndented)
scalar += " ";
else
scalar += "\n";
emptyLine = nextEmptyLine;
moreIndented = nextMoreIndented;
// are we done via indentation?
if(!emptyLine && m_column < indent)
break;
}
// post-processing
if(trimTrailingSpaces) {
unsigned pos = scalar.find_last_not_of(' ');
if(pos < scalar.size())
scalar.erase(pos + 1);
}
if(chomp <= 0) {
unsigned pos = scalar.find_last_not_of('\n');
if(chomp == 0 && pos + 1 < scalar.size())
scalar.erase(pos + 2);
else if(chomp == -1 && pos < scalar.size())
scalar.erase(pos + 1);
}
return scalar;
}
}
people:
- &jsb
name: Jesse
age: 23
- &dab
name: 'Daniel'
age: 25
- &ncb
name: "Naftali"
age: 21
students:
- *jsb
- *ncb
\ No newline at end of file
---
- "quoted scalar that contains
---
the document start!"
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment