Unverified Commit c82d3129 authored by Oliver Hamlet's avatar Oliver Hamlet Committed by GitHub
Browse files

Add support for JSON-compatible string escapes (#485)

For completeness I've implemented escaping for characters outside the
basic multilingual plane, but it doesn't get used (as there's no
EscapeAsAsciiJson emitter option implemented).
parent 370aceea
......@@ -19,6 +19,7 @@ enum EMITTER_MANIP {
// output character set
EmitNonAscii,
EscapeNonAscii,
EscapeAsJson,
// string manipulators
// Auto, // duplicate
......
......@@ -686,14 +686,27 @@ void Emitter::StartedScalar() { m_pState->StartedScalar(); }
// *******************************************************************************************
// overloads of Write
StringEscaping::value GetStringEscapingStyle(const EMITTER_MANIP emitterManip) {
switch (emitterManip) {
case EscapeNonAscii:
return StringEscaping::NonAscii;
case EscapeAsJson:
return StringEscaping::JSON;
default:
return StringEscaping::None;
break;
}
}
Emitter& Emitter::Write(const std::string& str) {
if (!good())
return *this;
const bool escapeNonAscii = m_pState->GetOutputCharset() == EscapeNonAscii;
StringEscaping::value stringEscaping = GetStringEscapingStyle(m_pState->GetOutputCharset());
const StringFormat::value strFormat =
Utils::ComputeStringFormat(str, m_pState->GetStringFormat(),
m_pState->CurGroupFlowType(), escapeNonAscii);
m_pState->CurGroupFlowType(), stringEscaping == StringEscaping::NonAscii);
if (strFormat == StringFormat::Literal)
m_pState->SetMapKeyFormat(YAML::LongKey, FmtScope::Local);
......@@ -708,7 +721,7 @@ Emitter& Emitter::Write(const std::string& str) {
Utils::WriteSingleQuotedString(m_stream, str);
break;
case StringFormat::DoubleQuoted:
Utils::WriteDoubleQuotedString(m_stream, str, escapeNonAscii);
Utils::WriteDoubleQuotedString(m_stream, str, stringEscaping);
break;
case StringFormat::Literal:
Utils::WriteLiteralString(m_stream, str,
......@@ -814,8 +827,10 @@ Emitter& Emitter::Write(char ch) {
if (!good())
return *this;
PrepareNode(EmitterNodeType::Scalar);
Utils::WriteChar(m_stream, ch);
Utils::WriteChar(m_stream, ch, GetStringEscapingStyle(m_pState->GetOutputCharset()));
StartedScalar();
return *this;
......
......@@ -231,6 +231,7 @@ bool EmitterState::SetOutputCharset(EMITTER_MANIP value,
switch (value) {
case EmitNonAscii:
case EscapeNonAscii:
case EscapeAsJson:
_Set(m_charset, value, scope);
return true;
default:
......
......@@ -218,20 +218,34 @@ bool IsValidLiteralScalar(const std::string& str, FlowType::value flowType,
});
}
void WriteDoubleQuoteEscapeSequence(ostream_wrapper& out, int codePoint) {
std::pair<uint16_t, uint16_t> EncodeUTF16SurrogatePair(int codePoint) {
const uint32_t leadOffset = 0xD800 - (0x10000 >> 10);
return {
leadOffset | (codePoint >> 10),
0xDC00 | (codePoint & 0x3FF),
};
}
void WriteDoubleQuoteEscapeSequence(ostream_wrapper& out, int codePoint, StringEscaping::value stringEscapingStyle) {
static const char hexDigits[] = "0123456789abcdef";
out << "\\";
int digits = 8;
if (codePoint < 0xFF) {
if (codePoint < 0xFF && stringEscapingStyle != StringEscaping::JSON) {
out << "x";
digits = 2;
} else if (codePoint < 0xFFFF) {
out << "u";
digits = 4;
} else {
} else if (stringEscapingStyle != StringEscaping::JSON) {
out << "U";
digits = 8;
} else {
auto surrogatePair = EncodeUTF16SurrogatePair(codePoint);
WriteDoubleQuoteEscapeSequence(out, surrogatePair.first, stringEscapingStyle);
WriteDoubleQuoteEscapeSequence(out, surrogatePair.second, stringEscapingStyle);
return;
}
// Write digits into the escape sequence
......@@ -303,7 +317,7 @@ bool WriteSingleQuotedString(ostream_wrapper& out, const std::string& str) {
}
bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
bool escapeNonAscii) {
StringEscaping::value stringEscaping) {
out << "\"";
int codePoint;
for (std::string::const_iterator i = str.begin();
......@@ -327,16 +341,19 @@ bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
case '\b':
out << "\\b";
break;
case '\f':
out << "\\f";
break;
default:
if (codePoint < 0x20 ||
(codePoint >= 0x80 &&
codePoint <= 0xA0)) { // Control characters and non-breaking space
WriteDoubleQuoteEscapeSequence(out, codePoint);
WriteDoubleQuoteEscapeSequence(out, codePoint, stringEscaping);
} else if (codePoint == 0xFEFF) { // Byte order marks (ZWNS) should be
// escaped (YAML 1.2, sec. 5.2)
WriteDoubleQuoteEscapeSequence(out, codePoint);
} else if (escapeNonAscii && codePoint > 0x7E) {
WriteDoubleQuoteEscapeSequence(out, codePoint);
WriteDoubleQuoteEscapeSequence(out, codePoint, stringEscaping);
} else if (stringEscaping == StringEscaping::NonAscii && codePoint > 0x7E) {
WriteDoubleQuoteEscapeSequence(out, codePoint, stringEscaping);
} else {
WriteCodePoint(out, codePoint);
}
......@@ -362,7 +379,7 @@ bool WriteLiteralString(ostream_wrapper& out, const std::string& str,
return true;
}
bool WriteChar(ostream_wrapper& out, char ch) {
bool WriteChar(ostream_wrapper& out, char ch, StringEscaping::value stringEscapingStyle) {
if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) {
out << ch;
} else if (ch == '\"') {
......@@ -373,13 +390,17 @@ bool WriteChar(ostream_wrapper& out, char ch) {
out << R"("\n")";
} else if (ch == '\b') {
out << R"("\b")";
} else if (ch == '\r') {
out << R"("\r")";
} else if (ch == '\f') {
out << R"("\f")";
} else if (ch == '\\') {
out << R"("\\")";
} else if (0x20 <= ch && ch <= 0x7e) {
out << "\"" << ch << "\"";
} else {
out << "\"";
WriteDoubleQuoteEscapeSequence(out, ch);
WriteDoubleQuoteEscapeSequence(out, ch, stringEscapingStyle);
out << "\"";
}
return true;
......@@ -469,7 +490,7 @@ bool WriteTagWithPrefix(ostream_wrapper& out, const std::string& prefix,
bool WriteBinary(ostream_wrapper& out, const Binary& binary) {
WriteDoubleQuotedString(out, EncodeBase64(binary.data(), binary.size()),
false);
StringEscaping::None);
return true;
}
} // namespace Utils
......
......@@ -24,6 +24,10 @@ struct StringFormat {
enum value { Plain, SingleQuoted, DoubleQuoted, Literal };
};
struct StringEscaping {
enum value { None, NonAscii, JSON };
};
namespace Utils {
StringFormat::value ComputeStringFormat(const std::string& str,
EMITTER_MANIP strFormat,
......@@ -32,10 +36,11 @@ StringFormat::value ComputeStringFormat(const std::string& str,
bool WriteSingleQuotedString(ostream_wrapper& out, const std::string& str);
bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
bool escapeNonAscii);
StringEscaping::value stringEscaping);
bool WriteLiteralString(ostream_wrapper& out, const std::string& str,
std::size_t indent);
bool WriteChar(ostream_wrapper& out, char ch);
bool WriteChar(ostream_wrapper& out, char ch,
StringEscaping::value stringEscapingStyle);
bool WriteComment(ostream_wrapper& out, const std::string& str,
std::size_t postCommentIndent);
bool WriteAlias(ostream_wrapper& out, const std::string& str);
......
......@@ -816,6 +816,42 @@ TEST_F(EmitterTest, DoubleQuotedUnicode) {
ExpectEmit("\"\x24 \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2\"");
}
TEST_F(EmitterTest, EscapedJsonString) {
out.SetStringFormat(DoubleQuoted);
out.SetOutputCharset(EscapeAsJson);
out << "\" \\ "
"\x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0A \x0B \x0C \x0D \x0E \x0F "
"\x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1A \x1B \x1C \x1D \x1E \x1F "
"\x24 \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2";
ExpectEmit(R"("\" \\ \u0001 \u0002 \u0003 \u0004 \u0005 \u0006 \u0007 \b \t )"
R"(\n \u000b \f \r \u000e \u000f \u0010 \u0011 \u0012 \u0013 )"
R"(\u0014 \u0015 \u0016 \u0017 \u0018 \u0019 \u001a \u001b )"
R"(\u001c \u001d \u001e \u001f )"
"$ \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2\"");
}
TEST_F(EmitterTest, EscapedCharacters) {
out << BeginSeq
<< '\x00'
<< '\x0C'
<< '\x0D'
<< EndSeq;
ExpectEmit("- \"\\x00\"\n- \"\\f\"\n- \"\\r\"");
}
TEST_F(EmitterTest, CharactersEscapedAsJson) {
out.SetOutputCharset(EscapeAsJson);
out << BeginSeq
<< '\x00'
<< '\x0C'
<< '\x0D'
<< EndSeq;
ExpectEmit("- \"\\u0000\"\n- \"\\f\"\n- \"\\r\"");
}
TEST_F(EmitterTest, DoubleQuotedString) {
out << DoubleQuoted << "\" \\ \n \t \r \b \x15 \xEF\xBB\xBF \x24";
ExpectEmit("\"\\\" \\\\ \\n \\t \\r \\b \\x15 \\ufeff $\"");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment