Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
yaml-cpp
Commits
fa0af88d
Commit
fa0af88d
authored
Oct 19, 2009
by
Jesse Beder
Browse files
Merged r270:HEAD of the emitting-unicode branch
parent
bce845bb
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
253 additions
and
165 deletions
+253
-165
include/emitter.h
include/emitter.h
+1
-0
include/emittermanip.h
include/emittermanip.h
+4
-0
src/emitter.cpp
src/emitter.cpp
+9
-3
src/emitterstate.cpp
src/emitterstate.cpp
+14
-0
src/emitterstate.h
src/emitterstate.h
+4
-0
src/emitterutils.cpp
src/emitterutils.cpp
+194
-155
src/emitterutils.h
src/emitterutils.h
+2
-2
src/exp.cpp
src/exp.cpp
+2
-2
src/exp.h
src/exp.h
+6
-1
yaml-reader/emittertests.cpp
yaml-reader/emittertests.cpp
+17
-2
No files found.
include/emitter.h
View file @
fa0af88d
...
@@ -29,6 +29,7 @@ namespace YAML
...
@@ -29,6 +29,7 @@ namespace YAML
const
std
::
string
GetLastError
()
const
;
const
std
::
string
GetLastError
()
const
;
// global setters
// global setters
bool
SetOutputCharset
(
EMITTER_MANIP
value
);
bool
SetStringFormat
(
EMITTER_MANIP
value
);
bool
SetStringFormat
(
EMITTER_MANIP
value
);
bool
SetBoolFormat
(
EMITTER_MANIP
value
);
bool
SetBoolFormat
(
EMITTER_MANIP
value
);
bool
SetIntBase
(
EMITTER_MANIP
value
);
bool
SetIntBase
(
EMITTER_MANIP
value
);
...
...
include/emittermanip.h
View file @
fa0af88d
...
@@ -11,6 +11,10 @@ namespace YAML
...
@@ -11,6 +11,10 @@ namespace YAML
enum
EMITTER_MANIP
{
enum
EMITTER_MANIP
{
// general manipulators
// general manipulators
Auto
,
Auto
,
// output character set
EmitNonAscii
,
EscapeNonAscii
,
// string manipulators
// string manipulators
// Auto, // duplicate
// Auto, // duplicate
...
...
src/emitter.cpp
View file @
fa0af88d
...
@@ -37,6 +37,11 @@ namespace YAML
...
@@ -37,6 +37,11 @@ namespace YAML
}
}
// global setters
// global setters
bool
Emitter
::
SetOutputCharset
(
EMITTER_MANIP
value
)
{
return
m_pState
->
SetOutputCharset
(
value
,
GLOBAL
);
}
bool
Emitter
::
SetStringFormat
(
EMITTER_MANIP
value
)
bool
Emitter
::
SetStringFormat
(
EMITTER_MANIP
value
)
{
{
return
m_pState
->
SetStringFormat
(
value
,
GLOBAL
);
return
m_pState
->
SetStringFormat
(
value
,
GLOBAL
);
...
@@ -485,13 +490,14 @@ namespace YAML
...
@@ -485,13 +490,14 @@ namespace YAML
PreAtomicWrite
();
PreAtomicWrite
();
EmitSeparationIfNecessary
();
EmitSeparationIfNecessary
();
bool
escapeNonAscii
=
m_pState
->
GetOutputCharset
()
==
EscapeNonAscii
;
EMITTER_MANIP
strFmt
=
m_pState
->
GetStringFormat
();
EMITTER_MANIP
strFmt
=
m_pState
->
GetStringFormat
();
FLOW_TYPE
flowType
=
m_pState
->
GetCurGroupFlowType
();
FLOW_TYPE
flowType
=
m_pState
->
GetCurGroupFlowType
();
unsigned
curIndent
=
m_pState
->
GetCurIndent
();
unsigned
curIndent
=
m_pState
->
GetCurIndent
();
switch
(
strFmt
)
{
switch
(
strFmt
)
{
case
Auto
:
case
Auto
:
Utils
::
WriteString
(
m_stream
,
str
,
flowType
==
FT_FLOW
);
Utils
::
WriteString
(
m_stream
,
str
,
flowType
==
FT_FLOW
,
escapeNonAscii
);
break
;
break
;
case
SingleQuoted
:
case
SingleQuoted
:
if
(
!
Utils
::
WriteSingleQuotedString
(
m_stream
,
str
))
{
if
(
!
Utils
::
WriteSingleQuotedString
(
m_stream
,
str
))
{
...
@@ -500,11 +506,11 @@ namespace YAML
...
@@ -500,11 +506,11 @@ namespace YAML
}
}
break
;
break
;
case
DoubleQuoted
:
case
DoubleQuoted
:
Utils
::
WriteDoubleQuotedString
(
m_stream
,
str
);
Utils
::
WriteDoubleQuotedString
(
m_stream
,
str
,
escapeNonAscii
);
break
;
break
;
case
Literal
:
case
Literal
:
if
(
flowType
==
FT_FLOW
)
if
(
flowType
==
FT_FLOW
)
Utils
::
WriteString
(
m_stream
,
str
,
flowType
==
FT_FLOW
);
Utils
::
WriteString
(
m_stream
,
str
,
flowType
==
FT_FLOW
,
escapeNonAscii
);
else
else
Utils
::
WriteLiteralString
(
m_stream
,
str
,
curIndent
+
m_pState
->
GetIndent
());
Utils
::
WriteLiteralString
(
m_stream
,
str
,
curIndent
+
m_pState
->
GetIndent
());
break
;
break
;
...
...
src/emitterstate.cpp
View file @
fa0af88d
...
@@ -9,6 +9,7 @@ namespace YAML
...
@@ -9,6 +9,7 @@ namespace YAML
m_stateStack
.
push
(
ES_WAITING_FOR_DOC
);
m_stateStack
.
push
(
ES_WAITING_FOR_DOC
);
// set default global manipulators
// set default global manipulators
m_charset
.
set
(
EmitNonAscii
);
m_strFmt
.
set
(
Auto
);
m_strFmt
.
set
(
Auto
);
m_boolFmt
.
set
(
TrueFalseBool
);
m_boolFmt
.
set
(
TrueFalseBool
);
m_boolLengthFmt
.
set
(
LongBool
);
m_boolLengthFmt
.
set
(
LongBool
);
...
@@ -43,6 +44,7 @@ namespace YAML
...
@@ -43,6 +44,7 @@ namespace YAML
// . Only the ones that make sense will be accepted
// . Only the ones that make sense will be accepted
void
EmitterState
::
SetLocalValue
(
EMITTER_MANIP
value
)
void
EmitterState
::
SetLocalValue
(
EMITTER_MANIP
value
)
{
{
SetOutputCharset
(
value
,
LOCAL
);
SetStringFormat
(
value
,
LOCAL
);
SetStringFormat
(
value
,
LOCAL
);
SetBoolFormat
(
value
,
LOCAL
);
SetBoolFormat
(
value
,
LOCAL
);
SetBoolCaseFormat
(
value
,
LOCAL
);
SetBoolCaseFormat
(
value
,
LOCAL
);
...
@@ -132,6 +134,18 @@ namespace YAML
...
@@ -132,6 +134,18 @@ namespace YAML
{
{
m_modifiedSettings
.
clear
();
m_modifiedSettings
.
clear
();
}
}
bool
EmitterState
::
SetOutputCharset
(
EMITTER_MANIP
value
,
FMT_SCOPE
scope
)
{
switch
(
value
)
{
case
EmitNonAscii
:
case
EscapeNonAscii
:
_Set
(
m_charset
,
value
,
scope
);
return
true
;
default:
return
false
;
}
}
bool
EmitterState
::
SetStringFormat
(
EMITTER_MANIP
value
,
FMT_SCOPE
scope
)
bool
EmitterState
::
SetStringFormat
(
EMITTER_MANIP
value
,
FMT_SCOPE
scope
)
{
{
...
...
src/emitterstate.h
View file @
fa0af88d
...
@@ -108,6 +108,9 @@ namespace YAML
...
@@ -108,6 +108,9 @@ namespace YAML
void
ClearModifiedSettings
();
void
ClearModifiedSettings
();
// formatters
// formatters
bool
SetOutputCharset
(
EMITTER_MANIP
value
,
FMT_SCOPE
scope
);
EMITTER_MANIP
GetOutputCharset
()
const
{
return
m_charset
.
get
();
}
bool
SetStringFormat
(
EMITTER_MANIP
value
,
FMT_SCOPE
scope
);
bool
SetStringFormat
(
EMITTER_MANIP
value
,
FMT_SCOPE
scope
);
EMITTER_MANIP
GetStringFormat
()
const
{
return
m_strFmt
.
get
();
}
EMITTER_MANIP
GetStringFormat
()
const
{
return
m_strFmt
.
get
();
}
...
@@ -149,6 +152,7 @@ namespace YAML
...
@@ -149,6 +152,7 @@ namespace YAML
// other state
// other state
std
::
stack
<
EMITTER_STATE
>
m_stateStack
;
std
::
stack
<
EMITTER_STATE
>
m_stateStack
;
Setting
<
EMITTER_MANIP
>
m_charset
;
Setting
<
EMITTER_MANIP
>
m_strFmt
;
Setting
<
EMITTER_MANIP
>
m_strFmt
;
Setting
<
EMITTER_MANIP
>
m_boolFmt
;
Setting
<
EMITTER_MANIP
>
m_boolFmt
;
Setting
<
EMITTER_MANIP
>
m_boolLengthFmt
;
Setting
<
EMITTER_MANIP
>
m_boolLengthFmt
;
...
...
src/emitterutils.cpp
View file @
fa0af88d
...
@@ -5,18 +5,129 @@
...
@@ -5,18 +5,129 @@
#include "stringsource.h"
#include "stringsource.h"
#include <sstream>
#include <sstream>
#include <iomanip>
#include <iomanip>
#include <cassert>
namespace
YAML
namespace
YAML
{
{
namespace
Utils
namespace
Utils
{
{
namespace
{
namespace
{
bool
IsPrintable
(
char
ch
)
{
enum
{
REPLACEMENT_CHARACTER
=
0xFFFD
};
return
(
0x20
<=
ch
&&
ch
<=
0x7E
);
bool
IsAnchorChar
(
int
ch
)
{
// test for ns-anchor-char
switch
(
ch
)
{
case
','
:
case
'['
:
case
']'
:
case
'{'
:
case
'}'
:
// c-flow-indicator
case
' '
:
case
'\t'
:
// s-white
case
0xFEFF
:
// c-byte-order-mark
case
0xA
:
case
0xD
:
// b-char
return
false
;
case
0x85
:
return
true
;
}
if
(
ch
<
0x20
)
return
false
;
if
(
ch
<
0x7E
)
return
true
;
if
(
ch
<
0xA0
)
return
false
;
if
(
ch
>=
0xD800
&&
ch
<=
0xDFFF
)
return
false
;
if
((
ch
&
0xFFFE
)
==
0xFFFE
)
return
false
;
if
((
ch
>=
0xFDD0
)
&&
(
ch
<=
0xFDEF
))
return
false
;
if
(
ch
>
0x10FFFF
)
return
false
;
return
true
;
}
int
Utf8BytesIndicated
(
char
ch
)
{
int
byteVal
=
static_cast
<
unsigned
char
>
(
ch
);
switch
(
byteVal
>>
4
)
{
case
0
:
case
1
:
case
2
:
case
3
:
case
4
:
case
5
:
case
6
:
case
7
:
return
1
;
case
12
:
case
13
:
return
2
;
case
14
:
return
3
;
case
15
:
return
4
;
default:
return
-
1
;
}
}
bool
IsTrailingByte
(
char
ch
)
{
return
(
ch
&
0xC0
)
==
0x80
;
}
}
bool
IsValidPlainScalar
(
const
std
::
string
&
str
,
bool
inFlow
)
{
bool
GetNextCodePointAndAdvance
(
int
&
codePoint
,
std
::
string
::
const_iterator
&
first
,
std
::
string
::
const_iterator
last
)
{
if
(
first
==
last
)
return
false
;
int
nBytes
=
Utf8BytesIndicated
(
*
first
);
if
(
nBytes
<
1
)
{
// Bad lead byte
++
first
;
codePoint
=
REPLACEMENT_CHARACTER
;
return
true
;
}
if
(
nBytes
==
1
)
{
codePoint
=
*
first
++
;
return
true
;
}
// Gather bits from trailing bytes
codePoint
=
static_cast
<
unsigned
char
>
(
*
first
)
&
~
(
0xFF
<<
(
7
-
nBytes
));
++
first
;
--
nBytes
;
for
(;
nBytes
>
0
;
++
first
,
--
nBytes
)
{
if
((
first
==
last
)
||
!
IsTrailingByte
(
*
first
))
{
codePoint
=
REPLACEMENT_CHARACTER
;
break
;
}
codePoint
<<=
6
;
codePoint
|=
*
first
&
0x3F
;
}
// Check for illegal code points
if
(
codePoint
>
0x10FFFF
)
codePoint
=
REPLACEMENT_CHARACTER
;
else
if
(
codePoint
>=
0xD800
&&
codePoint
<=
0xDFFF
)
codePoint
=
REPLACEMENT_CHARACTER
;
else
if
((
codePoint
&
0xFFFE
)
==
0xFFFE
)
codePoint
=
REPLACEMENT_CHARACTER
;
else
if
(
codePoint
>=
0xFDD0
&&
codePoint
<=
0xFDEF
)
codePoint
=
REPLACEMENT_CHARACTER
;
return
true
;
}
void
WriteCodePoint
(
ostream
&
out
,
int
codePoint
)
{
if
(
codePoint
<
0
||
codePoint
>
0x10FFFF
)
{
codePoint
=
REPLACEMENT_CHARACTER
;
}
if
(
codePoint
<
0x7F
)
{
out
<<
static_cast
<
char
>
(
codePoint
);
}
else
if
(
codePoint
<
0x7FF
)
{
out
<<
static_cast
<
char
>
(
0xC0
|
(
codePoint
>>
6
))
<<
static_cast
<
char
>
(
0x80
|
(
codePoint
&
0x3F
));
}
else
if
(
codePoint
<
0xFFFF
)
{
out
<<
static_cast
<
char
>
(
0xE0
|
(
codePoint
>>
12
))
<<
static_cast
<
char
>
(
0x80
|
((
codePoint
>>
6
)
&
0x3F
))
<<
static_cast
<
char
>
(
0x80
|
(
codePoint
&
0x3F
));
}
else
{
out
<<
static_cast
<
char
>
(
0xF0
|
(
codePoint
>>
18
))
<<
static_cast
<
char
>
(
0x80
|
((
codePoint
>>
12
)
&
0x3F
))
<<
static_cast
<
char
>
(
0x80
|
((
codePoint
>>
6
)
&
0x3F
))
<<
static_cast
<
char
>
(
0x80
|
(
codePoint
&
0x3F
));
}
}
bool
IsValidPlainScalar
(
const
std
::
string
&
str
,
bool
inFlow
,
bool
allowOnlyAscii
)
{
// first check the start
// first check the start
const
RegEx
&
start
=
(
inFlow
?
Exp
::
PlainScalarInFlow
:
Exp
::
PlainScalar
);
const
RegEx
&
start
=
(
inFlow
?
Exp
::
PlainScalarInFlow
:
Exp
::
PlainScalar
);
if
(
!
start
.
Matches
(
str
))
if
(
!
start
.
Matches
(
str
))
...
@@ -29,177 +140,109 @@ namespace YAML
...
@@ -29,177 +140,109 @@ namespace YAML
// then check until something is disallowed
// then check until something is disallowed
const
RegEx
&
disallowed
=
(
inFlow
?
Exp
::
EndScalarInFlow
:
Exp
::
EndScalar
)
const
RegEx
&
disallowed
=
(
inFlow
?
Exp
::
EndScalarInFlow
:
Exp
::
EndScalar
)
||
(
Exp
::
BlankOrBreak
+
Exp
::
Comment
)
||
(
Exp
::
BlankOrBreak
+
Exp
::
Comment
)
||
(
!
Exp
::
Printable
)
||
Exp
::
NotPrintable
||
Exp
::
Utf8_ByteOrderMark
||
Exp
::
Break
||
Exp
::
Break
||
Exp
::
Tab
;
||
Exp
::
Tab
;
StringCharSource
buffer
(
str
.
c_str
(),
str
.
size
());
StringCharSource
buffer
(
str
.
c_str
(),
str
.
size
());
while
(
buffer
)
{
while
(
buffer
)
{
if
(
disallowed
.
Matches
(
buffer
))
if
(
disallowed
.
Matches
(
buffer
))
return
false
;
return
false
;
if
(
allowOnlyAscii
&&
(
0x7F
<
static_cast
<
unsigned
char
>
(
buffer
[
0
])))
return
false
;
++
buffer
;
++
buffer
;
}
}
return
true
;
return
true
;
}
}
typedef
unsigned
char
byte
;
byte
ToByte
(
char
ch
)
{
return
static_cast
<
byte
>
(
ch
);
}
typedef
std
::
string
::
const_iterator
StrIter
;
std
::
string
WriteUnicode
(
unsigned
value
)
{
void
WriteDoubleQuoteEscapeSequence
(
ostream
&
out
,
int
codePoint
)
{
std
::
stringstream
str
;
static
const
char
hexDigits
[]
=
"0123456789abcdef"
;
// TODO: for the common escaped characters, give their usual symbol
if
(
value
<=
0xFF
)
str
<<
"
\\
x"
<<
std
::
hex
<<
std
::
setfill
(
'0'
)
<<
std
::
setw
(
2
)
<<
value
;
else
if
(
value
<=
0xFFFF
)
str
<<
"
\\
u"
<<
std
::
hex
<<
std
::
setfill
(
'0'
)
<<
std
::
setw
(
4
)
<<
value
;
else
str
<<
"
\\
U"
<<
std
::
hex
<<
std
::
setfill
(
'0'
)
<<
std
::
setw
(
8
)
<<
value
;
return
str
.
str
();
}
// GetBytesToRead
// . Returns the length of the UTF-8 sequence starting with 'signal'
int
GetBytesToRead
(
byte
signal
)
{
if
(
signal
<=
0x7F
)
// ASCII
return
1
;
else
if
(
signal
<=
0xBF
)
// invalid first characters
return
0
;
else
if
(
signal
<=
0xDF
)
// Note: this allows "overlong" UTF8 (0xC0 - 0xC1) to pass unscathed. OK?
return
2
;
else
if
(
signal
<=
0xEF
)
return
3
;
else
return
4
;
}
// ReadBytes
// . Reads the next 'bytesToRead', if we can.
// . Returns zero if we fail, otherwise fills the byte buffer with
// the data and returns the number of bytes read.
int
ReadBytes
(
byte
bytes
[
4
],
StrIter
start
,
StrIter
end
,
int
bytesToRead
)
{
for
(
int
i
=
0
;
i
<
bytesToRead
;
i
++
)
{
if
(
start
==
end
)
return
0
;
bytes
[
i
]
=
ToByte
(
*
start
);
++
start
;
}
return
bytesToRead
;
}
// IsValidUTF8
// . Assumes bytes[0] is a valid signal byte with the right size passed
bool
IsValidUTF8
(
byte
bytes
[
4
],
int
size
)
{
for
(
int
i
=
1
;
i
<
size
;
i
++
)
if
(
bytes
[
i
]
&
0x80
!=
0x80
)
return
false
;
return
true
;
}
byte
UTF8SignalPrefix
(
int
size
)
{
switch
(
size
)
{
case
1
:
return
0
;
case
2
:
return
0xC0
;
case
3
:
return
0xE0
;
case
4
:
return
0xF0
;
}
assert
(
false
);
return
0
;
}
unsigned
UTF8ToUnicode
(
byte
bytes
[
4
],
int
size
)
{
unsigned
value
=
bytes
[
0
]
-
UTF8SignalPrefix
(
size
);
for
(
int
i
=
1
;
i
<
size
;
i
++
)
value
=
(
value
<<
6
)
+
(
bytes
[
i
]
-
0x80
);
return
value
;
}
// ReadUTF8
char
escSeq
[]
=
"
\\
U00000000"
;
// . Returns the Unicode code point starting at 'start',
int
digits
=
8
;
// and sets 'bytesRead' to the length of the UTF-8 Sequence
if
(
codePoint
<
0xFF
)
{
// . If it's invalid UTF8, we set 'bytesRead' to zero.
escSeq
[
1
]
=
'x'
;
unsigned
ReadUTF8
(
StrIter
start
,
StrIter
end
,
int
&
bytesRead
)
{
digits
=
2
;
int
bytesToRead
=
GetBytesToRead
(
ToByte
(
*
start
));
}
else
if
(
codePoint
<
0xFFFF
)
{
if
(
!
bytesToRead
)
{
escSeq
[
1
]
=
'u'
;
bytesRead
=
0
;
digits
=
4
;
return
0
;
}
}
byte
bytes
[
4
];
// Write digits into the escape sequence
bytesRead
=
ReadBytes
(
bytes
,
start
,
end
,
bytesToRead
);
int
i
=
2
;
if
(
!
bytesRead
)
for
(;
digits
>
0
;
--
digits
,
++
i
)
{
return
0
;
escSeq
[
i
]
=
hexDigits
[(
codePoint
>>
(
4
*
(
digits
-
1
)))
&
0xF
];
if
(
!
IsValidUTF8
(
bytes
,
bytesRead
))
{
bytesRead
=
0
;
return
0
;
}
}
return
UTF8ToUnicode
(
bytes
,
bytesRead
);
escSeq
[
i
]
=
0
;
// terminate with NUL character
out
<<
escSeq
;
}
}
// WriteNonPrintable
bool
WriteAliasName
(
ostream
&
out
,
const
std
::
string
&
str
)
{
// . Writes the next UTF-8 code point to the stream
int
codePoint
;
int
WriteNonPrintable
(
ostream
&
out
,
StrIter
start
,
StrIter
end
)
{
for
(
std
::
string
::
const_iterator
i
=
str
.
begin
();
int
bytesRead
=
0
;
GetNextCodePointAndAdvance
(
codePoint
,
i
,
str
.
end
());
unsigned
value
=
ReadUTF8
(
start
,
end
,
bytesRead
);
)
{
if
(
!
IsAnchorChar
(
codePoint
))
return
false
;
if
(
bytesRead
==
0
)
{
WriteCodePoint
(
out
,
codePoint
);
// TODO: is it ok to just write the replacement character here,
// or should we instead write the invalid byte (as \xNN)?
out
<<
WriteUnicode
(
0xFFFD
);
return
1
;
}
}
return
true
;
out
<<
WriteUnicode
(
value
);
return
bytesRead
;
}
}
}
}
bool
WriteString
(
ostream
&
out
,
const
std
::
string
&
str
,
bool
inFlow
)
bool
WriteString
(
ostream
&
out
,
const
std
::
string
&
str
,
bool
inFlow
,
bool
escapeNonAscii
)
{
{
if
(
IsValidPlainScalar
(
str
,
inFlow
))
{
if
(
IsValidPlainScalar
(
str
,
inFlow
,
escapeNonAscii
))
{
out
<<
str
;
out
<<
str
;
return
true
;
return
true
;
}
else
}
else
return
WriteDoubleQuotedString
(
out
,
str
);
return
WriteDoubleQuotedString
(
out
,
str
,
escapeNonAscii
);
}
}
bool
WriteSingleQuotedString
(
ostream
&
out
,
const
std
::
string
&
str
)
bool
WriteSingleQuotedString
(
ostream
&
out
,
const
std
::
string
&
str
)
{
{
out
<<
"'"
;
out
<<
"'"
;
for
(
std
::
size_t
i
=
0
;
i
<
str
.
size
();
i
++
)
{
int
codePoint
;
char
ch
=
str
[
i
];
for
(
std
::
string
::
const_iterator
i
=
str
.
begin
();
if
(
!
IsPrintable
(
ch
))
GetNextCodePointAndAdvance
(
codePoint
,
i
,
str
.
end
());
return
false
;
)
{
if
(
ch
==
'\''
)
if
(
codePoint
==
'\n'
)
return
false
;
// We can't handle a new line and the attendant indentation yet
if
(
codePoint
==
'\''
)
out
<<
"''"
;
out
<<
"''"
;
else
else
out
<<
ch
;
WriteCodePoint
(
out
,
codePoint
)
;
}
}
out
<<
"'"
;
out
<<
"'"
;
return
true
;
return
true
;
}
}
bool
WriteDoubleQuotedString
(
ostream
&
out
,
const
std
::
string
&
str
)
bool
WriteDoubleQuotedString
(
ostream
&
out
,
const
std
::
string
&
str
,
bool
escapeNonAscii
)
{
{
out
<<
"
\"
"
;
out
<<
"
\"
"
;
for
(
StrIter
it
=
str
.
begin
();
it
!=
str
.
end
();
++
it
)
{
int
codePoint
;
char
ch
=
*
it
;
for
(
std
::
string
::
const_iterator
i
=
str
.
begin
();
if
(
IsPrintable
(
ch
))
{
GetNextCodePointAndAdvance
(
codePoint
,
i
,
str
.
end
());
if
(
ch
==
'\"'
)
)
out
<<
"
\\\"
"
;
{
else
if
(
ch
==
'\\'
)
if
(
codePoint
==
'\"'
)
out
<<
"
\\\\
"
;
out
<<
"
\\\"
"
;
else
else
if
(
codePoint
==
'\\'
)
out
<<
ch
;
out
<<
"
\\\\
"
;
}
else
{
else
if
(
codePoint
<
0x20
||
(
codePoint
>=
0x80
&&
codePoint
<=
0xA0
))
// Control characters and non-breaking space
int
bytesRead
=
WriteNonPrintable
(
out
,
it
,
str
.
end
());
WriteDoubleQuoteEscapeSequence
(
out
,
codePoint
);
if
(
bytesRead
>=
1
)
else
if
(
codePoint
==
0xFEFF
)
// Byte order marks (ZWNS) should be escaped (YAML 1.2, sec. 5.2)
it
+=
(
bytesRead
-
1
);
WriteDoubleQuoteEscapeSequence
(
out
,
codePoint
);
}
else
if
(
escapeNonAscii
&&
codePoint
>
0x7E
)
WriteDoubleQuoteEscapeSequence
(
out
,
codePoint
);
else
WriteCodePoint
(
out
,
codePoint
);
}
}
out
<<
"
\"
"
;
out
<<
"
\"
"
;
return
true
;
return
true
;
...
@@ -209,11 +252,15 @@ namespace YAML
...
@@ -209,11 +252,15 @@ namespace YAML
{
{
out
<<
"|
\n
"
;
out
<<
"|
\n
"
;
out
<<
IndentTo
(
indent
);
out
<<
IndentTo
(
indent
);
for
(
std
::
size_t
i
=
0
;
i
<
str
.
size
();
i
++
)
{
int
codePoint
;
if
(
str
[
i
]
==
'\n'
)
for
(
std
::
string
::
const_iterator
i
=
str
.
begin
();
out
<<
"
\n
"
<<
IndentTo
(
indent
);
GetNextCodePointAndAdvance
(
codePoint
,
i
,
str
.
end
());
)
{
if
(
codePoint
==
'\n'
)
out
<<
"
\n
"
<<
IndentTo
(
indent
);
else
else
out
<<
str
[
i
]
;
WriteCodePoint
(
out
,
codePoint
)
;
}
}
return
true
;
return
true
;
}
}
...
@@ -222,11 +269,15 @@ namespace YAML
...
@@ -222,11 +269,15 @@ namespace YAML
{
{
unsigned
curIndent
=
out
.
col
();
unsigned
curIndent
=
out
.
col
();
out
<<
"#"
<<
Indentation
(
postCommentIndent
);
out
<<
"#"
<<
Indentation
(
postCommentIndent
);
for
(
std
::
size_t
i
=
0
;
i
<
str
.
size
();
i
++
)
{
int
codePoint
;
if
(
str
[
i
]
==
'\n'
)
for
(
std
::
string
::
const_iterator
i
=
str
.
begin
();
GetNextCodePointAndAdvance
(
codePoint
,
i
,
str
.
end
());
)
{
if
(
codePoint
==
'\n'
)
out
<<
"
\n
"
<<
IndentTo
(
curIndent
)
<<
"#"
<<
Indentation
(
postCommentIndent
);
out
<<
"
\n
"
<<
IndentTo
(
curIndent
)
<<
"#"
<<
Indentation
(
postCommentIndent
);
else
else
out
<<
str
[
i
]
;
WriteCodePoint
(
out
,
codePoint
)
;
}
}
return
true
;
return
true
;
}
}
...
@@ -234,25 +285,13 @@ namespace YAML
...
@@ -234,25 +285,13 @@ namespace YAML
bool
WriteAlias
(
ostream
&
out
,
const
std
::
string
&
str
)
bool
WriteAlias
(
ostream
&
out
,
const
std
::
string
&
str
)
{
{
out
<<
"*"
;
out
<<
"*"
;
for
(
std
::
size_t
i
=
0
;
i
<
str
.
size
();
i
++
)
{
return
WriteAliasName
(
out
,
str
);
if
(
!
IsPrintable
(
str
[
i
])
||
str
[
i
]
==
' '
||
str
[
i
]
==
'\t'
||
str
[
i
]
==
'\n'
||
str
[
i
]
==
'\r'
)
return
false
;
out
<<
str
[
i
];
}
return
true
;
}
}
bool
WriteAnchor
(
ostream
&
out
,
const
std
::
string
&
str
)
bool
WriteAnchor
(
ostream
&
out
,
const
std
::
string
&
str
)
{
{
out
<<
"&"
;
out
<<
"&"
;
for
(
std
::
size_t
i
=
0
;
i
<
str
.
size
();
i
++
)
{
return
WriteAliasName
(
out
,
str
);
if
(
!
IsPrintable
(
str
[
i
])
||
str
[
i
]
==
' '
||
str
[
i
]
==
'\t'
||
str
[
i
]
==
'\n'
||
str
[
i
]
==
'\r'
)
return
false
;
out
<<
str
[
i
];
}
return
true
;
}
}
}
}
}
}
...
...
src/emitterutils.h
View file @
fa0af88d
...
@@ -11,9 +11,9 @@ namespace YAML
...
@@ -11,9 +11,9 @@ namespace YAML
{
{
namespace
Utils
namespace
Utils
{
{
bool
WriteString
(
ostream
&
out
,
const
std
::
string
&
str
,
bool
inFlow
);
bool
WriteString
(
ostream
&
out
,
const
std
::
string
&
str
,
bool
inFlow
,
bool
escapeNonAscii
);
bool
WriteSingleQuotedString
(
ostream
&
out
,
const
std
::
string
&
str
);
bool
WriteSingleQuotedString
(
ostream
&
out
,
const
std
::
string
&
str
);
bool
WriteDoubleQuotedString
(
ostream
&
out
,
const
std
::
string
&
str
);
bool
WriteDoubleQuotedString
(
ostream
&
out
,
const
std
::
string
&
str
,
bool
escapeNonAscii
);
bool
WriteLiteralString
(
ostream
&
out
,
const
std
::
string
&
str
,
int
indent
);
bool
WriteLiteralString
(
ostream
&
out
,
const
std
::
string
&
str
,
int
indent
);
bool
WriteComment
(
ostream
&
out
,
const
std
::
string
&
str
,
int
postCommentIndent
);
bool
WriteComment
(
ostream
&
out
,
const
std
::
string
&
str
,
int
postCommentIndent
);
bool
WriteAlias
(
ostream
&
out
,
const
std
::
string
&
str
);
bool
WriteAlias
(
ostream
&
out
,
const
std
::
string
&
str
);
...
...
src/exp.cpp
View file @
fa0af88d
...
@@ -28,9 +28,9 @@ namespace YAML
...
@@ -28,9 +28,9 @@ namespace YAML
return
value
;
return
value
;
}
}
std
::
string
Str
(
char
ch
)
std
::
string
Str
(
unsigned
ch
)
{
{
return
std
::
string
(
""
)
+
ch
;
return
std
::
string
(
""
)
+
static_cast
<
char
>
(
ch
)
;
}
}
// Escape
// Escape
...
...
src/exp.h
View file @
fa0af88d
...
@@ -26,7 +26,12 @@ namespace YAML
...
@@ -26,7 +26,12 @@ namespace YAML
const
RegEx
Alpha
=
RegEx
(
'a'
,
'z'
)
||
RegEx
(
'A'
,
'Z'
);
const
RegEx
Alpha
=
RegEx
(
'a'
,
'z'
)
||
RegEx
(
'A'
,
'Z'
);
const
RegEx
AlphaNumeric
=
Alpha
||
Digit
;
const
RegEx
AlphaNumeric
=
Alpha
||
Digit
;
const
RegEx
Hex
=
Digit
||
RegEx
(
'A'
,
'F'
)
||
RegEx
(
'a'
,
'f'
);
const
RegEx
Hex
=
Digit
||
RegEx
(
'A'
,
'F'
)
||
RegEx
(
'a'
,
'f'
);
const
RegEx
Printable
=
RegEx
(
0x20
,
0x7E
);
// Valid Unicode code points that are not part of c-printable (YAML 1.2, sec. 5.1)
const
RegEx
NotPrintable
=
RegEx
(
0
)
||
RegEx
(
"
\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x7F
"
,
REGEX_OR
)
||
RegEx
(
0x0E
,
0x1F
)
||
(
RegEx
(
'\xC2'
)
+
(
RegEx
(
'\x80'
,
'\x84'
)
||
RegEx
(
'\x86'
,
'\x9F'
)));
const
RegEx
Utf8_ByteOrderMark
=
RegEx
(
"
\xEF\xBB\xBF
"
);
// actual tags
// actual tags
...
...
yaml-reader/emittertests.cpp
View file @
fa0af88d
...
@@ -448,12 +448,25 @@ namespace Test
...
@@ -448,12 +448,25 @@ namespace Test
desiredOutput
=
"- ~
\n
-
\n
null value: ~
\n
~: null key"
;
desiredOutput
=
"- ~
\n
-
\n
null value: ~
\n
~: null key"
;
}
}
void
Unicode
(
YAML
::
Emitter
&
out
,
std
::
string
&
desiredOutput
)
void
Escaped
Unicode
(
YAML
::
Emitter
&
out
,
std
::
string
&
desiredOutput
)
{
{
out
<<
"
\x24
\xC2\xA2
\xE2\x82\xAC
\xF0\xA4\xAD\xA2
"
;
out
<<
YAML
::
EscapeNonAscii
<<
"
\x24
\xC2\xA2
\xE2\x82\xAC
\xF0\xA4\xAD\xA2
"
;
desiredOutput
=
"
\"
$
\\
xa2
\\
u20ac
\\
U00024b62
\"
"
;
desiredOutput
=
"
\"
$
\\
xa2
\\
u20ac
\\
U00024b62
\"
"
;
}
}
void
Unicode
(
YAML
::
Emitter
&
out
,
std
::
string
&
desiredOutput
)
{
out
<<
"
\x24
\xC2\xA2
\xE2\x82\xAC
\xF0\xA4\xAD\xA2
"
;
desiredOutput
=
"
\x24
\xC2\xA2
\xE2\x82\xAC
\xF0\xA4\xAD\xA2
"
;
}
void
DoubleQuotedUnicode
(
YAML
::
Emitter
&
out
,
std
::
string
&
desiredOutput
)
{
out
<<
YAML
::
DoubleQuoted
<<
"
\x24
\xC2\xA2
\xE2\x82\xAC
\xF0\xA4\xAD\xA2
"
;
desiredOutput
=
"
\"\x24
\xC2\xA2
\xE2\x82\xAC
\xF0\xA4\xAD\xA2\"
"
;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////
// incorrect emitting
// incorrect emitting
...
@@ -616,7 +629,9 @@ namespace Test
...
@@ -616,7 +629,9 @@ namespace Test
RunEmitterTest
(
&
Emitter
::
SimpleGlobalSettings
,
"simple global settings"
,
passed
,
total
);
RunEmitterTest
(
&
Emitter
::
SimpleGlobalSettings
,
"simple global settings"
,
passed
,
total
);
RunEmitterTest
(
&
Emitter
::
ComplexGlobalSettings
,
"complex global settings"
,
passed
,
total
);
RunEmitterTest
(
&
Emitter
::
ComplexGlobalSettings
,
"complex global settings"
,
passed
,
total
);
RunEmitterTest
(
&
Emitter
::
Null
,
"null"
,
passed
,
total
);
RunEmitterTest
(
&
Emitter
::
Null
,
"null"
,
passed
,
total
);
RunEmitterTest
(
&
Emitter
::
EscapedUnicode
,
"escaped unicode"
,
passed
,
total
);
RunEmitterTest
(
&
Emitter
::
Unicode
,
"unicode"
,
passed
,
total
);
RunEmitterTest
(
&
Emitter
::
Unicode
,
"unicode"
,
passed
,
total
);
RunEmitterTest
(
&
Emitter
::
DoubleQuotedUnicode
,
"double quoted unicode"
,
passed
,
total
);
RunEmitterErrorTest
(
&
Emitter
::
ExtraEndSeq
,
"extra EndSeq"
,
passed
,
total
);
RunEmitterErrorTest
(
&
Emitter
::
ExtraEndSeq
,
"extra EndSeq"
,
passed
,
total
);
RunEmitterErrorTest
(
&
Emitter
::
ExtraEndMap
,
"extra EndMap"
,
passed
,
total
);
RunEmitterErrorTest
(
&
Emitter
::
ExtraEndMap
,
"extra EndMap"
,
passed
,
total
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment