Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
yaml-cpp
Commits
d0b5bf4b
Commit
d0b5bf4b
authored
Oct 07, 2009
by
Jesse Beder
Browse files
Fixed the emitter unicode output
parent
7db39e66
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
104 additions
and
6 deletions
+104
-6
src/emitterutils.cpp
src/emitterutils.cpp
+96
-6
yaml-reader/emittertests.cpp
yaml-reader/emittertests.cpp
+8
-0
No files found.
src/emitterutils.cpp
View file @
d0b5bf4b
...
...
@@ -40,6 +40,99 @@ namespace YAML
return
true
;
}
unsigned
ToUnsigned
(
char
ch
)
{
return
static_cast
<
unsigned
int
>
(
static_cast
<
unsigned
char
>
(
ch
));
}
unsigned
AdvanceAndGetNextChar
(
std
::
string
::
const_iterator
&
it
,
std
::
string
::
const_iterator
end
)
{
std
::
string
::
const_iterator
jt
=
it
;
++
jt
;
if
(
jt
==
end
)
return
0
;
++
it
;
return
ToUnsigned
(
*
it
);
}
std
::
string
WriteUnicode
(
unsigned
value
)
{
std
::
stringstream
str
;
// TODO: for the common escaped characters, give their usual symbol
if
(
value
<=
0xFF
)
str
<<
"
\\
x"
<<
std
::
hex
<<
std
::
setfill
(
'0'
)
<<
std
::
setw
(
2
)
<<
value
;
else
if
(
value
<=
0xFFFF
)
str
<<
"
\\
u"
<<
std
::
hex
<<
std
::
setfill
(
'0'
)
<<
std
::
setw
(
4
)
<<
value
;
else
str
<<
"
\\
U"
<<
std
::
hex
<<
std
::
setfill
(
'0'
)
<<
std
::
setw
(
8
)
<<
value
;
return
str
.
str
();
}
std
::
string
WriteSingleByte
(
unsigned
ch
)
{
return
WriteUnicode
(
ch
);
}
std
::
string
WriteTwoBytes
(
unsigned
ch
,
unsigned
ch1
)
{
// Note: if no second byte is provided (signalled by ch1 == 0)
// then we just write the first one as a single byte.
// Should we throw an error instead? Or write something else?
// (The same question goes for the other WriteNBytes functions)
if
(
ch1
==
0
)
return
WriteSingleByte
(
ch
);
unsigned
value
=
((
ch
-
0xC0
)
<<
6
)
+
(
ch1
-
0x80
);
return
WriteUnicode
(
value
);
}
std
::
string
WriteThreeBytes
(
unsigned
ch
,
unsigned
ch1
,
unsigned
ch2
)
{
if
(
ch1
==
0
)
return
WriteSingleByte
(
ch
);
if
(
ch2
==
0
)
return
WriteSingleByte
(
ch
)
+
WriteSingleByte
(
ch1
);
unsigned
value
=
((
ch
-
0xE0
)
<<
12
)
+
((
ch1
-
0x80
)
<<
6
)
+
(
ch2
-
0x80
);
return
WriteUnicode
(
value
);
}
std
::
string
WriteFourBytes
(
unsigned
ch
,
unsigned
ch1
,
unsigned
ch2
,
unsigned
ch3
)
{
if
(
ch1
==
0
)
return
WriteSingleByte
(
ch
);
if
(
ch2
==
0
)
return
WriteSingleByte
(
ch
)
+
WriteSingleByte
(
ch1
);
if
(
ch3
==
0
)
return
WriteSingleByte
(
ch
)
+
WriteSingleByte
(
ch1
)
+
WriteSingleByte
(
ch2
);
unsigned
value
=
((
ch
-
0xF0
)
<<
18
)
+
((
ch1
-
0x80
)
<<
12
)
+
((
ch2
-
0x80
)
<<
6
)
+
(
ch3
-
0x80
);
return
WriteUnicode
(
value
);
}
// WriteNonPrintable
// . Writes the next UTF-8 code point to the stream
std
::
string
::
const_iterator
WriteNonPrintable
(
ostream
&
out
,
std
::
string
::
const_iterator
start
,
std
::
string
::
const_iterator
end
)
{
std
::
string
::
const_iterator
it
=
start
;
unsigned
ch
=
ToUnsigned
(
*
it
);
if
(
ch
<=
0xC1
)
{
// this may include invalid first characters (0x80 - 0xBF)
// or "overlong" UTF-8 (0xC0 - 0xC1)
// We just copy them as bytes
// TODO: should we do something else? throw an error?
out
<<
WriteSingleByte
(
ch
);
return
start
;
}
else
if
(
ch
<=
0xDF
)
{
unsigned
ch1
=
AdvanceAndGetNextChar
(
it
,
end
);
out
<<
WriteTwoBytes
(
ch
,
ch1
);
return
it
;
}
else
if
(
ch
<=
0xEF
)
{
unsigned
ch1
=
AdvanceAndGetNextChar
(
it
,
end
);
unsigned
ch2
=
AdvanceAndGetNextChar
(
it
,
end
);
out
<<
WriteThreeBytes
(
ch
,
ch1
,
ch2
);
return
it
;
}
else
{
unsigned
ch1
=
AdvanceAndGetNextChar
(
it
,
end
);
unsigned
ch2
=
AdvanceAndGetNextChar
(
it
,
end
);
unsigned
ch3
=
AdvanceAndGetNextChar
(
it
,
end
);
out
<<
WriteFourBytes
(
ch
,
ch1
,
ch2
,
ch3
);
return
it
;
}
return
start
;
}
}
bool
WriteString
(
ostream
&
out
,
const
std
::
string
&
str
,
bool
inFlow
)
...
...
@@ -71,8 +164,8 @@ namespace YAML
bool
WriteDoubleQuotedString
(
ostream
&
out
,
const
std
::
string
&
str
)
{
out
<<
"
\"
"
;
for
(
std
::
s
ize_t
i
=
0
;
i
<
str
.
size
();
i
++
)
{
char
ch
=
str
[
i
]
;
for
(
std
::
s
tring
::
const_iterator
it
=
str
.
begin
();
it
!=
str
.
end
();
++
it
)
{
char
ch
=
*
it
;
if
(
IsPrintable
(
ch
))
{
if
(
ch
==
'\"'
)
out
<<
"
\\\"
"
;
...
...
@@ -81,10 +174,7 @@ namespace YAML
else
out
<<
ch
;
}
else
{
// TODO: for the common escaped characters, give their usual symbol
std
::
stringstream
str
;
str
<<
"
\\
x"
<<
std
::
hex
<<
std
::
setfill
(
'0'
)
<<
std
::
setw
(
2
)
<<
static_cast
<
unsigned
int
>
(
static_cast
<
unsigned
char
>
(
ch
));
out
<<
str
.
str
();
it
=
WriteNonPrintable
(
out
,
it
,
str
.
end
());
}
}
out
<<
"
\"
"
;
...
...
yaml-reader/emittertests.cpp
View file @
d0b5bf4b
...
...
@@ -447,6 +447,13 @@ namespace Test
desiredOutput
=
"- ~
\n
-
\n
null value: ~
\n
~: null key"
;
}
void
Unicode
(
YAML
::
Emitter
&
out
,
std
::
string
&
desiredOutput
)
{
out
<<
"
\x24
\xC2\xA2
\xE2\x82\xAC
\xF0\xA4\xAD\xA2
"
;
desiredOutput
=
"
\"
$
\\
xa2
\\
u20ac
\\
U00024b62
\"
"
;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
// incorrect emitting
...
...
@@ -609,6 +616,7 @@ namespace Test
RunEmitterTest
(
&
Emitter
::
SimpleGlobalSettings
,
"simple global settings"
,
passed
,
total
);
RunEmitterTest
(
&
Emitter
::
ComplexGlobalSettings
,
"complex global settings"
,
passed
,
total
);
RunEmitterTest
(
&
Emitter
::
Null
,
"null"
,
passed
,
total
);
RunEmitterTest
(
&
Emitter
::
Unicode
,
"unicode"
,
passed
,
total
);
RunEmitterErrorTest
(
&
Emitter
::
ExtraEndSeq
,
"extra EndSeq"
,
passed
,
total
);
RunEmitterErrorTest
(
&
Emitter
::
ExtraEndMap
,
"extra EndMap"
,
passed
,
total
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment