Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
pybind11
Commits
6e39b765
Commit
6e39b765
authored
Dec 19, 2019
by
Vemund Handeland
Committed by
Wenzel Jakob
Dec 19, 2019
Browse files
Add C++20 char8_t/u8string support (#2026)
* Fix test build in C++20 * Add C++20 char8_t/u8string support
parent
37d04abd
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
72 additions
and
5 deletions
+72
-5
include/pybind11/cast.h
include/pybind11/cast.h
+13
-3
tests/test_builtin_casters.cpp
tests/test_builtin_casters.cpp
+20
-2
tests/test_builtin_casters.py
tests/test_builtin_casters.py
+39
-0
No files found.
include/pybind11/cast.h
View file @
6e39b765
...
@@ -32,6 +32,10 @@
...
@@ -32,6 +32,10 @@
#include <string_view>
#include <string_view>
#endif
#endif
#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
# define PYBIND11_HAS_U8STRING
#endif
NAMESPACE_BEGIN
(
PYBIND11_NAMESPACE
)
NAMESPACE_BEGIN
(
PYBIND11_NAMESPACE
)
NAMESPACE_BEGIN
(
detail
)
NAMESPACE_BEGIN
(
detail
)
...
@@ -988,6 +992,9 @@ public:
...
@@ -988,6 +992,9 @@ public:
template
<
typename
CharT
>
using
is_std_char_type
=
any_of
<
template
<
typename
CharT
>
using
is_std_char_type
=
any_of
<
std
::
is_same
<
CharT
,
char
>
,
/* std::string */
std
::
is_same
<
CharT
,
char
>
,
/* std::string */
#if defined(PYBIND11_HAS_U8STRING)
std
::
is_same
<
CharT
,
char8_t
>
,
/* std::u8string */
#endif
std
::
is_same
<
CharT
,
char16_t
>
,
/* std::u16string */
std
::
is_same
<
CharT
,
char16_t
>
,
/* std::u16string */
std
::
is_same
<
CharT
,
char32_t
>
,
/* std::u32string */
std
::
is_same
<
CharT
,
char32_t
>
,
/* std::u32string */
std
::
is_same
<
CharT
,
wchar_t
>
/* std::wstring */
std
::
is_same
<
CharT
,
wchar_t
>
/* std::wstring */
...
@@ -1191,6 +1198,9 @@ template <typename StringType, bool IsView = false> struct string_caster {
...
@@ -1191,6 +1198,9 @@ template <typename StringType, bool IsView = false> struct string_caster {
// Simplify life by being able to assume standard char sizes (the standard only guarantees
// Simplify life by being able to assume standard char sizes (the standard only guarantees
// minimums, but Python requires exact sizes)
// minimums, but Python requires exact sizes)
static_assert
(
!
std
::
is_same
<
CharT
,
char
>::
value
||
sizeof
(
CharT
)
==
1
,
"Unsupported char size != 1"
);
static_assert
(
!
std
::
is_same
<
CharT
,
char
>::
value
||
sizeof
(
CharT
)
==
1
,
"Unsupported char size != 1"
);
#if defined(PYBIND11_HAS_U8STRING)
static_assert
(
!
std
::
is_same
<
CharT
,
char8_t
>::
value
||
sizeof
(
CharT
)
==
1
,
"Unsupported char8_t size != 1"
);
#endif
static_assert
(
!
std
::
is_same
<
CharT
,
char16_t
>::
value
||
sizeof
(
CharT
)
==
2
,
"Unsupported char16_t size != 2"
);
static_assert
(
!
std
::
is_same
<
CharT
,
char16_t
>::
value
||
sizeof
(
CharT
)
==
2
,
"Unsupported char16_t size != 2"
);
static_assert
(
!
std
::
is_same
<
CharT
,
char32_t
>::
value
||
sizeof
(
CharT
)
==
4
,
"Unsupported char32_t size != 4"
);
static_assert
(
!
std
::
is_same
<
CharT
,
char32_t
>::
value
||
sizeof
(
CharT
)
==
4
,
"Unsupported char32_t size != 4"
);
// wchar_t can be either 16 bits (Windows) or 32 (everywhere else)
// wchar_t can be either 16 bits (Windows) or 32 (everywhere else)
...
@@ -1209,7 +1219,7 @@ template <typename StringType, bool IsView = false> struct string_caster {
...
@@ -1209,7 +1219,7 @@ template <typename StringType, bool IsView = false> struct string_caster {
#if PY_MAJOR_VERSION >= 3
#if PY_MAJOR_VERSION >= 3
return
load_bytes
(
load_src
);
return
load_bytes
(
load_src
);
#else
#else
if
(
s
izeof
(
CharT
)
==
1
)
{
if
(
s
td
::
is_same
<
CharT
,
char
>::
value
)
{
return
load_bytes
(
load_src
);
return
load_bytes
(
load_src
);
}
}
...
@@ -1269,7 +1279,7 @@ private:
...
@@ -1269,7 +1279,7 @@ private:
// without any encoding/decoding attempt). For other C++ char sizes this is a no-op.
// without any encoding/decoding attempt). For other C++ char sizes this is a no-op.
// which supports loading a unicode from a str, doesn't take this path.
// which supports loading a unicode from a str, doesn't take this path.
template
<
typename
C
=
CharT
>
template
<
typename
C
=
CharT
>
bool
load_bytes
(
enable_if_t
<
s
izeof
(
C
)
==
1
,
handle
>
src
)
{
bool
load_bytes
(
enable_if_t
<
s
td
::
is_same
<
C
,
char
>::
value
,
handle
>
src
)
{
if
(
PYBIND11_BYTES_CHECK
(
src
.
ptr
()))
{
if
(
PYBIND11_BYTES_CHECK
(
src
.
ptr
()))
{
// We were passed a Python 3 raw bytes; accept it into a std::string or char*
// We were passed a Python 3 raw bytes; accept it into a std::string or char*
// without any encoding attempt.
// without any encoding attempt.
...
@@ -1284,7 +1294,7 @@ private:
...
@@ -1284,7 +1294,7 @@ private:
}
}
template
<
typename
C
=
CharT
>
template
<
typename
C
=
CharT
>
bool
load_bytes
(
enable_if_t
<
sizeof
(
C
)
!=
1
,
handle
>
)
{
return
false
;
}
bool
load_bytes
(
enable_if_t
<
!
std
::
is_same
<
C
,
char
>::
value
,
handle
>
)
{
return
false
;
}
};
};
template
<
typename
CharT
,
class
Traits
,
class
Allocator
>
template
<
typename
CharT
,
class
Traits
,
class
Allocator
>
...
...
tests/test_builtin_casters.cpp
View file @
6e39b765
...
@@ -30,7 +30,7 @@ TEST_SUBMODULE(builtin_casters, m) {
...
@@ -30,7 +30,7 @@ TEST_SUBMODULE(builtin_casters, m) {
else
{
wstr
.
push_back
((
wchar_t
)
mathbfA32
);
}
// 𝐀, utf32
else
{
wstr
.
push_back
((
wchar_t
)
mathbfA32
);
}
// 𝐀, utf32
wstr
.
push_back
(
0x7a
);
// z
wstr
.
push_back
(
0x7a
);
// z
m
.
def
(
"good_utf8_string"
,
[]()
{
return
std
::
string
(
u8"Say utf8\u203d \U0001f382 \U0001d400"
);
});
// Say utf8‽ 🎂 𝐀
m
.
def
(
"good_utf8_string"
,
[]()
{
return
std
::
string
(
(
const
char
*
)
u8"Say utf8\u203d \U0001f382 \U0001d400"
);
});
// Say utf8‽ 🎂 𝐀
m
.
def
(
"good_utf16_string"
,
[
=
]()
{
return
std
::
u16string
({
b16
,
ib16
,
cake16_1
,
cake16_2
,
mathbfA16_1
,
mathbfA16_2
,
z16
});
});
// b‽🎂𝐀z
m
.
def
(
"good_utf16_string"
,
[
=
]()
{
return
std
::
u16string
({
b16
,
ib16
,
cake16_1
,
cake16_2
,
mathbfA16_1
,
mathbfA16_2
,
z16
});
});
// b‽🎂𝐀z
m
.
def
(
"good_utf32_string"
,
[
=
]()
{
return
std
::
u32string
({
a32
,
mathbfA32
,
cake32
,
ib32
,
z32
});
});
// a𝐀🎂‽z
m
.
def
(
"good_utf32_string"
,
[
=
]()
{
return
std
::
u32string
({
a32
,
mathbfA32
,
cake32
,
ib32
,
z32
});
});
// a𝐀🎂‽z
m
.
def
(
"good_wchar_string"
,
[
=
]()
{
return
wstr
;
});
// a‽𝐀z
m
.
def
(
"good_wchar_string"
,
[
=
]()
{
return
wstr
;
});
// a‽𝐀z
...
@@ -60,6 +60,18 @@ TEST_SUBMODULE(builtin_casters, m) {
...
@@ -60,6 +60,18 @@ TEST_SUBMODULE(builtin_casters, m) {
m
.
def
(
"strlen"
,
[](
char
*
s
)
{
return
strlen
(
s
);
});
m
.
def
(
"strlen"
,
[](
char
*
s
)
{
return
strlen
(
s
);
});
m
.
def
(
"string_length"
,
[](
std
::
string
s
)
{
return
s
.
length
();
});
m
.
def
(
"string_length"
,
[](
std
::
string
s
)
{
return
s
.
length
();
});
#ifdef PYBIND11_HAS_U8STRING
m
.
attr
(
"has_u8string"
)
=
true
;
m
.
def
(
"good_utf8_u8string"
,
[]()
{
return
std
::
u8string
(
u8"Say utf8\u203d \U0001f382 \U0001d400"
);
});
// Say utf8‽ 🎂 𝐀
m
.
def
(
"bad_utf8_u8string"
,
[]()
{
return
std
::
u8string
((
const
char8_t
*
)
"abc
\xd0
"
"def"
);
});
m
.
def
(
"u8_char8_Z"
,
[]()
->
char8_t
{
return
u8'Z'
;
});
// test_single_char_arguments
m
.
def
(
"ord_char8"
,
[](
char8_t
c
)
->
int
{
return
static_cast
<
unsigned
char
>
(
c
);
});
m
.
def
(
"ord_char8_lv"
,
[](
char8_t
&
c
)
->
int
{
return
static_cast
<
unsigned
char
>
(
c
);
});
#endif
// test_string_view
// test_string_view
#ifdef PYBIND11_HAS_STRING_VIEW
#ifdef PYBIND11_HAS_STRING_VIEW
m
.
attr
(
"has_string_view"
)
=
true
;
m
.
attr
(
"has_string_view"
)
=
true
;
...
@@ -69,9 +81,15 @@ TEST_SUBMODULE(builtin_casters, m) {
...
@@ -69,9 +81,15 @@ TEST_SUBMODULE(builtin_casters, m) {
m
.
def
(
"string_view_chars"
,
[](
std
::
string_view
s
)
{
py
::
list
l
;
for
(
auto
c
:
s
)
l
.
append
((
std
::
uint8_t
)
c
);
return
l
;
});
m
.
def
(
"string_view_chars"
,
[](
std
::
string_view
s
)
{
py
::
list
l
;
for
(
auto
c
:
s
)
l
.
append
((
std
::
uint8_t
)
c
);
return
l
;
});
m
.
def
(
"string_view16_chars"
,
[](
std
::
u16string_view
s
)
{
py
::
list
l
;
for
(
auto
c
:
s
)
l
.
append
((
int
)
c
);
return
l
;
});
m
.
def
(
"string_view16_chars"
,
[](
std
::
u16string_view
s
)
{
py
::
list
l
;
for
(
auto
c
:
s
)
l
.
append
((
int
)
c
);
return
l
;
});
m
.
def
(
"string_view32_chars"
,
[](
std
::
u32string_view
s
)
{
py
::
list
l
;
for
(
auto
c
:
s
)
l
.
append
((
int
)
c
);
return
l
;
});
m
.
def
(
"string_view32_chars"
,
[](
std
::
u32string_view
s
)
{
py
::
list
l
;
for
(
auto
c
:
s
)
l
.
append
((
int
)
c
);
return
l
;
});
m
.
def
(
"string_view_return"
,
[]()
{
return
std
::
string_view
(
u8"utf8 secret \U0001f382"
);
});
m
.
def
(
"string_view_return"
,
[]()
{
return
std
::
string_view
(
(
const
char
*
)
u8"utf8 secret \U0001f382"
);
});
m
.
def
(
"string_view16_return"
,
[]()
{
return
std
::
u16string_view
(
u"utf16 secret \U0001f382"
);
});
m
.
def
(
"string_view16_return"
,
[]()
{
return
std
::
u16string_view
(
u"utf16 secret \U0001f382"
);
});
m
.
def
(
"string_view32_return"
,
[]()
{
return
std
::
u32string_view
(
U"utf32 secret \U0001f382"
);
});
m
.
def
(
"string_view32_return"
,
[]()
{
return
std
::
u32string_view
(
U"utf32 secret \U0001f382"
);
});
# ifdef PYBIND11_HAS_U8STRING
m
.
def
(
"string_view8_print"
,
[](
std
::
u8string_view
s
)
{
py
::
print
(
s
,
s
.
size
());
});
m
.
def
(
"string_view8_chars"
,
[](
std
::
u8string_view
s
)
{
py
::
list
l
;
for
(
auto
c
:
s
)
l
.
append
((
std
::
uint8_t
)
c
);
return
l
;
});
m
.
def
(
"string_view8_return"
,
[]()
{
return
std
::
u8string_view
(
u8"utf8 secret \U0001f382"
);
});
# endif
#endif
#endif
// test_integer_casting
// test_integer_casting
...
...
tests/test_builtin_casters.py
View file @
6e39b765
...
@@ -15,6 +15,8 @@ def test_unicode_conversion():
...
@@ -15,6 +15,8 @@ def test_unicode_conversion():
assert
m
.
good_utf16_string
()
==
u
"b‽🎂𝐀z"
assert
m
.
good_utf16_string
()
==
u
"b‽🎂𝐀z"
assert
m
.
good_utf32_string
()
==
u
"a𝐀🎂‽z"
assert
m
.
good_utf32_string
()
==
u
"a𝐀🎂‽z"
assert
m
.
good_wchar_string
()
==
u
"a⸘𝐀z"
assert
m
.
good_wchar_string
()
==
u
"a⸘𝐀z"
if
hasattr
(
m
,
"has_u8string"
):
assert
m
.
good_utf8_u8string
()
==
u
"Say utf8‽ 🎂 𝐀"
with
pytest
.
raises
(
UnicodeDecodeError
):
with
pytest
.
raises
(
UnicodeDecodeError
):
m
.
bad_utf8_string
()
m
.
bad_utf8_string
()
...
@@ -29,12 +31,17 @@ def test_unicode_conversion():
...
@@ -29,12 +31,17 @@ def test_unicode_conversion():
if
hasattr
(
m
,
"bad_wchar_string"
):
if
hasattr
(
m
,
"bad_wchar_string"
):
with
pytest
.
raises
(
UnicodeDecodeError
):
with
pytest
.
raises
(
UnicodeDecodeError
):
m
.
bad_wchar_string
()
m
.
bad_wchar_string
()
if
hasattr
(
m
,
"has_u8string"
):
with
pytest
.
raises
(
UnicodeDecodeError
):
m
.
bad_utf8_u8string
()
assert
m
.
u8_Z
()
==
'Z'
assert
m
.
u8_Z
()
==
'Z'
assert
m
.
u8_eacute
()
==
u
'é'
assert
m
.
u8_eacute
()
==
u
'é'
assert
m
.
u16_ibang
()
==
u
'‽'
assert
m
.
u16_ibang
()
==
u
'‽'
assert
m
.
u32_mathbfA
()
==
u
'𝐀'
assert
m
.
u32_mathbfA
()
==
u
'𝐀'
assert
m
.
wchar_heart
()
==
u
'♥'
assert
m
.
wchar_heart
()
==
u
'♥'
if
hasattr
(
m
,
"has_u8string"
):
assert
m
.
u8_char8_Z
()
==
'Z'
def
test_single_char_arguments
():
def
test_single_char_arguments
():
...
@@ -92,6 +99,17 @@ def test_single_char_arguments():
...
@@ -92,6 +99,17 @@ def test_single_char_arguments():
assert
m
.
ord_wchar
(
u
'aa'
)
assert
m
.
ord_wchar
(
u
'aa'
)
assert
str
(
excinfo
.
value
)
==
toolong_message
assert
str
(
excinfo
.
value
)
==
toolong_message
if
hasattr
(
m
,
"has_u8string"
):
assert
m
.
ord_char8
(
u
'a'
)
==
0x61
# simple ASCII
assert
m
.
ord_char8_lv
(
u
'b'
)
==
0x62
assert
m
.
ord_char8
(
u
'é'
)
==
0xE9
# requires 2 bytes in utf-8, but can be stuffed in a char
with
pytest
.
raises
(
ValueError
)
as
excinfo
:
assert
m
.
ord_char8
(
u
'Ā'
)
==
0x100
# requires 2 bytes, doesn't fit in a char
assert
str
(
excinfo
.
value
)
==
toobig_message
(
0x100
)
with
pytest
.
raises
(
ValueError
)
as
excinfo
:
assert
m
.
ord_char8
(
u
'ab'
)
assert
str
(
excinfo
.
value
)
==
toolong_message
def
test_bytes_to_string
():
def
test_bytes_to_string
():
"""Tests the ability to pass bytes to C++ string-accepting functions. Note that this is
"""Tests the ability to pass bytes to C++ string-accepting functions. Note that this is
...
@@ -116,10 +134,15 @@ def test_string_view(capture):
...
@@ -116,10 +134,15 @@ def test_string_view(capture):
assert
m
.
string_view_chars
(
"Hi 🎂"
)
==
[
72
,
105
,
32
,
0xf0
,
0x9f
,
0x8e
,
0x82
]
assert
m
.
string_view_chars
(
"Hi 🎂"
)
==
[
72
,
105
,
32
,
0xf0
,
0x9f
,
0x8e
,
0x82
]
assert
m
.
string_view16_chars
(
"Hi 🎂"
)
==
[
72
,
105
,
32
,
0xd83c
,
0xdf82
]
assert
m
.
string_view16_chars
(
"Hi 🎂"
)
==
[
72
,
105
,
32
,
0xd83c
,
0xdf82
]
assert
m
.
string_view32_chars
(
"Hi 🎂"
)
==
[
72
,
105
,
32
,
127874
]
assert
m
.
string_view32_chars
(
"Hi 🎂"
)
==
[
72
,
105
,
32
,
127874
]
if
hasattr
(
m
,
"has_u8string"
):
assert
m
.
string_view8_chars
(
"Hi"
)
==
[
72
,
105
]
assert
m
.
string_view8_chars
(
"Hi 🎂"
)
==
[
72
,
105
,
32
,
0xf0
,
0x9f
,
0x8e
,
0x82
]
assert
m
.
string_view_return
()
==
"utf8 secret 🎂"
assert
m
.
string_view_return
()
==
"utf8 secret 🎂"
assert
m
.
string_view16_return
()
==
"utf16 secret 🎂"
assert
m
.
string_view16_return
()
==
"utf16 secret 🎂"
assert
m
.
string_view32_return
()
==
"utf32 secret 🎂"
assert
m
.
string_view32_return
()
==
"utf32 secret 🎂"
if
hasattr
(
m
,
"has_u8string"
):
assert
m
.
string_view8_return
()
==
"utf8 secret 🎂"
with
capture
:
with
capture
:
m
.
string_view_print
(
"Hi"
)
m
.
string_view_print
(
"Hi"
)
...
@@ -132,6 +155,14 @@ def test_string_view(capture):
...
@@ -132,6 +155,14 @@ def test_string_view(capture):
utf16 🎂 8
utf16 🎂 8
utf32 🎂 7
utf32 🎂 7
"""
"""
if
hasattr
(
m
,
"has_u8string"
):
with
capture
:
m
.
string_view8_print
(
"Hi"
)
m
.
string_view8_print
(
"utf8 🎂"
)
assert
capture
==
"""
Hi 2
utf8 🎂 9
"""
with
capture
:
with
capture
:
m
.
string_view_print
(
"Hi, ascii"
)
m
.
string_view_print
(
"Hi, ascii"
)
...
@@ -144,6 +175,14 @@ def test_string_view(capture):
...
@@ -144,6 +175,14 @@ def test_string_view(capture):
Hi, utf16 🎂 12
Hi, utf16 🎂 12
Hi, utf32 🎂 11
Hi, utf32 🎂 11
"""
"""
if
hasattr
(
m
,
"has_u8string"
):
with
capture
:
m
.
string_view8_print
(
"Hi, ascii"
)
m
.
string_view8_print
(
"Hi, utf8 🎂"
)
assert
capture
==
"""
Hi, ascii 9
Hi, utf8 🎂 13
"""
def
test_integer_casting
():
def
test_integer_casting
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment