Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ycai
simbricks
Commits
c72d8fc2
Commit
c72d8fc2
authored
Sep 16, 2020
by
Antoine Kaufmann
Browse files
i40e: TSO support
parent
e19aba65
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
246 additions
and
39 deletions
+246
-39
experiments/guests/gem5-pair-i40e-client/run.sh
experiments/guests/gem5-pair-i40e-client/run.sh
+1
-1
experiments/guests/gem5-pair-i40e-server/run.sh
experiments/guests/gem5-pair-i40e-server/run.sh
+1
-1
experiments/guests/qemu-pair-i40e-client/run.sh
experiments/guests/qemu-pair-i40e-client/run.sh
+1
-1
experiments/guests/qemu-pair-i40e-server/run.sh
experiments/guests/qemu-pair-i40e-server/run.sh
+1
-1
i40e_bm/i40e_bm.h
i40e_bm/i40e_bm.h
+10
-0
i40e_bm/i40e_lan.cc
i40e_bm/i40e_lan.cc
+161
-35
i40e_bm/xsums.cc
i40e_bm/xsums.cc
+71
-0
No files found.
experiments/guests/gem5-pair-i40e-client/run.sh
View file @
c72d8fc2
...
...
@@ -3,7 +3,7 @@ m5 checkpoint
modprobe i40e
ip
link set
dev eth0 up
ip addr add 192.168.64.2/24 dev eth0
ethtool
-K
eth0 tso off
#
ethtool -K eth0 tso off
sleep
2
iperf
-l
1M
-w
1M
-c
192.168.64.1
-i
1
-P
4
m5
exit
experiments/guests/gem5-pair-i40e-server/run.sh
View file @
c72d8fc2
...
...
@@ -3,6 +3,6 @@ m5 checkpoint
modprobe i40e
ip
link set
dev eth0 up
ip addr add 192.168.64.1/24 dev eth0
ethtool
-K
eth0 tso off
#
ethtool -K eth0 tso off
iperf
-s
-l
1M
-w
1M
-P
4
m5
exit
experiments/guests/qemu-pair-i40e-client/run.sh
View file @
c72d8fc2
...
...
@@ -6,7 +6,7 @@ sysctl -w net.core.busy_poll=50
sysctl
-w
net.core.busy_read
=
50
ip
link set
dev eth0 up
ip addr add 192.168.64.2/24 dev eth0
ethtool
-K
eth0 tso off
#
ethtool -K eth0 tso off
sleep
2
iperf
-l
1M
-w
1M
-c
192.168.64.1
-i
1
-P
4
poweroff
-f
experiments/guests/qemu-pair-i40e-server/run.sh
View file @
c72d8fc2
...
...
@@ -6,6 +6,6 @@ sysctl -w net.core.busy_poll=50
sysctl
-w
net.core.busy_read
=
50
ip
link set
dev eth0 up
ip addr add 192.168.64.1/24 dev eth0
ethtool
-K
eth0 tso off
#
ethtool -K eth0 tso off
iperf
-s
-l
1M
-w
1M
-P
4
poweroff
-f
i40e_bm/i40e_bm.h
View file @
c72d8fc2
...
...
@@ -355,6 +355,8 @@ class lan_queue_tx : public lan_queue_base {
};
uint8_t
pktbuf
[
MTU
];
uint32_t
tso_off
;
uint32_t
tso_len
;
std
::
deque
<
tx_desc_ctx
*>
ready_segments
;
bool
hwb
;
...
...
@@ -550,4 +552,12 @@ protected:
// places the tcp checksum in the packet (assuming ipv4)
void
xsum_tcp
(
void
*
tcphdr
,
size_t
l4len
);
// calculates the full ipv4 & tcp checksum without assuming any pseudo header
// xsums
void
xsum_tcpip_tso
(
void
*
iphdr
,
uint8_t
iplen
,
uint8_t
l4len
,
uint16_t
paylen
);
void
tso_postupdate_header
(
void
*
iphdr
,
uint8_t
iplen
,
uint8_t
l4len
,
uint16_t
paylen
);
}
// namespace corundum
i40e_bm/i40e_lan.cc
View file @
c72d8fc2
...
...
@@ -319,6 +319,8 @@ lan_queue_tx::lan_queue_tx(lan &lanmgr_, uint32_t ®_tail_, size_t idx_,
void
lan_queue_tx
::
reset
()
{
tso_off
=
0
;
tso_len
=
0
;
ready_segments
.
clear
();
queue_base
::
reset
();
}
...
...
@@ -377,74 +379,201 @@ void lan_queue_tx::do_writeback(uint32_t first_idx, uint32_t first_pos,
bool
lan_queue_tx
::
trigger_tx_packet
()
{
size_t
n
=
ready_segments
.
size
();
size_t
d_skip
=
0
,
dcnt
;
bool
eop
=
false
;
uint64_t
d1
;
uint32_t
iipt
,
l4t
,
pkt_len
,
total_len
=
0
,
data_limit
;
bool
tso
=
false
;
uint32_t
tso_mss
=
0
,
tso_paylen
=
0
;
uint16_t
maclen
=
0
,
iplen
=
0
,
l4len
=
0
;
// abort if no queued up descriptors
if
(
n
==
0
)
return
false
;
size_t
dcnt
;
bool
eop
=
false
;
uint64_t
d1
;
uint16_t
iipt
,
l4t
,
total_len
=
0
;
for
(
dcnt
=
0
;
dcnt
<
n
&&
!
eop
;
dcnt
++
)
{
tx_desc_ctx
*
rd
=
ready_segments
.
at
(
dcnt
);
#ifdef DEBUG_LAN
log
<<
"trigger_tx_packet(n="
<<
n
<<
", firstidx="
<<
ready_segments
.
at
(
0
)
->
index
<<
")"
<<
logger
::
endl
;
log
<<
" tso_off="
<<
tso_off
<<
" tso_len="
<<
tso_len
<<
logger
::
endl
;
#endif
// check if we have a context descriptor first
tx_desc_ctx
*
rd
=
ready_segments
.
at
(
0
);
uint8_t
dtype
=
(
rd
->
d
->
cmd_type_offset_bsz
&
I40E_TXD_QW1_DTYPE_MASK
)
>>
I40E_TXD_QW1_DTYPE_SHIFT
;
if
(
dtype
==
I40E_TX_DESC_DTYPE_CONTEXT
)
{
struct
i40e_tx_context_desc
*
ctxd
=
reinterpret_cast
<
struct
i40e_tx_context_desc
*>
(
rd
->
d
);
d1
=
ctxd
->
type_cmd_tso_mss
;
uint16_t
cmd
=
((
d1
&
I40E_TXD_CTX_QW1_CMD_MASK
)
>>
I40E_TXD_CTX_QW1_CMD_SHIFT
);
tso
=
!!
(
cmd
&
I40E_TX_CTX_DESC_TSO
);
tso_mss
=
(
d1
&
I40E_TXD_CTX_QW1_MSS_MASK
)
>>
I40E_TXD_CTX_QW1_MSS_SHIFT
;
#ifdef DEBUG_LAN
log
<<
" tso="
<<
tso
<<
" mss="
<<
tso_mss
<<
logger
::
endl
;
#endif
d_skip
=
1
;
}
// find EOP descriptor
for
(
dcnt
=
d_skip
;
dcnt
<
n
&&
!
eop
;
dcnt
++
)
{
tx_desc_ctx
*
rd
=
ready_segments
.
at
(
dcnt
);
d1
=
rd
->
d
->
cmd_type_offset_bsz
;
#ifdef DEBUG_LAN
log
<<
" data fetched didx="
<<
rd
->
index
<<
" d1="
<<
d1
<<
logger
::
endl
;
#endif
uint16_t
pkt_len
=
(
d1
&
I40E_TXD_QW1_
TX_BUF_SZ_MASK
)
>>
I40E_TXD_QW1_TX_BUF_SZ_SHIFT
;
if
(
total_len
+
pkt_len
>
MTU
)
{
log
<<
"
txq: trigger_tx_packet too large"
<<
logger
::
endl
;
dtype
=
(
d1
&
I40E_TXD_QW1_
DTYPE_MASK
)
>>
I40E_TXD_QW1_DTYPE_SHIFT
;
if
(
dtype
!=
I40E_TX_DESC_DTYPE_DATA
)
{
log
<<
"trigger tx desc is not a data descriptor idx="
<<
rd
->
index
<<
"
d1="
<<
d1
<<
logger
::
endl
;
abort
();
}
memcpy
(
pktbuf
+
total_len
,
rd
->
data
,
pkt_len
);
uint16_t
cmd
=
(
d1
&
I40E_TXD_QW1_CMD_MASK
)
>>
I40E_TXD_QW1_CMD_SHIFT
;
eop
=
(
cmd
&
I40E_TX_DESC_CMD_EOP
);
iipt
=
cmd
&
(
I40E_TX_DESC_CMD_IIPT_MASK
);
l4t
=
(
cmd
&
I40E_TX_DESC_CMD_L4T_EOFT_MASK
);
#ifdef DEBUG_LAN
log
<<
" eop="
<<
eop
<<
" len="
<<
pkt_len
<<
logger
::
endl
;
#endif
if
(
eop
)
{
uint32_t
off
=
(
d1
&
I40E_TXD_QW1_OFFSET_MASK
)
>>
I40E_TXD_QW1_OFFSET_SHIFT
;
maclen
=
((
off
&
I40E_TXD_QW1_MACLEN_MASK
)
>>
I40E_TX_DESC_LENGTH_MACLEN_SHIFT
)
*
2
;
iplen
=
((
off
&
I40E_TXD_QW1_IPLEN_MASK
)
>>
I40E_TX_DESC_LENGTH_IPLEN_SHIFT
)
*
4
;
l4len
=
((
off
&
I40E_TXD_QW1_L4LEN_MASK
)
>>
I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT
)
*
4
;
}
pkt_len
=
(
d1
&
I40E_TXD_QW1_TX_BUF_SZ_MASK
)
>>
I40E_TXD_QW1_TX_BUF_SZ_SHIFT
;
total_len
+=
pkt_len
;
#ifdef DEBUG_LAN
log
<<
" eop="
<<
eop
<<
" len="
<<
pkt_len
<<
logger
::
endl
;
#endif
}
// Unit not completely fetched yet
if
(
!
eop
)
return
false
;
uint32_t
off
=
(
d1
&
I40E_TXD_QW1_OFFSET_MASK
)
>>
I40E_TXD_QW1_OFFSET_SHIFT
;
uint16_t
maclen
=
((
off
&
I40E_TXD_QW1_MACLEN_MASK
)
>>
I40E_TX_DESC_LENGTH_MACLEN_SHIFT
)
*
2
;
uint16_t
iplen
=
((
off
&
I40E_TXD_QW1_IPLEN_MASK
)
>>
I40E_TX_DESC_LENGTH_IPLEN_SHIFT
)
*
4
;
/*uint16_t l4len = (off & I40E_TXD_QW1_L4LEN_MASK) >>
I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;*/
if
(
tso
)
{
if
(
tso_off
==
0
)
data_limit
=
maclen
+
iplen
+
l4len
+
tso_mss
;
else
data_limit
=
tso_off
+
tso_mss
;
if
(
l4t
==
I40E_TX_DESC_CMD_L4T_EOFT_TCP
)
{
uint16_t
tcp_off
=
maclen
+
iplen
;
xsum_tcp
(
pktbuf
+
tcp_off
,
total_len
-
tcp_off
);
if
(
data_limit
>
total_len
)
{
data_limit
=
total_len
;
}
}
else
{
if
(
total_len
>
MTU
)
{
log
<<
" packet is longer ("
<<
total_len
<<
") than MTU ("
<<
MTU
<<
")"
<<
logger
::
endl
;
abort
();
}
data_limit
=
total_len
;
}
#ifdef DEBUG_LAN
log
<<
" iipt="
<<
iipt
<<
" l4t="
<<
l4t
<<
" maclen="
<<
maclen
<<
" iplen="
<<
iplen
<<
logger
::
endl
;
" maclen="
<<
maclen
<<
" iplen="
<<
iplen
<<
" l4len="
<<
l4len
<<
" total_len="
<<
total_len
<<
" data_limit="
<<
data_limit
<<
logger
::
endl
;
#else
(
void
)
iipt
;
#endif
runner
->
eth_send
(
pktbuf
,
total_len
);
// copy data for this segment
uint32_t
off
=
0
;
for
(
dcnt
=
d_skip
;
dcnt
<
n
&&
off
<
data_limit
;
dcnt
++
)
{
tx_desc_ctx
*
rd
=
ready_segments
.
at
(
dcnt
);
d1
=
rd
->
d
->
cmd_type_offset_bsz
;
uint16_t
pkt_len
=
(
d1
&
I40E_TXD_QW1_TX_BUF_SZ_MASK
)
>>
I40E_TXD_QW1_TX_BUF_SZ_SHIFT
;
if
(
off
<=
tso_off
&&
off
+
pkt_len
>
tso_off
)
{
uint32_t
start
=
tso_off
;
uint32_t
end
=
off
+
pkt_len
;
if
(
end
>
data_limit
)
end
=
data_limit
;
#ifdef DEBUG_LAN
log
<<
" copying data from off="
<<
off
<<
" idx="
<<
rd
->
index
<<
" start="
<<
start
<<
" end="
<<
end
<<
" tso_len="
<<
tso_len
<<
logger
::
endl
;
#endif
memcpy
(
pktbuf
+
tso_len
,
(
uint8_t
*
)
rd
->
data
+
(
start
-
off
),
end
-
start
);
tso_off
=
end
;
tso_len
+=
end
-
start
;
}
off
+=
pkt_len
;
}
assert
(
tso_len
<=
MTU
);
if
(
!
tso
)
{
#ifdef DEBUG_LAN
log
<<
" normal non-tso packet"
<<
logger
::
endl
;
#endif
if
(
l4t
==
I40E_TX_DESC_CMD_L4T_EOFT_TCP
)
{
uint16_t
tcp_off
=
maclen
+
iplen
;
xsum_tcp
(
pktbuf
+
tcp_off
,
tso_len
-
tcp_off
);
}
runner
->
eth_send
(
pktbuf
,
tso_len
);
}
else
{
#ifdef DEBUG_LAN
log
<<
" tso packet off="
<<
tso_off
<<
" len="
<<
tso_len
<<
logger
::
endl
;
#endif
// TSO gets hairier
uint16_t
hdrlen
=
maclen
+
iplen
+
l4len
;
// calculate payload size
tso_paylen
=
tso_len
-
hdrlen
;
if
(
tso_paylen
>
tso_mss
)
tso_paylen
=
tso_mss
;
xsum_tcpip_tso
(
pktbuf
+
maclen
,
iplen
,
l4len
,
tso_paylen
);
runner
->
eth_send
(
pktbuf
,
tso_len
);
tso_postupdate_header
(
pktbuf
+
maclen
,
iplen
,
l4len
,
tso_paylen
);
// not done yet with this TSO unit
if
(
tso
&&
tso_off
<
total_len
)
{
tso_len
=
hdrlen
;
return
true
;
}
}
#ifdef DEBUG_LAN
log
<<
" unit done"
<<
logger
::
endl
;
#endif
while
(
dcnt
--
>
0
)
{
ready_segments
.
front
()
->
processed
();
ready_segments
.
pop_front
();
}
tso_len
=
0
;
tso_off
=
0
;
return
true
;
}
...
...
@@ -480,17 +609,14 @@ void lan_queue_tx::tx_desc_ctx::prepare()
data_fetch
(
d
->
buffer_addr
,
len
);
}
else
if
(
dtype
==
I40E_TX_DESC_DTYPE_CONTEXT
)
{
#ifdef DEBUG_LAN
struct
i40e_tx_context_desc
*
ctxd
=
reinterpret_cast
<
struct
i40e_tx_context_desc
*>
(
d
);
queue
.
log
<<
" context descriptor: tp="
<<
ctxd
->
tunneling_params
<<
" l2t="
<<
ctxd
->
l2tag2
<<
" tctm="
<<
ctxd
->
type_cmd_tso_mss
<<
logger
::
endl
;
abort
();
/*desc->buffer_addr = 0;
desc->cmd_type_offset_bsz = I40E_TX_DESC_DTYPE_DESC_DONE <<
I40E_TXD_QW1_DTYPE_SHIFT;
#endif
desc_writeback(desc_buf, didx);*/
prepared
();
}
else
{
queue
.
log
<<
"txq: only support context & data descriptors"
<<
logger
::
endl
;
abort
();
...
...
i40e_bm/xsums.cc
View file @
c72d8fc2
...
...
@@ -8,6 +8,7 @@
#include <stdlib.h>
#include <string.h>
#include <arpa/inet.h>
#include <cassert>
#include <iostream>
...
...
@@ -30,6 +31,19 @@ struct rte_tcp_hdr {
/* from dpdk/lib/librte_net/rte_ip.h */
struct
ipv4_hdr
{
uint8_t
version_ihl
;
/**< version and header length */
uint8_t
type_of_service
;
/**< type of service */
uint16_t
total_length
;
/**< length of packet */
uint16_t
packet_id
;
/**< packet ID */
uint16_t
fragment_offset
;
/**< fragmentation offset */
uint8_t
time_to_live
;
/**< time to live */
uint8_t
next_proto_id
;
/**< protocol ID */
uint16_t
hdr_checksum
;
/**< header checksum */
uint32_t
src_addr
;
/**< source address */
uint32_t
dst_addr
;
/**< destination address */
}
__attribute__
((
packed
));
static
inline
uint32_t
__rte_raw_cksum
(
const
void
*
buf
,
size_t
len
,
uint32_t
sum
)
{
/* workaround gcc strict-aliasing warning */
...
...
@@ -76,6 +90,27 @@ static inline uint16_t rte_raw_cksum(const void *buf, size_t len)
return
__rte_raw_cksum_reduce
(
sum
);
}
static
inline
uint16_t
rte_ipv4_phdr_cksum
(
const
struct
ipv4_hdr
*
ipv4_hdr
)
{
struct
ipv4_psd_header
{
uint32_t
src_addr
;
/* IP address of source host. */
uint32_t
dst_addr
;
/* IP address of destination host. */
uint8_t
zero
;
/* zero. */
uint8_t
proto
;
/* L4 protocol type. */
uint16_t
len
;
/* L4 length. */
}
psd_hdr
;
psd_hdr
.
src_addr
=
ipv4_hdr
->
src_addr
;
psd_hdr
.
dst_addr
=
ipv4_hdr
->
dst_addr
;
psd_hdr
.
zero
=
0
;
psd_hdr
.
proto
=
ipv4_hdr
->
next_proto_id
;
psd_hdr
.
len
=
htons
(
(
uint16_t
)(
ntohs
(
ipv4_hdr
->
total_length
)
-
sizeof
(
struct
ipv4_hdr
)));
return
rte_raw_cksum
(
&
psd_hdr
,
sizeof
(
psd_hdr
));
}
void
xsum_tcp
(
void
*
tcphdr
,
size_t
l4_len
)
{
struct
rte_tcp_hdr
*
tcph
=
reinterpret_cast
<
struct
rte_tcp_hdr
*>
(
tcphdr
);
...
...
@@ -85,4 +120,40 @@ void xsum_tcp(void *tcphdr, size_t l4_len)
tcph
->
cksum
=
cksum
;
}
void
xsum_tcpip_tso
(
void
*
iphdr
,
uint8_t
iplen
,
uint8_t
l4len
,
uint16_t
paylen
)
{
struct
ipv4_hdr
*
ih
=
(
struct
ipv4_hdr
*
)
iphdr
;
struct
rte_tcp_hdr
*
tcph
=
(
struct
rte_tcp_hdr
*
)
((
uint8_t
*
)
iphdr
+
iplen
);
uint32_t
cksum
;
// calculate ip xsum
ih
->
total_length
=
htons
(
iplen
+
l4len
+
paylen
);
ih
->
hdr_checksum
=
0
;
cksum
=
rte_raw_cksum
(
iphdr
,
iplen
);
cksum
=
((
cksum
&
0xffff0000
)
>>
16
)
+
(
cksum
&
0xffff
);
cksum
=
(
~
cksum
)
&
0xffff
;
ih
->
hdr_checksum
=
cksum
;
// calculate tcp xsum
tcph
->
cksum
=
0
;
cksum
=
rte_raw_cksum
(
tcph
,
l4len
+
paylen
);
cksum
+=
rte_ipv4_phdr_cksum
(
ih
);
cksum
=
((
cksum
&
0xffff0000
)
>>
16
)
+
(
cksum
&
0xffff
);
cksum
=
(
~
cksum
)
&
0xffff
;
tcph
->
cksum
=
cksum
;
}
void
tso_postupdate_header
(
void
*
iphdr
,
uint8_t
iplen
,
uint8_t
l4len
,
uint16_t
paylen
)
{
struct
ipv4_hdr
*
ih
=
(
struct
ipv4_hdr
*
)
iphdr
;
struct
rte_tcp_hdr
*
tcph
=
(
struct
rte_tcp_hdr
*
)
((
uint8_t
*
)
iphdr
+
iplen
);
tcph
->
sent_seq
=
htonl
(
ntohl
(
tcph
->
sent_seq
)
+
paylen
);
ih
->
packet_id
=
htons
(
ntohs
(
ih
->
packet_id
)
+
1
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment