"vscode:/vscode.git/clone" did not exist on "7d53f9dac0c0971f5bac1e0e21beea0d08b69b11"
Commit c72d8fc2 authored by Antoine Kaufmann's avatar Antoine Kaufmann
Browse files

i40e: TSO support

parent e19aba65
...@@ -3,7 +3,7 @@ m5 checkpoint ...@@ -3,7 +3,7 @@ m5 checkpoint
modprobe i40e modprobe i40e
ip link set dev eth0 up ip link set dev eth0 up
ip addr add 192.168.64.2/24 dev eth0 ip addr add 192.168.64.2/24 dev eth0
ethtool -K eth0 tso off #ethtool -K eth0 tso off
sleep 2 sleep 2
iperf -l 1M -w 1M -c 192.168.64.1 -i 1 -P 4 iperf -l 1M -w 1M -c 192.168.64.1 -i 1 -P 4
m5 exit m5 exit
...@@ -3,6 +3,6 @@ m5 checkpoint ...@@ -3,6 +3,6 @@ m5 checkpoint
modprobe i40e modprobe i40e
ip link set dev eth0 up ip link set dev eth0 up
ip addr add 192.168.64.1/24 dev eth0 ip addr add 192.168.64.1/24 dev eth0
ethtool -K eth0 tso off #ethtool -K eth0 tso off
iperf -s -l 1M -w 1M -P 4 iperf -s -l 1M -w 1M -P 4
m5 exit m5 exit
...@@ -6,7 +6,7 @@ sysctl -w net.core.busy_poll=50 ...@@ -6,7 +6,7 @@ sysctl -w net.core.busy_poll=50
sysctl -w net.core.busy_read=50 sysctl -w net.core.busy_read=50
ip link set dev eth0 up ip link set dev eth0 up
ip addr add 192.168.64.2/24 dev eth0 ip addr add 192.168.64.2/24 dev eth0
ethtool -K eth0 tso off #ethtool -K eth0 tso off
sleep 2 sleep 2
iperf -l 1M -w 1M -c 192.168.64.1 -i 1 -P 4 iperf -l 1M -w 1M -c 192.168.64.1 -i 1 -P 4
poweroff -f poweroff -f
...@@ -6,6 +6,6 @@ sysctl -w net.core.busy_poll=50 ...@@ -6,6 +6,6 @@ sysctl -w net.core.busy_poll=50
sysctl -w net.core.busy_read=50 sysctl -w net.core.busy_read=50
ip link set dev eth0 up ip link set dev eth0 up
ip addr add 192.168.64.1/24 dev eth0 ip addr add 192.168.64.1/24 dev eth0
ethtool -K eth0 tso off #ethtool -K eth0 tso off
iperf -s -l 1M -w 1M -P 4 iperf -s -l 1M -w 1M -P 4
poweroff -f poweroff -f
...@@ -355,6 +355,8 @@ class lan_queue_tx : public lan_queue_base { ...@@ -355,6 +355,8 @@ class lan_queue_tx : public lan_queue_base {
}; };
uint8_t pktbuf[MTU]; uint8_t pktbuf[MTU];
uint32_t tso_off;
uint32_t tso_len;
std::deque<tx_desc_ctx *> ready_segments; std::deque<tx_desc_ctx *> ready_segments;
bool hwb; bool hwb;
...@@ -550,4 +552,12 @@ protected: ...@@ -550,4 +552,12 @@ protected:
// places the tcp checksum in the packet (assuming ipv4) // places the tcp checksum in the packet (assuming ipv4)
void xsum_tcp(void *tcphdr, size_t l4len); void xsum_tcp(void *tcphdr, size_t l4len);
// calculates the full ipv4 & tcp checksum without assuming any pseudo header
// xsums
void xsum_tcpip_tso(void *iphdr, uint8_t iplen, uint8_t l4len,
uint16_t paylen);
void tso_postupdate_header(void *iphdr, uint8_t iplen, uint8_t l4len,
uint16_t paylen);
} // namespace corundum } // namespace corundum
...@@ -319,6 +319,8 @@ lan_queue_tx::lan_queue_tx(lan &lanmgr_, uint32_t &reg_tail_, size_t idx_, ...@@ -319,6 +319,8 @@ lan_queue_tx::lan_queue_tx(lan &lanmgr_, uint32_t &reg_tail_, size_t idx_,
void lan_queue_tx::reset() void lan_queue_tx::reset()
{ {
tso_off = 0;
tso_len = 0;
ready_segments.clear(); ready_segments.clear();
queue_base::reset(); queue_base::reset();
} }
...@@ -377,74 +379,201 @@ void lan_queue_tx::do_writeback(uint32_t first_idx, uint32_t first_pos, ...@@ -377,74 +379,201 @@ void lan_queue_tx::do_writeback(uint32_t first_idx, uint32_t first_pos,
bool lan_queue_tx::trigger_tx_packet() bool lan_queue_tx::trigger_tx_packet()
{ {
size_t n = ready_segments.size(); size_t n = ready_segments.size();
size_t d_skip = 0, dcnt;
bool eop = false;
uint64_t d1;
uint32_t iipt, l4t, pkt_len, total_len = 0, data_limit;
bool tso = false;
uint32_t tso_mss = 0, tso_paylen = 0;
uint16_t maclen = 0, iplen = 0, l4len = 0;
// abort if no queued up descriptors
if (n == 0) if (n == 0)
return false; return false;
size_t dcnt; #ifdef DEBUG_LAN
bool eop = false; log << "trigger_tx_packet(n=" << n << ", firstidx=" <<
uint64_t d1; ready_segments.at(0)->index << ")" << logger::endl;
uint16_t iipt, l4t, total_len = 0; log << " tso_off=" << tso_off << " tso_len=" << tso_len << logger::endl;
for (dcnt = 0; dcnt < n && !eop; dcnt++) { #endif
tx_desc_ctx *rd = ready_segments.at(dcnt);
// check if we have a context descriptor first
tx_desc_ctx *rd = ready_segments.at(0);
uint8_t dtype = (rd->d->cmd_type_offset_bsz & I40E_TXD_QW1_DTYPE_MASK) >>
I40E_TXD_QW1_DTYPE_SHIFT;
if (dtype == I40E_TX_DESC_DTYPE_CONTEXT) {
struct i40e_tx_context_desc *ctxd =
reinterpret_cast<struct i40e_tx_context_desc *> (rd->d);
d1 = ctxd->type_cmd_tso_mss;
uint16_t cmd = ((d1 & I40E_TXD_CTX_QW1_CMD_MASK) >>
I40E_TXD_CTX_QW1_CMD_SHIFT);
tso = !!(cmd & I40E_TX_CTX_DESC_TSO);
tso_mss = (d1 & I40E_TXD_CTX_QW1_MSS_MASK) >>
I40E_TXD_CTX_QW1_MSS_SHIFT;
#ifdef DEBUG_LAN
log << " tso=" << tso << " mss=" << tso_mss << logger::endl;
#endif
d_skip = 1;
}
// find EOP descriptor
for (dcnt = d_skip; dcnt < n && !eop; dcnt++) {
tx_desc_ctx *rd = ready_segments.at(dcnt);
d1 = rd->d->cmd_type_offset_bsz; d1 = rd->d->cmd_type_offset_bsz;
#ifdef DEBUG_LAN #ifdef DEBUG_LAN
log << " data fetched didx=" << rd->index << " d1=" << log << " data fetched didx=" << rd->index << " d1=" <<
d1 << logger::endl; d1 << logger::endl;
#endif #endif
uint16_t pkt_len = (d1 & I40E_TXD_QW1_TX_BUF_SZ_MASK) >> dtype = (d1 & I40E_TXD_QW1_DTYPE_MASK) >> I40E_TXD_QW1_DTYPE_SHIFT;
I40E_TXD_QW1_TX_BUF_SZ_SHIFT; if (dtype != I40E_TX_DESC_DTYPE_DATA) {
if (total_len + pkt_len > MTU) { log << "trigger tx desc is not a data descriptor idx=" << rd->index
log << "txq: trigger_tx_packet too large" << logger::endl; << " d1=" << d1 << logger::endl;
abort(); abort();
} }
memcpy(pktbuf + total_len, rd->data, pkt_len);
uint16_t cmd = (d1 & I40E_TXD_QW1_CMD_MASK) >> I40E_TXD_QW1_CMD_SHIFT; uint16_t cmd = (d1 & I40E_TXD_QW1_CMD_MASK) >> I40E_TXD_QW1_CMD_SHIFT;
eop = (cmd & I40E_TX_DESC_CMD_EOP); eop = (cmd & I40E_TX_DESC_CMD_EOP);
iipt = cmd & (I40E_TX_DESC_CMD_IIPT_MASK); iipt = cmd & (I40E_TX_DESC_CMD_IIPT_MASK);
l4t = (cmd & I40E_TX_DESC_CMD_L4T_EOFT_MASK); l4t = (cmd & I40E_TX_DESC_CMD_L4T_EOFT_MASK);
#ifdef DEBUG_LAN if (eop) {
log << " eop=" << eop << " len=" << pkt_len << uint32_t off = (d1 & I40E_TXD_QW1_OFFSET_MASK) >> I40E_TXD_QW1_OFFSET_SHIFT;
logger::endl; maclen = ((off & I40E_TXD_QW1_MACLEN_MASK) >>
#endif I40E_TX_DESC_LENGTH_MACLEN_SHIFT) * 2;
iplen = ((off & I40E_TXD_QW1_IPLEN_MASK) >>
I40E_TX_DESC_LENGTH_IPLEN_SHIFT) * 4;
l4len = ((off & I40E_TXD_QW1_L4LEN_MASK) >>
I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT) * 4;
}
pkt_len = (d1 & I40E_TXD_QW1_TX_BUF_SZ_MASK) >>
I40E_TXD_QW1_TX_BUF_SZ_SHIFT;
total_len += pkt_len; total_len += pkt_len;
#ifdef DEBUG_LAN
log << " eop=" << eop << " len=" << pkt_len << logger::endl;
#endif
} }
// Unit not completely fetched yet
if (!eop) if (!eop)
return false; return false;
uint32_t off = (d1 & I40E_TXD_QW1_OFFSET_MASK) >> I40E_TXD_QW1_OFFSET_SHIFT; if (tso) {
uint16_t maclen = ((off & I40E_TXD_QW1_MACLEN_MASK) >> if (tso_off == 0)
I40E_TX_DESC_LENGTH_MACLEN_SHIFT) * 2; data_limit = maclen + iplen + l4len + tso_mss;
uint16_t iplen = ((off & I40E_TXD_QW1_IPLEN_MASK) >> else
I40E_TX_DESC_LENGTH_IPLEN_SHIFT) * 4; data_limit = tso_off + tso_mss;
/*uint16_t l4len = (off & I40E_TXD_QW1_L4LEN_MASK) >>
I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;*/
if (l4t == I40E_TX_DESC_CMD_L4T_EOFT_TCP) { if (data_limit > total_len) {
uint16_t tcp_off = maclen + iplen; data_limit = total_len;
xsum_tcp(pktbuf + tcp_off, total_len - tcp_off);
} }
} else {
if (total_len > MTU) {
log << " packet is longer (" << total_len << ") than MTU (" <<
MTU << ")" << logger::endl;
abort();
}
data_limit = total_len;
}
#ifdef DEBUG_LAN #ifdef DEBUG_LAN
log << " iipt=" << iipt << " l4t=" << l4t << log << " iipt=" << iipt << " l4t=" << l4t <<
" maclen=" << maclen << " iplen=" << iplen<< logger::endl; " maclen=" << maclen << " iplen=" << iplen << " l4len=" << l4len <<
" total_len=" << total_len << " data_limit=" << data_limit <<
logger::endl;
#else #else
(void) iipt; (void) iipt;
#endif #endif
runner->eth_send(pktbuf, total_len);
// copy data for this segment
uint32_t off = 0;
for (dcnt = d_skip; dcnt < n && off < data_limit; dcnt++) {
tx_desc_ctx *rd = ready_segments.at(dcnt);
d1 = rd->d->cmd_type_offset_bsz;
uint16_t pkt_len = (d1 & I40E_TXD_QW1_TX_BUF_SZ_MASK) >>
I40E_TXD_QW1_TX_BUF_SZ_SHIFT;
if (off <= tso_off && off + pkt_len > tso_off) {
uint32_t start = tso_off;
uint32_t end = off + pkt_len;
if (end > data_limit)
end = data_limit;
#ifdef DEBUG_LAN
log << " copying data from off=" << off << " idx=" << rd->index <<
" start=" << start << " end=" << end << " tso_len=" << tso_len <<
logger::endl;
#endif
memcpy(pktbuf + tso_len, (uint8_t *) rd->data + (start - off),
end - start);
tso_off = end;
tso_len += end - start;
}
off += pkt_len;
}
assert(tso_len <= MTU);
if (!tso) {
#ifdef DEBUG_LAN
log << " normal non-tso packet" << logger::endl;
#endif
if (l4t == I40E_TX_DESC_CMD_L4T_EOFT_TCP) {
uint16_t tcp_off = maclen + iplen;
xsum_tcp(pktbuf + tcp_off, tso_len - tcp_off);
}
runner->eth_send(pktbuf, tso_len);
} else {
#ifdef DEBUG_LAN
log << " tso packet off=" << tso_off << " len=" << tso_len <<
logger::endl;
#endif
// TSO gets hairier
uint16_t hdrlen = maclen + iplen + l4len;
// calculate payload size
tso_paylen = tso_len - hdrlen;
if (tso_paylen > tso_mss)
tso_paylen = tso_mss;
xsum_tcpip_tso(pktbuf + maclen, iplen, l4len, tso_paylen);
runner->eth_send(pktbuf, tso_len);
tso_postupdate_header(pktbuf + maclen, iplen, l4len, tso_paylen);
// not done yet with this TSO unit
if (tso && tso_off < total_len) {
tso_len = hdrlen;
return true;
}
}
#ifdef DEBUG_LAN
log << " unit done" << logger::endl;
#endif
while (dcnt-- > 0) { while (dcnt-- > 0) {
ready_segments.front()->processed(); ready_segments.front()->processed();
ready_segments.pop_front(); ready_segments.pop_front();
} }
tso_len = 0;
tso_off = 0;
return true; return true;
} }
...@@ -480,17 +609,14 @@ void lan_queue_tx::tx_desc_ctx::prepare() ...@@ -480,17 +609,14 @@ void lan_queue_tx::tx_desc_ctx::prepare()
data_fetch(d->buffer_addr, len); data_fetch(d->buffer_addr, len);
} else if (dtype == I40E_TX_DESC_DTYPE_CONTEXT) { } else if (dtype == I40E_TX_DESC_DTYPE_CONTEXT) {
#ifdef DEBUG_LAN
struct i40e_tx_context_desc *ctxd = struct i40e_tx_context_desc *ctxd =
reinterpret_cast<struct i40e_tx_context_desc *> (d); reinterpret_cast<struct i40e_tx_context_desc *> (d);
queue.log << " context descriptor: tp=" << ctxd->tunneling_params << queue.log << " context descriptor: tp=" << ctxd->tunneling_params <<
" l2t=" << ctxd->l2tag2 << " tctm=" << ctxd->type_cmd_tso_mss << logger::endl; " l2t=" << ctxd->l2tag2 << " tctm=" << ctxd->type_cmd_tso_mss << logger::endl;
abort(); #endif
/*desc->buffer_addr = 0;
desc->cmd_type_offset_bsz = I40E_TX_DESC_DTYPE_DESC_DONE <<
I40E_TXD_QW1_DTYPE_SHIFT;
desc_writeback(desc_buf, didx);*/ prepared();
} else { } else {
queue.log << "txq: only support context & data descriptors" << logger::endl; queue.log << "txq: only support context & data descriptors" << logger::endl;
abort(); abort();
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <arpa/inet.h>
#include <cassert> #include <cassert>
#include <iostream> #include <iostream>
...@@ -30,6 +31,19 @@ struct rte_tcp_hdr { ...@@ -30,6 +31,19 @@ struct rte_tcp_hdr {
/* from dpdk/lib/librte_net/rte_ip.h */ /* from dpdk/lib/librte_net/rte_ip.h */
struct ipv4_hdr {
uint8_t version_ihl; /**< version and header length */
uint8_t type_of_service; /**< type of service */
uint16_t total_length; /**< length of packet */
uint16_t packet_id; /**< packet ID */
uint16_t fragment_offset; /**< fragmentation offset */
uint8_t time_to_live; /**< time to live */
uint8_t next_proto_id; /**< protocol ID */
uint16_t hdr_checksum; /**< header checksum */
uint32_t src_addr; /**< source address */
uint32_t dst_addr; /**< destination address */
} __attribute__((packed));
static inline uint32_t __rte_raw_cksum(const void *buf, size_t len, uint32_t sum) static inline uint32_t __rte_raw_cksum(const void *buf, size_t len, uint32_t sum)
{ {
/* workaround gcc strict-aliasing warning */ /* workaround gcc strict-aliasing warning */
...@@ -76,6 +90,27 @@ static inline uint16_t rte_raw_cksum(const void *buf, size_t len) ...@@ -76,6 +90,27 @@ static inline uint16_t rte_raw_cksum(const void *buf, size_t len)
return __rte_raw_cksum_reduce(sum); return __rte_raw_cksum_reduce(sum);
} }
static inline uint16_t rte_ipv4_phdr_cksum(const struct ipv4_hdr *ipv4_hdr)
{
struct ipv4_psd_header {
uint32_t src_addr; /* IP address of source host. */
uint32_t dst_addr; /* IP address of destination host. */
uint8_t zero; /* zero. */
uint8_t proto; /* L4 protocol type. */
uint16_t len; /* L4 length. */
} psd_hdr;
psd_hdr.src_addr = ipv4_hdr->src_addr;
psd_hdr.dst_addr = ipv4_hdr->dst_addr;
psd_hdr.zero = 0;
psd_hdr.proto = ipv4_hdr->next_proto_id;
psd_hdr.len = htons(
(uint16_t)(ntohs(ipv4_hdr->total_length)
- sizeof(struct ipv4_hdr)));
return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr));
}
void xsum_tcp(void *tcphdr, size_t l4_len) void xsum_tcp(void *tcphdr, size_t l4_len)
{ {
struct rte_tcp_hdr *tcph = reinterpret_cast<struct rte_tcp_hdr *> (tcphdr); struct rte_tcp_hdr *tcph = reinterpret_cast<struct rte_tcp_hdr *> (tcphdr);
...@@ -85,4 +120,40 @@ void xsum_tcp(void *tcphdr, size_t l4_len) ...@@ -85,4 +120,40 @@ void xsum_tcp(void *tcphdr, size_t l4_len)
tcph->cksum = cksum; tcph->cksum = cksum;
} }
void xsum_tcpip_tso(void *iphdr, uint8_t iplen, uint8_t l4len,
uint16_t paylen)
{
struct ipv4_hdr *ih = (struct ipv4_hdr *) iphdr;
struct rte_tcp_hdr *tcph = (struct rte_tcp_hdr *)
((uint8_t *) iphdr + iplen);
uint32_t cksum;
// calculate ip xsum
ih->total_length = htons(iplen + l4len + paylen);
ih->hdr_checksum = 0;
cksum = rte_raw_cksum(iphdr, iplen);
cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
cksum = (~cksum) & 0xffff;
ih->hdr_checksum = cksum;
// calculate tcp xsum
tcph->cksum = 0;
cksum = rte_raw_cksum(tcph, l4len + paylen);
cksum += rte_ipv4_phdr_cksum(ih);
cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
cksum = (~cksum) & 0xffff;
tcph->cksum = cksum;
}
void tso_postupdate_header(void *iphdr, uint8_t iplen, uint8_t l4len,
uint16_t paylen)
{
struct ipv4_hdr *ih = (struct ipv4_hdr *) iphdr;
struct rte_tcp_hdr *tcph = (struct rte_tcp_hdr *)
((uint8_t *) iphdr + iplen);
tcph->sent_seq = htonl(ntohl(tcph->sent_seq) + paylen);
ih->packet_id = htons(ntohs(ih->packet_id) + 1);
}
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment