Commit 5ee84460 authored by Jialin Li's avatar Jialin Li
Browse files

merge master

parents d8440794 dbbc89bd
...@@ -141,16 +141,23 @@ void lan_queue_base::disable() ...@@ -141,16 +141,23 @@ void lan_queue_base::disable()
void lan_queue_base::interrupt() void lan_queue_base::interrupt()
{ {
uint32_t qctl = reg_intqctl; uint32_t qctl = reg_intqctl;
uint32_t gctl = lanmgr.dev.regs.pfint_dyn_ctl0;
#ifdef DEBUG_LAN #ifdef DEBUG_LAN
log << " interrupt intctl=" << qctl << logger::endl; log << " interrupt qctl=" << qctl << " gctl=" << gctl << logger::endl;
#endif #endif
uint16_t msix_idx = (qctl & I40E_QINT_TQCTL_MSIX_INDX_MASK) >> uint16_t msix_idx = (qctl & I40E_QINT_TQCTL_MSIX_INDX_MASK) >>
I40E_QINT_TQCTL_ITR_INDX_SHIFT; I40E_QINT_TQCTL_ITR_INDX_SHIFT;
uint8_t msix0_idx = (qctl & I40E_QINT_TQCTL_MSIX0_INDX_MASK) >> uint8_t msix0_idx = (qctl & I40E_QINT_TQCTL_MSIX0_INDX_MASK) >>
I40E_QINT_TQCTL_MSIX0_INDX_SHIFT; I40E_QINT_TQCTL_MSIX0_INDX_SHIFT;
bool cause_ena = !!(qctl & I40E_QINT_TQCTL_CAUSE_ENA_MASK);
if (msix_idx != 0) {
log << "TODO: only int 0 is supported" << logger::endl;
abort();
}
bool cause_ena = !!(qctl & I40E_QINT_TQCTL_CAUSE_ENA_MASK) &&
!!(gctl & I40E_PFINT_DYN_CTL0_INTENA_MASK);
if (!cause_ena) { if (!cause_ena) {
#ifdef DEBUG_LAN #ifdef DEBUG_LAN
log << " interrupt cause disabled" << logger::endl; log << " interrupt cause disabled" << logger::endl;
...@@ -158,18 +165,17 @@ void lan_queue_base::interrupt() ...@@ -158,18 +165,17 @@ void lan_queue_base::interrupt()
return; return;
} }
if (msix_idx != 0) {
log << "TODO: only int 0 is supported" << logger::endl;
abort();
}
// TODO throttling? // TODO throttling?
#ifdef DEBUG_LAN #ifdef DEBUG_LAN
log << " setting int0.qidx=" << msix0_idx << logger::endl; log << " setting int0.qidx=" << msix0_idx << logger::endl;
#endif #endif
lanmgr.dev.regs.pfint_icr0 |= I40E_PFINT_ICR0_INTEVENT_MASK | lanmgr.dev.regs.pfint_icr0 |= I40E_PFINT_ICR0_INTEVENT_MASK |
(1 << (I40E_PFINT_ICR0_QUEUE_0_SHIFT + msix0_idx)); (1 << (I40E_PFINT_ICR0_QUEUE_0_SHIFT + msix0_idx));
runner->msi_issue(0);
uint8_t itr = (qctl & I40E_QINT_TQCTL_ITR_INDX_MASK) >>
I40E_QINT_TQCTL_ITR_INDX_SHIFT;
lanmgr.dev.signal_interrupt(0, itr);
} }
lan_queue_base::qctx_fetch::qctx_fetch(lan_queue_base &lq_) lan_queue_base::qctx_fetch::qctx_fetch(lan_queue_base &lq_)
...@@ -252,22 +258,36 @@ queue_base::desc_ctx &lan_queue_rx::desc_ctx_create() ...@@ -252,22 +258,36 @@ queue_base::desc_ctx &lan_queue_rx::desc_ctx_create()
void lan_queue_rx::packet_received(const void *data, size_t pktlen) void lan_queue_rx::packet_received(const void *data, size_t pktlen)
{ {
if (dcache.empty()) { size_t num_descs = (pktlen + dbuff_size - 1) / dbuff_size;
if (!enabled)
return;
if (dcache.size() < num_descs) {
#ifdef DEBUG_LAN #ifdef DEBUG_LAN
log << " empty, dropping packet" << logger::endl; log << " not enough rx descs (" << num_descs << ", dropping packet" <<
logger::endl;
#endif #endif
return; return;
} }
rx_desc_ctx &ctx = *dcache.front(); for (size_t i = 0; i < num_descs; i++) {
rx_desc_ctx &ctx = *dcache.front();
#ifdef DEBUG_LAN #ifdef DEBUG_LAN
log << " packet received didx=" << ctx.index << " cnt=" << log << " packet part=" << i << " received didx=" << ctx.index <<
dcache.size() << logger::endl; " cnt=" << dcache.size() << logger::endl;
#endif #endif
dcache.pop_front();
dcache.pop_front();
ctx.packet_received(data, pktlen); const uint8_t *buf = (const uint8_t *) data + (dbuff_size * i);
if (i == num_descs - 1) {
// last packet
ctx.packet_received(buf, pktlen - dbuff_size * i, true);
} else {
ctx.packet_received(buf, dbuff_size, false);
}
}
} }
lan_queue_rx::rx_desc_ctx::rx_desc_ctx(lan_queue_rx &queue_) lan_queue_rx::rx_desc_ctx::rx_desc_ctx(lan_queue_rx &queue_)
...@@ -285,7 +305,8 @@ void lan_queue_rx::rx_desc_ctx::process() ...@@ -285,7 +305,8 @@ void lan_queue_rx::rx_desc_ctx::process()
rq.dcache.push_back(this); rq.dcache.push_back(this);
} }
void lan_queue_rx::rx_desc_ctx::packet_received(const void *data, size_t pktlen) void lan_queue_rx::rx_desc_ctx::packet_received(const void *data,
size_t pktlen, bool last)
{ {
union i40e_32byte_rx_desc *rxd = reinterpret_cast< union i40e_32byte_rx_desc *rxd = reinterpret_cast<
union i40e_32byte_rx_desc *> (desc); union i40e_32byte_rx_desc *> (desc);
...@@ -294,11 +315,13 @@ void lan_queue_rx::rx_desc_ctx::packet_received(const void *data, size_t pktlen) ...@@ -294,11 +315,13 @@ void lan_queue_rx::rx_desc_ctx::packet_received(const void *data, size_t pktlen)
memset(rxd, 0, sizeof(*rxd)); memset(rxd, 0, sizeof(*rxd));
rxd->wb.qword1.status_error_len |= (1 << I40E_RX_DESC_STATUS_DD_SHIFT); rxd->wb.qword1.status_error_len |= (1 << I40E_RX_DESC_STATUS_DD_SHIFT);
rxd->wb.qword1.status_error_len |= (1 << I40E_RX_DESC_STATUS_EOF_SHIFT);
// TODO: only if checksums are correct
rxd->wb.qword1.status_error_len |= (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT);
rxd->wb.qword1.status_error_len |= (pktlen << I40E_RXD_QW1_LENGTH_PBUF_SHIFT); rxd->wb.qword1.status_error_len |= (pktlen << I40E_RXD_QW1_LENGTH_PBUF_SHIFT);
if (last) {
rxd->wb.qword1.status_error_len |= (1 << I40E_RX_DESC_STATUS_EOF_SHIFT);
// TODO: only if checksums are correct
rxd->wb.qword1.status_error_len |= (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT);
}
data_write(addr, pktlen, data); data_write(addr, pktlen, data);
} }
...@@ -313,6 +336,8 @@ lan_queue_tx::lan_queue_tx(lan &lanmgr_, uint32_t &reg_tail_, size_t idx_, ...@@ -313,6 +336,8 @@ lan_queue_tx::lan_queue_tx(lan &lanmgr_, uint32_t &reg_tail_, size_t idx_,
void lan_queue_tx::reset() void lan_queue_tx::reset()
{ {
tso_off = 0;
tso_len = 0;
ready_segments.clear(); ready_segments.clear();
queue_base::reset(); queue_base::reset();
} }
...@@ -371,74 +396,201 @@ void lan_queue_tx::do_writeback(uint32_t first_idx, uint32_t first_pos, ...@@ -371,74 +396,201 @@ void lan_queue_tx::do_writeback(uint32_t first_idx, uint32_t first_pos,
bool lan_queue_tx::trigger_tx_packet() bool lan_queue_tx::trigger_tx_packet()
{ {
size_t n = ready_segments.size(); size_t n = ready_segments.size();
size_t d_skip = 0, dcnt;
bool eop = false;
uint64_t d1;
uint32_t iipt, l4t, pkt_len, total_len = 0, data_limit;
bool tso = false;
uint32_t tso_mss = 0, tso_paylen = 0;
uint16_t maclen = 0, iplen = 0, l4len = 0;
// abort if no queued up descriptors
if (n == 0) if (n == 0)
return false; return false;
size_t dcnt; #ifdef DEBUG_LAN
bool eop = false; log << "trigger_tx_packet(n=" << n << ", firstidx=" <<
uint64_t d1; ready_segments.at(0)->index << ")" << logger::endl;
uint16_t iipt, l4t, total_len = 0; log << " tso_off=" << tso_off << " tso_len=" << tso_len << logger::endl;
for (dcnt = 0; dcnt < n && !eop; dcnt++) { #endif
tx_desc_ctx *rd = ready_segments.at(dcnt);
// check if we have a context descriptor first
tx_desc_ctx *rd = ready_segments.at(0);
uint8_t dtype = (rd->d->cmd_type_offset_bsz & I40E_TXD_QW1_DTYPE_MASK) >>
I40E_TXD_QW1_DTYPE_SHIFT;
if (dtype == I40E_TX_DESC_DTYPE_CONTEXT) {
struct i40e_tx_context_desc *ctxd =
reinterpret_cast<struct i40e_tx_context_desc *> (rd->d);
d1 = ctxd->type_cmd_tso_mss;
uint16_t cmd = ((d1 & I40E_TXD_CTX_QW1_CMD_MASK) >>
I40E_TXD_CTX_QW1_CMD_SHIFT);
tso = !!(cmd & I40E_TX_CTX_DESC_TSO);
tso_mss = (d1 & I40E_TXD_CTX_QW1_MSS_MASK) >>
I40E_TXD_CTX_QW1_MSS_SHIFT;
#ifdef DEBUG_LAN
log << " tso=" << tso << " mss=" << tso_mss << logger::endl;
#endif
d_skip = 1;
}
// find EOP descriptor
for (dcnt = d_skip; dcnt < n && !eop; dcnt++) {
tx_desc_ctx *rd = ready_segments.at(dcnt);
d1 = rd->d->cmd_type_offset_bsz; d1 = rd->d->cmd_type_offset_bsz;
#ifdef DEBUG_LAN #ifdef DEBUG_LAN
log << " data fetched didx=" << rd->index << " d1=" << log << " data fetched didx=" << rd->index << " d1=" <<
d1 << logger::endl; d1 << logger::endl;
#endif #endif
uint16_t pkt_len = (d1 & I40E_TXD_QW1_TX_BUF_SZ_MASK) >> dtype = (d1 & I40E_TXD_QW1_DTYPE_MASK) >> I40E_TXD_QW1_DTYPE_SHIFT;
I40E_TXD_QW1_TX_BUF_SZ_SHIFT; if (dtype != I40E_TX_DESC_DTYPE_DATA) {
if (total_len + pkt_len > MTU) { log << "trigger tx desc is not a data descriptor idx=" << rd->index
log << "txq: trigger_tx_packet too large" << logger::endl; << " d1=" << d1 << logger::endl;
abort(); abort();
} }
memcpy(pktbuf + total_len, rd->data, pkt_len);
uint16_t cmd = (d1 & I40E_TXD_QW1_CMD_MASK) >> I40E_TXD_QW1_CMD_SHIFT; uint16_t cmd = (d1 & I40E_TXD_QW1_CMD_MASK) >> I40E_TXD_QW1_CMD_SHIFT;
eop = (cmd & I40E_TX_DESC_CMD_EOP); eop = (cmd & I40E_TX_DESC_CMD_EOP);
iipt = cmd & (I40E_TX_DESC_CMD_IIPT_MASK); iipt = cmd & (I40E_TX_DESC_CMD_IIPT_MASK);
l4t = (cmd & I40E_TX_DESC_CMD_L4T_EOFT_MASK); l4t = (cmd & I40E_TX_DESC_CMD_L4T_EOFT_MASK);
#ifdef DEBUG_LAN if (eop) {
log << " eop=" << eop << " len=" << pkt_len << uint32_t off = (d1 & I40E_TXD_QW1_OFFSET_MASK) >> I40E_TXD_QW1_OFFSET_SHIFT;
logger::endl; maclen = ((off & I40E_TXD_QW1_MACLEN_MASK) >>
#endif I40E_TX_DESC_LENGTH_MACLEN_SHIFT) * 2;
iplen = ((off & I40E_TXD_QW1_IPLEN_MASK) >>
I40E_TX_DESC_LENGTH_IPLEN_SHIFT) * 4;
l4len = ((off & I40E_TXD_QW1_L4LEN_MASK) >>
I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT) * 4;
}
pkt_len = (d1 & I40E_TXD_QW1_TX_BUF_SZ_MASK) >>
I40E_TXD_QW1_TX_BUF_SZ_SHIFT;
total_len += pkt_len; total_len += pkt_len;
#ifdef DEBUG_LAN
log << " eop=" << eop << " len=" << pkt_len << logger::endl;
#endif
} }
// Unit not completely fetched yet
if (!eop) if (!eop)
return false; return false;
uint32_t off = (d1 & I40E_TXD_QW1_OFFSET_MASK) >> I40E_TXD_QW1_OFFSET_SHIFT; if (tso) {
uint16_t maclen = ((off & I40E_TXD_QW1_MACLEN_MASK) >> if (tso_off == 0)
I40E_TX_DESC_LENGTH_MACLEN_SHIFT) * 2; data_limit = maclen + iplen + l4len + tso_mss;
uint16_t iplen = ((off & I40E_TXD_QW1_IPLEN_MASK) >> else
I40E_TX_DESC_LENGTH_IPLEN_SHIFT) * 4; data_limit = tso_off + tso_mss;
/*uint16_t l4len = (off & I40E_TXD_QW1_L4LEN_MASK) >>
I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;*/
if (data_limit > total_len) {
if (l4t == I40E_TX_DESC_CMD_L4T_EOFT_TCP) { data_limit = total_len;
uint16_t tcp_off = maclen + iplen; }
xsum_tcp(pktbuf + tcp_off, total_len - tcp_off); } else {
if (total_len > MTU) {
log << " packet is longer (" << total_len << ") than MTU (" <<
MTU << ")" << logger::endl;
abort();
}
data_limit = total_len;
} }
#ifdef DEBUG_LAN #ifdef DEBUG_LAN
log << " iipt=" << iipt << " l4t=" << l4t << log << " iipt=" << iipt << " l4t=" << l4t <<
" maclen=" << maclen << " iplen=" << iplen<< logger::endl; " maclen=" << maclen << " iplen=" << iplen << " l4len=" << l4len <<
" total_len=" << total_len << " data_limit=" << data_limit <<
logger::endl;
#else #else
(void) iipt; (void) iipt;
#endif #endif
runner->eth_send(pktbuf, total_len);
// copy data for this segment
uint32_t off = 0;
for (dcnt = d_skip; dcnt < n && off < data_limit; dcnt++) {
tx_desc_ctx *rd = ready_segments.at(dcnt);
d1 = rd->d->cmd_type_offset_bsz;
uint16_t pkt_len = (d1 & I40E_TXD_QW1_TX_BUF_SZ_MASK) >>
I40E_TXD_QW1_TX_BUF_SZ_SHIFT;
if (off <= tso_off && off + pkt_len > tso_off) {
uint32_t start = tso_off;
uint32_t end = off + pkt_len;
if (end > data_limit)
end = data_limit;
#ifdef DEBUG_LAN
log << " copying data from off=" << off << " idx=" << rd->index <<
" start=" << start << " end=" << end << " tso_len=" << tso_len <<
logger::endl;
#endif
memcpy(pktbuf + tso_len, (uint8_t *) rd->data + (start - off),
end - start);
tso_off = end;
tso_len += end - start;
}
off += pkt_len;
}
assert(tso_len <= MTU);
if (!tso) {
#ifdef DEBUG_LAN
log << " normal non-tso packet" << logger::endl;
#endif
if (l4t == I40E_TX_DESC_CMD_L4T_EOFT_TCP) {
uint16_t tcp_off = maclen + iplen;
xsum_tcp(pktbuf + tcp_off, tso_len - tcp_off);
}
runner->eth_send(pktbuf, tso_len);
} else {
#ifdef DEBUG_LAN
log << " tso packet off=" << tso_off << " len=" << tso_len <<
logger::endl;
#endif
// TSO gets hairier
uint16_t hdrlen = maclen + iplen + l4len;
// calculate payload size
tso_paylen = tso_len - hdrlen;
if (tso_paylen > tso_mss)
tso_paylen = tso_mss;
xsum_tcpip_tso(pktbuf + maclen, iplen, l4len, tso_paylen);
runner->eth_send(pktbuf, tso_len);
tso_postupdate_header(pktbuf + maclen, iplen, l4len, tso_paylen);
// not done yet with this TSO unit
if (tso && tso_off < total_len) {
tso_len = hdrlen;
return true;
}
}
#ifdef DEBUG_LAN
log << " unit done" << logger::endl;
#endif
while (dcnt-- > 0) { while (dcnt-- > 0) {
ready_segments.front()->processed(); ready_segments.front()->processed();
ready_segments.pop_front(); ready_segments.pop_front();
} }
tso_len = 0;
tso_off = 0;
return true; return true;
} }
...@@ -474,17 +626,14 @@ void lan_queue_tx::tx_desc_ctx::prepare() ...@@ -474,17 +626,14 @@ void lan_queue_tx::tx_desc_ctx::prepare()
data_fetch(d->buffer_addr, len); data_fetch(d->buffer_addr, len);
} else if (dtype == I40E_TX_DESC_DTYPE_CONTEXT) { } else if (dtype == I40E_TX_DESC_DTYPE_CONTEXT) {
#ifdef DEBUG_LAN
struct i40e_tx_context_desc *ctxd = struct i40e_tx_context_desc *ctxd =
reinterpret_cast<struct i40e_tx_context_desc *> (d); reinterpret_cast<struct i40e_tx_context_desc *> (d);
queue.log << " context descriptor: tp=" << ctxd->tunneling_params << queue.log << " context descriptor: tp=" << ctxd->tunneling_params <<
" l2t=" << ctxd->l2tag2 << " tctm=" << ctxd->type_cmd_tso_mss << logger::endl; " l2t=" << ctxd->l2tag2 << " tctm=" << ctxd->type_cmd_tso_mss << logger::endl;
abort(); #endif
/*desc->buffer_addr = 0;
desc->cmd_type_offset_bsz = I40E_TX_DESC_DTYPE_DESC_DONE <<
I40E_TXD_QW1_DTYPE_SHIFT;
desc_writeback(desc_buf, didx);*/ prepared();
} else { } else {
queue.log << "txq: only support context & data descriptors" << logger::endl; queue.log << "txq: only support context & data descriptors" << logger::endl;
abort(); abort();
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#include <string.h> #include <string.h>
#include <cassert> #include <cassert>
#include <iostream> #include <iostream>
#include <algorithm>
#include "i40e_bm.h" #include "i40e_bm.h"
...@@ -289,6 +290,8 @@ void queue_base::desc_ctx::processed() ...@@ -289,6 +290,8 @@ void queue_base::desc_ctx::processed()
state = DESC_PROCESSED; state = DESC_PROCESSED;
} }
#define MAX_DMA_SIZE ((size_t) 9024)
void queue_base::desc_ctx::data_fetch(uint64_t addr, size_t data_len) void queue_base::desc_ctx::data_fetch(uint64_t addr, size_t data_len)
{ {
if (data_capacity < data_len) { if (data_capacity < data_len) {
...@@ -302,7 +305,10 @@ void queue_base::desc_ctx::data_fetch(uint64_t addr, size_t data_len) ...@@ -302,7 +305,10 @@ void queue_base::desc_ctx::data_fetch(uint64_t addr, size_t data_len)
data_capacity = data_len; data_capacity = data_len;
} }
dma_data_fetch *dma = new dma_data_fetch(*this, data_len, data); dma_data_fetch *dma = new dma_data_fetch(*this, std::min(data_len,
MAX_DMA_SIZE), data);
dma->part_offset = 0;
dma->total_len = data_len;
dma->write = false; dma->write = false;
dma->dma_addr = addr; dma->dma_addr = addr;
...@@ -387,7 +393,19 @@ queue_base::dma_data_fetch::~dma_data_fetch() ...@@ -387,7 +393,19 @@ queue_base::dma_data_fetch::~dma_data_fetch()
void queue_base::dma_data_fetch::done() void queue_base::dma_data_fetch::done()
{ {
ctx.data_fetched(dma_addr, len); part_offset += len;
dma_addr += len;
data = (uint8_t *) data + len;
if (part_offset < total_len) {
#ifdef DEBUG_QUEUES
ctx.queue.log << " dma_fetch: next part of multi part dma" << logger::endl;
#endif
len = std::min(total_len - part_offset, MAX_DMA_SIZE);
runner->issue_dma(*this);
return;
}
ctx.data_fetched(dma_addr - part_offset, total_len);
ctx.queue.trigger(); ctx.queue.trigger();
delete this; delete this;
} }
......
...@@ -9,6 +9,7 @@ extern nicbm::Runner *runner; ...@@ -9,6 +9,7 @@ extern nicbm::Runner *runner;
logger::logger(const std::string &label_) logger::logger(const std::string &label_)
: label(label_) : label(label_)
{ {
ss << std::hex;
} }
logger &logger::operator<<(char c) logger &logger::operator<<(char c)
...@@ -17,6 +18,7 @@ logger &logger::operator<<(char c) ...@@ -17,6 +18,7 @@ logger &logger::operator<<(char c)
std::cerr << runner->time_ps() << " " << label << ": " << ss.str() << std::cerr << runner->time_ps() << " " << label << ": " << ss.str() <<
std::endl; std::endl;
ss.str(std::string()); ss.str(std::string());
ss << std::hex;
} else { } else {
ss << c; ss << c;
} }
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <arpa/inet.h>
#include <cassert> #include <cassert>
#include <iostream> #include <iostream>
...@@ -30,6 +31,19 @@ struct rte_tcp_hdr { ...@@ -30,6 +31,19 @@ struct rte_tcp_hdr {
/* from dpdk/lib/librte_net/rte_ip.h */ /* from dpdk/lib/librte_net/rte_ip.h */
struct ipv4_hdr {
uint8_t version_ihl; /**< version and header length */
uint8_t type_of_service; /**< type of service */
uint16_t total_length; /**< length of packet */
uint16_t packet_id; /**< packet ID */
uint16_t fragment_offset; /**< fragmentation offset */
uint8_t time_to_live; /**< time to live */
uint8_t next_proto_id; /**< protocol ID */
uint16_t hdr_checksum; /**< header checksum */
uint32_t src_addr; /**< source address */
uint32_t dst_addr; /**< destination address */
} __attribute__((packed));
static inline uint32_t __rte_raw_cksum(const void *buf, size_t len, uint32_t sum) static inline uint32_t __rte_raw_cksum(const void *buf, size_t len, uint32_t sum)
{ {
/* workaround gcc strict-aliasing warning */ /* workaround gcc strict-aliasing warning */
...@@ -76,6 +90,27 @@ static inline uint16_t rte_raw_cksum(const void *buf, size_t len) ...@@ -76,6 +90,27 @@ static inline uint16_t rte_raw_cksum(const void *buf, size_t len)
return __rte_raw_cksum_reduce(sum); return __rte_raw_cksum_reduce(sum);
} }
static inline uint16_t rte_ipv4_phdr_cksum(const struct ipv4_hdr *ipv4_hdr)
{
struct ipv4_psd_header {
uint32_t src_addr; /* IP address of source host. */
uint32_t dst_addr; /* IP address of destination host. */
uint8_t zero; /* zero. */
uint8_t proto; /* L4 protocol type. */
uint16_t len; /* L4 length. */
} psd_hdr;
psd_hdr.src_addr = ipv4_hdr->src_addr;
psd_hdr.dst_addr = ipv4_hdr->dst_addr;
psd_hdr.zero = 0;
psd_hdr.proto = ipv4_hdr->next_proto_id;
psd_hdr.len = htons(
(uint16_t)(ntohs(ipv4_hdr->total_length)
- sizeof(struct ipv4_hdr)));
return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr));
}
void xsum_tcp(void *tcphdr, size_t l4_len) void xsum_tcp(void *tcphdr, size_t l4_len)
{ {
struct rte_tcp_hdr *tcph = reinterpret_cast<struct rte_tcp_hdr *> (tcphdr); struct rte_tcp_hdr *tcph = reinterpret_cast<struct rte_tcp_hdr *> (tcphdr);
...@@ -85,4 +120,40 @@ void xsum_tcp(void *tcphdr, size_t l4_len) ...@@ -85,4 +120,40 @@ void xsum_tcp(void *tcphdr, size_t l4_len)
tcph->cksum = cksum; tcph->cksum = cksum;
} }
void xsum_tcpip_tso(void *iphdr, uint8_t iplen, uint8_t l4len,
uint16_t paylen)
{
struct ipv4_hdr *ih = (struct ipv4_hdr *) iphdr;
struct rte_tcp_hdr *tcph = (struct rte_tcp_hdr *)
((uint8_t *) iphdr + iplen);
uint32_t cksum;
// calculate ip xsum
ih->total_length = htons(iplen + l4len + paylen);
ih->hdr_checksum = 0;
cksum = rte_raw_cksum(iphdr, iplen);
cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
cksum = (~cksum) & 0xffff;
ih->hdr_checksum = cksum;
// calculate tcp xsum
tcph->cksum = 0;
cksum = rte_raw_cksum(tcph, l4len + paylen);
cksum += rte_ipv4_phdr_cksum(ih);
cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
cksum = (~cksum) & 0xffff;
tcph->cksum = cksum;
}
void tso_postupdate_header(void *iphdr, uint8_t iplen, uint8_t l4len,
uint16_t paylen)
{
struct ipv4_hdr *ih = (struct ipv4_hdr *) iphdr;
struct rte_tcp_hdr *tcph = (struct rte_tcp_hdr *)
((uint8_t *) iphdr + iplen);
tcph->sent_seq = htonl(ntohl(tcph->sent_seq) + paylen);
ih->packet_id = htons(ntohs(ih->packet_id) + 1);
}
} }
...@@ -5,7 +5,8 @@ UBUNTU_IMAGE := output-ubuntu1804/ubuntu1804 ...@@ -5,7 +5,8 @@ UBUNTU_IMAGE := output-ubuntu1804/ubuntu1804
BASE_IMAGE := output-base/base BASE_IMAGE := output-base/base
NOPAXOS_IMAGE := output-nopaxos/nopaxos NOPAXOS_IMAGE := output-nopaxos/nopaxos
MTCP_IMAGE := output-mtcp/mtcp MTCP_IMAGE := output-mtcp/mtcp
IMAGES := $(BASE_IMAGE) $(NOPAXOS_IMAGE) $(MTCP_IMAGE) TAS_IMAGE := output-tas/tas
IMAGES := $(BASE_IMAGE) $(NOPAXOS_IMAGE) $(MTCP_IMAGE) $(TAS_IMAGE)
RAW_IMAGES := $(addsuffix .raw,$(IMAGES)) RAW_IMAGES := $(addsuffix .raw,$(IMAGES))
QEMU_IMG := ../qemu/qemu-img QEMU_IMG := ../qemu/qemu-img
...@@ -17,6 +18,7 @@ clean: ...@@ -17,6 +18,7 @@ clean:
mqnic/mqnic.ko mqnic/*.o mqnic/.*.cmd mqnic/mqnic.mod.c \ mqnic/mqnic.ko mqnic/*.o mqnic/.*.cmd mqnic/mqnic.mod.c \
mqnic/Module.symvers mqnic/modules.order \ mqnic/Module.symvers mqnic/modules.order \
output-ubuntu1804 output-base output-mtcp output-nopaxos \ output-ubuntu1804 output-base output-mtcp output-nopaxos \
output-tas \
kernel/linux-$(KERNEL_VERSION)/ kheaders.tar.bz2 kernel/linux-$(KERNEL_VERSION)/ kheaders.tar.bz2
################################################ ################################################
...@@ -48,6 +50,11 @@ $(MTCP_IMAGE): mtcp.json packer $(BASE_IMAGE) scripts/install-mtcp.sh ...@@ -48,6 +50,11 @@ $(MTCP_IMAGE): mtcp.json packer $(BASE_IMAGE) scripts/install-mtcp.sh
./packer-wrap.sh build mtcp.json ./packer-wrap.sh build mtcp.json
touch $@ touch $@
$(TAS_IMAGE): tas.json packer $(BASE_IMAGE) scripts/install-tas.sh
rm -rf output-tas
./packer-wrap.sh build tas.json
touch $@
packer: packer:
wget https://releases.hashicorp.com/packer/$(PACKER_VERSION)/packer_$(PACKER_VERSION)_linux_amd64.zip wget https://releases.hashicorp.com/packer/$(PACKER_VERSION)/packer_$(PACKER_VERSION)_linux_amd64.zip
unzip packer_$(PACKER_VERSION)_linux_amd64.zip unzip packer_$(PACKER_VERSION)_linux_amd64.zip
......
#!/bin/bash -eux
set -eux
apt-get -y install libnuma-dev libgmp-dev bc python
cd /root/
wget http://deb.debian.org/debian/pool/main/d/dpdk/dpdk_18.11.8.orig.tar.xz
tar xf dpdk_18.11.8.orig.tar.xz
cd dpdk-stable-18.11.8
make -j4 install T=x86_64-native-linuxapp-gcc DESTDIR=/root/dpdk
cd ..
rm -rf dpdk-*
git clone https://github.com/tcp-acceleration-service/tas.git /root/tas
cd /root/tas
make -j4 RTE_SDK=/root/dpdk
git clone https://github.com/FreakyPenguin/benchmarks.git /root/tasbench
cd /root/tasbench/micro_rpc
make echoserver_linux testclient_linux TAS_CODE="/root/tas"
echo "blacklist i40e" > /etc/modprobe.d/i40e_bl.conf
{
"_comment": "Build with `packer build ubuntu.json`",
"builders": [
{
"iso_urls": [ "output-base/base" ],
"iso_checksum": "none",
"disk_image": "true",
"use_backing_file": "true",
"headless": "{{ user `headless` }}",
"disable_vnc": "false",
"output_directory": "output-{{ user `vm_name` }}",
"qemuargs": [
[
"-m",
"{{ user `memory` }}"
],
[
"-display",
"none"
],
[
"-machine",
"accel=kvm"
],
[
"-cpu",
"host"
],
[
"-smp",
"cpus={{ user `cpus`}}"
],
[
"-serial",
"mon:stdio"
]
],
"shutdown_command": "echo '{{ user `ssh_password` }}'|sudo -S shutdown -P now",
"ssh_password": "{{ user `ssh_password` }}",
"ssh_timeout": "10000s",
"ssh_username": "{{ user `ssh_username` }}",
"type": "qemu",
"vm_name": "{{ user `vm_name` }}"
}
],
"provisioners": [
{
"execute_command": "echo '{{ user `ssh_password` }}' | {{.Vars}} sudo -E -S bash '{{.Path}}'",
"scripts": [
"scripts/install-tas.sh"
],
"type": "shell"
}
],
"variables": {
"cleanup_pause": "",
"cpus": "4",
"custom_script": ".",
"desktop": "false",
"disk_size": "10000",
"headless": "", "hostname": "vagrant",
"memory": "4096",
"preseed": "preseed.cfg",
"ssh_fullname": "vagrant",
"ssh_password": "ubuntu",
"ssh_username": "ubuntu",
"vagrantfile_template": "",
"version": "0.1.0",
"vm_name": "tas"
}
}
#include <set>
#include <deque>
namespace nicbm { namespace nicbm {
#include <cassert> #include <cassert>
...@@ -17,6 +20,13 @@ class DMAOp { ...@@ -17,6 +20,13 @@ class DMAOp {
void *data; void *data;
}; };
class TimedEvent {
public:
virtual ~TimedEvent() { }
uint64_t time;
};
/** /**
* The Runner drives the main simulation loop. It's initialized with a reference * The Runner drives the main simulation loop. It's initialized with a reference
* to a device it should manage, and then once `runMain` is called, it will * to a device it should manage, and then once `runMain` is called, it will
...@@ -59,10 +69,25 @@ class Runner { ...@@ -59,10 +69,25 @@ class Runner {
*/ */
virtual void eth_rx(uint8_t port, const void *data, size_t len) virtual void eth_rx(uint8_t port, const void *data, size_t len)
= 0; = 0;
/**
* A timed event is due.
*/
virtual void timed_event(TimedEvent &ev);
}; };
protected: protected:
struct event_cmp {
bool operator() (TimedEvent *a, TimedEvent *b)
{
return a->time < b->time;
}
};
Device &dev; Device &dev;
std::set<TimedEvent *, event_cmp> events;
std::deque<DMAOp *> dma_queue;
size_t dma_pending;
uint64_t mac_addr; uint64_t mac_addr;
struct nicsim_params nsparams; struct nicsim_params nsparams;
struct cosim_pcie_proto_dev_intro dintro; struct cosim_pcie_proto_dev_intro dintro;
...@@ -79,6 +104,11 @@ class Runner { ...@@ -79,6 +104,11 @@ class Runner {
void eth_recv(volatile struct cosim_eth_proto_n2d_recv *recv); void eth_recv(volatile struct cosim_eth_proto_n2d_recv *recv);
void poll_n2d(); void poll_n2d();
bool event_next(uint64_t &retval);
void event_trigger();
void dma_do(DMAOp &op);
void dma_trigger();
public: public:
Runner(Device &dev_); Runner(Device &dev_);
...@@ -90,6 +120,9 @@ class Runner { ...@@ -90,6 +120,9 @@ class Runner {
void msi_issue(uint8_t vec); void msi_issue(uint8_t vec);
void eth_send(const void *data, size_t len); void eth_send(const void *data, size_t len);
void event_schedule(TimedEvent &evt);
void event_cancel(TimedEvent &evt);
uint64_t time_ps() const; uint64_t time_ps() const;
uint64_t get_mac_addr() const; uint64_t get_mac_addr() const;
}; };
......
...@@ -12,9 +12,10 @@ ...@@ -12,9 +12,10 @@
//#define DEBUG_NICBM 1 //#define DEBUG_NICBM 1
#define SYNC_PERIOD (500 * 1000ULL) // 500ns #define SYNC_PERIOD (100 * 1000ULL) // 100ns
#define PCI_LATENCY (500 * 1000ULL) // 500ns #define PCI_LATENCY (500 * 1000ULL) // 500ns
#define ETH_LATENCY (500 * 1000ULL) // 500ns #define ETH_LATENCY (500 * 1000ULL) // 500ns
#define DMA_MAX_PENDING 64
using namespace nicbm; using namespace nicbm;
...@@ -57,11 +58,41 @@ volatile union cosim_eth_proto_d2n *Runner::d2n_alloc(void) ...@@ -57,11 +58,41 @@ volatile union cosim_eth_proto_d2n *Runner::d2n_alloc(void)
} }
void Runner::issue_dma(DMAOp &op) void Runner::issue_dma(DMAOp &op)
{
if (dma_pending < DMA_MAX_PENDING) {
// can directly issue
#ifdef DEBUG_NICBM
printf("nicbm: issuing dma op %p addr %lx len %zu pending %zu\n", &op,
op.dma_addr, op.len, dma_pending);
#endif
dma_do(op);
} else {
#ifdef DEBUG_NICBM
printf("nicbm: enqueuing dma op %p addr %lx len %zu pending %zu\n", &op,
op.dma_addr, op.len, dma_pending);
#endif
dma_queue.push_back(&op);
}
}
void Runner::dma_trigger()
{
if (dma_queue.empty() || dma_pending == DMA_MAX_PENDING)
return;
DMAOp *op = dma_queue.front();
dma_queue.pop_front();
dma_do(*op);
}
void Runner::dma_do(DMAOp &op)
{ {
volatile union cosim_pcie_proto_d2h *msg = d2h_alloc(); volatile union cosim_pcie_proto_d2h *msg = d2h_alloc();
dma_pending++;
#ifdef DEBUG_NICBM #ifdef DEBUG_NICBM
printf("nicbm: issue dma op %p addr %lx len %zu\n", &op, op.dma_addr, printf("nicbm: executing dma op %p addr %lx len %zu pending %zu\n", &op,
op.len); op.dma_addr, op.len, dma_pending);
#endif #endif
if (op.write) { if (op.write) {
...@@ -113,6 +144,16 @@ void Runner::msi_issue(uint8_t vec) ...@@ -113,6 +144,16 @@ void Runner::msi_issue(uint8_t vec)
COSIM_PCIE_PROTO_D2H_OWN_HOST; COSIM_PCIE_PROTO_D2H_OWN_HOST;
} }
void Runner::event_schedule(TimedEvent &evt)
{
events.insert(&evt);
}
void Runner::event_cancel(TimedEvent &evt)
{
events.erase(&evt);
}
void Runner::h2d_read(volatile struct cosim_pcie_proto_h2d_read *read) void Runner::h2d_read(volatile struct cosim_pcie_proto_h2d_read *read)
{ {
volatile union cosim_pcie_proto_d2h *msg; volatile union cosim_pcie_proto_d2h *msg;
...@@ -169,6 +210,9 @@ void Runner::h2d_readcomp(volatile struct cosim_pcie_proto_h2d_readcomp *rc) ...@@ -169,6 +210,9 @@ void Runner::h2d_readcomp(volatile struct cosim_pcie_proto_h2d_readcomp *rc)
memcpy(op->data, (void *)rc->data, op->len); memcpy(op->data, (void *)rc->data, op->len);
dev.dma_complete(*op); dev.dma_complete(*op);
dma_pending--;
dma_trigger();
} }
void Runner::h2d_writecomp(volatile struct cosim_pcie_proto_h2d_writecomp *wc) void Runner::h2d_writecomp(volatile struct cosim_pcie_proto_h2d_writecomp *wc)
...@@ -181,6 +225,9 @@ void Runner::h2d_writecomp(volatile struct cosim_pcie_proto_h2d_writecomp *wc) ...@@ -181,6 +225,9 @@ void Runner::h2d_writecomp(volatile struct cosim_pcie_proto_h2d_writecomp *wc)
#endif #endif
dev.dma_complete(*op); dev.dma_complete(*op);
dma_pending--;
dma_trigger();
} }
void Runner::eth_recv(volatile struct cosim_eth_proto_n2d_recv *recv) void Runner::eth_recv(volatile struct cosim_eth_proto_n2d_recv *recv)
...@@ -281,10 +328,36 @@ uint64_t Runner::get_mac_addr() const ...@@ -281,10 +328,36 @@ uint64_t Runner::get_mac_addr() const
return mac_addr; return mac_addr;
} }
bool Runner::event_next(uint64_t &retval)
{
if (events.empty())
return false;
retval = (*events.begin())->time;
return true;
}
void Runner::event_trigger()
{
auto it = events.begin();
if (it == events.end())
return;
TimedEvent *ev = *it;
// event is in the future
if (ev->time > main_time)
return;
events.erase(it);
dev.timed_event(*ev);
}
Runner::Runner(Device &dev_) Runner::Runner(Device &dev_)
: dev(dev_) : dev(dev_), events(event_cmp())
{ {
//mac_addr = lrand48() & ~(3ULL << 46); //mac_addr = lrand48() & ~(3ULL << 46);
dma_pending = 0;
srand48(time(NULL) ^ getpid()); srand48(time(NULL) ^ getpid());
mac_addr = lrand48(); mac_addr = lrand48();
mac_addr <<= 16; mac_addr <<= 16;
...@@ -297,6 +370,7 @@ Runner::Runner(Device &dev_) ...@@ -297,6 +370,7 @@ Runner::Runner(Device &dev_)
int Runner::runMain(int argc, char *argv[]) int Runner::runMain(int argc, char *argv[])
{ {
uint64_t next_ts; uint64_t next_ts;
uint64_t max_step = 10000;
if (argc != 4 && argc != 5) { if (argc != 4 && argc != 5) {
fprintf(stderr, "Usage: corundum_bm PCI-SOCKET ETH-SOCKET " fprintf(stderr, "Usage: corundum_bm PCI-SOCKET ETH-SOCKET "
...@@ -327,6 +401,8 @@ int Runner::runMain(int argc, char *argv[]) ...@@ -327,6 +401,8 @@ int Runner::runMain(int argc, char *argv[])
fprintf(stderr, "sync_pci=%d sync_eth=%d\n", nsparams.sync_pci, fprintf(stderr, "sync_pci=%d sync_eth=%d\n", nsparams.sync_pci,
nsparams.sync_eth); nsparams.sync_eth);
bool is_sync = nsparams.sync_pci || nsparams.sync_eth;
while (!exiting) { while (!exiting) {
while (nicsim_sync(&nsparams, main_time)) { while (nicsim_sync(&nsparams, main_time)) {
fprintf(stderr, "warn: nicsim_sync failed (t=%lu)\n", main_time); fprintf(stderr, "warn: nicsim_sync failed (t=%lu)\n", main_time);
...@@ -335,9 +411,21 @@ int Runner::runMain(int argc, char *argv[]) ...@@ -335,9 +411,21 @@ int Runner::runMain(int argc, char *argv[])
do { do {
poll_h2d(); poll_h2d();
poll_n2d(); poll_n2d();
next_ts = netsim_next_timestamp(&nsparams); event_trigger();
} while ((nsparams.sync_pci || nsparams.sync_eth) &&
next_ts <= main_time && !exiting); if (is_sync) {
next_ts = netsim_next_timestamp(&nsparams);
if (next_ts > main_time + max_step)
next_ts = main_time + max_step;
} else {
next_ts = main_time + max_step;
}
uint64_t ev_ts;
if (event_next(ev_ts) && ev_ts < next_ts)
next_ts = ev_ts;
} while (next_ts <= main_time && !exiting);
main_time = next_ts; main_time = next_ts;
} }
...@@ -345,3 +433,7 @@ int Runner::runMain(int argc, char *argv[]) ...@@ -345,3 +433,7 @@ int Runner::runMain(int argc, char *argv[])
nicsim_cleanup(); nicsim_cleanup();
return 0; return 0;
} }
void Runner::Device::timed_event(TimedEvent &te)
{
}
...@@ -64,7 +64,7 @@ int netsim_init(struct netsim_interface *nsif, ...@@ -64,7 +64,7 @@ int netsim_init(struct netsim_interface *nsif,
if ((di.flags & COSIM_ETH_PROTO_FLAGS_DI_SYNC) == 0) { if ((di.flags & COSIM_ETH_PROTO_FLAGS_DI_SYNC) == 0) {
*sync_eth = 0; *sync_eth = 0;
nsif->sync = 1; nsif->sync = 0;
} else { } else {
nsif->sync = *sync_eth; nsif->sync = *sync_eth;
} }
...@@ -142,6 +142,9 @@ int netsim_n2d_sync(struct netsim_interface *nsif, uint64_t timestamp, ...@@ -142,6 +142,9 @@ int netsim_n2d_sync(struct netsim_interface *nsif, uint64_t timestamp,
volatile union cosim_eth_proto_n2d *msg; volatile union cosim_eth_proto_n2d *msg;
volatile struct cosim_eth_proto_n2d_sync *sync; volatile struct cosim_eth_proto_n2d_sync *sync;
if (!nsif->sync)
return 0;
if (nsif->n2d_timestamp != 0 && if (nsif->n2d_timestamp != 0 &&
timestamp - nsif->n2d_timestamp < sync_delay) timestamp - nsif->n2d_timestamp < sync_delay)
return 0; return 0;
......
...@@ -32,16 +32,16 @@ ...@@ -32,16 +32,16 @@
#include "internal.h" #include "internal.h"
#define D2H_ELEN (4096 + 64) #define D2H_ELEN (9024 + 64)
#define D2H_ENUM 1024 #define D2H_ENUM 1024
#define H2D_ELEN (4096 + 64) #define H2D_ELEN (9024 + 64)
#define H2D_ENUM 1024 #define H2D_ENUM 1024
#define D2N_ELEN (2048 + 64) #define D2N_ELEN (9024 + 64)
#define D2N_ENUM 8192 #define D2N_ENUM 8192
#define N2D_ELEN (2048 + 64) #define N2D_ELEN (9024 + 64)
#define N2D_ENUM 8192 #define N2D_ENUM 8192
...@@ -182,9 +182,14 @@ int nicsim_init(struct nicsim_params *params, ...@@ -182,9 +182,14 @@ int nicsim_init(struct nicsim_params *params,
{ {
int pci_lfd = -1, eth_lfd = -1; int pci_lfd = -1, eth_lfd = -1;
void *shmptr; void *shmptr;
size_t shm_size;
/* ready in memory queues */ /* ready in memory queues */
if ((shm_fd = shm_create(params->shm_path, 64 * 1024 * 1024, &shmptr)) shm_size = (uint64_t) D2H_ELEN * D2H_ENUM +
(uint64_t) H2D_ELEN * H2D_ENUM +
(uint64_t) D2N_ELEN * D2N_ENUM +
(uint64_t) N2D_ELEN * N2D_ENUM;
if ((shm_fd = shm_create(params->shm_path, shm_size, &shmptr))
< 0) < 0)
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment