Commit 5ee84460 authored by Jialin Li's avatar Jialin Li
Browse files

merge master

parents d8440794 dbbc89bd
......@@ -141,16 +141,23 @@ void lan_queue_base::disable()
void lan_queue_base::interrupt()
{
uint32_t qctl = reg_intqctl;
uint32_t gctl = lanmgr.dev.regs.pfint_dyn_ctl0;
#ifdef DEBUG_LAN
log << " interrupt intctl=" << qctl << logger::endl;
log << " interrupt qctl=" << qctl << " gctl=" << gctl << logger::endl;
#endif
uint16_t msix_idx = (qctl & I40E_QINT_TQCTL_MSIX_INDX_MASK) >>
I40E_QINT_TQCTL_ITR_INDX_SHIFT;
uint8_t msix0_idx = (qctl & I40E_QINT_TQCTL_MSIX0_INDX_MASK) >>
I40E_QINT_TQCTL_MSIX0_INDX_SHIFT;
bool cause_ena = !!(qctl & I40E_QINT_TQCTL_CAUSE_ENA_MASK);
if (msix_idx != 0) {
log << "TODO: only int 0 is supported" << logger::endl;
abort();
}
bool cause_ena = !!(qctl & I40E_QINT_TQCTL_CAUSE_ENA_MASK) &&
!!(gctl & I40E_PFINT_DYN_CTL0_INTENA_MASK);
if (!cause_ena) {
#ifdef DEBUG_LAN
log << " interrupt cause disabled" << logger::endl;
......@@ -158,18 +165,17 @@ void lan_queue_base::interrupt()
return;
}
if (msix_idx != 0) {
log << "TODO: only int 0 is supported" << logger::endl;
abort();
}
// TODO throttling?
#ifdef DEBUG_LAN
log << " setting int0.qidx=" << msix0_idx << logger::endl;
#endif
lanmgr.dev.regs.pfint_icr0 |= I40E_PFINT_ICR0_INTEVENT_MASK |
(1 << (I40E_PFINT_ICR0_QUEUE_0_SHIFT + msix0_idx));
runner->msi_issue(0);
uint8_t itr = (qctl & I40E_QINT_TQCTL_ITR_INDX_MASK) >>
I40E_QINT_TQCTL_ITR_INDX_SHIFT;
lanmgr.dev.signal_interrupt(0, itr);
}
lan_queue_base::qctx_fetch::qctx_fetch(lan_queue_base &lq_)
......@@ -252,22 +258,36 @@ queue_base::desc_ctx &lan_queue_rx::desc_ctx_create()
void lan_queue_rx::packet_received(const void *data, size_t pktlen)
{
if (dcache.empty()) {
size_t num_descs = (pktlen + dbuff_size - 1) / dbuff_size;
if (!enabled)
return;
if (dcache.size() < num_descs) {
#ifdef DEBUG_LAN
log << " empty, dropping packet" << logger::endl;
log << " not enough rx descs (" << num_descs << ", dropping packet" <<
logger::endl;
#endif
return;
}
rx_desc_ctx &ctx = *dcache.front();
for (size_t i = 0; i < num_descs; i++) {
rx_desc_ctx &ctx = *dcache.front();
#ifdef DEBUG_LAN
log << " packet received didx=" << ctx.index << " cnt=" <<
dcache.size() << logger::endl;
log << " packet part=" << i << " received didx=" << ctx.index <<
" cnt=" << dcache.size() << logger::endl;
#endif
dcache.pop_front();
ctx.packet_received(data, pktlen);
dcache.pop_front();
const uint8_t *buf = (const uint8_t *) data + (dbuff_size * i);
if (i == num_descs - 1) {
// last packet
ctx.packet_received(buf, pktlen - dbuff_size * i, true);
} else {
ctx.packet_received(buf, dbuff_size, false);
}
}
}
lan_queue_rx::rx_desc_ctx::rx_desc_ctx(lan_queue_rx &queue_)
......@@ -285,7 +305,8 @@ void lan_queue_rx::rx_desc_ctx::process()
rq.dcache.push_back(this);
}
void lan_queue_rx::rx_desc_ctx::packet_received(const void *data, size_t pktlen)
void lan_queue_rx::rx_desc_ctx::packet_received(const void *data,
size_t pktlen, bool last)
{
union i40e_32byte_rx_desc *rxd = reinterpret_cast<
union i40e_32byte_rx_desc *> (desc);
......@@ -294,11 +315,13 @@ void lan_queue_rx::rx_desc_ctx::packet_received(const void *data, size_t pktlen)
memset(rxd, 0, sizeof(*rxd));
rxd->wb.qword1.status_error_len |= (1 << I40E_RX_DESC_STATUS_DD_SHIFT);
rxd->wb.qword1.status_error_len |= (1 << I40E_RX_DESC_STATUS_EOF_SHIFT);
// TODO: only if checksums are correct
rxd->wb.qword1.status_error_len |= (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT);
rxd->wb.qword1.status_error_len |= (pktlen << I40E_RXD_QW1_LENGTH_PBUF_SHIFT);
if (last) {
rxd->wb.qword1.status_error_len |= (1 << I40E_RX_DESC_STATUS_EOF_SHIFT);
// TODO: only if checksums are correct
rxd->wb.qword1.status_error_len |= (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT);
}
data_write(addr, pktlen, data);
}
......@@ -313,6 +336,8 @@ lan_queue_tx::lan_queue_tx(lan &lanmgr_, uint32_t &reg_tail_, size_t idx_,
void lan_queue_tx::reset()
{
tso_off = 0;
tso_len = 0;
ready_segments.clear();
queue_base::reset();
}
......@@ -371,74 +396,201 @@ void lan_queue_tx::do_writeback(uint32_t first_idx, uint32_t first_pos,
bool lan_queue_tx::trigger_tx_packet()
{
size_t n = ready_segments.size();
size_t d_skip = 0, dcnt;
bool eop = false;
uint64_t d1;
uint32_t iipt, l4t, pkt_len, total_len = 0, data_limit;
bool tso = false;
uint32_t tso_mss = 0, tso_paylen = 0;
uint16_t maclen = 0, iplen = 0, l4len = 0;
// abort if no queued up descriptors
if (n == 0)
return false;
size_t dcnt;
bool eop = false;
uint64_t d1;
uint16_t iipt, l4t, total_len = 0;
for (dcnt = 0; dcnt < n && !eop; dcnt++) {
tx_desc_ctx *rd = ready_segments.at(dcnt);
#ifdef DEBUG_LAN
log << "trigger_tx_packet(n=" << n << ", firstidx=" <<
ready_segments.at(0)->index << ")" << logger::endl;
log << " tso_off=" << tso_off << " tso_len=" << tso_len << logger::endl;
#endif
// check if we have a context descriptor first
tx_desc_ctx *rd = ready_segments.at(0);
uint8_t dtype = (rd->d->cmd_type_offset_bsz & I40E_TXD_QW1_DTYPE_MASK) >>
I40E_TXD_QW1_DTYPE_SHIFT;
if (dtype == I40E_TX_DESC_DTYPE_CONTEXT) {
struct i40e_tx_context_desc *ctxd =
reinterpret_cast<struct i40e_tx_context_desc *> (rd->d);
d1 = ctxd->type_cmd_tso_mss;
uint16_t cmd = ((d1 & I40E_TXD_CTX_QW1_CMD_MASK) >>
I40E_TXD_CTX_QW1_CMD_SHIFT);
tso = !!(cmd & I40E_TX_CTX_DESC_TSO);
tso_mss = (d1 & I40E_TXD_CTX_QW1_MSS_MASK) >>
I40E_TXD_CTX_QW1_MSS_SHIFT;
#ifdef DEBUG_LAN
log << " tso=" << tso << " mss=" << tso_mss << logger::endl;
#endif
d_skip = 1;
}
// find EOP descriptor
for (dcnt = d_skip; dcnt < n && !eop; dcnt++) {
tx_desc_ctx *rd = ready_segments.at(dcnt);
d1 = rd->d->cmd_type_offset_bsz;
#ifdef DEBUG_LAN
log << " data fetched didx=" << rd->index << " d1=" <<
d1 << logger::endl;
#endif
uint16_t pkt_len = (d1 & I40E_TXD_QW1_TX_BUF_SZ_MASK) >>
I40E_TXD_QW1_TX_BUF_SZ_SHIFT;
if (total_len + pkt_len > MTU) {
log << "txq: trigger_tx_packet too large" << logger::endl;
dtype = (d1 & I40E_TXD_QW1_DTYPE_MASK) >> I40E_TXD_QW1_DTYPE_SHIFT;
if (dtype != I40E_TX_DESC_DTYPE_DATA) {
log << "trigger tx desc is not a data descriptor idx=" << rd->index
<< " d1=" << d1 << logger::endl;
abort();
}
memcpy(pktbuf + total_len, rd->data, pkt_len);
uint16_t cmd = (d1 & I40E_TXD_QW1_CMD_MASK) >> I40E_TXD_QW1_CMD_SHIFT;
eop = (cmd & I40E_TX_DESC_CMD_EOP);
iipt = cmd & (I40E_TX_DESC_CMD_IIPT_MASK);
l4t = (cmd & I40E_TX_DESC_CMD_L4T_EOFT_MASK);
#ifdef DEBUG_LAN
log << " eop=" << eop << " len=" << pkt_len <<
logger::endl;
#endif
if (eop) {
uint32_t off = (d1 & I40E_TXD_QW1_OFFSET_MASK) >> I40E_TXD_QW1_OFFSET_SHIFT;
maclen = ((off & I40E_TXD_QW1_MACLEN_MASK) >>
I40E_TX_DESC_LENGTH_MACLEN_SHIFT) * 2;
iplen = ((off & I40E_TXD_QW1_IPLEN_MASK) >>
I40E_TX_DESC_LENGTH_IPLEN_SHIFT) * 4;
l4len = ((off & I40E_TXD_QW1_L4LEN_MASK) >>
I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT) * 4;
}
pkt_len = (d1 & I40E_TXD_QW1_TX_BUF_SZ_MASK) >>
I40E_TXD_QW1_TX_BUF_SZ_SHIFT;
total_len += pkt_len;
#ifdef DEBUG_LAN
log << " eop=" << eop << " len=" << pkt_len << logger::endl;
#endif
}
// Unit not completely fetched yet
if (!eop)
return false;
uint32_t off = (d1 & I40E_TXD_QW1_OFFSET_MASK) >> I40E_TXD_QW1_OFFSET_SHIFT;
uint16_t maclen = ((off & I40E_TXD_QW1_MACLEN_MASK) >>
I40E_TX_DESC_LENGTH_MACLEN_SHIFT) * 2;
uint16_t iplen = ((off & I40E_TXD_QW1_IPLEN_MASK) >>
I40E_TX_DESC_LENGTH_IPLEN_SHIFT) * 4;
/*uint16_t l4len = (off & I40E_TXD_QW1_L4LEN_MASK) >>
I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;*/
if (tso) {
if (tso_off == 0)
data_limit = maclen + iplen + l4len + tso_mss;
else
data_limit = tso_off + tso_mss;
if (l4t == I40E_TX_DESC_CMD_L4T_EOFT_TCP) {
uint16_t tcp_off = maclen + iplen;
xsum_tcp(pktbuf + tcp_off, total_len - tcp_off);
if (data_limit > total_len) {
data_limit = total_len;
}
} else {
if (total_len > MTU) {
log << " packet is longer (" << total_len << ") than MTU (" <<
MTU << ")" << logger::endl;
abort();
}
data_limit = total_len;
}
#ifdef DEBUG_LAN
log << " iipt=" << iipt << " l4t=" << l4t <<
" maclen=" << maclen << " iplen=" << iplen<< logger::endl;
" maclen=" << maclen << " iplen=" << iplen << " l4len=" << l4len <<
" total_len=" << total_len << " data_limit=" << data_limit <<
logger::endl;
#else
(void) iipt;
#endif
runner->eth_send(pktbuf, total_len);
// copy data for this segment
uint32_t off = 0;
for (dcnt = d_skip; dcnt < n && off < data_limit; dcnt++) {
tx_desc_ctx *rd = ready_segments.at(dcnt);
d1 = rd->d->cmd_type_offset_bsz;
uint16_t pkt_len = (d1 & I40E_TXD_QW1_TX_BUF_SZ_MASK) >>
I40E_TXD_QW1_TX_BUF_SZ_SHIFT;
if (off <= tso_off && off + pkt_len > tso_off) {
uint32_t start = tso_off;
uint32_t end = off + pkt_len;
if (end > data_limit)
end = data_limit;
#ifdef DEBUG_LAN
log << " copying data from off=" << off << " idx=" << rd->index <<
" start=" << start << " end=" << end << " tso_len=" << tso_len <<
logger::endl;
#endif
memcpy(pktbuf + tso_len, (uint8_t *) rd->data + (start - off),
end - start);
tso_off = end;
tso_len += end - start;
}
off += pkt_len;
}
assert(tso_len <= MTU);
if (!tso) {
#ifdef DEBUG_LAN
log << " normal non-tso packet" << logger::endl;
#endif
if (l4t == I40E_TX_DESC_CMD_L4T_EOFT_TCP) {
uint16_t tcp_off = maclen + iplen;
xsum_tcp(pktbuf + tcp_off, tso_len - tcp_off);
}
runner->eth_send(pktbuf, tso_len);
} else {
#ifdef DEBUG_LAN
log << " tso packet off=" << tso_off << " len=" << tso_len <<
logger::endl;
#endif
// TSO gets hairier
uint16_t hdrlen = maclen + iplen + l4len;
// calculate payload size
tso_paylen = tso_len - hdrlen;
if (tso_paylen > tso_mss)
tso_paylen = tso_mss;
xsum_tcpip_tso(pktbuf + maclen, iplen, l4len, tso_paylen);
runner->eth_send(pktbuf, tso_len);
tso_postupdate_header(pktbuf + maclen, iplen, l4len, tso_paylen);
// not done yet with this TSO unit
if (tso && tso_off < total_len) {
tso_len = hdrlen;
return true;
}
}
#ifdef DEBUG_LAN
log << " unit done" << logger::endl;
#endif
while (dcnt-- > 0) {
ready_segments.front()->processed();
ready_segments.pop_front();
}
tso_len = 0;
tso_off = 0;
return true;
}
......@@ -474,17 +626,14 @@ void lan_queue_tx::tx_desc_ctx::prepare()
data_fetch(d->buffer_addr, len);
} else if (dtype == I40E_TX_DESC_DTYPE_CONTEXT) {
#ifdef DEBUG_LAN
struct i40e_tx_context_desc *ctxd =
reinterpret_cast<struct i40e_tx_context_desc *> (d);
queue.log << " context descriptor: tp=" << ctxd->tunneling_params <<
" l2t=" << ctxd->l2tag2 << " tctm=" << ctxd->type_cmd_tso_mss << logger::endl;
abort();
/*desc->buffer_addr = 0;
desc->cmd_type_offset_bsz = I40E_TX_DESC_DTYPE_DESC_DONE <<
I40E_TXD_QW1_DTYPE_SHIFT;
#endif
desc_writeback(desc_buf, didx);*/
prepared();
} else {
queue.log << "txq: only support context & data descriptors" << logger::endl;
abort();
......
......@@ -2,6 +2,7 @@
#include <string.h>
#include <cassert>
#include <iostream>
#include <algorithm>
#include "i40e_bm.h"
......@@ -289,6 +290,8 @@ void queue_base::desc_ctx::processed()
state = DESC_PROCESSED;
}
#define MAX_DMA_SIZE ((size_t) 9024)
void queue_base::desc_ctx::data_fetch(uint64_t addr, size_t data_len)
{
if (data_capacity < data_len) {
......@@ -302,7 +305,10 @@ void queue_base::desc_ctx::data_fetch(uint64_t addr, size_t data_len)
data_capacity = data_len;
}
dma_data_fetch *dma = new dma_data_fetch(*this, data_len, data);
dma_data_fetch *dma = new dma_data_fetch(*this, std::min(data_len,
MAX_DMA_SIZE), data);
dma->part_offset = 0;
dma->total_len = data_len;
dma->write = false;
dma->dma_addr = addr;
......@@ -387,7 +393,19 @@ queue_base::dma_data_fetch::~dma_data_fetch()
void queue_base::dma_data_fetch::done()
{
ctx.data_fetched(dma_addr, len);
part_offset += len;
dma_addr += len;
data = (uint8_t *) data + len;
if (part_offset < total_len) {
#ifdef DEBUG_QUEUES
ctx.queue.log << " dma_fetch: next part of multi part dma" << logger::endl;
#endif
len = std::min(total_len - part_offset, MAX_DMA_SIZE);
runner->issue_dma(*this);
return;
}
ctx.data_fetched(dma_addr - part_offset, total_len);
ctx.queue.trigger();
delete this;
}
......
......@@ -9,6 +9,7 @@ extern nicbm::Runner *runner;
logger::logger(const std::string &label_)
: label(label_)
{
ss << std::hex;
}
logger &logger::operator<<(char c)
......@@ -17,6 +18,7 @@ logger &logger::operator<<(char c)
std::cerr << runner->time_ps() << " " << label << ": " << ss.str() <<
std::endl;
ss.str(std::string());
ss << std::hex;
} else {
ss << c;
}
......
......@@ -8,6 +8,7 @@
#include <stdlib.h>
#include <string.h>
#include <arpa/inet.h>
#include <cassert>
#include <iostream>
......@@ -30,6 +31,19 @@ struct rte_tcp_hdr {
/* from dpdk/lib/librte_net/rte_ip.h */
struct ipv4_hdr {
uint8_t version_ihl; /**< version and header length */
uint8_t type_of_service; /**< type of service */
uint16_t total_length; /**< length of packet */
uint16_t packet_id; /**< packet ID */
uint16_t fragment_offset; /**< fragmentation offset */
uint8_t time_to_live; /**< time to live */
uint8_t next_proto_id; /**< protocol ID */
uint16_t hdr_checksum; /**< header checksum */
uint32_t src_addr; /**< source address */
uint32_t dst_addr; /**< destination address */
} __attribute__((packed));
static inline uint32_t __rte_raw_cksum(const void *buf, size_t len, uint32_t sum)
{
/* workaround gcc strict-aliasing warning */
......@@ -76,6 +90,27 @@ static inline uint16_t rte_raw_cksum(const void *buf, size_t len)
return __rte_raw_cksum_reduce(sum);
}
static inline uint16_t rte_ipv4_phdr_cksum(const struct ipv4_hdr *ipv4_hdr)
{
struct ipv4_psd_header {
uint32_t src_addr; /* IP address of source host. */
uint32_t dst_addr; /* IP address of destination host. */
uint8_t zero; /* zero. */
uint8_t proto; /* L4 protocol type. */
uint16_t len; /* L4 length. */
} psd_hdr;
psd_hdr.src_addr = ipv4_hdr->src_addr;
psd_hdr.dst_addr = ipv4_hdr->dst_addr;
psd_hdr.zero = 0;
psd_hdr.proto = ipv4_hdr->next_proto_id;
psd_hdr.len = htons(
(uint16_t)(ntohs(ipv4_hdr->total_length)
- sizeof(struct ipv4_hdr)));
return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr));
}
void xsum_tcp(void *tcphdr, size_t l4_len)
{
struct rte_tcp_hdr *tcph = reinterpret_cast<struct rte_tcp_hdr *> (tcphdr);
......@@ -85,4 +120,40 @@ void xsum_tcp(void *tcphdr, size_t l4_len)
tcph->cksum = cksum;
}
void xsum_tcpip_tso(void *iphdr, uint8_t iplen, uint8_t l4len,
uint16_t paylen)
{
struct ipv4_hdr *ih = (struct ipv4_hdr *) iphdr;
struct rte_tcp_hdr *tcph = (struct rte_tcp_hdr *)
((uint8_t *) iphdr + iplen);
uint32_t cksum;
// calculate ip xsum
ih->total_length = htons(iplen + l4len + paylen);
ih->hdr_checksum = 0;
cksum = rte_raw_cksum(iphdr, iplen);
cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
cksum = (~cksum) & 0xffff;
ih->hdr_checksum = cksum;
// calculate tcp xsum
tcph->cksum = 0;
cksum = rte_raw_cksum(tcph, l4len + paylen);
cksum += rte_ipv4_phdr_cksum(ih);
cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
cksum = (~cksum) & 0xffff;
tcph->cksum = cksum;
}
void tso_postupdate_header(void *iphdr, uint8_t iplen, uint8_t l4len,
uint16_t paylen)
{
struct ipv4_hdr *ih = (struct ipv4_hdr *) iphdr;
struct rte_tcp_hdr *tcph = (struct rte_tcp_hdr *)
((uint8_t *) iphdr + iplen);
tcph->sent_seq = htonl(ntohl(tcph->sent_seq) + paylen);
ih->packet_id = htons(ntohs(ih->packet_id) + 1);
}
}
......@@ -5,7 +5,8 @@ UBUNTU_IMAGE := output-ubuntu1804/ubuntu1804
BASE_IMAGE := output-base/base
NOPAXOS_IMAGE := output-nopaxos/nopaxos
MTCP_IMAGE := output-mtcp/mtcp
IMAGES := $(BASE_IMAGE) $(NOPAXOS_IMAGE) $(MTCP_IMAGE)
TAS_IMAGE := output-tas/tas
IMAGES := $(BASE_IMAGE) $(NOPAXOS_IMAGE) $(MTCP_IMAGE) $(TAS_IMAGE)
RAW_IMAGES := $(addsuffix .raw,$(IMAGES))
QEMU_IMG := ../qemu/qemu-img
......@@ -17,6 +18,7 @@ clean:
mqnic/mqnic.ko mqnic/*.o mqnic/.*.cmd mqnic/mqnic.mod.c \
mqnic/Module.symvers mqnic/modules.order \
output-ubuntu1804 output-base output-mtcp output-nopaxos \
output-tas \
kernel/linux-$(KERNEL_VERSION)/ kheaders.tar.bz2
################################################
......@@ -48,6 +50,11 @@ $(MTCP_IMAGE): mtcp.json packer $(BASE_IMAGE) scripts/install-mtcp.sh
./packer-wrap.sh build mtcp.json
touch $@
$(TAS_IMAGE): tas.json packer $(BASE_IMAGE) scripts/install-tas.sh
rm -rf output-tas
./packer-wrap.sh build tas.json
touch $@
packer:
wget https://releases.hashicorp.com/packer/$(PACKER_VERSION)/packer_$(PACKER_VERSION)_linux_amd64.zip
unzip packer_$(PACKER_VERSION)_linux_amd64.zip
......
#!/bin/bash -eux
set -eux
apt-get -y install libnuma-dev libgmp-dev bc python
cd /root/
wget http://deb.debian.org/debian/pool/main/d/dpdk/dpdk_18.11.8.orig.tar.xz
tar xf dpdk_18.11.8.orig.tar.xz
cd dpdk-stable-18.11.8
make -j4 install T=x86_64-native-linuxapp-gcc DESTDIR=/root/dpdk
cd ..
rm -rf dpdk-*
git clone https://github.com/tcp-acceleration-service/tas.git /root/tas
cd /root/tas
make -j4 RTE_SDK=/root/dpdk
git clone https://github.com/FreakyPenguin/benchmarks.git /root/tasbench
cd /root/tasbench/micro_rpc
make echoserver_linux testclient_linux TAS_CODE="/root/tas"
echo "blacklist i40e" > /etc/modprobe.d/i40e_bl.conf
{
"_comment": "Build with `packer build ubuntu.json`",
"builders": [
{
"iso_urls": [ "output-base/base" ],
"iso_checksum": "none",
"disk_image": "true",
"use_backing_file": "true",
"headless": "{{ user `headless` }}",
"disable_vnc": "false",
"output_directory": "output-{{ user `vm_name` }}",
"qemuargs": [
[
"-m",
"{{ user `memory` }}"
],
[
"-display",
"none"
],
[
"-machine",
"accel=kvm"
],
[
"-cpu",
"host"
],
[
"-smp",
"cpus={{ user `cpus`}}"
],
[
"-serial",
"mon:stdio"
]
],
"shutdown_command": "echo '{{ user `ssh_password` }}'|sudo -S shutdown -P now",
"ssh_password": "{{ user `ssh_password` }}",
"ssh_timeout": "10000s",
"ssh_username": "{{ user `ssh_username` }}",
"type": "qemu",
"vm_name": "{{ user `vm_name` }}"
}
],
"provisioners": [
{
"execute_command": "echo '{{ user `ssh_password` }}' | {{.Vars}} sudo -E -S bash '{{.Path}}'",
"scripts": [
"scripts/install-tas.sh"
],
"type": "shell"
}
],
"variables": {
"cleanup_pause": "",
"cpus": "4",
"custom_script": ".",
"desktop": "false",
"disk_size": "10000",
"headless": "", "hostname": "vagrant",
"memory": "4096",
"preseed": "preseed.cfg",
"ssh_fullname": "vagrant",
"ssh_password": "ubuntu",
"ssh_username": "ubuntu",
"vagrantfile_template": "",
"version": "0.1.0",
"vm_name": "tas"
}
}
#include <set>
#include <deque>
namespace nicbm {
#include <cassert>
......@@ -17,6 +20,13 @@ class DMAOp {
void *data;
};
class TimedEvent {
public:
virtual ~TimedEvent() { }
uint64_t time;
};
/**
* The Runner drives the main simulation loop. It's initialized with a reference
* to a device it should manage, and then once `runMain` is called, it will
......@@ -59,10 +69,25 @@ class Runner {
*/
virtual void eth_rx(uint8_t port, const void *data, size_t len)
= 0;
/**
* A timed event is due.
*/
virtual void timed_event(TimedEvent &ev);
};
protected:
struct event_cmp {
bool operator() (TimedEvent *a, TimedEvent *b)
{
return a->time < b->time;
}
};
Device &dev;
std::set<TimedEvent *, event_cmp> events;
std::deque<DMAOp *> dma_queue;
size_t dma_pending;
uint64_t mac_addr;
struct nicsim_params nsparams;
struct cosim_pcie_proto_dev_intro dintro;
......@@ -79,6 +104,11 @@ class Runner {
void eth_recv(volatile struct cosim_eth_proto_n2d_recv *recv);
void poll_n2d();
bool event_next(uint64_t &retval);
void event_trigger();
void dma_do(DMAOp &op);
void dma_trigger();
public:
Runner(Device &dev_);
......@@ -90,6 +120,9 @@ class Runner {
void msi_issue(uint8_t vec);
void eth_send(const void *data, size_t len);
void event_schedule(TimedEvent &evt);
void event_cancel(TimedEvent &evt);
uint64_t time_ps() const;
uint64_t get_mac_addr() const;
};
......
......@@ -12,9 +12,10 @@
//#define DEBUG_NICBM 1
#define SYNC_PERIOD (500 * 1000ULL) // 500ns
#define SYNC_PERIOD (100 * 1000ULL) // 100ns
#define PCI_LATENCY (500 * 1000ULL) // 500ns
#define ETH_LATENCY (500 * 1000ULL) // 500ns
#define DMA_MAX_PENDING 64
using namespace nicbm;
......@@ -57,11 +58,41 @@ volatile union cosim_eth_proto_d2n *Runner::d2n_alloc(void)
}
void Runner::issue_dma(DMAOp &op)
{
if (dma_pending < DMA_MAX_PENDING) {
// can directly issue
#ifdef DEBUG_NICBM
printf("nicbm: issuing dma op %p addr %lx len %zu pending %zu\n", &op,
op.dma_addr, op.len, dma_pending);
#endif
dma_do(op);
} else {
#ifdef DEBUG_NICBM
printf("nicbm: enqueuing dma op %p addr %lx len %zu pending %zu\n", &op,
op.dma_addr, op.len, dma_pending);
#endif
dma_queue.push_back(&op);
}
}
void Runner::dma_trigger()
{
if (dma_queue.empty() || dma_pending == DMA_MAX_PENDING)
return;
DMAOp *op = dma_queue.front();
dma_queue.pop_front();
dma_do(*op);
}
void Runner::dma_do(DMAOp &op)
{
volatile union cosim_pcie_proto_d2h *msg = d2h_alloc();
dma_pending++;
#ifdef DEBUG_NICBM
printf("nicbm: issue dma op %p addr %lx len %zu\n", &op, op.dma_addr,
op.len);
printf("nicbm: executing dma op %p addr %lx len %zu pending %zu\n", &op,
op.dma_addr, op.len, dma_pending);
#endif
if (op.write) {
......@@ -113,6 +144,16 @@ void Runner::msi_issue(uint8_t vec)
COSIM_PCIE_PROTO_D2H_OWN_HOST;
}
void Runner::event_schedule(TimedEvent &evt)
{
events.insert(&evt);
}
void Runner::event_cancel(TimedEvent &evt)
{
events.erase(&evt);
}
void Runner::h2d_read(volatile struct cosim_pcie_proto_h2d_read *read)
{
volatile union cosim_pcie_proto_d2h *msg;
......@@ -169,6 +210,9 @@ void Runner::h2d_readcomp(volatile struct cosim_pcie_proto_h2d_readcomp *rc)
memcpy(op->data, (void *)rc->data, op->len);
dev.dma_complete(*op);
dma_pending--;
dma_trigger();
}
void Runner::h2d_writecomp(volatile struct cosim_pcie_proto_h2d_writecomp *wc)
......@@ -181,6 +225,9 @@ void Runner::h2d_writecomp(volatile struct cosim_pcie_proto_h2d_writecomp *wc)
#endif
dev.dma_complete(*op);
dma_pending--;
dma_trigger();
}
void Runner::eth_recv(volatile struct cosim_eth_proto_n2d_recv *recv)
......@@ -281,10 +328,36 @@ uint64_t Runner::get_mac_addr() const
return mac_addr;
}
bool Runner::event_next(uint64_t &retval)
{
if (events.empty())
return false;
retval = (*events.begin())->time;
return true;
}
void Runner::event_trigger()
{
auto it = events.begin();
if (it == events.end())
return;
TimedEvent *ev = *it;
// event is in the future
if (ev->time > main_time)
return;
events.erase(it);
dev.timed_event(*ev);
}
Runner::Runner(Device &dev_)
: dev(dev_)
: dev(dev_), events(event_cmp())
{
//mac_addr = lrand48() & ~(3ULL << 46);
dma_pending = 0;
srand48(time(NULL) ^ getpid());
mac_addr = lrand48();
mac_addr <<= 16;
......@@ -297,6 +370,7 @@ Runner::Runner(Device &dev_)
int Runner::runMain(int argc, char *argv[])
{
uint64_t next_ts;
uint64_t max_step = 10000;
if (argc != 4 && argc != 5) {
fprintf(stderr, "Usage: corundum_bm PCI-SOCKET ETH-SOCKET "
......@@ -327,6 +401,8 @@ int Runner::runMain(int argc, char *argv[])
fprintf(stderr, "sync_pci=%d sync_eth=%d\n", nsparams.sync_pci,
nsparams.sync_eth);
bool is_sync = nsparams.sync_pci || nsparams.sync_eth;
while (!exiting) {
while (nicsim_sync(&nsparams, main_time)) {
fprintf(stderr, "warn: nicsim_sync failed (t=%lu)\n", main_time);
......@@ -335,9 +411,21 @@ int Runner::runMain(int argc, char *argv[])
do {
poll_h2d();
poll_n2d();
next_ts = netsim_next_timestamp(&nsparams);
} while ((nsparams.sync_pci || nsparams.sync_eth) &&
next_ts <= main_time && !exiting);
event_trigger();
if (is_sync) {
next_ts = netsim_next_timestamp(&nsparams);
if (next_ts > main_time + max_step)
next_ts = main_time + max_step;
} else {
next_ts = main_time + max_step;
}
uint64_t ev_ts;
if (event_next(ev_ts) && ev_ts < next_ts)
next_ts = ev_ts;
} while (next_ts <= main_time && !exiting);
main_time = next_ts;
}
......@@ -345,3 +433,7 @@ int Runner::runMain(int argc, char *argv[])
nicsim_cleanup();
return 0;
}
void Runner::Device::timed_event(TimedEvent &te)
{
}
......@@ -64,7 +64,7 @@ int netsim_init(struct netsim_interface *nsif,
if ((di.flags & COSIM_ETH_PROTO_FLAGS_DI_SYNC) == 0) {
*sync_eth = 0;
nsif->sync = 1;
nsif->sync = 0;
} else {
nsif->sync = *sync_eth;
}
......@@ -142,6 +142,9 @@ int netsim_n2d_sync(struct netsim_interface *nsif, uint64_t timestamp,
volatile union cosim_eth_proto_n2d *msg;
volatile struct cosim_eth_proto_n2d_sync *sync;
if (!nsif->sync)
return 0;
if (nsif->n2d_timestamp != 0 &&
timestamp - nsif->n2d_timestamp < sync_delay)
return 0;
......
......@@ -32,16 +32,16 @@
#include "internal.h"
#define D2H_ELEN (4096 + 64)
#define D2H_ELEN (9024 + 64)
#define D2H_ENUM 1024
#define H2D_ELEN (4096 + 64)
#define H2D_ELEN (9024 + 64)
#define H2D_ENUM 1024
#define D2N_ELEN (2048 + 64)
#define D2N_ELEN (9024 + 64)
#define D2N_ENUM 8192
#define N2D_ELEN (2048 + 64)
#define N2D_ELEN (9024 + 64)
#define N2D_ENUM 8192
......@@ -182,9 +182,14 @@ int nicsim_init(struct nicsim_params *params,
{
int pci_lfd = -1, eth_lfd = -1;
void *shmptr;
size_t shm_size;
/* ready in memory queues */
if ((shm_fd = shm_create(params->shm_path, 64 * 1024 * 1024, &shmptr))
shm_size = (uint64_t) D2H_ELEN * D2H_ENUM +
(uint64_t) H2D_ELEN * H2D_ENUM +
(uint64_t) D2N_ELEN * D2N_ENUM +
(uint64_t) N2D_ELEN * N2D_ENUM;
if ((shm_fd = shm_create(params->shm_path, shm_size, &shmptr))
< 0)
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment