Commit 18d52cdb authored by Jialin Li's avatar Jialin Li
Browse files

Merge branch 'tofino'

parents 34d1e582 626bbaab
...@@ -25,6 +25,7 @@ Code structure: ...@@ -25,6 +25,7 @@ Code structure:
- python (>= 3.7) - python (>= 3.7)
- libgoogle-perftools-dev - libgoogle-perftools-dev
- libboost-iostreams-dev - libboost-iostreams-dev
- libboost-coroutine-dev
- scons - scons
- ninja-build - ninja-build
- libpixman-1-dev - libpixman-1-dev
......
...@@ -25,7 +25,7 @@ import simbricks.simulators as sim ...@@ -25,7 +25,7 @@ import simbricks.simulators as sim
import simbricks.nodeconfig as node import simbricks.nodeconfig as node
host_configs = ['qemu', 'gt'] host_configs = ['qemu', 'gt']
seq_configs = ['swseq', 'ehseq'] seq_configs = ['swseq', 'ehseq', 'tofino']
nic_configs = ['ib', 'cb', 'cv'] nic_configs = ['ib', 'cb', 'cv']
proto_configs = ['vr', 'nopaxos'] proto_configs = ['vr', 'nopaxos']
num_client_configs = [1, 2, 3, 4, 5, 6, 7, 8, 10, 12] num_client_configs = [1, 2, 3, 4, 5, 6, 7, 8, 10, 12]
...@@ -41,7 +41,10 @@ for proto_config in proto_configs: ...@@ -41,7 +41,10 @@ for proto_config in proto_configs:
for seq_config in seq_configs: for seq_config in seq_configs:
for nic_config in nic_configs: for nic_config in nic_configs:
e = exp.Experiment(proto_config + '-' + host_config + '-' + nic_config + '-' + seq_config + f'-{num_c}') e = exp.Experiment(proto_config + '-' + host_config + '-' + nic_config + '-' + seq_config + f'-{num_c}')
net = sim.NS3SequencerNet() if seq_config == 'tofino':
net = sim.TofinoNet()
else:
net = sim.NS3SequencerNet()
net.sync_period = sync_period net.sync_period = sync_period
net.opt = link_rate_opt + link_latency_opt net.opt = link_rate_opt + link_latency_opt
e.add_network(net) e.add_network(net)
...@@ -49,6 +52,7 @@ for proto_config in proto_configs: ...@@ -49,6 +52,7 @@ for proto_config in proto_configs:
# host # host
if host_config == 'qemu': if host_config == 'qemu':
host_class = sim.QemuHost host_class = sim.QemuHost
net.sync = False
elif host_config == 'gt': elif host_config == 'gt':
host_class = sim.Gem5Host host_class = sim.Gem5Host
e.checkpoint = True e.checkpoint = True
...@@ -103,6 +107,9 @@ for proto_config in proto_configs: ...@@ -103,6 +107,9 @@ for proto_config in proto_configs:
for c in clients: for c in clients:
c.sleep = 5 c.sleep = 5
c.node_config.app.server_ips = ['10.0.0.1', '10.0.0.2', '10.0.0.3'] c.node_config.app.server_ips = ['10.0.0.1', '10.0.0.2', '10.0.0.3']
if seq_config == 'ehseq':
c.node_config.app.server_ips.append('10.0.0.100')
c.node_config.app.use_ehseq = True
c.node_config.disk_image = 'nopaxos' c.node_config.disk_image = 'nopaxos'
c.nics[0].sync_period = sync_period c.nics[0].sync_period = sync_period
c.sync_period = sync_period c.sync_period = sync_period
......
...@@ -34,7 +34,7 @@ import simbricks.nodeconfig as node ...@@ -34,7 +34,7 @@ import simbricks.nodeconfig as node
kinds_of_host = ['qemu'] kinds_of_host = ['qemu']
kinds_of_nic = ['cv','cb','ib'] kinds_of_nic = ['cv','cb','ib']
kinds_of_net = ['wire', 'switch', 'dumbbell', 'bridge'] kinds_of_net = ['wire', 'switch', 'dumbbell', 'bridge', 'tofino']
kinds_of_app = ['TCPs'] kinds_of_app = ['TCPs']
experiments = [] experiments = []
...@@ -49,6 +49,8 @@ for n in kinds_of_net: ...@@ -49,6 +49,8 @@ for n in kinds_of_net:
net_class = sim.NS3DumbbellNet net_class = sim.NS3DumbbellNet
if n == 'bridge': if n == 'bridge':
net_class = sim.NS3BridgeNet net_class = sim.NS3BridgeNet
if n == 'tofino':
net_class = sim.TofinoNet
# set nic sim # set nic sim
...@@ -56,28 +58,28 @@ for n in kinds_of_net: ...@@ -56,28 +58,28 @@ for n in kinds_of_net:
net = net_class() net = net_class()
e = exp.Experiment('qemu-' + c + '-' + n + '-' + 'TCPs') e = exp.Experiment('qemu-' + c + '-' + n + '-' + 'TCPs')
e.add_network(net) e.add_network(net)
if c == 'cv': if c == 'cv':
servers = sim.create_basic_hosts(e, 1, 'server', net, sim.CorundumVerilatorNIC, sim.QemuHost, servers = sim.create_basic_hosts(e, 1, 'server', net, sim.CorundumVerilatorNIC, sim.QemuHost,
node.CorundumLinuxNode, node.IperfTCPServer) node.CorundumLinuxNode, node.IperfTCPServer)
clients = sim.create_basic_hosts(e, 1, 'client', net, sim.CorundumVerilatorNIC, sim.QemuHost, clients = sim.create_basic_hosts(e, 1, 'client', net, sim.CorundumVerilatorNIC, sim.QemuHost,
node.CorundumLinuxNode, node.IperfTCPClient, ip_start = 2) node.CorundumLinuxNode, node.IperfTCPClient, ip_start = 2)
if c == 'cb': if c == 'cb':
servers = sim.create_basic_hosts(e, 1, 'server', net, sim.CorundumBMNIC, sim.QemuHost, servers = sim.create_basic_hosts(e, 1, 'server', net, sim.CorundumBMNIC, sim.QemuHost,
node.CorundumLinuxNode, node.IperfTCPServer) node.CorundumLinuxNode, node.IperfTCPServer)
clients = sim.create_basic_hosts(e, 1, 'client', net, sim.CorundumBMNIC, sim.QemuHost, clients = sim.create_basic_hosts(e, 1, 'client', net, sim.CorundumBMNIC, sim.QemuHost,
node.CorundumLinuxNode, node.IperfTCPClient, ip_start = 2) node.CorundumLinuxNode, node.IperfTCPClient, ip_start = 2)
if c == 'ib': if c == 'ib':
servers = sim.create_basic_hosts(e, 1, 'server', net, sim.I40eNIC, sim.QemuHost, servers = sim.create_basic_hosts(e, 1, 'server', net, sim.I40eNIC, sim.QemuHost,
node.I40eLinuxNode, node.IperfTCPServer) node.I40eLinuxNode, node.IperfTCPServer)
clients = sim.create_basic_hosts(e, 1, 'client', net, sim.I40eNIC, sim.QemuHost, clients = sim.create_basic_hosts(e, 1, 'client', net, sim.I40eNIC, sim.QemuHost,
node.I40eLinuxNode, node.IperfTCPClient, ip_start = 2) node.I40eLinuxNode, node.IperfTCPClient, ip_start = 2)
clients[0].wait = True clients[0].wait = True
clients[0].node_config.app.server_ip = servers[0].node_config.ip clients[0].node_config.app.server_ip = servers[0].node_config.ip
......
...@@ -34,7 +34,7 @@ import simbricks.nodeconfig as node ...@@ -34,7 +34,7 @@ import simbricks.nodeconfig as node
kinds_of_host = ['qemu'] kinds_of_host = ['qemu']
kinds_of_nic = ['cv','cb','ib'] kinds_of_nic = ['cv','cb','ib']
kinds_of_net = ['wire', 'switch', 'dumbbell', 'bridge'] kinds_of_net = ['wire', 'switch', 'dumbbell', 'bridge', 'tofino']
kinds_of_app = ['UDPs'] kinds_of_app = ['UDPs']
rate = '200m' rate = '200m'
...@@ -51,6 +51,8 @@ for n in kinds_of_net: ...@@ -51,6 +51,8 @@ for n in kinds_of_net:
net_class = sim.NS3DumbbellNet net_class = sim.NS3DumbbellNet
if n == 'bridge': if n == 'bridge':
net_class = sim.NS3BridgeNet net_class = sim.NS3BridgeNet
if n == 'tofino':
net_class = sim.TofinoNet
# set nic sim # set nic sim
...@@ -58,28 +60,28 @@ for n in kinds_of_net: ...@@ -58,28 +60,28 @@ for n in kinds_of_net:
net = net_class() net = net_class()
e = exp.Experiment('qemu-' + c + '-' + n + '-' + 'UDPs') e = exp.Experiment('qemu-' + c + '-' + n + '-' + 'UDPs')
e.add_network(net) e.add_network(net)
if c == 'cv': if c == 'cv':
servers = sim.create_basic_hosts(e, 1, 'server', net, sim.CorundumVerilatorNIC, sim.QemuHost, servers = sim.create_basic_hosts(e, 1, 'server', net, sim.CorundumVerilatorNIC, sim.QemuHost,
node.CorundumLinuxNode, node.IperfUDPServer) node.CorundumLinuxNode, node.IperfUDPServer)
clients = sim.create_basic_hosts(e, 1, 'client', net, sim.CorundumVerilatorNIC, sim.QemuHost, clients = sim.create_basic_hosts(e, 1, 'client', net, sim.CorundumVerilatorNIC, sim.QemuHost,
node.CorundumLinuxNode, node.IperfUDPClient, ip_start = 2) node.CorundumLinuxNode, node.IperfUDPClient, ip_start = 2)
if c == 'cb': if c == 'cb':
servers = sim.create_basic_hosts(e, 1, 'server', net, sim.CorundumBMNIC, sim.QemuHost, servers = sim.create_basic_hosts(e, 1, 'server', net, sim.CorundumBMNIC, sim.QemuHost,
node.CorundumLinuxNode, node.IperfUDPServer) node.CorundumLinuxNode, node.IperfUDPServer)
clients = sim.create_basic_hosts(e, 1, 'client', net, sim.CorundumBMNIC, sim.QemuHost, clients = sim.create_basic_hosts(e, 1, 'client', net, sim.CorundumBMNIC, sim.QemuHost,
node.CorundumLinuxNode, node.IperfUDPClient, ip_start = 2) node.CorundumLinuxNode, node.IperfUDPClient, ip_start = 2)
if c == 'ib': if c == 'ib':
servers = sim.create_basic_hosts(e, 1, 'server', net, sim.I40eNIC, sim.QemuHost, servers = sim.create_basic_hosts(e, 1, 'server', net, sim.I40eNIC, sim.QemuHost,
node.I40eLinuxNode, node.IperfUDPServer) node.I40eLinuxNode, node.IperfUDPServer)
clients = sim.create_basic_hosts(e, 1, 'client', net, sim.I40eNIC, sim.QemuHost, clients = sim.create_basic_hosts(e, 1, 'client', net, sim.I40eNIC, sim.QemuHost,
node.I40eLinuxNode, node.IperfUDPClient, ip_start = 2) node.I40eLinuxNode, node.IperfUDPClient, ip_start = 2)
clients[0].wait = True clients[0].wait = True
clients[0].node_config.app.server_ip = servers[0].node_config.ip clients[0].node_config.app.server_ip = servers[0].node_config.ip
clients[0].node_config.app.rate = rate clients[0].node_config.app.rate = rate
......
...@@ -341,7 +341,7 @@ class IperfTCPClient(AppConfig): ...@@ -341,7 +341,7 @@ class IperfTCPClient(AppConfig):
is_last = False is_last = False
def run_cmds(self, node): def run_cmds(self, node):
cmds = ['sleep 1', cmds = ['sleep 1',
'iperf -l 32M -w 32M -c ' + self.server_ip + ' -i 1 -P ' + 'iperf -l 32M -w 32M -c ' + self.server_ip + ' -i 1 -P ' +
str(self.procs)] str(self.procs)]
...@@ -359,15 +359,15 @@ class IperfUDPClient(AppConfig): ...@@ -359,15 +359,15 @@ class IperfUDPClient(AppConfig):
def run_cmds(self, node): def run_cmds(self, node):
cmds = ['sleep 1', cmds = ['sleep 1',
'iperf -c ' + self.server_ip + ' -i 1 -u -b ' + self.rate] 'iperf -c ' + self.server_ip + ' -i 1 -u -b ' + self.rate]
if self.is_last: if self.is_last:
cmds.append('sleep 0.5') cmds.append('sleep 0.5')
else: else:
cmds.append('sleep 10') cmds.append('sleep 10')
return cmds return cmds
class IperfUDPClientSleep(AppConfig): class IperfUDPClientSleep(AppConfig):
server_ip = '10.0.0.1' server_ip = '10.0.0.1'
...@@ -388,9 +388,9 @@ class NoTraffic(AppConfig): ...@@ -388,9 +388,9 @@ class NoTraffic(AppConfig):
else: else:
if (self.is_sleep): if (self.is_sleep):
cmds.append('sleep 10') cmds.append('sleep 10')
else: else:
cmds.append('dd if=/dev/urandom of=/dev/null count=500000') cmds.append('dd if=/dev/urandom of=/dev/null count=500000')
return cmds return cmds
...@@ -417,9 +417,9 @@ class VRClient(AppConfig): ...@@ -417,9 +417,9 @@ class VRClient(AppConfig):
def run_cmds(self, node): def run_cmds(self, node):
cmds = [] cmds = []
for ip in self.server_ips: for ip in self.server_ips:
cmds.append('ping -c 1 ' + ip) cmds.append('ping -c 5 ' + ip)
cmds.append('/root/nopaxos/bench/client -c /root/nopaxos.config ' + cmds.append('/root/nopaxos/bench/client -c /root/nopaxos.config ' +
'-m vr -n 2000') '-m vr -u 2 -h ' + node.ip)
return cmds return cmds
class NOPaxosReplica(AppConfig): class NOPaxosReplica(AppConfig):
...@@ -431,13 +431,17 @@ class NOPaxosReplica(AppConfig): ...@@ -431,13 +431,17 @@ class NOPaxosReplica(AppConfig):
class NOPaxosClient(AppConfig): class NOPaxosClient(AppConfig):
server_ips = [] server_ips = []
is_last = False is_last = False
use_ehseq = False
def run_cmds(self, node): def run_cmds(self, node):
cmds = [] cmds = []
for ip in self.server_ips: for ip in self.server_ips:
cmds.append('ping -c 1 ' + ip) cmds.append('ping -c 5 ' + ip)
cmds.append('/root/nopaxos/bench/client -c /root/nopaxos.config ' + cmd = '/root/nopaxos/bench/client -c /root/nopaxos.config ' + \
'-m nopaxos -n 40000') '-m nopaxos -u 2 -h ' + node.ip
if self.use_ehseq:
cmd += ' -e'
cmds.append(cmd)
if self.is_last: if self.is_last:
cmds.append('sleep 1') cmds.append('sleep 1')
else: else:
...@@ -446,7 +450,7 @@ class NOPaxosClient(AppConfig): ...@@ -446,7 +450,7 @@ class NOPaxosClient(AppConfig):
class NOPaxosSequencer(AppConfig): class NOPaxosSequencer(AppConfig):
def run_cmds(self, node): def run_cmds(self, node):
return ['/root/nopaxos/sequencer/sequencer -c /root/sequencer.config'] return ['/root/nopaxos/sequencer/sequencer -c /root/nopaxos.config -m nopaxos']
class RPCServer(AppConfig): class RPCServer(AppConfig):
......
...@@ -459,12 +459,17 @@ class SwitchNet(NetSim): ...@@ -459,12 +459,17 @@ class SwitchNet(NetSim):
return cleanup return cleanup
class TofinoNet(NetSim): class TofinoNet(NetSim):
tofino_log_path = '/tmp/model.ldjson'
sync = True
def __init__(self): def __init__(self):
super().__init__() super().__init__()
def run_cmd(self, env): def run_cmd(self, env):
cmd = env.repodir + '/sims/tofino/tofino' cmd = env.repodir + '/sims/tofino/tofino'
cmd += f' -m {self.sync_mode} -S {self.sync_period} -E {self.eth_latency}' cmd += f' -m {self.sync_mode} -S {self.sync_period} -E {self.eth_latency} -t {self.tofino_log_path}'
if not self.sync:
cmd += ' -u'
for (_,n) in self.connect_sockets(env): for (_,n) in self.connect_sockets(env):
cmd += ' -s ' + n cmd += ' -s ' + n
return cmd return cmd
......
...@@ -2,4 +2,5 @@ f 1 ...@@ -2,4 +2,5 @@ f 1
replica 10.0.0.1:12345 replica 10.0.0.1:12345
replica 10.0.0.2:12345 replica 10.0.0.2:12345
replica 10.0.0.3:12345 replica 10.0.0.3:12345
multicast 10.0.0.255:12345 multicast 10.0.0.255:22222
sequencer 10.0.0.100:12345
...@@ -53,15 +53,9 @@ ...@@ -53,15 +53,9 @@
"destination": "/tmp/nopaxos.config", "destination": "/tmp/nopaxos.config",
"direction": "upload" "direction": "upload"
}, },
{
"type": "file",
"source": "sequencer.config",
"destination": "/tmp/sequencer.config",
"direction": "upload"
},
{ {
"type": "shell", "type": "shell",
"inline": ["sudo mv /tmp/nopaxos.config /root/nopaxos.config; sudo mv /tmp/sequencer.config /root/sequencer.config"] "inline": ["sudo mv /tmp/nopaxos.config /root/nopaxos.config"]
} }
], ],
"variables": { "variables": {
......
...@@ -9,6 +9,6 @@ make install ...@@ -9,6 +9,6 @@ make install
ldconfig ldconfig
mkdir -p /root mkdir -p /root
git clone https://github.com/UWSysLab/NOPaxos.git /root/nopaxos git clone https://github.com/nicklijl/simbricks-nopaxos.git /root/nopaxos
cd /root/nopaxos cd /root/nopaxos
make -j4 make -j4
...@@ -98,10 +98,10 @@ ...@@ -98,10 +98,10 @@
"https_proxy": "{{env `https_proxy`}}", "https_proxy": "{{env `https_proxy`}}",
"install_dev_packages": "false", "install_dev_packages": "false",
"install_vagrant_key": "true", "install_vagrant_key": "true",
"iso_checksum": "8c5fc24894394035402f66f3824beb7234b757dd2b5531379cb310cedfdf0996", "iso_checksum": "f5cbb8104348f0097a8e513b10173a07dbc6684595e331cb06f93f385d0aecf6",
"iso_checksum_type": "sha256", "iso_checksum_type": "sha256",
"iso_name": "ubuntu-18.04.5-server-amd64.iso", "iso_name": "ubuntu-18.04.6-server-amd64.iso",
"iso_url": "http://cdimage.ubuntu.com/ubuntu/releases/18.04.5/release/ubuntu-18.04.5-server-amd64.iso", "iso_url": "http://cdimage.ubuntu.com/ubuntu/releases/18.04.5/release/ubuntu-18.04.6-server-amd64.iso",
"locale": "en_US", "locale": "en_US",
"memory": "4096", "memory": "4096",
"no_proxy": "{{env `no_proxy`}}", "no_proxy": "{{env `no_proxy`}}",
......
This source diff could not be displayed because it is too large. You can view the blob instead.
/* -*- P4_16 -*- */
#include <core.p4>
#include <tna.p4>
/*************************************************************************
************* C O N S T A N T S A N D T Y P E S *******************
**************************************************************************/
enum bit<16> ether_type_t {
IPV4 = 0x0800,
ARP = 0x0806,
TPID = 0x8100,
TPID2 = 0x9100,
IPV6 = 0x86DD
}
const ReplicationId_t L2_MCAST_RID = 0xFFFF;
const int MAC_TABLE_SIZE = 65536;
const int VLAN_PORT_TABLE_SIZE = 1 << (7 + 12);
/* We can use up to 8 different digest types */
const bit<3> L2_LEARN_DIGEST = 1;
/*************************************************************************
*********************** H E A D E R S *********************************
*************************************************************************/
/* Define all the headers the program will recognize */
/* The actual sets of headers processed by each gress can differ */
/* Standard ethernet header */
header ethernet_h {
bit<48> dst_addr;
bit<48> src_addr;
}
header vlan_tag_h {
bit<16> tpid;
bit<3> pcp;
bit<1> dei;
bit<12> vid;
}
header etherII_h {
ether_type_t ether_type;
}
/*** Internal Headers ***/
typedef bit<4> header_type_t; /* These fields can be coombined into one */
typedef bit<4> header_info_t; /* 8-bit field as well. Exercise for the brave */
const header_type_t HEADER_TYPE_BRIDGE = 0xB;
const header_type_t HEADER_TYPE_MIRROR_INGRESS = 0xC;
const header_type_t HEADER_TYPE_MIRROR_EGRESS = 0xD;
const header_type_t HEADER_TYPE_RESUBMIT = 0xA;
/*
* This is a common "preamble" header that must be present in all internal
* headers. The only time you do not need it is when you know that you are
* not going to have more than one internal header type ever
*/
#define INTERNAL_HEADER \
header_type_t header_type; \
header_info_t header_info
header inthdr_h {
INTERNAL_HEADER;
}
/* Bridged metadata */
header bridge_h {
INTERNAL_HEADER;
bit<7> pad0;
PortId_t ingress_port;
bit<3> pcp;
bit<1> dei;
bit<12> vid;
}
/*************************************************************************
************** I N G R E S S P R O C E S S I N G *******************
*************************************************************************/
/*********************** H E A D E R S ************************/
struct my_ingress_headers_t {
bridge_h bridge;
ethernet_h ethernet;
vlan_tag_h vlan_tag;
etherII_h etherII;
}
/****** G L O B A L I N G R E S S M E T A D A T A *********/
struct port_metadata_t {
bit<3> port_pcp;
bit<12> port_vid;
bit<9> l2_xid;
}
struct my_ingress_metadata_t {
port_metadata_t port_properties;
bit<3> pcp;
bit<1> dei;
bit<12> vid;
bit<9> mac_move; /* Should have the same size as PortId_t */
bit<1> is_static;
bit<1> smac_hit;
PortId_t ingress_port;
}
/*********************** P A R S E R **************************/
parser IngressParser(packet_in pkt,
/* User */
out my_ingress_headers_t hdr,
out my_ingress_metadata_t meta,
/* Intrinsic */
out ingress_intrinsic_metadata_t ig_intr_md)
{
/* This is a mandatory state, required by Tofino Architecture */
state start {
pkt.extract(ig_intr_md);
meta.port_properties = port_metadata_unpack<port_metadata_t>(pkt);
transition meta_init;
}
state meta_init {
meta.pcp = 0;
meta.dei = 0;
meta.vid = 0;
meta.mac_move = 0;
meta.is_static = 0;
meta.smac_hit = 0;
meta.ingress_port = ig_intr_md.ingress_port;
transition parse_ethernet;
}
state parse_ethernet {
pkt.extract(hdr.ethernet);
transition select(pkt.lookahead<bit<16>>()) {
(bit<16>)ether_type_t.TPID &&& 0xEFFF: parse_vlan_tag;
default: parse_etherII;
}
}
state parse_vlan_tag {
pkt.extract(hdr.vlan_tag);
meta.pcp = hdr.vlan_tag.pcp;
meta.dei = hdr.vlan_tag.dei;
meta.vid = hdr.vlan_tag.vid;
transition parse_etherII;
}
state parse_etherII {
pkt.extract(hdr.etherII);
transition accept;
}
}
/***************** M A T C H - A C T I O N *********************/
control Ingress(
/* User */
inout my_ingress_headers_t hdr,
inout my_ingress_metadata_t meta,
/* Intrinsic */
in ingress_intrinsic_metadata_t ig_intr_md,
in ingress_intrinsic_metadata_from_parser_t ig_prsr_md,
inout ingress_intrinsic_metadata_for_deparser_t ig_dprsr_md,
inout ingress_intrinsic_metadata_for_tm_t ig_tm_md)
{
action drop() {
ig_dprsr_md.drop_ctl = 1;
}
action send(PortId_t port) {
ig_tm_md.ucast_egress_port = port;
}
action smac_hit(PortId_t port, bit<1> is_static) {
meta.mac_move = ig_intr_md.ingress_port ^ port;
meta.smac_hit = 1;
meta.is_static = is_static;
}
action smac_miss() { }
action smac_drop() {
drop(); exit;
}
@idletime_precision(3)
table smac {
key = {
meta.vid : exact;
hdr.ethernet.src_addr : exact;
}
actions = {
smac_hit; smac_miss; smac_drop;
}
size = MAC_TABLE_SIZE;
const default_action = smac_miss();
idle_timeout = true;
}
action mac_learn_notify() {
ig_dprsr_md.digest_type = L2_LEARN_DIGEST;
}
table smac_results {
key = {
meta.mac_move : ternary;
meta.is_static : ternary;
meta.smac_hit : ternary;
}
actions = {
mac_learn_notify; NoAction; smac_drop;
}
const entries = {
( _, _, 0) : mac_learn_notify();
( 0, _, 1) : NoAction();
( _, 0, 1) : mac_learn_notify();
( _, 1, 1) : smac_drop();
}
}
action dmac_unicast(PortId_t port) {
send(port);
}
action dmac_miss() {
ig_tm_md.mcast_grp_a = (MulticastGroupId_t)meta.vid;
ig_tm_md.rid = L2_MCAST_RID;
/* Set the exclusion id here, since parser can't acces ig_tm_md */
ig_tm_md.level2_exclusion_id = meta.port_properties.l2_xid;
}
action dmac_multicast(MulticastGroupId_t mcast_grp) {
ig_tm_md.mcast_grp_a = mcast_grp;
ig_tm_md.rid = L2_MCAST_RID;
/* Set the exclusion id here, since parser can't acces ig_tm_md */
ig_tm_md.level2_exclusion_id = meta.port_properties.l2_xid;
}
action dmac_drop() {
drop();
exit;
}
table dmac {
key = {
meta.vid : exact;
hdr.ethernet.dst_addr : exact;
}
actions = {
dmac_unicast; dmac_miss; dmac_multicast; dmac_drop;
}
size = MAC_TABLE_SIZE;
default_action = dmac_miss();
}
apply {
/* Assign Port-based VLAN to untagged/priority-tagged packets */
if (meta.vid == 0) {
meta.vid = meta.port_properties.port_vid;
}
if (!hdr.vlan_tag.isValid()) {
meta.pcp = meta.port_properties.port_pcp;
}
smac.apply();
smac_results.apply();
switch (dmac.apply().action_run) {
dmac_unicast: { /* Unicast source pruning */
if (ig_intr_md.ingress_port ==
ig_tm_md.ucast_egress_port) {
drop();
}
}
}
/* Bridge metadata to the egress pipeline */
hdr.bridge.setValid();
hdr.bridge.header_type = HEADER_TYPE_BRIDGE;
hdr.bridge.header_info = 0; /* Ignore */
hdr.bridge.ingress_port = ig_intr_md.ingress_port;
hdr.bridge.pcp = meta.pcp;
hdr.bridge.dei = meta.dei;
hdr.bridge.vid = meta.vid;
}
}
/********************* D E P A R S E R ************************/
/* This struct is needed for proper digest receive API generation */
struct l2_digest_t {
bit<12> vid;
bit<48> src_mac;
bit<9> ingress_port;
bit<9> mac_move;
bit<1> is_static;
bit<1> smac_hit;
}
control IngressDeparser(packet_out pkt,
/* User */
inout my_ingress_headers_t hdr,
in my_ingress_metadata_t meta,
/* Intrinsic */
in ingress_intrinsic_metadata_for_deparser_t ig_dprsr_md)
{
Digest <l2_digest_t>() l2_digest;
apply {
if (ig_dprsr_md.digest_type == L2_LEARN_DIGEST) {
l2_digest.pack({
meta.vid,
hdr.ethernet.src_addr,
meta.ingress_port,
meta.mac_move,
meta.is_static,
meta.smac_hit });
}
pkt.emit(hdr);
}
}
/*************************************************************************
**************** E G R E S S P R O C E S S I N G *******************
*************************************************************************/
/*********************** H E A D E R S ************************/
struct my_egress_headers_t {
ethernet_h ethernet;
vlan_tag_h vlan_tag;
etherII_h etherII;
}
/******** G L O B A L E G R E S S M E T A D A T A *********/
struct my_egress_metadata_t {
bridge_h bridge;
PortId_t ingress_port;
bit<3> pcp;
bit<1> dei;
bit<12> vid;
}
/*********************** P A R S E R **************************/
parser EgressParser(packet_in pkt,
/* User */
out my_egress_headers_t hdr,
out my_egress_metadata_t meta,
/* Intrinsic */
out egress_intrinsic_metadata_t eg_intr_md)
{
inthdr_h inthdr;
/* This is a mandatory state, required by Tofino Architecture */
state start {
pkt.extract(eg_intr_md);
inthdr = pkt.lookahead<inthdr_h>();
transition select(inthdr.header_type, inthdr.header_info) {
( HEADER_TYPE_BRIDGE, _ ) : parse_bridge;
default : reject;
}
}
state parse_bridge {
pkt.extract(meta.bridge);
meta.ingress_port = meta.bridge.ingress_port;
meta.pcp = meta.bridge.pcp;
meta.dei = meta.bridge.dei;
meta.vid = meta.bridge.vid;
transition parse_ethernet;
}
state parse_ethernet {
pkt.extract(hdr.ethernet);
transition select(pkt.lookahead<bit<16>>()) {
(bit<16>)ether_type_t.TPID &&& 0xEFFF: parse_vlan_tag;
default: parse_etherII;
}
}
state parse_vlan_tag {
pkt.extract(hdr.vlan_tag);
transition parse_etherII;
}
state parse_etherII {
pkt.extract(hdr.etherII);
transition accept;
}
}
/***************** M A T C H - A C T I O N *********************/
control Egress(
/* User */
inout my_egress_headers_t hdr,
inout my_egress_metadata_t meta,
/* Intrinsic */
in egress_intrinsic_metadata_t eg_intr_md,
in egress_intrinsic_metadata_from_parser_t eg_prsr_md,
inout egress_intrinsic_metadata_for_deparser_t eg_dprsr_md,
inout egress_intrinsic_metadata_for_output_port_t eg_oport_md)
{
action drop() {
eg_dprsr_md.drop_ctl = eg_dprsr_md.drop_ctl | 1;
}
action send_tagged() {
hdr.vlan_tag.setValid();
hdr.vlan_tag.tpid = (bit<16>)ether_type_t.TPID;
#ifdef P4C_1719_FIXED
hdr.vlan_tag.pcp = meta.pcp;
hdr.vlan_tag.dei = meta.dei;
#else
hdr.vlan_tag.pcp = 0;
hdr.vlan_tag.dei = 0;
#endif
hdr.vlan_tag.vid = meta.vid;
}
action send_untagged() {
hdr.vlan_tag.setInvalid();
}
action not_a_member() {
drop();
}
table egr_vlan_port {
key = {
meta.vid : exact;
#ifdef USE_MASK
eg_intr_md.egress_port & 0x7F : exact @name("egress_port");
#else
eg_intr_md.egress_port[6:0] : exact @name("egress_port");
#endif
}
actions = {
send_tagged;
send_untagged;
not_a_member;
}
default_action = not_a_member();
size = VLAN_PORT_TABLE_SIZE;
}
apply {
#ifdef P4_SOURCE_PRUNING
if (meta.ingress_port == eg_intr_md.egress_port) {
drop();
} else {
#endif
egr_vlan_port.apply();
#ifdef P4_SOURCE_PRUNING
}
#endif
}
}
/********************* D E P A R S E R ************************/
control EgressDeparser(packet_out pkt,
/* User */
inout my_egress_headers_t hdr,
in my_egress_metadata_t meta,
/* Intrinsic */
in egress_intrinsic_metadata_for_deparser_t eg_dprsr_md)
{
apply {
pkt.emit(hdr);
}
}
/************ F I N A L P A C K A G E ******************************/
Pipeline(
IngressParser(),
Ingress(),
IngressDeparser(),
EgressParser(),
Egress(),
EgressDeparser()
) pipe;
Switch(pipe) main;
/* -*- P4_16 -*- */
#include <core.p4>
#include <tna.p4>
typedef bit<48> mac_addr_t;
typedef bit<32> ipv4_addr_t;
typedef bit<16> sess_num_t;
typedef bit<64> msg_num_t;
typedef bit<32> reg_val_t;
typedef bit<8> reg_key_t;
#define ETHERTYPE_TPID 0x8100
#define ETHERTYPE_IPV4 0x0800
#define ETHERTYPE_PKTGEN 16w0x7777
#define IP_PROTOCOL_UDP 17
#define IP_PROTOCOL_TCP 6
#define MCAST_DST_PORT 22222
const int MAC_TABLE_SIZE = 65536;
const bit<3> L2_LEARN_DIGEST = 1;
/*************************************************************************
*********************** H E A D E R S *********************************
*************************************************************************/
/* Define all the headers the program will recognize */
/* The actual sets of headers processed by each gress can differ */
/* Standard ethernet header */
header ethernet_h {
bit<48> dst_addr;
bit<48> src_addr;
bit<16> ether_type;
}
header vlan_tag_h {
bit<3> pcp;
bit<1> cfi;
bit<12> vid;
bit<16> ether_type;
}
header ipv4_h {
bit<4> version;
bit<4> ihl;
bit<8> diffserv;
bit<16> total_len;
bit<16> identification;
bit<3> flags;
bit<13> frag_offset;
bit<8> ttl;
bit<8> protocol;
bit<16> hdr_checksum;
bit<32> src_addr;
bit<32> dst_addr;
}
header udp_h {
bit<16> src_port;
bit<16> dst_port;
bit<16> len;
bit<16> checksum;
}
header nopaxos_h {
bit<32> is_frag;
bit<16> header_size;
// meta_data from here
sess_num_t sess_num;
msg_num_t msg_num;
}
/****** G L O B A L I N G R E S S M E T A D A T A *********/
struct my_ingress_headers_t {
ethernet_h ethernet;
ipv4_h ipv4;
udp_h udp;
nopaxos_h nopaxos;
}
struct my_ingress_metadata_t {
bit<9> mac_move;
bit<1> is_static;
bit<1> smac_hit;
PortId_t ingress_port;
}
/*********************** P A R S E R **************************/
parser IngressParser(
packet_in pkt,
out my_ingress_headers_t hdr,
out my_ingress_metadata_t meta,
out ingress_intrinsic_metadata_t ig_intr_md){
/* This is a mandatory state, required by Tofino Architecture */
state start {
pkt.extract(ig_intr_md);
pkt.advance(PORT_METADATA_SIZE);
transition meta_init;
}
state meta_init {
meta.mac_move = 0;
meta.is_static = 0;
meta.smac_hit = 0;
meta.ingress_port = ig_intr_md.ingress_port;
transition parse_ethernet;
}
state parse_ethernet {
pkt.extract(hdr.ethernet);
transition select(hdr.ethernet.ether_type){
ETHERTYPE_IPV4: parse_ipv4;
default: accept;
}
}
state parse_ipv4 {
pkt.extract(hdr.ipv4);
transition select(hdr.ipv4.protocol){
IP_PROTOCOL_UDP: parse_udp;
default: accept;
}
}
state parse_udp {
pkt.extract(hdr.udp);
transition select(hdr.udp.dst_port) {
MCAST_DST_PORT: parse_nopaxos;
default: accept;
}
}
state parse_nopaxos {
pkt.extract(hdr.nopaxos);
transition accept;
}
}
/***************** M A T C H - A C T I O N *********************/
control Ingress(
/* User */
inout my_ingress_headers_t hdr,
inout my_ingress_metadata_t meta,
/* Intrinsic */
in ingress_intrinsic_metadata_t ig_intr_md,
in ingress_intrinsic_metadata_from_parser_t ig_prsr_md,
inout ingress_intrinsic_metadata_for_deparser_t ig_dprsr_md,
inout ingress_intrinsic_metadata_for_tm_t ig_tm_md)
{
Register<reg_val_t, reg_key_t>(1) reg_cnt; // value, key
RegisterAction<reg_val_t, reg_key_t, reg_val_t>(reg_cnt)
sequencing = {
void apply(inout reg_val_t register_data, out reg_val_t seq_num) {
register_data = register_data + 1;
seq_num = register_data;
}
};
action send(PortId_t port) {
ig_tm_md.ucast_egress_port = port;
}
action drop() {
ig_dprsr_md.drop_ctl = 1;
}
action smac_hit(PortId_t port, bit<1> is_static) {
meta.mac_move = ig_intr_md.ingress_port ^ port;
meta.smac_hit = 1;
meta.is_static = is_static;
}
action smac_miss() { }
action smac_drop() {
drop(); exit;
}
@idletime_precision(3)
table smac {
key = {
hdr.ethernet.src_addr : exact;
}
actions = {
smac_hit; smac_miss; smac_drop;
}
size = MAC_TABLE_SIZE;
const default_action = smac_miss();
idle_timeout = true;
}
action mac_learn_notify() {
ig_dprsr_md.digest_type = L2_LEARN_DIGEST;
}
table smac_results {
key = {
meta.mac_move : ternary;
meta.is_static : ternary;
meta.smac_hit : ternary;
}
actions = {
mac_learn_notify; NoAction; smac_drop;
}
const entries = {
( _, _, 0) : mac_learn_notify();
( 0, _, 1) : NoAction();
( _, 0, 1) : mac_learn_notify();
( _, 1, 1) : smac_drop();
}
}
action dmac_unicast(PortId_t port) {
send(port);
}
action dmac_miss() {
ig_tm_md.mcast_grp_a = 1;
}
action dmac_drop() {
drop();
exit;
}
table dmac {
key = {
hdr.ethernet.dst_addr : exact;
}
actions = {
dmac_unicast; dmac_miss; dmac_drop;
}
size = MAC_TABLE_SIZE;
default_action = dmac_miss();
}
apply {
ig_tm_md.bypass_egress = 1w1;
smac.apply();
smac_results.apply();
if(hdr.udp.isValid() && hdr.udp.dst_port == MCAST_DST_PORT) {
hdr.udp.checksum = 0;
hdr.nopaxos.msg_num = (msg_num_t)sequencing.execute(0);
hdr.nopaxos.sess_num = (sess_num_t)0;
ig_tm_md.mcast_grp_a = 2;
} else {
switch (dmac.apply().action_run) {
dmac_unicast: { /* Unicast source pruning */
if (ig_intr_md.ingress_port == ig_tm_md.ucast_egress_port) {
drop();
}
}
}
}
}
} // End of SwitchIngressControl
/********************* D E P A R S E R ************************/
/* This struct is needed for proper digest receive API generation */
struct l2_digest_t {
bit<48> src_mac;
bit<9> ingress_port;
bit<9> mac_move;
bit<1> is_static;
bit<1> smac_hit;
}
control IngressDeparser(packet_out pkt,
/* User */
inout my_ingress_headers_t hdr,
in my_ingress_metadata_t meta,
/* Intrinsic */
in ingress_intrinsic_metadata_for_deparser_t ig_dprsr_md)
{
Digest <l2_digest_t>() l2_digest;
apply {
if (ig_dprsr_md.digest_type == L2_LEARN_DIGEST) {
l2_digest.pack({
hdr.ethernet.src_addr,
meta.ingress_port,
meta.mac_move,
meta.is_static,
meta.smac_hit });
}
pkt.emit(hdr);
}
}
/*************************************************************************
**************** E G R E S S P R O C E S S I N G *******************
*************************************************************************/
/*********************** H E A D E R S ************************/
struct my_egress_headers_t {
ethernet_h ethernet;
vlan_tag_h vlan_tag;
ipv4_h ipv4;
}
/******** G L O B A L E G R E S S M E T A D A T A *********/
struct my_egress_metadata_t {
}
/*********************** P A R S E R **************************/
parser EgressParser(packet_in pkt,
/* User */
out my_egress_headers_t hdr,
out my_egress_metadata_t meta,
/* Intrinsic */
out egress_intrinsic_metadata_t eg_intr_md)
{
/* This is a mandatory state, required by Tofino Architecture */
state start {
pkt.extract(eg_intr_md);
transition parse_ethernet;
}
state parse_ethernet {
pkt.extract(hdr.ethernet);
transition select(hdr.ethernet.ether_type) {
ETHERTYPE_TPID: parse_vlan_tag;
ETHERTYPE_IPV4: parse_ipv4;
default: accept;
}
}
state parse_vlan_tag {
pkt.extract(hdr.vlan_tag);
transition select(hdr.vlan_tag.ether_type) {
ETHERTYPE_IPV4: parse_ipv4;
default: accept;
}
}
state parse_ipv4 {
pkt.extract(hdr.ipv4);
transition accept;
}
}
/***************** M A T C H - A C T I O N *********************/
control Egress(
/* User */
inout my_egress_headers_t hdr,
inout my_egress_metadata_t meta,
/* Intrinsic */
in egress_intrinsic_metadata_t eg_intr_md,
in egress_intrinsic_metadata_from_parser_t eg_prsr_md,
inout egress_intrinsic_metadata_for_deparser_t eg_dprsr_md,
inout egress_intrinsic_metadata_for_output_port_t eg_oport_md)
{
apply {
}
}
/********************* D E P A R S E R ************************/
control EgressDeparser(packet_out pkt,
/* User */
inout my_egress_headers_t hdr,
in my_egress_metadata_t meta,
/* Intrinsic */
in egress_intrinsic_metadata_for_deparser_t eg_dprsr_md)
{
apply {
pkt.emit(hdr);
}
}
/************ F I N A L P A C K A G E ******************************/
Pipeline(IngressParser(),
Ingress(),
IngressDeparser(),
EgressParser(),
Egress(),
EgressDeparser()
) pipe;
Switch(pipe) main;
from bfrtcli import *
class l2_switch():
#
# Helper Functions to deal with ports
#
def devport(self, pipe, port):
return ((pipe & 3) << 7) | (port & 0x7F)
def pipeport(self,dp):
return ((dp & 0x180) >> 7, (dp & 0x7F))
def mcport(self, pipe, port):
return pipe * 72 + port
def devport_to_mcport(self, dp):
return self.mcport(*self.pipeport(dp))
# This is a useful bfrt_python function that should potentially allow one
# to quickly clear all the logical tables (including the fixed ones) in
# their data plane program.
#
# This function can clear all P4 tables and later other fixed objects
# (once proper BfRt support is added). As of SDE-9.2.0 the support is mixed.
# As a result the function contains some workarounds.
def clear_all(self, verbose=True, batching=True, clear_ports=False):
table_list = bfrt.info(return_info=True, print_info=False)
# Remove port tables from the list
port_types = ['PORT_CFG', 'PORT_FRONT_PANEL_IDX_INFO',
'PORT_HDL_INFO', 'PORT_STR_INFO']
if not clear_ports:
for table in list(table_list):
if table['type'] in port_types:
table_list.remove(table)
# The order is important. We do want to clear from the top,
# i.e. delete objects that use other objects. For example,
# table entries use selector groups and selector groups
# use action profile members.
#
# Same is true for the fixed tables. However, the list of
# table types grows, so we will first clean the tables we
# know and then clear the rest
for table_types in (['MATCH_DIRECT', 'MATCH_INDIRECT_SELECTOR'],
['SELECTOR'],
['ACTION_PROFILE'],
['PRE_MGID'],
['PRE_ECMP'],
['PRE_NODE'],
[]): # This is catch-all
for table in list(table_list):
if table['type'] in table_types or len(table_types) == 0:
try:
if verbose:
print("Clearing table {:<40} ... ".
format(table['full_name']),
end='', flush=True)
table['node'].clear(batch=batching)
table_list.remove(table)
if verbose:
print('Done')
use_entry_list = False
except:
use_entry_list = True
# Some tables do not support clear(). Thus we'll try
# to get a list of entries and clear them one-by-one
if use_entry_list:
try:
if batching:
bfrt.batch_begin()
# This line can result in an exception,
# since # not all tables support get()
entry_list = table['node'].get(regex=True,
return_ents=True,
print_ents=False)
# Not every table supports delete() method.
# For those tables we'll try to push in an
# entry with everything being zeroed out
has_delete = hasattr(table['node'], 'delete')
if entry_list != -1:
if has_delete:
for entry in entry_list:
entry.remove()
else:
clear_entry = table['node'].entry()
for entry in entry_list:
entry.data = clear_entry.data
# We can still have an exception
# here, since not all tables
# support add()/mod()
entry.push()
if verbose:
print('Done')
else:
print('Empty')
table_list.remove(table)
except BfRtTableError as e:
print('Empty')
table_list.remove(table)
except Exception as e:
# We can have in a number of ways: no get(),
# no add() etc. Another reason is that the
# table is read-only.
if verbose:
print("Failed")
finally:
if batching:
bfrt.batch_end()
bfrt.complete_operations()
def __init__(self, default_ttl=60000):
self.p4 = bfrt.nopaxos.pipe
self.all_ports = [port.key[b'$DEV_PORT']
for port in bfrt.port.port.get(regex=1,
return_ents=True,
print_ents=False)]
self.l2_age_ttl = default_ttl
def setup(self):
self.clear_all()
self.__init__()
# Enable learning on SMAC
print("Initializing learning on SMAC ... ", end='', flush=True)
try:
self.p4.IngressDeparser.l2_digest.callback_deregister()
except:
pass
self.p4.IngressDeparser.l2_digest.callback_register(self.learning_cb)
print("Done")
# Enable aging on SMAC
print("Inializing Aging on SMAC ... ", end='', flush=True)
self.p4.Ingress.smac.idle_table_set_notify(enable=False,
callback=None)
self.p4.Ingress.smac.idle_table_set_notify(enable=True,
callback=self.aging_cb,
interval = 10000,
min_ttl = 10000,
max_ttl = 60000)
print("Done")
@staticmethod
def aging_cb(dev_id, pipe_id, direction, parser_id, entry):
smac = bfrt.nopaxos.pipe.Ingress.smac
dmac = bfrt.nopaxos.pipe.Ingress.dmac
mac_addr = entry.key[b'hdr.ethernet.src_addr']
print("Aging out: MAC: {}".format(mac(mac_addr)))
entry.remove() # from smac
try:
dmac.delete(dst_addr=mac_addr)
except:
print("WARNING: Could not find the matching DMAC entry")
@staticmethod
def learning_cb(dev_id, pipe_id, direction, parser_id, session, msg):
smac = bfrt.nopaxos.pipe.Ingress.smac
dmac = bfrt.nopaxos.pipe.Ingress.dmac
for digest in msg:
port = digest["ingress_port"]
mac_move = digest["mac_move"]
mac_addr = digest["src_mac"]
old_port = port ^ mac_move # Because mac_move = ingress_port ^ port
print("MAC: {}, Port={}".format(
mac(mac_addr), port), end="")
if mac_move != 0:
print("(Move from port={})".format(old_port))
else:
print("(New)")
# Since we do not have access to self, we have to use
# the hardcoded value for the TTL :(
smac.entry_with_smac_hit(src_addr=mac_addr,
port=port,
is_static=False,
ENTRY_TTL=60000).push()
dmac.entry_with_dmac_unicast(dst_addr=mac_addr,
port=port).push()
return 0
def l2_add_smac_drop(self, vid, mac_addr):
mac_addr = mac(mac_addr)
self.p4.Ingress.smac.entry_with_smac_drop(
src_addr=mac_addr).push()
def set_mcast(num_groups=1, num_sequencers=1):
all_ports = [p for p in range(8)]
mcast_ports = [0, 1, 2]
# Broadcast
bfrt.pre.node.entry(MULTICAST_NODE_ID=0, MULTICAST_RID=0,
MULTICAST_LAG_ID=[], DEV_PORT=all_ports).push()
bfrt.pre.mgid.entry(MGID=1, MULTICAST_NODE_ID=[0],
MULTICAST_NODE_L1_XID_VALID=[False],
MULTICAST_NODE_L1_XID=[0]).push()
# Multicast
bfrt.pre.node.entry(MULTICAST_NODE_ID=1, MULTICAST_RID=1,
MULTICAST_LAG_ID=[], DEV_PORT=mcast_ports).push()
bfrt.pre.mgid.entry(MGID=2, MULTICAST_NODE_ID=[1],
MULTICAST_NODE_L1_XID_VALID=[False],
MULTICAST_NODE_L1_XID=[0]).push()
### Setup L2 learning
sl2 = l2_switch(default_ttl=10000)
sl2.setup()
sl2.l2_add_smac_drop(1, "00:00:00:00:00:00")
bfrt.complete_operations()
p4 = bfrt.nopaxos.pipe
num_groups = 1
num_sequencers = 1
set_mcast(num_groups, num_sequencers)
### Register initialization
p4.Ingress.reg_cnt.mod(0, 0)
bfrt.complete_operations()
### Register print out
print("""******************* SETUP RESULTS *****************""")
print ("\n reg_cnt:")
p4.Ingress.reg_cnt.get(REGISTER_INDEX=0, from_hw=True)
from bfrtcli import *
#from netaddr import *
class l2_switch():
#
# Helper Functions to deal with ports
#
def devport(self, pipe, port):
return ((pipe & 3) << 7) | (port & 0x7F)
def pipeport(self,dp):
return ((dp & 0x180) >> 7, (dp & 0x7F))
def mcport(self, pipe, port):
return pipe * 72 + port
def devport_to_mcport(self, dp):
return self.mcport(*self.pipeport(dp))
# This is a useful bfrt_python function that should potentially allow one
# to quickly clear all the logical tables (including the fixed ones) in
# their data plane program.
#
# This function can clear all P4 tables and later other fixed objects
# (once proper BfRt support is added). As of SDE-9.2.0 the support is mixed.
# As a result the function contains some workarounds.
def clear_all(self, verbose=True, batching=True, clear_ports=False):
table_list = bfrt.info(return_info=True, print_info=False)
# Remove port tables from the list
port_types = ['PORT_CFG', 'PORT_FRONT_PANEL_IDX_INFO',
'PORT_HDL_INFO', 'PORT_STR_INFO']
if not clear_ports:
for table in list(table_list):
if table['type'] in port_types:
table_list.remove(table)
# The order is important. We do want to clear from the top,
# i.e. delete objects that use other objects. For example,
# table entries use selector groups and selector groups
# use action profile members.
#
# Same is true for the fixed tables. However, the list of
# table types grows, so we will first clean the tables we
# know and then clear the rest
for table_types in (['MATCH_DIRECT', 'MATCH_INDIRECT_SELECTOR'],
['SELECTOR'],
['ACTION_PROFILE'],
['PRE_MGID'],
['PRE_ECMP'],
['PRE_NODE'],
[]): # This is catch-all
for table in list(table_list):
if table['type'] in table_types or len(table_types) == 0:
try:
if verbose:
print("Clearing table {:<40} ... ".
format(table['full_name']),
end='', flush=True)
table['node'].clear(batch=batching)
table_list.remove(table)
if verbose:
print('Done')
use_entry_list = False
except:
use_entry_list = True
# Some tables do not support clear(). Thus we'll try
# to get a list of entries and clear them one-by-one
if use_entry_list:
try:
if batching:
bfrt.batch_begin()
# This line can result in an exception,
# since # not all tables support get()
entry_list = table['node'].get(regex=True,
return_ents=True,
print_ents=False)
# Not every table supports delete() method.
# For those tables we'll try to push in an
# entry with everything being zeroed out
has_delete = hasattr(table['node'], 'delete')
if entry_list != -1:
if has_delete:
for entry in entry_list:
entry.remove()
else:
clear_entry = table['node'].entry()
for entry in entry_list:
entry.data = clear_entry.data
# We can still have an exception
# here, since not all tables
# support add()/mod()
entry.push()
if verbose:
print('Done')
else:
print('Empty')
table_list.remove(table)
except BfRtTableError as e:
print('Empty')
table_list.remove(table)
except Exception as e:
# We can have in a number of ways: no get(),
# no add() etc. Another reason is that the
# table is read-only.
if verbose:
print("Failed")
finally:
if batching:
bfrt.batch_end()
bfrt.complete_operations()
def __init__(self, default_ttl=60000):
self.p4 = bfrt.l2_switch.pipe
self.vlan = {}
self.all_ports = [port.key[b'$DEV_PORT']
for port in bfrt.port.port.get(regex=1,
return_ents=True,
print_ents=False)]
self.l2_age_ttl = default_ttl
if (hasattr(self.p4.Egress, 'port_vlan_member') and
hasattr(self.p4.Egress, 'port_vlan_tagged')):
self.egr_vlan_port = False
else:
self.egr_vlan_port = True
def setup(self):
self.clear_all()
self.__init__()
# Preset port properties and the multicast pruning
for dp in self.all_ports:
l2_xid = self.mcport(*self.pipeport(dp))
self.p4.IngressParser.PORT_METADATA.entry(ingress_port=dp,
port_vid=0, port_pcp=0,
l2_xid=l2_xid).push()
bfrt.pre.prune.entry(MULTICAST_L2_XID=l2_xid, DEV_PORT=[dp]).push()
# Ensure that egr_port_vlan (or its equivalent) is in asymmetric mode
if self.egr_vlan_port:
self.p4.Egress.egr_vlan_port.symmetric_mode_set(False)
else:
self.p4.Egress.port_vlan_member.symmetric_mode_set(False)
self.p4.Egress.port_vlan_tagged.symmetric_mode_set(False)
# Enable learning on SMAC
print("Initializing learning on SMAC ... ", end='', flush=True)
try:
self.p4.IngressDeparser.l2_digest.callback_deregister()
except:
pass
self.p4.IngressDeparser.l2_digest.callback_register(self.learning_cb)
print("Done")
# Enable aging on SMAC
print("Inializing Aging on SMAC ... ", end='', flush=True)
self.p4.Ingress.smac.idle_table_set_notify(enable=False,
callback=None)
self.p4.Ingress.smac.idle_table_set_notify(enable=True,
callback=self.aging_cb,
interval = 10000,
min_ttl = 10000,
max_ttl = 60000)
print("Done")
@staticmethod
def aging_cb(dev_id, pipe_id, direction, parser_id, entry):
smac = bfrt.l2_switch.pipe.Ingress.smac
dmac = bfrt.l2_switch.pipe.Ingress.dmac
vid = entry.key[b'meta.vid']
mac_addr = entry.key[b'hdr.ethernet.src_addr']
print("Aging out: VID: {}, MAC: {}".format(vid, mac(mac_addr)))
entry.remove() # from smac
try:
dmac.delete(vid=vid, dst_addr=mac_addr)
except:
print("WARNING: Could not find the matching DMAC entry")
@staticmethod
def learning_cb(dev_id, pipe_id, direction, parser_id, session, msg):
smac = bfrt.l2_switch.pipe.Ingress.smac
dmac = bfrt.l2_switch.pipe.Ingress.dmac
for digest in msg:
vid = digest["vid"]
port = digest["ingress_port"]
mac_move = digest["mac_move"]
mac_addr = digest["src_mac"]
old_port = port ^ mac_move # Because mac_move = ingress_port ^ port
print("VID: {}, MAC: {}, Port={}".format(
vid, mac(mac_addr), port), end="")
if mac_move != 0:
print("(Move from port={})".format(old_port))
else:
print("(New)")
# Since we do not have access to self, we have to use
# the hardcoded value for the TTL :(
smac.entry_with_smac_hit(vid=vid, src_addr=mac_addr,
port=port,
is_static=False,
ENTRY_TTL=60000).push()
dmac.entry_with_dmac_unicast(vid=vid, dst_addr=mac_addr,
port=port).push()
return 0
def vlan_create(self, vid):
if vid in self.vlan:
raise KeyError("Vlan {} already exists".format(vid))
if not 1 <= vid <= 4095:
raise ValueError("Vlan ID {} is incorrect".format(vid))
bfrt.pre.node.entry(MULTICAST_NODE_ID = vid,
MULTICAST_RID = 0xFFFF, # See P4 code
MULTICAST_LAG_ID = [ ],
DEV_PORT = [ ]).push()
bfrt.pre.mgid.entry(MGID = vid,
MULTICAST_NODE_ID = [vid],
MULTICAST_NODE_L1_XID_VALID = [0],
MULTICAST_NODE_L1_XID = [0]).push()
self.vlan[vid] = {
"ports": {}
}
def vlan_destroy(self, vid):
if vid not in self.vlan:
raise KeyError("Vlan {} doesn't exist".format(vid))
vlan_mgid = bfrt.pre.mgid.get(vid, print_ents=False)
# Remove the multicast group
bfrt.pre.mgid.delete(MGID=vid)
# Remove the corresponding (single) node
bfrt.pre.node.delete(MULTICAST_NODE_ID = vid)
# Remove all entries from egr_vlan_port
bfrt.batch_begin()
try:
if self.egr_vlan_port:
for pipe in range(0, 3):
# pipe=None
for e in self.p4.Egress.egr_vlan_port.get(
pipe=pipe, regex=1, print_ents=False,
vid='0x[0]*{X}'.format(vid)):
e.remove()
else:
for pipe in range(0, 3):
for p in range(0, 72):
self.p4.Egress.port_vlan_member.mod(vid << 7 | p,
0, pipe=pipe)
self.p4.Egress.port_vlan_tagged.mod(vid << 7 | p,
0, pipe=pipe)
except:
pass
finally:
bfrt.batch_end()
del(self.vlan[vid])
def vlan_show(self, vlans=None):
if vlans is None:
vlans = self.vlan.keys()
print(
"""
+------+------------------------------------------
| VLAN | Ports (Tagged or Untagged)
+------+------------------------------------------""")
for vid in sorted(vlans):
print ('| {:>4d} | '.format(vid), end='')
for p in sorted(self.vlan[vid]["ports"].keys()):
print(p, end='')
if self.vlan[vid]["ports"][p]:
print("(T)", end='')
else:
print("(U)", end='')
print()
print( "+------+------------------------------------------")
def vlan_port_add(self, vid, dp, tagged=False):
if vid not in self.vlan:
raise KeyError("Vlan %d doesn't exist" % vid)
# Update the multicast Node
vlan_node = bfrt.pre.node.get(MULTICAST_NODE_ID=vid,
return_ents=True, print_ents=False)
vlan_node.data[b'$DEV_PORT'].append(dp)
vlan_node.push()
# Update egr_vlan_port table with the proper action
# Unfortunately push(pipe=xxx) doesn't work (DRV-3667), so we'll
# explicitly delete the entry and then add it to avoid any
# issues
(pipe, port) = self.pipeport(dp)
#pipe=None
try:
if self.egr_vlan_port:
self.p4.Egress.egr_vlan_port.delete(
vid=vid, egress_port=port, pipe=pipe)
except:
pass
if self.egr_vlan_port:
# Using the egr_port_vlan table
if tagged:
self.p4.Egress.egr_vlan_port.add_with_send_tagged(
vid=vid, egress_port=port, pipe=pipe)
else:
self.p4.Egress.egr_vlan_port.add_with_send_untagged(
vid=vid, egress_port=port, pipe=pipe)
else:
# Using register equivalents
print('port_vlan_member.mod({}/{}, 1, {}) ... '.format(
vid, port, pipe),
end='', flush=True)
self.p4.Egress.port_vlan_member.mod(
vid << 7 | port, 1, pipe=pipe)
print('Done', flush=True)
if tagged:
self.p4.Egress.port_vlan_tagged.mod(
vid << 7 | port, 1, pipe=pipe)
else:
self.p4.Egress.port_vlan_tagged.mod(
vid << 7 | port, 0, pipe=pipe)
# Update internal state
vlan_ports = self.vlan[vid]["ports"]
vlan_ports[dp] = tagged
def vlan_port_delete(self, vid, dp):
if vid not in self.vlan:
raise KeyError("Vlan %d doesn't exist" % vid)
vlan_node = bfrt.pre.node_get(MULTICAST_NODE_ID=vid,
return_ents=True, print_ents=False)
try:
vlan_node.data[b'$DEV_PORT'].remove(dp)
except:
pass
vlan_node.push()
try:
(pipe, port) = self.pipeport(dp)
#pipe=None
if self.egr_vlan_port:
self.p4.Egress.egr_vlan_port.delete(
vid=vid, egress_port=port, pipe=pipe)
else:
self.p4.Egress.port_vlan_member.mod(
vid << 7 | port, 0, pipe=pipe)
self.p4.Egress.port_vlan_tagged.mod(
vid << 7 | port, 0, pipe=pipe)
except:
pass
try:
del(self.vlan[vid]['ports'][dp])
except:
pass
def port_vlan_default_set(self, dp, vid):
e = self.p4.IngressParser.PORT_METADATA.get(ingress_port=dp,
return_ents=True,
print_ents=False)
e.data[b'port_vid'] = vid
e.push()
def port_vlan_default_get(self, dp):
return self.p4.IngressParser.PORT_METADATA.get(
ingress_port=dp,
return_ents=True, print_ents=False).data[b'port_vid']
def port_vlan_default_show(self, ports=None, show_all=False):
if ports is None:
ports = self.all_ports
else:
if not isinstance(ports, list):
ports = [ports]
for dp in sorted(ports):
try:
default_vid = self.port_vlan_default_get(dp)
if default_vid != 0 or show_all:
print("Port %3d : Default VLAN is %4d" % (dp, default_vid))
except:
pass
def l2_lookup(self, vid, mac_addr):
dmac_entry = self.p4.Ingress.dmac.get(vid=vid, dst_addr=mac_addr,
return_ents=True, print_ents=False)
if dmac_entry == -1:
dmac_entry = None
smac_entry = self.p4.Ingress.smac.get(vid=vid, src_addr=mac_addr,
return_ents=True, print_ents=False)
if smac_entry == -1:
smac_entry = None
return (dmac_entry, smac_entry)
def l2_add_smac_drop(self, vid, mac_addr):
mac_addr = mac(mac_addr)
self.p4.Ingress.smac.entry_with_smac_drop(
vid=vid, src_addr=mac_addr).push()
def l2_del(self, vid, mac_addr):
mac_addr = mac(mac_addr)
p4.Ingress.dmac.delete(vid=vid, dst_add=mac_addr)
p4.Ingress.dmac.delete(vid=vid, src_add=mac_addr)
def l2_print(self, dmac_entry, smac_entry):
vid = None
mac_addr = None
port = " "
pending = " "
valid = " "
static = " "
dst_drop = " "
src_drop = " "
static = " "
ttl = " "
dmac_eh_s= " "
smac_eh_s= " "
if dmac_entry is not None:
valid = "Y"
vid = dmac_entry.key[b'meta.vid']
mac_addr = mac(dmac_entry.key[b'hdr.ethernet.dst_addr'])
if dmac_entry.action.endswith("dmac_drop"):
dst_drop = "Y"
else:
dst_drop = " "
if dmac_entry.action.endswith("dmac_unicast"):
port = dmac_entry.data[b'port']
if smac_entry is not None:
valid = "Y"
ttl = int(smac_entry.data[b'$ENTRY_TTL'])
if ttl > 1000 or ttl == 0:
ttl = "%6d" % (ttl/1000)
else:
ttl = " 0.%03d" % ttl
if dmac_entry is None:
vid = smac_entry.key[b'meta.vid']
mac_addr = mac(smac_entry.key[b'hdr.ethernet.src_addr'])
if smac_entry.action.endswith("smac_hit"):
if (dmac_entry is None or
dmac_entry.action.endswith("dmac_miss")):
pending = "Y"
port = smac_entry.data[b'port']
if smac_entry.data[b'is_static']:
static = "Y"
elif smac_entry.action.endswith("smac_drop"):
src_drop = "Y"
if dmac_entry or smac_entry:
print("| %4d | %s | %3d | %s %s %s | %s %s | %s |" % (
vid, mac_addr, port,
valid, pending, static,
src_drop, dst_drop,
ttl))
def l2_show(self, from_hw=True):
dmac_entries={}
try:
for e in self.p4.Ingress.dmac.get(regex=1, from_hw=from_hw,
print_ents=False,
return_ents=True):
dmac_entries[
(e.key[b'meta.vid'], e.key[b'hdr.ethernet.dst_addr'])] = e
except:
pass
smac_entries = {}
try:
for e in self.p4.Ingress.smac.get(regex=1, from_hw=from_hw,
print_ents=False,
return_ents=True):
smac_entries[
(e.key[b'meta.vid'], e.key[b'hdr.ethernet.src_addr'])] = e
except:
pass
print (
"""
+------+-------------------+-----+-------+------+--------+
| VLAN | MAC ADDR |PORT | Flags | DROP | TTL(s) |
| | | | V P S | S D | |
+------+-------------------+-----+-------+------+--------+""")
for (vid, mac_addr) in dmac_entries:
dmac_entry = dmac_entries[(vid, mac_addr)]
try:
smac_entry = smac_entries[(vid, mac_addr)]
except:
smac_entry = None
self.l2_print(dmac_entry, smac_entry)
if smac_entry is not None:
del(smac_entries[(vid, mac_addr)])
for smac_entry in smac_entries.values():
self.l2_print(None, smac_entry)
print (
"""+------+-------------------+-----+-------+------+--------+""")
# Sample setup
sl2 = l2_switch(default_ttl=10000)
sl2.setup()
sl2.vlan_create(1)
sl2.vlan_port_add(1, 0)
sl2.vlan_port_add(1, 1)
sl2.vlan_port_add(1, 2)
sl2.vlan_port_add(1, 3)
sl2.port_vlan_default_set(0, 1)
sl2.port_vlan_default_set(1, 1)
sl2.port_vlan_default_set(2, 1)
sl2.port_vlan_default_set(3, 1)
sl2.l2_add_smac_drop(1, "00:00:00:00:00:00")
bfrt.complete_operations()
sl2.vlan_show()
sl2.port_vlan_default_show()
sl2.l2_show()
...@@ -22,215 +22,400 @@ ...@@ -22,215 +22,400 @@
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include <arpa/inet.h>
#include <fcntl.h>
#include <linux/if_packet.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <unistd.h>
#include <cassert>
#include <chrono>
#include <climits>
#include <csignal> #include <csignal>
#include <cstdio> #include <cstdio>
#include <cstring>
#include <climits>
#include <cassert>
#include <cstdlib> #include <cstdlib>
#include <unistd.h> #include <cstring>
#include <fcntl.h> #include <fstream>
#include <sys/socket.h> #include <iostream>
#include <sys/ioctl.h> #include <set>
#include <arpa/inet.h> #include <string>
#include <net/if.h>
#include <linux/if_packet.h>
#include <vector> #include <vector>
extern "C" { extern "C" {
#include <simbricks/netif/netif.h> #include <simbricks/netif/netif.h>
#include <simbricks/proto/base.h> #include <simbricks/proto/base.h>
}; };
#include <utils/json.hpp>
//#define DEBUG
using json = nlohmann::json;
typedef long long int ts_t;
static uint64_t sync_period = (500 * 1000ULL); static const int log_wait_limit_ms = 10; // 10ms
static uint64_t eth_latency = (500 * 1000ULL); // 500ns static ts_t sync_period = (500 * 1000ULL);
static const int ETHER_TYPE = 0x0800; static ts_t eth_latency = (500 * 1000ULL); // 500ns
static uint64_t cur_ts = 0; static int sync_mode = SIMBRICKS_PROTO_SYNC_SIMBRICKS;
static ts_t cur_ts = 0;
static int exiting = 0; static int exiting = 0;
static std::vector<struct SimbricksNetIf> nsifs; static std::vector<struct SimbricksNetIf> nsifs;
static std::vector<int> tofino_fds; static std::vector<int> tofino_fds;
static std::ifstream log_ifs;
static std::string log_line;
static const int flush_msg_sz = 14;
static char flush_msg[flush_msg_sz] = {0x0};
static void sigint_handler(int dummy) { static void sigint_handler(int dummy) {
exiting = 1; exiting = 1;
} }
static void dev_to_switch(struct SimbricksNetIf *nsif, size_t port) { struct packet_info {
volatile union SimbricksProtoNetD2N *msg_from = unsigned int port;
SimbricksNetIfD2NPoll(nsif, cur_ts); ts_t latency;
if (msg_from == nullptr) { };
return;
} struct event {
uint8_t type = msg_from->dummy.own_type & SIMBRICKS_PROTO_NET_D2N_MSG_MASK; ts_t time;
if (type == SIMBRICKS_PROTO_NET_D2N_MSG_SEND) { bool to_switch;
volatile struct SimbricksProtoNetD2NSend *tx; unsigned int port;
tx = &msg_from->send; std::string msg;
if (send(tofino_fds.at(port), (const void *)tx->data, tx->len, 0) < tx->len) { };
fprintf(stderr, "tofino: failed to forward packet to switch\n"); struct classcomp {
abort(); bool operator()(const struct event &lhs, const struct event &rhs) const {
} return lhs.time < rhs.time;
} else if (type == SIMBRICKS_PROTO_NET_D2N_MSG_SYNC) { }
} else { };
fprintf(stderr, "tofino: unsupported type=%u\n", type); std::set<struct event, classcomp> event_queue;
abort();
static bool get_tofino_log_line(int limit_ms) {
using std::chrono::system_clock;
system_clock::time_point start, end;
char buf[16384];
start = system_clock::now();
do {
log_ifs.clear();
log_ifs.getline(buf, 16384);
log_line.append(buf);
end = system_clock::now();
} while (!log_ifs.good() &&
(limit_ms < 0 ||
std::chrono::duration_cast<std::chrono::milliseconds>(end - start)
.count() < limit_ms));
return log_ifs.good();
}
static void get_egress_pkts(ts_t ingress_ts,
std::vector<struct packet_info> &pkts) {
while (get_tofino_log_line(log_wait_limit_ms)) {
json j = json::parse(log_line);
log_line.clear();
if (j.contains("context")) {
auto context = j.at("context");
if (context.at("gress").get<std::string>().compare("egress") == 0 &&
context.at("component").get<std::string>().compare("port") == 0) {
unsigned int port = j.at("packet").at("port").get<unsigned int>();
ts_t latency = j.at("sim_time").get<ts_t>() / 100000000 - ingress_ts;
struct packet_info info = {port, latency};
pkts.push_back(info);
}
} else if (j.contains("message") &&
j.at("message").get<std::string>().compare(0, 7, "Ingress") ==
0) {
break;
}
}
}
static std::vector<struct packet_info> get_tofino_output() {
std::vector<struct packet_info> pkts;
// First, get packet ingress time
ts_t ingress_ts = -1;
while (ingress_ts < 0) {
get_tofino_log_line(-1);
json j = json::parse(log_line);
log_line.clear();
if (j.contains("message") &&
j.at("message").get<std::string>().compare(0, 7, "Ingress") == 0) {
ingress_ts = j.at("sim_time").get<ts_t>() / 100000000;
} }
SimbricksNetIfD2NDone(nsif, msg_from); }
// Next, get egress time for each port
get_egress_pkts(ingress_ts, pkts);
// Send a malformatted message to force log flushing
send(tofino_fds.at(0), flush_msg, flush_msg_sz, 0);
get_egress_pkts(ingress_ts, pkts);
return pkts;
}
static ts_t get_min_peer_time() {
std::set<uint64_t> peer_times;
for (auto &nsif : nsifs) {
peer_times.insert(SimbricksNetIfD2NTimestamp(&nsif));
}
return *peer_times.begin();
} }
static void switch_to_dev(size_t port) { static void switch_to_dev(int port) {
static const int BUFFER_SIZE = 2048; static const int BUFFER_SIZE = 2048;
char buf[BUFFER_SIZE]; char buf[BUFFER_SIZE];
volatile union SimbricksProtoNetN2D *msg_to; volatile union SimbricksProtoNetN2D *msg_to;
struct sockaddr_ll addr;
socklen_t addr_len;
ssize_t n;
#ifdef DEBUG
printf("forward packet to peer %u at time %llu\n", port, cur_ts);
#endif
while ((n = recvfrom(tofino_fds.at(port), buf, BUFFER_SIZE, 0,
(struct sockaddr *)&addr, &addr_len)) <= 0 ||
addr.sll_pkttype == PACKET_OUTGOING) {
;
}
ssize_t n = recv(tofino_fds.at(port), buf, BUFFER_SIZE, 0); msg_to = SimbricksNetIfN2DAlloc(&nsifs[port], cur_ts, eth_latency);
if (n <= 0) { if (msg_to != nullptr) {
return; volatile struct SimbricksProtoNetN2DRecv *rx;
rx = &msg_to->recv;
rx->len = n;
rx->port = 0;
memcpy((void *)rx->data, (void *)buf, n);
// WMB();
rx->own_type =
SIMBRICKS_PROTO_NET_N2D_MSG_RECV | SIMBRICKS_PROTO_NET_N2D_OWN_DEV;
} else {
fprintf(stderr, "switch_to_dev: dropping packet\n");
}
}
static void process_event(const struct event &e) {
if (e.to_switch) {
#ifdef DEBUG
printf("process to_switch event from peer %u at time %llu\n", e.port,
e.time);
#endif
if (send(tofino_fds.at(e.port), e.msg.data(), e.msg.length(), 0) <
(long int)e.msg.length()) {
fprintf(stderr, "tofino: failed to forward packet to switch\n");
abort();
}
for (const auto &pkt : get_tofino_output()) {
if (pkt.port < nsifs.size()) {
auto &nsif = nsifs.at(pkt.port);
if (nsif.sync) {
struct event de;
de.time = cur_ts + pkt.latency;
de.to_switch = false;
de.port = pkt.port;
event_queue.insert(de);
#ifdef DEBUG
printf("add to_dev event to peer %u at time %llu to queue\n", de.port,
de.time);
#endif
} else {
switch_to_dev(pkt.port);
}
}
} }
} else {
switch_to_dev(e.port);
}
}
msg_to = SimbricksNetIfN2DAlloc(&nsifs[port], cur_ts, eth_latency); static void recv_from_peer(int port) {
if (msg_to != NULL) { struct SimbricksNetIf *nsif = &nsifs.at(port);
volatile struct SimbricksProtoNetN2DRecv *rx; volatile union SimbricksProtoNetD2N *msg_from =
rx = &msg_to->recv; SimbricksNetIfD2NPoll(nsif, cur_ts);
rx->len = n; if (msg_from == nullptr) {
rx->port = 0; return;
memcpy((void *)rx->data, (void *)buf, n); }
uint8_t type = msg_from->dummy.own_type & SIMBRICKS_PROTO_NET_D2N_MSG_MASK;
if (type == SIMBRICKS_PROTO_NET_D2N_MSG_SEND) {
struct event e;
e.time = msg_from->send.timestamp;
e.to_switch = true;
e.port = port;
e.msg = std::string((const char *)msg_from->send.data, msg_from->send.len);
#ifdef DEBUG
printf("received packet from peer %u at time %llu\n", port, e.time);
#endif
if (nsif->sync) {
event_queue.insert(e);
#ifdef DEBUG
printf("add to_switch event from peer %u at time %llu to queue\n", port,
e.time);
#endif
} else {
process_event(e);
}
} else if (type == SIMBRICKS_PROTO_NET_D2N_MSG_SYNC) {
} else {
fprintf(stderr, "tofino: unsupported type=%u\n", type);
abort();
}
SimbricksNetIfD2NDone(nsif, msg_from);
}
// WMB(); static void process_event_queue() {
rx->own_type = while (!event_queue.empty()) {
SIMBRICKS_PROTO_NET_N2D_MSG_RECV | SIMBRICKS_PROTO_NET_N2D_OWN_DEV; const struct event &e = *event_queue.begin();
if (e.time <= cur_ts) {
process_event(e);
event_queue.erase(event_queue.begin());
} else { } else {
fprintf(stderr, "switch_to_dev: dropping packet\n"); break;
} }
}
} }
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
int c; int c;
int bad_option = 0; int bad_option = 0;
int sync_mode = SIMBRICKS_PROTO_SYNC_SIMBRICKS; int sync = 1;
int sync = 1; std::string tofino_log;
struct SimbricksNetIf nsif;
// Parse command line argument
// Parse command line argument while ((c = getopt(argc, argv, "s:S:E:m:t:u")) != -1 && !bad_option) {
while ((c = getopt(argc, argv, "s:S:E:m:")) != -1 && !bad_option) { switch (c) {
switch (c) { case 's':
case 's': struct SimbricksNetIf nsif;
if (SimbricksNetIfInit(&nsif, optarg, &sync) != 0) { if (SimbricksNetIfInit(&nsif, optarg, &sync) != 0) {
fprintf(stderr, "connecting to %s failed\n", optarg); fprintf(stderr, "connecting to %s failed\n", optarg);
return EXIT_FAILURE; return EXIT_FAILURE;
}
nsifs.push_back(nsif);
break;
case 'S':
sync_period = strtoull(optarg, NULL, 0) * 1000ULL;
break;
case 'E':
eth_latency = strtoull(optarg, NULL, 0) * 1000ULL;
break;
case 'm':
sync_mode = strtol(optarg, NULL, 0);
assert(sync_mode == SIMBRICKS_PROTO_SYNC_SIMBRICKS ||
sync_mode == SIMBRICKS_PROTO_SYNC_BARRIER);
break;
default:
fprintf(stderr, "unknown option %c\n", c);
bad_option = 1;
break;
} }
nsifs.push_back(nsif);
break;
case 'S':
sync_period = strtoull(optarg, NULL, 0) * 1000ULL;
break;
case 'E':
eth_latency = strtoull(optarg, NULL, 0) * 1000ULL;
break;
case 'm':
sync_mode = strtol(optarg, NULL, 0);
assert(sync_mode == SIMBRICKS_PROTO_SYNC_SIMBRICKS ||
sync_mode == SIMBRICKS_PROTO_SYNC_BARRIER);
break;
case 't':
tofino_log = std::string(optarg);
break;
case 'u':
sync = 0;
break;
default:
fprintf(stderr, "unknown option %c\n", c);
bad_option = 1;
break;
} }
}
if (nsifs.empty() || bad_option) { if (!sync) {
fprintf(stderr, for (auto &nsif : nsifs) {
"Usage: tofino [-S SYNC-PERIOD] [-E ETH-LATENCY] " nsif.sync = 0;
"-s SOCKET-A [-s SOCKET-B ...]\n");
return EXIT_FAILURE;
} }
}
signal(SIGINT, sigint_handler); if (nsifs.empty() || tofino_log.empty() || bad_option) {
signal(SIGTERM, sigint_handler); fprintf(stderr,
"Usage: tofino [-S SYNC-PERIOD] [-E ETH-LATENCY] "
"-t TOFINO-LOG-PATH -s SOCKET-A [-s SOCKET-B ...]\n");
return EXIT_FAILURE;
}
// Create sockets for Tofino model interfaces signal(SIGINT, sigint_handler);
for (size_t port = 0; port < nsifs.size(); port++) { signal(SIGTERM, sigint_handler);
int fd = socket(PF_PACKET, SOCK_RAW, htons(ETHER_TYPE));
if (fd == -1) {
fprintf(stderr, "Failed to create raw socket\n");
abort();
}
char ifname[16]; // Open Tofino log file
sprintf(ifname, "veth%ld", port*2); log_ifs.open(tofino_log.c_str(), std::ifstream::in);
struct ifreq ifopts; if (!log_ifs.good()) {
memset(&ifopts, 0, sizeof(ifopts)); fprintf(stderr, "Failed to open tofino log file %s\n", tofino_log.c_str());
strcpy(ifopts.ifr_name, ifname); abort();
if (ioctl(fd, SIOCGIFINDEX, &ifopts) < 0) { }
fprintf(stderr, "Failed to set ioctl option SIOCGIFINDEX\n");
abort();
}
int sockopt = 1; // Create sockets for Tofino model interfaces
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &sockopt, sizeof(sockopt)) == -1) { for (size_t port = 0; port < nsifs.size(); port++) {
fprintf(stderr, "Failed to set socket option SO_REUSEADDR"); int fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
abort(); if (fd == -1) {
} fprintf(stderr, "Failed to create raw socket\n");
abort();
}
if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) { char ifname[16];
fprintf(stderr, "Failed to set socket to non-blocking\n"); sprintf(ifname, "veth%ld", port * 2 + 1);
abort(); struct ifreq ifopts;
} memset(&ifopts, 0, sizeof(ifopts));
strcpy(ifopts.ifr_name, ifname);
if (ioctl(fd, SIOCGIFINDEX, &ifopts) < 0) {
fprintf(stderr, "Failed to set ioctl option SIOCGIFINDEX\n");
abort();
}
int sockopt = 1;
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &sockopt, sizeof(sockopt)) ==
-1) {
fprintf(stderr, "Failed to set socket option SO_REUSEADDR");
abort();
}
struct sockaddr_ll sll; if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
bzero(&sll, sizeof(sll)); fprintf(stderr, "Failed to set socket to non-blocking\n");
sll.sll_family = AF_PACKET; abort();
sll.sll_ifindex = ifopts.ifr_ifindex; }
if (bind(fd, (struct sockaddr *)&sll, sizeof(sll)) == -1) { struct sockaddr_ll sll;
fprintf(stderr, "Failed to bind socket\n"); bzero(&sll, sizeof(sll));
abort(); sll.sll_family = AF_PACKET;
} sll.sll_ifindex = ifopts.ifr_ifindex;
tofino_fds.push_back(fd); if (bind(fd, (struct sockaddr *)&sll, sizeof(sll)) == -1) {
fprintf(stderr, "Failed to bind socket\n");
abort();
} }
printf("start polling\n"); tofino_fds.push_back(fd);
while (!exiting) { }
// Sync all interfaces
for (auto &nsif : nsifs) { fprintf(stderr, "start polling\n");
if (SimbricksNetIfN2DSync(&nsif, cur_ts, eth_latency, sync_period, while (!exiting) {
sync_mode) != 0) { // Sync all interfaces
fprintf(stderr, "SimbricksNetIfN2DSync failed\n"); for (auto &nsif : nsifs) {
abort(); if (SimbricksNetIfN2DSync(&nsif, cur_ts, eth_latency, sync_period,
} sync_mode) != 0) {
} fprintf(stderr, "SimbricksNetIfN2DSync failed\n");
SimbricksNetIfAdvanceEpoch(cur_ts, sync_period, sync_mode); abort();
}
// Switch packets
uint64_t min_ts;
do {
min_ts = ULLONG_MAX;
for (size_t port = 0; port < nsifs.size(); port++) {
auto &nsif = nsifs.at(port);
dev_to_switch(&nsif, port);
if (nsif.sync) {
uint64_t ts = SimbricksNetIfD2NTimestamp(&nsif);
min_ts = ts < min_ts ? ts : min_ts;
}
}
for (size_t port = 0; port < nsifs.size(); port++) {
switch_to_dev(port);
}
} while (!exiting && (min_ts <= cur_ts));
// Update cur_ts
if (min_ts < ULLONG_MAX) {
cur_ts = SimbricksNetIfAdvanceTime(min_ts, sync_period, sync_mode);
}
} }
SimbricksNetIfAdvanceEpoch(cur_ts, sync_period, sync_mode);
for (int fd : tofino_fds) { // Switch packets
close(fd); ts_t min_ts = 0;
while (!exiting && min_ts <= cur_ts) {
for (int port = 0; port < (int)nsifs.size(); port++) {
recv_from_peer(port);
}
min_ts = get_min_peer_time();
process_event_queue();
} }
if (min_ts > cur_ts) {
cur_ts = SimbricksNetIfAdvanceTime(min_ts, sync_period, sync_mode);
}
}
for (int fd : tofino_fds) {
close(fd);
}
return 0; return 0;
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment