Commit cd432895 authored by Jialin Li's avatar Jialin Li
Browse files

tofino: add nopaxos tofino P4 code and controller

parent 66590662
......@@ -25,7 +25,7 @@ import simbricks.simulators as sim
import simbricks.nodeconfig as node
host_configs = ['qemu', 'gt']
seq_configs = ['swseq', 'ehseq']
seq_configs = ['swseq', 'ehseq', 'tofino']
nic_configs = ['ib', 'cb', 'cv']
proto_configs = ['vr', 'nopaxos']
num_client_configs = [1, 2, 3, 4, 5, 6, 7, 8, 10, 12]
......@@ -41,7 +41,10 @@ for proto_config in proto_configs:
for seq_config in seq_configs:
for nic_config in nic_configs:
e = exp.Experiment(proto_config + '-' + host_config + '-' + nic_config + '-' + seq_config + f'-{num_c}')
net = sim.NS3SequencerNet()
if seq_config == 'tofino':
net = sim.TofinoNet()
else:
net = sim.NS3SequencerNet()
net.sync_period = sync_period
net.opt = link_rate_opt + link_latency_opt
e.add_network(net)
......
/* -*- P4_16 -*- */
#include <core.p4>
#include <tna.p4>
typedef bit<48> mac_addr_t;
typedef bit<32> ipv4_addr_t;
typedef bit<16> sess_num_t;
typedef bit<64> msg_num_t;
typedef bit<32> reg_val_t;
typedef bit<8> reg_key_t;
#define ETHERTYPE_TPID 0x8100
#define ETHERTYPE_IPV4 0x0800
#define ETHERTYPE_PKTGEN 16w0x7777
#define IP_PROTOCOL_UDP 17
#define IP_PROTOCOL_TCP 6
#define MCAST_DST_PORT 22222
const int MAC_TABLE_SIZE = 65536;
const bit<3> L2_LEARN_DIGEST = 1;
/*************************************************************************
*********************** H E A D E R S *********************************
*************************************************************************/
/* Define all the headers the program will recognize */
/* The actual sets of headers processed by each gress can differ */
/* Standard ethernet header */
header ethernet_h {
bit<48> dst_addr;
bit<48> src_addr;
bit<16> ether_type;
}
header vlan_tag_h {
bit<3> pcp;
bit<1> cfi;
bit<12> vid;
bit<16> ether_type;
}
header ipv4_h {
bit<4> version;
bit<4> ihl;
bit<8> diffserv;
bit<16> total_len;
bit<16> identification;
bit<3> flags;
bit<13> frag_offset;
bit<8> ttl;
bit<8> protocol;
bit<16> hdr_checksum;
bit<32> src_addr;
bit<32> dst_addr;
}
header udp_h {
bit<16> src_port;
bit<16> dst_port;
bit<16> len;
bit<16> checksum;
}
header nopaxos_h {
bit<32> is_frag;
bit<16> header_size;
// meta_data from here
sess_num_t sess_num;
msg_num_t msg_num;
}
/****** G L O B A L I N G R E S S M E T A D A T A *********/
struct my_ingress_headers_t {
ethernet_h ethernet;
ipv4_h ipv4;
udp_h udp;
nopaxos_h nopaxos;
}
struct my_ingress_metadata_t {
bit<9> mac_move;
bit<1> is_static;
bit<1> smac_hit;
PortId_t ingress_port;
}
/*********************** P A R S E R **************************/
parser IngressParser(
packet_in pkt,
out my_ingress_headers_t hdr,
out my_ingress_metadata_t meta,
out ingress_intrinsic_metadata_t ig_intr_md){
/* This is a mandatory state, required by Tofino Architecture */
state start {
pkt.extract(ig_intr_md);
pkt.advance(PORT_METADATA_SIZE);
transition meta_init;
}
state meta_init {
meta.mac_move = 0;
meta.is_static = 0;
meta.smac_hit = 0;
meta.ingress_port = ig_intr_md.ingress_port;
transition parse_ethernet;
}
state parse_ethernet {
pkt.extract(hdr.ethernet);
transition select(hdr.ethernet.ether_type){
ETHERTYPE_IPV4: parse_ipv4;
default: accept;
}
}
state parse_ipv4 {
pkt.extract(hdr.ipv4);
transition select(hdr.ipv4.protocol){
IP_PROTOCOL_UDP: parse_udp;
default: accept;
}
}
state parse_udp {
pkt.extract(hdr.udp);
transition select(hdr.udp.dst_port) {
MCAST_DST_PORT: parse_nopaxos;
default: accept;
}
}
state parse_nopaxos {
pkt.extract(hdr.nopaxos);
transition accept;
}
}
/***************** M A T C H - A C T I O N *********************/
control Ingress(
/* User */
inout my_ingress_headers_t hdr,
inout my_ingress_metadata_t meta,
/* Intrinsic */
in ingress_intrinsic_metadata_t ig_intr_md,
in ingress_intrinsic_metadata_from_parser_t ig_prsr_md,
inout ingress_intrinsic_metadata_for_deparser_t ig_dprsr_md,
inout ingress_intrinsic_metadata_for_tm_t ig_tm_md)
{
Register<reg_val_t, reg_key_t>(1) reg_cnt; // value, key
RegisterAction<reg_val_t, reg_key_t, reg_val_t>(reg_cnt)
sequencing = {
void apply(inout reg_val_t register_data, out reg_val_t seq_num) {
register_data = register_data + 1;
seq_num = register_data;
}
};
action send(PortId_t port) {
ig_tm_md.ucast_egress_port = port;
}
action drop() {
ig_dprsr_md.drop_ctl = 1;
}
action smac_hit(PortId_t port, bit<1> is_static) {
meta.mac_move = ig_intr_md.ingress_port ^ port;
meta.smac_hit = 1;
meta.is_static = is_static;
}
action smac_miss() { }
action smac_drop() {
drop(); exit;
}
@idletime_precision(3)
table smac {
key = {
hdr.ethernet.src_addr : exact;
}
actions = {
smac_hit; smac_miss; smac_drop;
}
size = MAC_TABLE_SIZE;
const default_action = smac_miss();
idle_timeout = true;
}
action mac_learn_notify() {
ig_dprsr_md.digest_type = L2_LEARN_DIGEST;
}
table smac_results {
key = {
meta.mac_move : ternary;
meta.is_static : ternary;
meta.smac_hit : ternary;
}
actions = {
mac_learn_notify; NoAction; smac_drop;
}
const entries = {
( _, _, 0) : mac_learn_notify();
( 0, _, 1) : NoAction();
( _, 0, 1) : mac_learn_notify();
( _, 1, 1) : smac_drop();
}
}
action dmac_unicast(PortId_t port) {
send(port);
}
action dmac_miss() {
ig_tm_md.mcast_grp_a = 1;
}
action dmac_drop() {
drop();
exit;
}
table dmac {
key = {
hdr.ethernet.dst_addr : exact;
}
actions = {
dmac_unicast; dmac_miss; dmac_drop;
}
size = MAC_TABLE_SIZE;
default_action = dmac_miss();
}
apply {
ig_tm_md.bypass_egress = 1w1;
smac.apply();
smac_results.apply();
if(hdr.udp.isValid() && hdr.udp.dst_port == MCAST_DST_PORT) {
hdr.udp.checksum = 0;
hdr.nopaxos.msg_num = (msg_num_t)sequencing.execute(0);
hdr.nopaxos.sess_num = (sess_num_t)0;
ig_tm_md.mcast_grp_a = 2;
} else {
switch (dmac.apply().action_run) {
dmac_unicast: { /* Unicast source pruning */
if (ig_intr_md.ingress_port == ig_tm_md.ucast_egress_port) {
drop();
}
}
}
}
}
} // End of SwitchIngressControl
/********************* D E P A R S E R ************************/
/* This struct is needed for proper digest receive API generation */
struct l2_digest_t {
bit<48> src_mac;
bit<9> ingress_port;
bit<9> mac_move;
bit<1> is_static;
bit<1> smac_hit;
}
control IngressDeparser(packet_out pkt,
/* User */
inout my_ingress_headers_t hdr,
in my_ingress_metadata_t meta,
/* Intrinsic */
in ingress_intrinsic_metadata_for_deparser_t ig_dprsr_md)
{
Digest <l2_digest_t>() l2_digest;
apply {
if (ig_dprsr_md.digest_type == L2_LEARN_DIGEST) {
l2_digest.pack({
hdr.ethernet.src_addr,
meta.ingress_port,
meta.mac_move,
meta.is_static,
meta.smac_hit });
}
pkt.emit(hdr);
}
}
/*************************************************************************
**************** E G R E S S P R O C E S S I N G *******************
*************************************************************************/
/*********************** H E A D E R S ************************/
struct my_egress_headers_t {
ethernet_h ethernet;
vlan_tag_h vlan_tag;
ipv4_h ipv4;
}
/******** G L O B A L E G R E S S M E T A D A T A *********/
struct my_egress_metadata_t {
}
/*********************** P A R S E R **************************/
parser EgressParser(packet_in pkt,
/* User */
out my_egress_headers_t hdr,
out my_egress_metadata_t meta,
/* Intrinsic */
out egress_intrinsic_metadata_t eg_intr_md)
{
/* This is a mandatory state, required by Tofino Architecture */
state start {
pkt.extract(eg_intr_md);
transition parse_ethernet;
}
state parse_ethernet {
pkt.extract(hdr.ethernet);
transition select(hdr.ethernet.ether_type) {
ETHERTYPE_TPID: parse_vlan_tag;
ETHERTYPE_IPV4: parse_ipv4;
default: accept;
}
}
state parse_vlan_tag {
pkt.extract(hdr.vlan_tag);
transition select(hdr.vlan_tag.ether_type) {
ETHERTYPE_IPV4: parse_ipv4;
default: accept;
}
}
state parse_ipv4 {
pkt.extract(hdr.ipv4);
transition accept;
}
}
/***************** M A T C H - A C T I O N *********************/
control Egress(
/* User */
inout my_egress_headers_t hdr,
inout my_egress_metadata_t meta,
/* Intrinsic */
in egress_intrinsic_metadata_t eg_intr_md,
in egress_intrinsic_metadata_from_parser_t eg_prsr_md,
inout egress_intrinsic_metadata_for_deparser_t eg_dprsr_md,
inout egress_intrinsic_metadata_for_output_port_t eg_oport_md)
{
apply {
}
}
/********************* D E P A R S E R ************************/
control EgressDeparser(packet_out pkt,
/* User */
inout my_egress_headers_t hdr,
in my_egress_metadata_t meta,
/* Intrinsic */
in egress_intrinsic_metadata_for_deparser_t eg_dprsr_md)
{
apply {
pkt.emit(hdr);
}
}
/************ F I N A L P A C K A G E ******************************/
Pipeline(IngressParser(),
Ingress(),
IngressDeparser(),
EgressParser(),
Egress(),
EgressDeparser()
) pipe;
Switch(pipe) main;
from bfrtcli import *
class l2_switch():
#
# Helper Functions to deal with ports
#
def devport(self, pipe, port):
return ((pipe & 3) << 7) | (port & 0x7F)
def pipeport(self,dp):
return ((dp & 0x180) >> 7, (dp & 0x7F))
def mcport(self, pipe, port):
return pipe * 72 + port
def devport_to_mcport(self, dp):
return self.mcport(*self.pipeport(dp))
# This is a useful bfrt_python function that should potentially allow one
# to quickly clear all the logical tables (including the fixed ones) in
# their data plane program.
#
# This function can clear all P4 tables and later other fixed objects
# (once proper BfRt support is added). As of SDE-9.2.0 the support is mixed.
# As a result the function contains some workarounds.
def clear_all(self, verbose=True, batching=True, clear_ports=False):
table_list = bfrt.info(return_info=True, print_info=False)
# Remove port tables from the list
port_types = ['PORT_CFG', 'PORT_FRONT_PANEL_IDX_INFO',
'PORT_HDL_INFO', 'PORT_STR_INFO']
if not clear_ports:
for table in list(table_list):
if table['type'] in port_types:
table_list.remove(table)
# The order is important. We do want to clear from the top,
# i.e. delete objects that use other objects. For example,
# table entries use selector groups and selector groups
# use action profile members.
#
# Same is true for the fixed tables. However, the list of
# table types grows, so we will first clean the tables we
# know and then clear the rest
for table_types in (['MATCH_DIRECT', 'MATCH_INDIRECT_SELECTOR'],
['SELECTOR'],
['ACTION_PROFILE'],
['PRE_MGID'],
['PRE_ECMP'],
['PRE_NODE'],
[]): # This is catch-all
for table in list(table_list):
if table['type'] in table_types or len(table_types) == 0:
try:
if verbose:
print("Clearing table {:<40} ... ".
format(table['full_name']),
end='', flush=True)
table['node'].clear(batch=batching)
table_list.remove(table)
if verbose:
print('Done')
use_entry_list = False
except:
use_entry_list = True
# Some tables do not support clear(). Thus we'll try
# to get a list of entries and clear them one-by-one
if use_entry_list:
try:
if batching:
bfrt.batch_begin()
# This line can result in an exception,
# since # not all tables support get()
entry_list = table['node'].get(regex=True,
return_ents=True,
print_ents=False)
# Not every table supports delete() method.
# For those tables we'll try to push in an
# entry with everything being zeroed out
has_delete = hasattr(table['node'], 'delete')
if entry_list != -1:
if has_delete:
for entry in entry_list:
entry.remove()
else:
clear_entry = table['node'].entry()
for entry in entry_list:
entry.data = clear_entry.data
# We can still have an exception
# here, since not all tables
# support add()/mod()
entry.push()
if verbose:
print('Done')
else:
print('Empty')
table_list.remove(table)
except BfRtTableError as e:
print('Empty')
table_list.remove(table)
except Exception as e:
# We can have in a number of ways: no get(),
# no add() etc. Another reason is that the
# table is read-only.
if verbose:
print("Failed")
finally:
if batching:
bfrt.batch_end()
bfrt.complete_operations()
def __init__(self, default_ttl=60000):
self.p4 = bfrt.nopaxos.pipe
self.all_ports = [port.key[b'$DEV_PORT']
for port in bfrt.port.port.get(regex=1,
return_ents=True,
print_ents=False)]
self.l2_age_ttl = default_ttl
def setup(self):
self.clear_all()
self.__init__()
# Enable learning on SMAC
print("Initializing learning on SMAC ... ", end='', flush=True)
try:
self.p4.IngressDeparser.l2_digest.callback_deregister()
except:
pass
self.p4.IngressDeparser.l2_digest.callback_register(self.learning_cb)
print("Done")
# Enable aging on SMAC
print("Inializing Aging on SMAC ... ", end='', flush=True)
self.p4.Ingress.smac.idle_table_set_notify(enable=False,
callback=None)
self.p4.Ingress.smac.idle_table_set_notify(enable=True,
callback=self.aging_cb,
interval = 10000,
min_ttl = 10000,
max_ttl = 60000)
print("Done")
@staticmethod
def aging_cb(dev_id, pipe_id, direction, parser_id, entry):
smac = bfrt.nopaxos.pipe.Ingress.smac
dmac = bfrt.nopaxos.pipe.Ingress.dmac
mac_addr = entry.key[b'hdr.ethernet.src_addr']
print("Aging out: MAC: {}".format(mac(mac_addr)))
entry.remove() # from smac
try:
dmac.delete(dst_addr=mac_addr)
except:
print("WARNING: Could not find the matching DMAC entry")
@staticmethod
def learning_cb(dev_id, pipe_id, direction, parser_id, session, msg):
smac = bfrt.nopaxos.pipe.Ingress.smac
dmac = bfrt.nopaxos.pipe.Ingress.dmac
for digest in msg:
port = digest["ingress_port"]
mac_move = digest["mac_move"]
mac_addr = digest["src_mac"]
old_port = port ^ mac_move # Because mac_move = ingress_port ^ port
print("MAC: {}, Port={}".format(
mac(mac_addr), port), end="")
if mac_move != 0:
print("(Move from port={})".format(old_port))
else:
print("(New)")
# Since we do not have access to self, we have to use
# the hardcoded value for the TTL :(
smac.entry_with_smac_hit(src_addr=mac_addr,
port=port,
is_static=False,
ENTRY_TTL=60000).push()
dmac.entry_with_dmac_unicast(dst_addr=mac_addr,
port=port).push()
return 0
def set_mcast(num_groups=1, num_sequencers=1):
all_ports = [p for p in range(8)]
mcast_ports = [0, 1, 2]
# Broadcast
bfrt.pre.node.entry(MULTICAST_NODE_ID=0, MULTICAST_RID=0,
MULTICAST_LAG_ID=[], DEV_PORT=all_ports).push()
bfrt.pre.mgid.entry(MGID=1, MULTICAST_NODE_ID=[0],
MULTICAST_NODE_L1_XID_VALID=[False],
MULTICAST_NODE_L1_XID=[0]).push()
# Multicast
bfrt.pre.node.entry(MULTICAST_NODE_ID=1, MULTICAST_RID=1,
MULTICAST_LAG_ID=[], DEV_PORT=mcast_ports).push()
bfrt.pre.mgid.entry(MGID=2, MULTICAST_NODE_ID=[1],
MULTICAST_NODE_L1_XID_VALID=[False],
MULTICAST_NODE_L1_XID=[0]).push()
### Setup L2 learning
sl2 = l2_switch(default_ttl=10000)
sl2.setup()
bfrt.complete_operations()
p4 = bfrt.nopaxos.pipe
num_groups = 1
num_sequencers = 1
set_mcast(num_groups, num_sequencers)
### Register initialization
p4.Ingress.reg_cnt.mod(0, 0)
bfrt.complete_operations()
### Register print out
print("""******************* SETUP RESULTS *****************""")
print ("\n reg_cnt:")
p4.Ingress.reg_cnt.get(REGISTER_INDEX=0, from_hw=True)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment