Skip to content

Commit

Permalink
p4tc: Add packet digest extern
Browse files Browse the repository at this point in the history
Add packet digest extern as described in the P4 spec:
https://p4.org/p4-spec/docs/PSA.html#sec-packet-digest

Packet digest allows users to send packet or metadata information
collected during data path processing to user space. To do so, for P4TC
we created a new rtnetlink group (RTNLGRP_P4TC_DIGEST) focused entirely
on extern digest events. To make this more concrete, let's show an
example:

struct mac_learn_digest_t {
    EthernetAddress srcAddr;
    PortId_t        ingress_port;
}

control ingress(inout headers hdr,
                inout metadata meta,
                in    psa_ingress_input_metadata_t  istd,
                inout psa_ingress_output_metadata_t ostd)
{
    action unknown_source () {
        meta.mac_learn_msg.srcAddr = hdr.ethernet.srcAddr;
        meta.mac_learn_msg.ingress_port = istd.ingress_port;
    }

    ...
}

...

control IngressDeparserImpl(packet_out packet,
                            out empty_metadata_t clone_i2e_meta,
                            out empty_metadata_t resubmit_meta,
                            out empty_metadata_t normal_meta,
                            inout headers hdr,
                            in metadata meta,
                            in psa_ingress_output_metadata_t istd)
{
    Digest<mac_learn_digest_t>() mac_learn_digest;
    apply {
        mac_learn_digest.pack(meta.mac_learn_msg);
        ...
    }
}

In the above example, the P4 program is collecting the source mac
address and ingress_port. Then, at the deparser stage, it creates an
instance of an Digest extern (mac_learn_digest) with type
mac_learn_digest_t and invokes the pack method on that instance passing
the collected information gathered previously. The pack method will send
this information to user space.

To do the equivalent using P4TC, the user will issue the following
template commands:

$TC p4template create extern/root/digest ext_id 102 numinstances 1
has_exec_method

$TC p4template create extern_inst/digest/digest/mac_learn_digest \
        tc_numel 0 instid 1 \
        control_path tc_key index type bit32 param srcAddr type macaddr \
        param ingress_port type dev

The first one will create the extern template for digest.
The second will create create the instance (mac_learn_digest) which has
the same parameters as the instance shown in the P4 program (mac srcAddr
and ingress_port). Note that there's also a "tc_key" parameter. It is
not really used, but is required by the extern API.

After create the extern instance, we can load the bpf program which will
call the pack kfunc (bpf_p4tc_extern_digest_pack):

struct p4tc_ext_bpf_params params = {
    .pipe_id = p4tc_filter_fields.pipeid,
    .ext_id = EXT_DIGEST_ID,
    .inst_id = EXT_DIGEST_INST_ID,
};

struct mac_learn_digest_t *mac_learn_digest =
        (struct mac_learn_digest_t *)&params.in_params;
mac_learn_digest->srcAddr = hdr->ethernet.srcAddr;
mac_learn_digest->ingress_port = skb->ifindex;
bpf_p4tc_extern_digest_pack(skb, &params);

This will cause the kernel to send a netlink event with the required
information. The event will look something like this:

Added extern
        extern order 1:
          Extern kind digest
          Extern instance mac_learn_digest
          Extern key 0
          Params:

          srcAddr  id 2 type macaddr value: 10:00:00:02:aa:bb
          ingress_port  id 3 type dev value: port0

Co-developed-by: Victor Nogueira <[email protected]>
Signed-off-by: Victor Nogueira <[email protected]>
Co-developed-by: Pedro Tammela <[email protected]>
Signed-off-by: Pedro Tammela <[email protected]>
Signed-off-by: Jamal Hadi Salim <[email protected]>
  • Loading branch information
jhsmt authored and vbnogueira committed May 24, 2024
1 parent 2b5b327 commit f661e03
Show file tree
Hide file tree
Showing 3 changed files with 379 additions and 0 deletions.
2 changes: 2 additions & 0 deletions include/uapi/linux/rtnetlink.h
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,8 @@ enum rtnetlink_groups {
#define RTNLGRP_STATS RTNLGRP_STATS
RTNLGRP_P4TC,
#define RTNLGRP_P4TC RTNLGRP_P4TC
RTNLGRP_P4TC_DIGEST,
#define RTNLGRP_P4TC_DIGEST RTNLGRP_P4TC_DIGEST
__RTNLGRP_MAX
};
#define RTNLGRP_MAX (__RTNLGRP_MAX - 1)
Expand Down
1 change: 1 addition & 0 deletions net/sched/p4tc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ obj-y := p4tc_types.o p4tc_tmpl_api.o p4tc_pipeline.o \
p4tc_tmpl_ext.o
obj-m += externs/ext_Counter.o
obj-m += externs/ext_csum.o
obj-m += externs/ext_digest.o
obj-$(CONFIG_DEBUG_INFO_BTF) += p4tc_bpf.o
376 changes: 376 additions & 0 deletions net/sched/p4tc/externs/ext_digest.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,376 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* net/sched/p4tc/externs/ext_digest.c Example digest extern implementation
*
* Copyright (c) 2024, Mojatatu Networks
* Copyright (c) 2024, Intel Corporation.
* Authors: Jamal Hadi Salim <[email protected]>
* Victor Nogueira <[email protected]>
* Pedro Tammela <[email protected]>
*/

#include <linux/module.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/if_arp.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
#include <net/tc_wrapper.h>
#include <net/p4tc.h>
#include <net/p4tc_ext_api.h>
#include <net/sock.h>
#include <net/sch_generic.h>
#include <linux/filter.h>
#include <linux/list.h>
#include <linux/idr.h>
#include <linux/bitops.h>

#define SKB_POOL_SIZE 0x80
#define SKB_MAX_SIZE 0x80

#define EXTERN_DIGEST_ID 102
#define EXTERN_DIGEST_MAX_SIZE 0x80

struct p4tc_extern_digest_inst {
struct p4tc_extern_inst common;
u32 payload_size;
};

#define to_digest_inst(inst) ((struct p4tc_extern_digest_inst *)inst)

static struct p4tc_extern_ops ext_digest_ops;

static struct sk_buff_head skb_pool;

static void alloc_skb_pull(void)
{
skb_queue_head_init(&skb_pool);

spin_lock_bh(&skb_pool.lock);
while (skb_pool.qlen < SKB_POOL_SIZE) {
struct sk_buff *skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);

if (!skb)
break;

__skb_queue_tail(&skb_pool, skb);
}
spin_unlock_bh(&skb_pool.lock);
}

static void free_skb_pull(void)
{
spin_lock_bh(&skb_pool.lock);
while (skb_pool.qlen) {
struct sk_buff *skb = __skb_dequeue(&skb_pool);

consume_skb(skb);
}
spin_unlock_bh(&skb_pool.lock);
}

/* Skip prepended ext_ from digest kind name */
#define skip_prepended_ext(ext_kind) (&((ext_kind)[4]))

static int
p4tc_extern_digest_constr(struct p4tc_extern_inst **common,
struct p4tc_extern_params *control_params,
struct p4tc_extern_params *constr_params,
u32 max_num_elems, bool tbl_bindable,
struct netlink_ext_ack *extack)
{
struct idr *constr_params_idr = &constr_params->params_idr;
struct p4tc_extern_digest_inst *digest_inst;
struct p4tc_extern_params *new_params;
u32 tot_control_param_bytesz = 0;
struct p4tc_extern_param *param;
unsigned long param_id, tmp;
int ret;

if (max_num_elems) {
NL_SET_ERR_MSG(extack,
"Digest must not have any extern elems");
return -EINVAL;
}

if (!idr_is_empty(constr_params_idr)) {
NL_SET_ERR_MSG(extack,
"Must not have any constructor arguments");
return -EINVAL;
}


idr_for_each_entry_ul(&control_params->params_idr, param, tmp,
param_id) {
tot_control_param_bytesz +=
BITS_TO_BYTES(param->type->container_bitsz);

}

if (tot_control_param_bytesz > EXTERN_DIGEST_MAX_SIZE) {
NL_SET_ERR_MSG_FMT(extack,
"Control parameter byte size exceeds %u",
EXTERN_DIGEST_MAX_SIZE);
return -E2BIG;
}

*common = p4tc_ext_inst_alloc(&ext_digest_ops,
max_num_elems, tbl_bindable,
skip_prepended_ext(ext_digest_ops.kind));
if (IS_ERR(*common))
return PTR_ERR(*common);

new_params = p4tc_ext_params_copy(control_params);
if (IS_ERR(new_params)) {
ret = PTR_ERR(new_params);
goto free_common;
}

digest_inst = to_digest_inst(*common);
digest_inst->common.params = new_params;

ret = p4tc_extern_inst_init_elems(&digest_inst->common, 0);
if (ret < 0)
goto free_params;

return 0;

free_params:
p4tc_ext_params_free(new_params);
free_common:
kfree(*common);
return ret;
}

static void
p4tc_extern_digest_deconstr(struct p4tc_extern_inst *common)
{
p4tc_ext_inst_purge(common);
if (common->params)
p4tc_ext_params_free(common->params);
kfree(common);
}

static void digest_params_cpy(struct p4tc_type_mask_shift *dst_mask_shift,
struct p4tc_type *dst_t, void *dstv,
struct p4tc_type_mask_shift *src_mask_shift,
struct p4tc_type *src_t, void *srcv)
{
u64 readval[BITS_TO_U64(P4TC_MAX_KEYSZ)] = {0};
const struct p4tc_type_ops *srco, *dsto;

dsto = dst_t->ops;
srco = src_t->ops;

srco->host_read(src_t, src_mask_shift, srcv, &readval);
dsto->host_write(dst_t, dst_mask_shift, readval, dstv);
}

static int digest_nlmsg_prepare(struct sk_buff *skb,
struct p4tc_extern_common *e,
const u32 pipeid)
{
unsigned char *b = nlmsg_get_pos(skb);
struct nlmsghdr *nlh;
struct nlattr *count;
struct nlattr *nest;
struct p4tcmsg *t;
int ret;

nlh = nlmsg_put(skb, 0, 0, RTM_P4TC_CREATE, sizeof(*t), NLM_F_CREATE);
if (!nlh)
return -ENOMEM;
t = nlmsg_data(nlh);
t->pipeid = pipeid;
t->obj = P4TC_OBJ_RUNTIME_EXTERN;

nest = nla_nest_start(skb, P4TC_ROOT);
if (!nest) {
ret = -ENOMEM;
goto out_nlmsg_trim;
}

count = nla_nest_start(skb, 1);
if (p4tc_ext_elem_dump_1(skb, e, false) < 0) {
ret = -ENOMEM;
goto out_nlmsg_trim;
}
nla_nest_end(skb, count);

nla_nest_end(skb, nest);

nlh->nlmsg_len = (unsigned char *)nlmsg_get_pos(skb) - b;

return 0;

out_nlmsg_trim:
nlmsg_trim(skb, b);
return ret;
}

static void digest_params_write(struct p4tc_extern_common *common,
struct p4tc_ext_bpf_params *bpf_params)
{
struct p4tc_extern_params *params = common->params;
char *in_params_ptr = bpf_params->in_params;
struct p4tc_extern_param *param;
unsigned long param_id, tmp;

idr_for_each_entry_ul(&params->params_idr, param, tmp,
param_id) {
struct p4tc_type *type = param->type;
const u32 type_bytesz =
BITS_TO_BYTES(type->container_bitsz);

digest_params_cpy(param->mask_shift, type, param->value,
param->mask_shift, type, in_params_ptr);
in_params_ptr += type_bytesz;
}
}

static int
__p4tc_extern_digest_pack(struct p4tc_pipeline *pipeline,
struct p4tc_extern_common *common,
struct p4tc_ext_bpf_params *params)
{
struct sk_buff *skb;
int err = 0;

if (params->index != P4TC_EXT_ELEM_PRIV_IDX)
return -EINVAL;

spin_lock_bh(&skb_pool.lock);
skb = __skb_dequeue(&skb_pool);
spin_unlock_bh(&skb_pool.lock);
if (!skb)
return -ENOENT;

spin_lock_bh(&common->params->params_lock);
digest_params_write(common, params);
digest_nlmsg_prepare(skb, common, params->pipe_id);
spin_unlock_bh(&common->params->params_lock);
refcount_inc(&skb->users);

/* Put the filter callback and pointers once we have it for externs */
err = nlmsg_multicast_filtered(pipeline->net->rtnl, skb, 0,
RTNLGRP_P4TC_DIGEST, GFP_ATOMIC, NULL,
NULL);
if (err == -ESRCH)
err = 0;
else
goto queue_skb;

refcount_set(&skb->users, 1);

queue_skb:
spin_lock_bh(&skb_pool.lock);
__skb_queue_tail(&skb_pool, skb);
spin_unlock_bh(&skb_pool.lock);
return err;
}

__bpf_kfunc_start_defs();

__bpf_kfunc int
bpf_p4tc_extern_digest_pack(struct sk_buff *skb,
struct p4tc_ext_bpf_params *params)
{
struct p4tc_extern_common *common;
struct p4tc_pipeline *pipeline;
int err;

common = p4tc_ext_common_elem_priv_get(skb, &pipeline, params);
if (IS_ERR(common))
return PTR_ERR(common);

err = __p4tc_extern_digest_pack(pipeline, common, params);

p4tc_ext_common_elem_put(pipeline, common);
return err;
}

__bpf_kfunc int
xdp_p4tc_extern_digest_pack(struct xdp_buff *ctx,
struct p4tc_ext_bpf_params *params)
{
struct p4tc_extern_common *common;
struct p4tc_pipeline *pipeline;
int err;

common = p4tc_xdp_ext_common_elem_get(ctx, &pipeline, params);
if (IS_ERR(common))
return PTR_ERR(common);

err = __p4tc_extern_digest_pack(pipeline, common, params);

p4tc_ext_common_elem_put(pipeline, common);
return err;
}

__bpf_kfunc_end_defs();

BTF_KFUNCS_START(p4tc_kfunc_ext_digest_set_defs)
BTF_ID_FLAGS(func, bpf_p4tc_extern_digest_pack);
BTF_ID_FLAGS(func, xdp_p4tc_extern_digest_pack);
BTF_KFUNCS_END(p4tc_kfunc_ext_digest_set_defs)

static const struct btf_kfunc_id_set p4tc_kfunc_ext_digest_set = {
.owner = THIS_MODULE,
.set = &p4tc_kfunc_ext_digest_set_defs,
};

static struct p4tc_extern_ops ext_digest_ops = {
.kind = "ext_digest",
.size = sizeof(struct p4tc_extern_digest_inst),
.id = EXTERN_DIGEST_ID,
.construct = p4tc_extern_digest_constr,
.deconstruct = p4tc_extern_digest_deconstr,
.owner = THIS_MODULE,
};

MODULE_AUTHOR("Mojatatu Networks, Inc");
MODULE_DESCRIPTION("Digest extern");
MODULE_LICENSE("GPL");

static int __init digest_init_module(void)
{
int ret = p4tc_register_extern(&ext_digest_ops);

if (ret < 0) {
pr_info("Failed to register Digest TC extern");
return ret;
}

ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_ACT,
&p4tc_kfunc_ext_digest_set);
if (ret < 0) {
pr_info("Failed to register Digest TC kfuncs");
goto unregister_counters;
}
ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP,
&p4tc_kfunc_ext_digest_set);
if (ret < 0) {
pr_info("Failed to register Digest XDP kfuncs");
goto unregister_counters;
}
alloc_skb_pull();

return ret;

unregister_counters:
p4tc_unregister_extern(&ext_digest_ops);
return ret;
}

static void __exit digest_cleanup_module(void)
{
p4tc_unregister_extern(&ext_digest_ops);
free_skb_pull();
}

module_init(digest_init_module);
module_exit(digest_cleanup_module);

0 comments on commit f661e03

Please sign in to comment.