From 40bbc1d81ab714bac4b0b55fb113f9801822cfbe Mon Sep 17 00:00:00 2001 From: David Wang <00107082@163.com> Date: Fri, 8 Sep 2023 22:45:58 +0800 Subject: [PATCH] Add sample usage for BPF_PROG_TYPE_NETFILTER Signed-off-by: David Wang <00107082@163.com> --- headers/vmlinux/vmlinux_net.h | 238 +++++++++++++++----- headers/vmlinux/vmlinux_types.h | 3 + netfilter-bpf/Makefile | 9 + netfilter-bpf/README.md | 20 ++ netfilter-bpf/netfilter_ip4_blocklist.bpf.c | 63 ++++++ netfilter-bpf/netfilter_ip4_blocklist.c | 98 ++++++++ 6 files changed, 369 insertions(+), 62 deletions(-) create mode 100644 netfilter-bpf/Makefile create mode 100644 netfilter-bpf/README.md create mode 100644 netfilter-bpf/netfilter_ip4_blocklist.bpf.c create mode 100644 netfilter-bpf/netfilter_ip4_blocklist.c diff --git a/headers/vmlinux/vmlinux_net.h b/headers/vmlinux/vmlinux_net.h index 1afcc5d7..db998501 100644 --- a/headers/vmlinux/vmlinux_net.h +++ b/headers/vmlinux/vmlinux_net.h @@ -16,6 +16,9 @@ typedef unsigned int sk_buff_data_t; typedef unsigned char *sk_buff_data_t; #endif */ +struct llist_node { + struct llist_node *next; +}; struct sk_buff { union { @@ -29,6 +32,7 @@ struct sk_buff { }; struct rb_node rbnode; struct list_head list; + struct llist_node ll_node; }; union { struct sock *sk; @@ -45,6 +49,7 @@ struct sk_buff { void (*destructor)(struct sk_buff *); }; struct list_head tcp_tsorted_anchor; + long unsigned int _sk_redir; }; long unsigned int _nfct; unsigned int len; @@ -59,73 +64,152 @@ struct sk_buff { __u8 peeked: 1; __u8 head_frag: 1; __u8 pfmemalloc: 1; + __u8 pp_recycle: 1; __u8 active_extensions; - __u32 headers_start[0]; - __u8 __pkt_type_offset[0]; - __u8 pkt_type: 3; - __u8 ignore_df: 1; - __u8 nf_trace: 1; - __u8 ip_summed: 2; - __u8 ooo_okay: 1; - __u8 l4_hash: 1; - __u8 sw_hash: 1; - __u8 wifi_acked_valid: 1; - __u8 wifi_acked: 1; - __u8 no_fcs: 1; - __u8 encapsulation: 1; - __u8 encap_hdr_csum: 1; - __u8 csum_valid: 1; - __u8 __pkt_vlan_present_offset[0]; - __u8 vlan_present: 1; - __u8 csum_complete_sw: 1; - __u8 csum_level: 2; - __u8 csum_not_inet: 1; - __u8 dst_pending_confirm: 1; - __u8 ndisc_nodetype: 2; - __u8 ipvs_property: 1; - __u8 inner_protocol_type: 1; - __u8 remcsum_offload: 1; - __u8 offload_fwd_mark: 1; - __u8 offload_l3_fwd_mark: 1; - __u8 tc_skip_classify: 1; - __u8 tc_at_ingress: 1; - __u8 redirected: 1; - __u8 from_ingress: 1; - __u8 decrypted: 1; - __u16 tc_index; union { - __wsum csum; struct { - __u16 csum_start; - __u16 csum_offset; + __u8 __pkt_type_offset[0]; + __u8 pkt_type: 3; + __u8 ignore_df: 1; + __u8 dst_pending_confirm: 1; + __u8 ip_summed: 2; + __u8 ooo_okay: 1; + __u8 __mono_tc_offset[0]; + __u8 mono_delivery_time: 1; + __u8 tc_at_ingress: 1; + __u8 tc_skip_classify: 1; + __u8 remcsum_offload: 1; + __u8 csum_complete_sw: 1; + __u8 csum_level: 2; + __u8 inner_protocol_type: 1; + __u8 l4_hash: 1; + __u8 sw_hash: 1; + __u8 wifi_acked_valid: 1; + __u8 wifi_acked: 1; + __u8 no_fcs: 1; + __u8 encapsulation: 1; + __u8 encap_hdr_csum: 1; + __u8 csum_valid: 1; + __u8 ndisc_nodetype: 2; + __u8 ipvs_property: 1; + __u8 nf_trace: 1; + __u8 redirected: 1; + __u8 from_ingress: 1; + __u8 nf_skip_egress: 1; + __u8 slow_gro: 1; + __u8 csum_not_inet: 1; + __u16 tc_index; + u16 alloc_cpu; + union { + __wsum csum; + struct { + __u16 csum_start; + __u16 csum_offset; + }; + }; + __u32 priority; + int skb_iif; + __u32 hash; + union { + u32 vlan_all; + struct { + __be16 vlan_proto; + __u16 vlan_tci; + }; + }; + union { + unsigned int napi_id; + unsigned int sender_cpu; + }; + __u32 secmark; + union { + __u32 mark; + __u32 reserved_tailroom; + }; + union { + __be16 inner_protocol; + __u8 inner_ipproto; + }; + __u16 inner_transport_header; + __u16 inner_network_header; + __u16 inner_mac_header; + __be16 protocol; + __u16 transport_header; + __u16 network_header; + __u16 mac_header; }; + struct { + __u8 __pkt_type_offset[0]; + __u8 pkt_type: 3; + __u8 ignore_df: 1; + __u8 dst_pending_confirm: 1; + __u8 ip_summed: 2; + __u8 ooo_okay: 1; + __u8 __mono_tc_offset[0]; + __u8 mono_delivery_time: 1; + __u8 tc_at_ingress: 1; + __u8 tc_skip_classify: 1; + __u8 remcsum_offload: 1; + __u8 csum_complete_sw: 1; + __u8 csum_level: 2; + __u8 inner_protocol_type: 1; + __u8 l4_hash: 1; + __u8 sw_hash: 1; + __u8 wifi_acked_valid: 1; + __u8 wifi_acked: 1; + __u8 no_fcs: 1; + __u8 encapsulation: 1; + __u8 encap_hdr_csum: 1; + __u8 csum_valid: 1; + __u8 ndisc_nodetype: 2; + __u8 ipvs_property: 1; + __u8 nf_trace: 1; + __u8 redirected: 1; + __u8 from_ingress: 1; + __u8 nf_skip_egress: 1; + __u8 slow_gro: 1; + __u8 csum_not_inet: 1; + __u16 tc_index; + u16 alloc_cpu; + union { + __wsum csum; + struct { + __u16 csum_start; + __u16 csum_offset; + }; + }; + __u32 priority; + int skb_iif; + __u32 hash; + union { + u32 vlan_all; + struct { + __be16 vlan_proto; + __u16 vlan_tci; + }; + }; + union { + unsigned int napi_id; + unsigned int sender_cpu; + }; + __u32 secmark; + union { + __u32 mark; + __u32 reserved_tailroom; + }; + union { + __be16 inner_protocol; + __u8 inner_ipproto; + }; + __u16 inner_transport_header; + __u16 inner_network_header; + __u16 inner_mac_header; + __be16 protocol; + __u16 transport_header; + __u16 network_header; + __u16 mac_header; + } headers; }; - __u32 priority; - int skb_iif; - __u32 hash; - __be16 vlan_proto; - __u16 vlan_tci; - union { - unsigned int napi_id; - unsigned int sender_cpu; - }; - __u32 secmark; - union { - __u32 mark; - __u32 reserved_tailroom; - }; - union { - __be16 inner_protocol; - __u8 inner_ipproto; - }; - __u16 inner_transport_header; - __u16 inner_network_header; - __u16 inner_mac_header; - __be16 protocol; - __u16 transport_header; - __u16 network_header; - __u16 mac_header; - __u32 headers_end[0]; sk_buff_data_t tail; sk_buff_data_t end; unsigned char *head; @@ -135,4 +219,34 @@ struct sk_buff { struct skb_ext *extensions; }; + + +struct iphdr { + __u8 ihl: 4; + __u8 version: 4; + __u8 tos; + __be16 tot_len; + __be16 id; + __be16 frag_off; + __u8 ttl; + __u8 protocol; + __sum16 check; + union { + struct { + __be32 saddr; + __be32 daddr; + }; + struct { + __be32 saddr; + __be32 daddr; + } addrs; + }; +}; + +struct bpf_nf_ctx { + const struct nf_hook_state *state; + struct sk_buff *skb; +}; + + #endif /* __VMLINUX_NET_H__ */ diff --git a/headers/vmlinux/vmlinux_types.h b/headers/vmlinux/vmlinux_types.h index d7b3bed0..f2010415 100644 --- a/headers/vmlinux/vmlinux_types.h +++ b/headers/vmlinux/vmlinux_types.h @@ -11,4 +11,7 @@ typedef __u64 u64; typedef s64 ktime_t; +typedef u32 uint32_t; + + #endif /* __VMLINUX_TYPES_H__ */ diff --git a/netfilter-bpf/Makefile b/netfilter-bpf/Makefile new file mode 100644 index 00000000..48a05475 --- /dev/null +++ b/netfilter-bpf/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) + +USER_TARGETS := netfilter_ip4_blocklist +BPF_TARGETS := netfilter_ip4_blocklist.bpf + + +LIB_DIR = ../lib + +include $(LIB_DIR)/common.mk diff --git a/netfilter-bpf/README.md b/netfilter-bpf/README.md new file mode 100644 index 00000000..b5f6b775 --- /dev/null +++ b/netfilter-bpf/README.md @@ -0,0 +1,20 @@ +# Introduction + +BPF_PROG_TYPE_NETFILTER was introduced in 6.4, now with a new kernel, a bpf program could attach to netfilter hooks and handles package in a similiar way as iptables/nftables. By now, 6.5.0, there is no bpf kfunc implemented yet for DNAT/SNAT, and the only thing a bpf program can do is to decide whether to DROP the package or not. + +* netfilter_ip4_blocklist.c/netfilter_ip4_blocklist.bpf.c + +This sample code implements a simple ipv4 blocklist. +The bpf program drops package if destination ip address hits a match in the map of type BPF_MAP_TYPE_LPM_TRIE, +The userspace code would load the bpf program, attach it to netfilter's FORWARD/OUTPUT hook, and then write ip patterns into the bpf map. + + +# TODO + +This sample hard-codes ip address to be blocked, just for demonstration. +It would be better to break the userspace program into two parts: +* init program +Loads bpf program and pin bpf program and map into somewhere under /sys/fs/bpf +* interactive program +add/delete/query ip blocklist via bpf map under /sys/fs/bpf + diff --git a/netfilter-bpf/netfilter_ip4_blocklist.bpf.c b/netfilter-bpf/netfilter_ip4_blocklist.bpf.c new file mode 100644 index 00000000..fa3655a5 --- /dev/null +++ b/netfilter-bpf/netfilter_ip4_blocklist.bpf.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux_local.h" +#include "linux/bpf.h" +#include + + +#define NF_DROP 0 +#define NF_ACCEPT 1 + +int bpf_dynptr_from_skb(struct sk_buff *skb, + __u64 flags, struct bpf_dynptr *ptr__uninit) __ksym; +void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, + uint32_t offset, void *buffer, uint32_t buffer__sz) __ksym; + + +struct ipv4_lpm_key { + __u32 prefixlen; + __u32 data; +}; + +struct { + __uint(type, BPF_MAP_TYPE_LPM_TRIE); + __type(key, struct ipv4_lpm_key); + __type(value, __u32); + __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(max_entries, 200); +} ipv4_lpm_map SEC(".maps"); + + +SEC("netfilter") +int netfilter_ip4block(struct bpf_nf_ctx *ctx) +{ + struct sk_buff *skb = ctx->skb; + struct bpf_dynptr ptr; + struct iphdr *p, iph = {}; + struct ipv4_lpm_key key; + __u32 *pvalue; + + if (skb->len <= 20 || bpf_dynptr_from_skb(skb, 0, &ptr)) + return NF_ACCEPT; + p = bpf_dynptr_slice(&ptr, 0, &iph, sizeof(iph)); + if (!p) + return NF_ACCEPT; + + /* ip4 only */ + if (p->version != 4) + return NF_ACCEPT; + + /* search p->daddr in trie */ + key.prefixlen = 32; + key.data = p->daddr; + pvalue = bpf_map_lookup_elem(&ipv4_lpm_map, &key); + if (pvalue) { + /* cat /sys/kernel/debug/tracing/trace_pipe */ + bpf_printk("rule matched with %d...\n", *pvalue); + return NF_DROP; + } + return NF_ACCEPT; +} + +char _license[] SEC("license") = "GPL"; + diff --git a/netfilter-bpf/netfilter_ip4_blocklist.c b/netfilter-bpf/netfilter_ip4_blocklist.c new file mode 100644 index 00000000..6ef4a0f6 --- /dev/null +++ b/netfilter-bpf/netfilter_ip4_blocklist.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include + + +static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, unsigned int size) +{ + return syscall(__NR_bpf, cmd, attr, size); +} + +struct ipv4_lpm_key { + __u32 prefixlen; + __u32 data; +}; + + +int main(int argc, char **argv) +{ + int prog_fd, map_fd; + int err; + struct bpf_object *obj; + struct bpf_program *prog; + union bpf_attr attr = { }; + + obj = bpf_object__open_file("./netfilter_ip4_blocklist.bpf.o", NULL); + if (libbpf_get_error(obj)) { + printf("fail to open bpf file\n"); + return 1; + } + prog = bpf_object__find_program_by_name(obj, "netfilter_ip4block"); + if (!prog) { + printf("fail to find bpf program\n"); + return 1; + } + bpf_program__set_type(prog, BPF_PROG_TYPE_NETFILTER); + if (bpf_object__load(obj)) { + printf("loading BPF object file failed\n"); + return 1; + } + map_fd = bpf_object__find_map_fd_by_name(obj, "ipv4_lpm_map"); + if (map_fd < 0) { + printf("Fail to locate trie ipv4_lpm_map\n"); + return 1; + } + /* attach to netfilter forward handler */ + prog_fd = bpf_program__fd(prog); + attr.link_create.prog_fd = prog_fd; + attr.link_create.attach_type = BPF_NETFILTER; + attr.link_create.netfilter.pf = NFPROTO_IPV4; + attr.link_create.netfilter.hooknum = NF_INET_FORWARD; + attr.link_create.netfilter.priority = -128; + err = sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); + if (err < 0) { + perror("Fail to link bpf program to netfilter forward hook\n"); + return 1; + } + /* attach to netfilter output handler */ + attr.link_create.netfilter.hooknum = NF_INET_LOCAL_OUT; + err = sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); + if (err < 0) { + perror("Fail to link bpf program to netfilter output hook\n"); + return 1; + } + printf("bpf program/map loaded....\n"); + /* add rules */ + { + struct ipv4_lpm_key key; + __u32 value = 0; + __u8 *p = (__u8 *) &key.data; + /* block 192.168.11.107/32 */ + key.prefixlen = 27; + /* same as key.data = 0x6B0BA8C0; on a little-endian machine */ + p[0] = 192; + p[1] = 168; + p[2] = 11; + p[3] = 107; + bpf_map_update_elem(map_fd, &key, &value, BPF_ANY); + /* block 192.168.11.107/24 */ + key.prefixlen = 24; + value++; + bpf_map_update_elem(map_fd, &key, &value, BPF_ANY); + /* block 192.168.11.107/27 */ + key.prefixlen = 32; + value++; + bpf_map_update_elem(map_fd, &key, &value, BPF_ANY); + /* remove rule */ + /* bpf_map_delete_elem(map_fd, &key); */ + printf("rules inserted, ready to work\n"); + } + while (1) + sleep(600); + return 0; +}