Suricata中的eBPF解析

网友投稿 394 2022-10-11


Suricata中的eBPF解析

Suricata中的eBPF解析

Suricata中的eBPF代码位于ebpf目录中,包括bypass_filter.c、filter.c、lb.c、vlan_filter.c、xdp_filter.c、xdp_lb.c文件。

Filter

#define LINUX_VERSION_CODE 263682 struct bpf_map_def SEC("maps") ipv4_drop = { .type = BPF_MAP_TYPE_PERCPU_HASH, .key_size = sizeof(__u32), .value_size = sizeof(__u32), .max_entries = 32768, }; // vlan头结构,共四个字节 struct vlan_hdr { // tci:3bit优先级,1bit CFI,12bit vlan id __u16 h_vlan_TCI; // 上层协议 __u16 h_vlan_encapsulated_proto; }; static __always_inline int ipv4_filter(struct __sk_buff *skb) { __u32 nhoff; __u32 *value; __u32 ip = 0; // 获取偏移量 nhoff = skb->cb[0]; // 源IP ip = load_word(skb, nhoff + offsetof(struct iphdr, saddr)); // 查找源IP value = bpf_map_lookup_elem(&ipv4_drop, &ip); if (value) { // 找到了,丢弃并更新计数器 #if DEBUG char fmt[] = "Found value for saddr: %u\n"; bpf_trace_printk(fmt, sizeof(fmt), value); #endif *value = *value + 1; return 0; } // 目的IP ip = load_word(skb, nhoff + offsetof(struct iphdr, daddr)); // 查找目的IP value = bpf_map_lookup_elem(&ipv4_drop, &ip); if (value) { // 找到了,丢弃并更新计数器 #if DEBUG char fmt[] = "Found value for daddr: %u\n"; bpf_trace_printk(fmt, sizeof(fmt), value); #endif *value = *value + 1; return 0; } #if DEBUG char fmt[] = "Nothing so ok\n"; bpf_trace_printk(fmt, sizeof(fmt)); #endif return -1; } // 暂未实现 static __always_inline int ipv6_filter(struct __sk_buff *skb) { return -1; } int SEC("filter") hashfilter(struct __sk_buff *skb) { // 头偏移量为以太网头长度(14字节) __u32 nhoff = ETH_HLEN; // 获取ip协议 __u16 proto = load_half(skb, offsetof(struct ethhdr, h_proto)); // 对于VLAN或者QinQ帧 if (proto == ETH_P_8021AD || proto == ETH_P_8021Q) { // 解1层vlan,获取上层协议 proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, h_vlan_encapsulated_proto)); // 更新头偏移量 nhoff += sizeof(struct vlan_hdr); } // 保存当前偏移量 skb->cb[0] = nhoff; switch (proto) { // 执行ipv4过滤 case ETH_P_IP: return ipv4_filter(skb); // 执行ipv6过滤 case ETH_P_IPV6: return ipv6_filter(skb); default: break; } return -1; } // license char __license[] SEC("license") = "GPL"; // 内核版本4.6.2 __u32 __version SEC("version") = LINUX_VERSION_CODE;

Bypass Filter

#define LINUX_VERSION_CODE 263682 // ipv4流定义 struct flowv4_keys { __u32 src; __u32 dst; union { __u32 ports; __u16 port16[2]; }; __u8 ip_proto:1; __u16 vlan0:15; __u16 vlan1; }; // ipv6流定义 struct flowv6_keys { __u32 src[4]; __u32 dst[4]; union { __u32 ports; __u16 port16[2]; }; __u8 ip_proto:1; __u16 vlan0:15; __u16 vlan1; }; // 流量和包数统计量 struct pair { __u64 packets; __u64 bytes; }; struct bpf_map_def SEC("maps") flow_table_v4 = { .type = BPF_MAP_TYPE_PERCPU_HASH, .key_size = sizeof(struct flowv4_keys), .value_size = sizeof(struct pair), .max_entries = 32768, }; struct bpf_map_def SEC("maps") flow_table_v6 = { .type = BPF_MAP_TYPE_PERCPU_HASH, .key_size = sizeof(struct flowv6_keys), .value_size = sizeof(struct pair), .max_entries = 32768, }; struct vlan_hdr { __u16 h_vlan_TCI; __u16 h_vlan_encapsulated_proto; }; /** * IPv4 filter * * \return 0 to drop packet out and -1 to accept it */ static __always_inline int ipv4_filter(struct __sk_buff *skb, __u16 vlan0, __u16 vlan1) { __u32 nhoff, verlen; struct flowv4_keys tuple; struct pair *value; __u16 port; __u8 ip_proto; nhoff = skb->cb[0]; // 获取上层协议 ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol)); /* only support TCP and UDP for now */ switch (ip_proto) { // tcp case IPPROTO_TCP: tuple.ip_proto = 1; break; // udp case IPPROTO_UDP: tuple.ip_proto = 0; break; default: return -1; } // 元组(ip、端口、协议、vlan) tuple.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr)); tuple.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr)); verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/); nhoff += (verlen & 0xF) << 2; tuple.ports = load_word(skb, nhoff); port = tuple.port16[1]; tuple.port16[1] = tuple.port16[0]; tuple.port16[0] = port; tuple.vlan0 = vlan0; tuple.vlan1 = vlan1; #if 0 if ((tuple.port16[0] == 22) || (tuple.port16[1] == 22)) { __u16 sp = tuple.port16[0]; //__u16 dp = tuple.port16[1]; char fmt[] = "Parsed SSH flow: %u %d -> %u\n"; bpf_trace_printk(fmt, sizeof(fmt), tuple.src, sp, tuple.dst); } #endif /* Test if src is in hash */ // 查找流 value = bpf_map_lookup_elem(&flow_table_v4, &tuple); if (value) { #if 0 { __u16 sp = tuple.port16[0]; //__u16 dp = tuple.port16[1]; char bfmt[] = "Found flow: %u %d -> %u\n"; bpf_trace_printk(bfmt, sizeof(bfmt), tuple.src, sp, tuple.dst); } #endif // 找到了,更新流量和包数 value->packets++; value->bytes += skb->len; return 0; } // 未找到 return -1; } /** * IPv6 filter * * \return 0 to drop packet out and -1 to accept it */ static __always_inline int ipv6_filter(struct __sk_buff *skb, __u16 vlan0, __u16 vlan1) { __u32 nhoff; __u8 nhdr; struct flowv6_keys tuple; struct pair *value; __u16 port; nhoff = skb->cb[0]; /* get next header */ nhdr = load_byte(skb, nhoff + offsetof(struct ipv6hdr, nexthdr)); /* only support direct TCP and UDP for now */ switch (nhdr) { case IPPROTO_TCP: tuple.ip_proto = 1; break; case IPPROTO_UDP: tuple.ip_proto = 0; break; default: return -1; } tuple.src[0] = load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr)); tuple.src[1] = load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr) + 4); tuple.src[2] = load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr) + 8); tuple.src[3] = load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr) + 12); tuple.dst[0] = load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr)); tuple.dst[1] = load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr) + 4); tuple.dst[2] = load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr) + 8); tuple.dst[3] = load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr) + 12); /* Parse TCP */ tuple.ports = load_word(skb, nhoff + 40 /* IPV6_HEADER_LEN */); port = tuple.port16[1]; tuple.port16[1] = tuple.port16[0]; tuple.port16[0] = port; tuple.vlan0 = vlan0; tuple.vlan1 = vlan1; //char fmt[] = "Now Got IPv6 port %u and %u\n"; //bpf_trace_printk(fmt, sizeof(fmt), tuple.port16[0], tuple.port16[1]); /* Test if src is in hash */ value = bpf_map_lookup_elem(&flow_table_v6, &tuple); if (value) { //char fmt[] = "Got a match IPv6: %u and %u\n"; //bpf_trace_printk(fmt, sizeof(fmt), tuple.port16[0], tuple.port16[1]); value->packets++; value->bytes += skb->len; return 0; } return -1; } /** * filter function * * It is loaded in kernel by Suricata that uses the section name specified * by the SEC call to find it in the Elf binary object and load it. * * \return 0 to drop packet out and -1 to accept it */ int SEC("filter") hashfilter(struct __sk_buff *skb) { __u32 nhoff = ETH_HLEN; __u16 proto = load_half(skb, offsetof(struct ethhdr, h_proto)); // 第一层vlan id __u16 vlan0 = skb->vlan_tci & 0x0fff; __u16 vlan1 = 0; // vlan或者qinq if (proto == ETH_P_8021AD || proto == ETH_P_8021Q) { // 上层协议 proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, h_vlan_encapsulated_proto)); #if VLAN_TRACKING /* one vlan layer is stripped by OS so get vlan 1 at first pass */ // 第二层vlan id vlan1 = load_half(skb, nhoff + offsetof(struct vlan_hdr, h_vlan_TCI)) & 0x0fff; #endif nhoff += sizeof(struct vlan_hdr); } skb->cb[0] = nhoff; switch (proto) { case ETH_P_IP: // ipv4过滤 return ipv4_filter(skb, vlan0, vlan1); case ETH_P_IPV6: // ipv6过滤 return ipv6_filter(skb, vlan0, vlan1); default: #if 0 { char fmt[] = "Got proto %u\n"; bpf_trace_printk(fmt, sizeof(fmt), h_proto); break; } #else break; #endif } return -1; } char __license[] SEC("license") = "GPL"; __u32 __version SEC("version") = LINUX_VERSION_CODE;

Load Balancer

#define LINUX_VERSION_CODE 263682 #ifndef __section # define __section(x) __attribute__((section(x), used)) #endif struct vlan_hdr { __u16 h_vlan_TCI; __u16 h_vlan_encapsulated_proto; }; static __always_inline int ipv4_hash(struct __sk_buff *skb) { __u32 nhoff; __u32 src, dst; nhoff = skb->cb[0]; // 以源ip和目的ip计算哈希 src = load_word(skb, nhoff + offsetof(struct iphdr, saddr)); dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr)); #if 0 char fmt[] = "Got addr: %x -> %x at %d\n"; bpf_trace_printk(fmt, sizeof(fmt), src, dst, nhoff); //char fmt2[] = "Got hash %u\n"; //bpf_trace_printk(fmt2, sizeof(fmt2), src + dst); #endif return src + dst; } static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) { __u64 w0 = load_word(ctx, off); __u64 w1 = load_word(ctx, off + 4); __u64 w2 = load_word(ctx, off + 8); __u64 w3 = load_word(ctx, off + 12); return (__u32)(w0 ^ w1 ^ w2 ^ w3); } static __always_inline int ipv6_hash(struct __sk_buff *skb) { __u32 nhoff; __u32 src_hash, dst_hash; // 以源ip和目的ip计算哈希 nhoff = skb->cb[0]; src_hash = ipv6_addr_hash(skb, nhoff + offsetof(struct ipv6hdr, saddr)); dst_hash = ipv6_addr_hash(skb, nhoff + offsetof(struct ipv6hdr, daddr)); return src_hash + dst_hash; } int __section("loadbalancer") lb(struct __sk_buff *skb) { __u64 nhoff = ETH_HLEN; // 上层协议 __u16 proto = load_half(skb, ETH_HLEN - ETH_TLEN); __u16 ret = proto; switch (proto) { // 处理vlan和qinq case ETH_P_8021Q: case ETH_P_8021AD: { // 解第二层vlan __u16 vproto = load_half(skb, nhoff + offsetof(struct vlan_hdr, h_vlan_encapsulated_proto)); switch(vproto) { case ETH_P_8021AD: case ETH_P_8021Q: nhoff += sizeof(struct vlan_hdr); proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, h_vlan_encapsulated_proto)); break; default: proto = vproto; } nhoff += sizeof(struct vlan_hdr); skb->cb[0] = nhoff; switch (proto) { // 计算ipv4哈希 case ETH_P_IP: #if 0 { char fmt[] = "ipv4\n"; bpf_trace_printk(fmt, sizeof(fmt));} #endif ret = ipv4_hash(skb); break; // 计算ipv6哈希 case ETH_P_IPV6: ret = ipv6_hash(skb); break; default: #if 0 { char fmt[] = "Dflt VLAN proto %u\n"; bpf_trace_printk(fmt, sizeof(fmt), proto); break; } #else break; #endif } } break; case ETH_P_IP: // 计算ipv4哈希 ret = ipv4_hash(skb); break; case ETH_P_IPV6: // 计算ipv6哈希 ret = ipv6_hash(skb); break; default: #if 0 { char fmt[] = "Got proto %x\n"; bpf_trace_printk(fmt, sizeof(fmt), proto); break; } #else break; #endif } return ret; } char __license[] __section("license") = "GPL"; /* libbpf needs version section to check sync of eBPF code and kernel * but socket filter don't need it */ __u32 __version __section("version") = LINUX_VERSION_CODE;

Vlan Filter

#define LINUX_VERSION_CODE 263682 int SEC("filter") hashfilter(struct __sk_buff *skb) { __u16 vlan_id = skb->vlan_tci & 0x0fff; /* accept VLAN 2 and 4 and drop the rest */ switch (vlan_id) { case 2: case 4: return -1; default: return 0; } return 0; } char __license[] SEC("license") = "GPL"; __u32 __version SEC("version") = LINUX_VERSION_CODE;

XDP Filter

#define LINUX_VERSION_CODE 263682 /* Hashing initval */ #define INITVAL 15485863 /* Set BUILD_CPUMAP to 0 if you want to run XDP bypass on kernel * older than 4.15 */ #define BUILD_CPUMAP 1 /* Increase CPUMAP_MAX_CPUS if ever you have more than 64 CPUs */ #define CPUMAP_MAX_CPUS 64 /* Set to 1 to bypass encrypted packets of TLS sessions. Suricata will * be blind to these packets or forged packets looking alike. */ #define ENCRYPTED_TLS_BYPASS 0 /* Set it to 0 if for example you plan to use the XDP filter in a * network card that don't support per CPU value (like netronome) */ #define USE_PERCPU_HASH 1 /* Set it to 0 if your XDP subsystem don't handle XDP_REDIRECT (like netronome) */ #define GOT_TX_PEER 1 /* set to non 0 to load balance in hardware mode on RSS_QUEUE_NUMBERS queues * and unset BUILD_CPUMAP (number must be a power of 2 for netronome) */ #define RSS_QUEUE_NUMBERS 32 /* no vlan tracking: set it to 0 if you don't use VLAN for tracking. Can * also be used as workaround of some hardware offload issue */ #define VLAN_TRACKING 1 struct vlan_hdr { __u16 h_vlan_TCI; __u16 h_vlan_encapsulated_proto; }; struct flowv4_keys { __u32 src; __u32 dst; union { __u32 ports; __u16 port16[2]; }; __u8 ip_proto:1; __u16 vlan0:15; __u16 vlan1; }; struct flowv6_keys { __u32 src[4]; __u32 dst[4]; union { __u32 ports; __u16 port16[2]; }; __u8 ip_proto:1; __u16 vlan0:15; __u16 vlan1; }; struct pair { __u64 packets; __u64 bytes; }; struct bpf_map_def SEC("maps") flow_table_v4 = { #if USE_PERCPU_HASH .type = BPF_MAP_TYPE_PERCPU_HASH, #else .type = BPF_MAP_TYPE_HASH, #endif .key_size = sizeof(struct flowv4_keys), .value_size = sizeof(struct pair), .max_entries = 32768, }; struct bpf_map_def SEC("maps") flow_table_v6 = { #if USE_PERCPU_HASH .type = BPF_MAP_TYPE_PERCPU_HASH, #else .type = BPF_MAP_TYPE_HASH, #endif .key_size = sizeof(struct flowv6_keys), .value_size = sizeof(struct pair), .max_entries = 32768, }; #if ENCRYPTED_TLS_BYPASS struct bpf_map_def SEC("maps") tls_bypass_count = { #if USE_PERCPU_HASH .type = BPF_MAP_TYPE_PERCPU_ARRAY, #else .type = BPF_MAP_TYPE_ARRAY, #endif .key_size = sizeof(__u32), .value_size = sizeof(__u64), .max_entries = 1, }; #endif #if BUILD_CPUMAP /* Special map type that can XDP_REDIRECT frames to another CPU */ struct bpf_map_def SEC("maps") cpu_map = { .type = BPF_MAP_TYPE_CPUMAP, .key_size = sizeof(__u32), .value_size = sizeof(__u32), .max_entries = CPUMAP_MAX_CPUS, }; struct bpf_map_def SEC("maps") cpus_available = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(__u32), .max_entries = CPUMAP_MAX_CPUS, }; struct bpf_map_def SEC("maps") cpus_count = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(__u32), .max_entries = 1, }; #endif #if GOT_TX_PEER /* Map has only one element as we don't handle any sort of * routing for now. Key value set by user space is 0 and * value is the peer interface. */ struct bpf_map_def SEC("maps") tx_peer = { .type = BPF_MAP_TYPE_DEVMAP, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 1, }; /* single entry to indicate if we have peer, key value * set in user space is 0. It is only used to see if * a interface has a peer we need to send the information to */ struct bpf_map_def SEC("maps") tx_peer_int = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 1, }; #endif #define USE_GLOBAL_BYPASS 0 #if USE_GLOBAL_BYPASS /* single entry to indicate if global bypass switch is on */ struct bpf_map_def SEC("maps") global_bypass = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(char), .value_size = sizeof(char), .max_entries = 1, }; #endif // 获取tcp,udp源端口 static __always_inline int get_sport(void *trans_data, void *data_end, __u8 protocol) { struct tcphdr *th; struct udphdr *uh; switch (protocol) { case IPPROTO_TCP: th = (struct tcphdr *)trans_data; if ((void *)(th + 1) > data_end) return -1; return th->source; case IPPROTO_UDP: uh = (struct udphdr *)trans_data; if ((void *)(uh + 1) > data_end) return -1; return uh->source; default: return 0; } } // 获取tcp,udp目的端口 static __always_inline int get_dport(void *trans_data, void *data_end, __u8 protocol) { struct tcphdr *th; struct udphdr *uh; switch (protocol) { case IPPROTO_TCP: th = (struct tcphdr *)trans_data; if ((void *)(th + 1) > data_end) return -1; return th->dest; case IPPROTO_UDP: uh = (struct udphdr *)trans_data; if ((void *)(uh + 1) > data_end) return -1; return uh->dest; default: return 0; } } static int __always_inline filter_ipv4(struct xdp_md *ctx, void *data, __u64 nh_off, void *data_end, __u16 vlan0, __u16 vlan1) { struct iphdr *iph = data + nh_off; int dport; int sport; struct flowv4_keys tuple; struct pair *value; #if BUILD_CPUMAP || GOT_TX_PEER __u32 key0 = 0; #endif #if ENCRYPTED_TLS_BYPASS __u32 key1 = 0; __u32 *tls_count = NULL; #endif #if BUILD_CPUMAP __u32 cpu_dest; __u32 *cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); __u32 *cpu_selected; __u32 cpu_hash; #endif #if GOT_TX_PEER int *iface_peer; int tx_port = 0; #endif if ((void *)(iph + 1) > data_end) return XDP_PASS; // 元组 if (iph->protocol == IPPROTO_TCP) { tuple.ip_proto = 1; } else { tuple.ip_proto = 0; } tuple.src = iph->saddr; tuple.dst = iph->daddr; dport = get_dport(iph + 1, data_end, iph->protocol); if (dport == -1) return XDP_PASS; sport = get_sport(iph + 1, data_end, iph->protocol); if (sport == -1) return XDP_PASS; tuple.port16[0] = (__u16)sport; tuple.port16[1] = (__u16)dport; tuple.vlan0 = vlan0; tuple.vlan1 = vlan1; // 流表查询 value = bpf_map_lookup_elem(&flow_table_v4, &tuple); #if 0 { char fmt[] = "Current flow src: %u:%d\n"; char fmt1[] = "Current flow dst: %u:%d\n"; bpf_trace_printk(fmt, sizeof(fmt), tuple.src, tuple.port16[0]); bpf_trace_printk(fmt1, sizeof(fmt1), tuple.dst, tuple.port16[1]); } #endif if (value) { #if 0 char fmt[] = "Found flow v4: %u %d -> %d\n"; bpf_trace_printk(fmt, sizeof(fmt), tuple.src, sport, dport); char fmt[] = "Data: t:%lu p:%lu n:%lu\n"; bpf_trace_printk(fmt, sizeof(fmt), value->time, value->packets, value->bytes); #endif #if USE_PERCPU_HASH // 存在,更新流量和包数 value->packets++; value->bytes += data_end - data; #else // 存在,更新流量和包数(原子操作) __sync_fetch_and_add(&value->packets, 1); __sync_fetch_and_add(&value->bytes, data_end - data); #endif #if GOT_TX_PEER iface_peer = bpf_map_lookup_elem(&tx_peer_int, &key0); if (!iface_peer) { return XDP_DROP; } else { return bpf_redirect_map(&tx_peer, tx_port, 0); } #else // 丢弃 return XDP_DROP; #endif } #if ENCRYPTED_TLS_BYPASS if ((dport == __constant_ntohs(443)) || (sport == __constant_ntohs(443))) { __u8 *app_data; /* drop application data for tls 1.2 */ /* FIXME better parsing */ nh_off += sizeof(struct iphdr) + sizeof(struct tcphdr); if (data_end > data + nh_off + 4) { app_data = data + nh_off; // tls加密数据 if (app_data[0] == 0x17 && app_data[1] == 0x3 && app_data[2] == 0x3) { // 根据key查找,更新tls加密数据计数器 tls_count = bpf_map_lookup_elem(&tls_bypass_count, &key1); if (tls_count) { #if USE_PERCPU_HASH tls_count++; #else __sync_fetch_and_add(tls_count, 1); #endif } #if GOT_TX_PEER iface_peer = bpf_map_lookup_elem(&tx_peer_int, &key0); if (!iface_peer) { return XDP_DROP; } else { return bpf_redirect_map(&tx_peer, tx_port, 0); } #else // 丢弃 return XDP_DROP; #endif } } } #endif #if BUILD_CPUMAP /* IP-pairs + protocol (UDP/TCP/ICMP) hit same CPU */ cpu_hash = tuple.src + tuple.dst; cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol); if (cpu_max && *cpu_max) { cpu_dest = cpu_hash % *cpu_max; cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_dest); if (!cpu_selected) return XDP_ABORTED; cpu_dest = *cpu_selected; return bpf_redirect_map(&cpu_map, cpu_dest, 0); } else { return XDP_PASS; } #else #if RSS_QUEUE_NUMBERS /* IP-pairs + protocol (UDP/TCP/ICMP) hit same CPU */ __u32 xdp_hash = tuple.src + tuple.dst; xdp_hash = SuperFastHash((char *)&xdp_hash, 4, INITVAL + iph->protocol); ctx->rx_queue_index = xdp_hash % RSS_QUEUE_NUMBERS; #endif return XDP_PASS; #endif } static int __always_inline filter_ipv6(struct xdp_md *ctx, void *data, __u64 nh_off, void *data_end, __u16 vlan0, __u16 vlan1) { struct ipv6hdr *ip6h = data + nh_off; int dport; int sport; struct flowv6_keys tuple; struct pair *value; #if BUILD_CPUMAP || GOT_TX_PEER __u32 key0 = 0; #endif #if BUILD_CPUMAP __u32 cpu_dest; int *cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); __u32 *cpu_selected; __u32 cpu_hash; #endif #if GOT_TX_PEER int tx_port = 0; int *iface_peer; #endif if ((void *)(ip6h + 1) > data_end) return 0; if (!((ip6h->nexthdr == IPPROTO_UDP) || (ip6h->nexthdr == IPPROTO_TCP))) return XDP_PASS; dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr); if (dport == -1) return XDP_PASS; sport = get_sport(ip6h + 1, data_end, ip6h->nexthdr); if (sport == -1) return XDP_PASS; if (ip6h->nexthdr == IPPROTO_TCP) { tuple.ip_proto = 1; } else { tuple.ip_proto = 0; } __builtin_memcpy(tuple.src, ip6h->saddr.s6_addr32, sizeof(tuple.src)); __builtin_memcpy(tuple.dst, ip6h->daddr.s6_addr32, sizeof(tuple.dst)); tuple.port16[0] = sport; tuple.port16[1] = dport; tuple.vlan0 = vlan0; tuple.vlan1 = vlan1; value = bpf_map_lookup_elem(&flow_table_v6, &tuple); if (value) { #if 0 char fmt6[] = "Found IPv6 flow: %d -> %d\n"; bpf_trace_printk(fmt6, sizeof(fmt6), sport, dport); #endif #if USE_PERCPU_HASH value->packets++; value->bytes += data_end - data; #else __sync_fetch_and_add(&value->packets, 1); __sync_fetch_and_add(&value->bytes, data_end - data); #endif #if GOT_TX_PEER iface_peer = bpf_map_lookup_elem(&tx_peer_int, &key0); if (!iface_peer) { return XDP_DROP; } else { return bpf_redirect_map(&tx_peer, tx_port, 0); } #else return XDP_DROP; #endif } #if BUILD_CPUMAP /* IP-pairs + protocol (UDP/TCP/ICMP) hit same CPU */ cpu_hash = tuple.src[0] + tuple.dst[0]; cpu_hash += tuple.src[1] + tuple.dst[1]; cpu_hash += tuple.src[2] + tuple.dst[2]; cpu_hash += tuple.src[3] + tuple.dst[3]; cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL); if (cpu_max && *cpu_max) { cpu_dest = cpu_hash % *cpu_max; cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_dest); if (!cpu_selected) return XDP_ABORTED; cpu_dest = *cpu_selected; return bpf_redirect_map(&cpu_map, cpu_dest, 0); } else { return XDP_PASS; } #else #if RSS_QUEUE_NUMBERS /* IP-pairs + protocol (UDP/TCP/ICMP) hit same CPU */ __u32 xdp_hash = tuple.src[0] + tuple.dst[0]; xdp_hash += tuple.src[1] + tuple.dst[1]; xdp_hash += tuple.src[2] + tuple.dst[2]; xdp_hash += tuple.src[3] + tuple.dst[3]; xdp_hash = SuperFastHash((char *)&xdp_hash, 4, INITVAL); ctx->rx_queue_index = xdp_hash % RSS_QUEUE_NUMBERS; #endif return XDP_PASS; #endif } int SEC("xdp") xdp_hashfilter(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; struct ethhdr *eth = data; __u16 h_proto; __u64 nh_off; __u16 vlan0 = 0; __u16 vlan1 = 0; #if USE_GLOBAL_BYPASS int *iface_peer; char *g_switch = 0; char key0; int tx_port = 0; g_switch = bpf_map_lookup_elem(&global_bypass, &key0); if (g_switch && *g_switch) { iface_peer = bpf_map_lookup_elem(&tx_peer_int, &key0); if (!iface_peer) { return XDP_DROP; } else { return bpf_redirect_map(&tx_peer, tx_port, 0); } } #endif nh_off = sizeof(*eth); if (data + nh_off > data_end) return XDP_PASS; h_proto = eth->h_proto; // vlan和qinq处理 if (h_proto == __constant_htons(ETH_P_8021Q) || h_proto == __constant_htons(ETH_P_8021AD)) { struct vlan_hdr *vhdr; vhdr = data + nh_off; nh_off += sizeof(struct vlan_hdr); if (data + nh_off > data_end) return XDP_PASS; h_proto = vhdr->h_vlan_encapsulated_proto; #if VLAN_TRACKING vlan0 = vhdr->h_vlan_TCI & 0x0fff; #else vlan0 = 0; #endif } if (h_proto == __constant_htons(ETH_P_8021Q) || h_proto == __constant_htons(ETH_P_8021AD)) { struct vlan_hdr *vhdr; vhdr = data + nh_off; nh_off += sizeof(struct vlan_hdr); if (data + nh_off > data_end) return XDP_PASS; h_proto = vhdr->h_vlan_encapsulated_proto; #if VLAN_TRACKING vlan1 = vhdr->h_vlan_TCI & 0x0fff; #else vlan1 = 0; #endif } // 执行ipv4或ipv6过滤 if (h_proto == __constant_htons(ETH_P_IP)) return filter_ipv4(ctx, data, nh_off, data_end, vlan0, vlan1); else if (h_proto == __constant_htons(ETH_P_IPV6)) return filter_ipv6(ctx, data, nh_off, data_end, vlan0, vlan1); return XDP_PASS; } char __license[] SEC("license") = "GPL"; __u32 __version SEC("version") = LINUX_VERSION_CODE;

XDP Load Balancer

#define LINUX_VERSION_CODE 263682 /* Hashing initval */ #define INITVAL 15485863 /* Increase CPUMAP_MAX_CPUS if ever you have more than 128 CPUs */ #define CPUMAP_MAX_CPUS 128 struct vlan_hdr { __u16 h_vlan_TCI; __u16 h_vlan_encapsulated_proto; }; /* Special map type that can XDP_REDIRECT frames to another CPU */ struct bpf_map_def SEC("maps") cpu_map = { .type = BPF_MAP_TYPE_CPUMAP, .key_size = sizeof(__u32), .value_size = sizeof(__u32), .max_entries = CPUMAP_MAX_CPUS, }; struct bpf_map_def SEC("maps") cpus_available = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(__u32), .max_entries = CPUMAP_MAX_CPUS, }; struct bpf_map_def SEC("maps") cpus_count = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(__u32), .max_entries = 1, }; static int __always_inline hash_ipv4(void *data, void *data_end) { struct iphdr *iph = data; if ((void *)(iph + 1) > data_end) return XDP_PASS; __u32 key0 = 0; __u32 cpu_dest; __u32 *cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); __u32 *cpu_selected; __u32 cpu_hash; /* IP-pairs hit same CPU */ cpu_hash = iph->saddr + iph->daddr; cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL); if (cpu_max && *cpu_max) { cpu_dest = cpu_hash % *cpu_max; cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_dest); if (!cpu_selected) return XDP_ABORTED; cpu_dest = *cpu_selected; return bpf_redirect_map(&cpu_map, cpu_dest, 0); } else { return XDP_PASS; } } static int __always_inline hash_ipv6(void *data, void *data_end) { struct ipv6hdr *ip6h = data; if ((void *)(ip6h + 1) > data_end) return XDP_PASS; __u32 key0 = 0; __u32 cpu_dest; __u32 *cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); __u32 *cpu_selected; __u32 cpu_hash; /* IP-pairs hit same CPU */ cpu_hash = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0]; cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1]; cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2]; cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3]; cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL); if (cpu_max && *cpu_max) { cpu_dest = cpu_hash % *cpu_max; cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_dest); if (!cpu_selected) return XDP_ABORTED; cpu_dest = *cpu_selected; return bpf_redirect_map(&cpu_map, cpu_dest, 0); } else { return XDP_PASS; } return XDP_PASS; } static int __always_inline filter_gre(struct xdp_md *ctx, void *data, __u64 nh_off, void *data_end) { struct iphdr *iph = data + nh_off; __u16 proto; struct gre_hdr { __be16 flags; __be16 proto; }; nh_off += sizeof(struct iphdr); struct gre_hdr *grhdr = (struct gre_hdr *)(iph + 1); if ((void *)(grhdr + 1) > data_end) return XDP_PASS; if (grhdr->flags & (GRE_VERSION|GRE_ROUTING)) return XDP_PASS; nh_off += 4; proto = grhdr->proto; if (grhdr->flags & GRE_CSUM) nh_off += 4; if (grhdr->flags & GRE_KEY) nh_off += 4; if (grhdr->flags & GRE_SEQ) nh_off += 4; /* Update offset to skip ERPSAN header if we have one */ if (proto == __constant_htons(ETH_P_ERSPAN)) { nh_off += 8; } if (data + nh_off > data_end) return XDP_PASS; if (bpf_xdp_adjust_head(ctx, 0 + nh_off)) return XDP_PASS; data = (void *)(long)ctx->data; data_end = (void *)(long)ctx->data_end; /* we have now data starting at Ethernet header */ struct ethhdr *eth = data; proto = eth->h_proto; /* we want to hash on IP so we need to get to ip hdr */ nh_off = sizeof(*eth); if (data + nh_off > data_end) return XDP_PASS; /* we need to increase offset and update protocol * in the case we have VLANs */ if (proto == __constant_htons(ETH_P_8021Q)) { struct vlan_hdr *vhdr = (struct vlan_hdr *)(data + nh_off); if ((void *)(vhdr + 1) > data_end) return XDP_PASS; proto = vhdr->h_vlan_encapsulated_proto; nh_off += sizeof(struct vlan_hdr); } if (data + nh_off > data_end) return XDP_PASS; /* proto should now be IP style */ if (proto == __constant_htons(ETH_P_IP)) { return hash_ipv4(data + nh_off, data_end); } else if (proto == __constant_htons(ETH_P_IPV6)) { return hash_ipv6(data + nh_off, data_end); } else return XDP_PASS; } static int __always_inline filter_ipv4(struct xdp_md *ctx, void *data, __u64 nh_off, void *data_end) { struct iphdr *iph = data + nh_off; if ((void *)(iph + 1) > data_end) return XDP_PASS; // 处理gre隧道协议 if (iph->protocol == IPPROTO_GRE) { return filter_gre(ctx, data, nh_off, data_end); } // 计算ipv4哈希 return hash_ipv4(data + nh_off, data_end); } static int __always_inline filter_ipv6(struct xdp_md *ctx, void *data, __u64 nh_off, void *data_end) { struct ipv6hdr *ip6h = data + nh_off; return hash_ipv6((void *)ip6h, data_end); } int SEC("xdp") xdp_loadfilter(struct xdp_md *ctx) { void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; struct ethhdr *eth = data; __u16 h_proto; __u64 nh_off; nh_off = sizeof(*eth); if (data + nh_off > data_end) return XDP_PASS; h_proto = eth->h_proto; #if 0 if (h_proto != __constant_htons(ETH_P_IP)) { char fmt[] = "Current proto: %u\n"; bpf_trace_printk(fmt, sizeof(fmt), h_proto); } #endif if (h_proto == __constant_htons(ETH_P_8021Q) || h_proto == __constant_htons(ETH_P_8021AD)) { struct vlan_hdr *vhdr; vhdr = data + nh_off; nh_off += sizeof(struct vlan_hdr); if (data + nh_off > data_end) return XDP_PASS; h_proto = vhdr->h_vlan_encapsulated_proto; } if (h_proto == __constant_htons(ETH_P_8021Q) || h_proto == __constant_htons(ETH_P_8021AD)) { struct vlan_hdr *vhdr; vhdr = data + nh_off; nh_off += sizeof(struct vlan_hdr); if (data + nh_off > data_end) return XDP_PASS; h_proto = vhdr->h_vlan_encapsulated_proto; } if (h_proto == __constant_htons(ETH_P_IP)) return filter_ipv4(ctx, data, nh_off, data_end); else if (h_proto == __constant_htons(ETH_P_IPV6)) return filter_ipv6(ctx, data, nh_off, data_end); return XDP_PASS; } char __license[] SEC("license") = "GPL"; __u32 __version SEC("version") = LINUX_VERSION_CODE;


版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。

上一篇:室外光纤资源管理——可视化管理平台
下一篇:Spring Cloud Gateway 内存溢出的解决方案
相关文章

 发表评论

暂时没有评论,来抢沙发吧~