forked from dorkamotorka/transparent-proxy-ebpf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bpf-builtin.h
4900 lines (4689 loc) · 173 KB
/
bpf-builtin.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* Forward declarations of BPF structs */
struct bpf_fib_lookup;
struct bpf_sk_lookup;
struct bpf_perf_event_data;
struct bpf_perf_event_value;
struct bpf_pidns_info;
struct bpf_redir_neigh;
struct bpf_sock;
struct bpf_sock_addr;
struct bpf_sock_ops;
struct bpf_sock_tuple;
struct bpf_spin_lock;
struct bpf_sysctl;
struct bpf_tcp_sock;
struct bpf_tunnel_key;
struct bpf_xfrm_state;
struct linux_binprm;
struct pt_regs;
struct sk_reuseport_md;
struct sockaddr;
struct tcphdr;
struct seq_file;
struct tcp6_sock;
struct tcp_sock;
struct tcp_timewait_sock;
struct tcp_request_sock;
struct udp6_sock;
struct unix_sock;
struct task_struct;
struct cgroup;
struct __sk_buff;
struct sk_msg_md;
struct xdp_md;
struct path;
struct btf_ptr;
struct inode;
struct socket;
struct file;
struct bpf_timer;
struct mptcp_sock;
struct bpf_dynptr;
struct iphdr;
struct ipv6hdr;
/*
* bpf_map_lookup_elem
*
* Perform a lookup in *map* for an entry associated to *key*.
*
* Returns
* Map value associated to *key*, or **NULL** if no entry was
* found.
*/
static void *(*bpf_map_lookup_elem)(void *map, const void *key) = (void *)1;
/*
* bpf_map_update_elem
*
* Add or update the value of the entry associated to *key* in
* *map* with *value*. *flags* is one of:
*
* **BPF_NOEXIST**
* The entry for *key* must not exist in the map.
* **BPF_EXIST**
* The entry for *key* must already exist in the map.
* **BPF_ANY**
* No condition on the existence of the entry for *key*.
*
* Flag value **BPF_NOEXIST** cannot be used for maps of types
* **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all
* elements always exist), the helper would return an error.
*
* Returns
* 0 on success, or a negative error in case of failure.
*/
static long (*bpf_map_update_elem)(void *map, const void *key,
const void *value, __u64 flags) = (void *)2;
/*
* bpf_map_delete_elem
*
* Delete entry with *key* from *map*.
*
* Returns
* 0 on success, or a negative error in case of failure.
*/
static long (*bpf_map_delete_elem)(void *map, const void *key) = (void *)3;
/*
* bpf_probe_read
*
* For tracing programs, safely attempt to read *size* bytes from
* kernel space address *unsafe_ptr* and store the data in *dst*.
*
* Generally, use **bpf_probe_read_user**\ () or
* **bpf_probe_read_kernel**\ () instead.
*
* Returns
* 0 on success, or a negative error in case of failure.
*/
static long (*bpf_probe_read)(void *dst, __u32 size,
const void *unsafe_ptr) = (void *)4;
/*
* bpf_ktime_get_ns
*
* Return the time elapsed since system boot, in nanoseconds.
* Does not include time the system was suspended.
* See: **clock_gettime**\ (**CLOCK_MONOTONIC**)
*
* Returns
* Current *ktime*.
*/
static __u64 (*bpf_ktime_get_ns)(void) = (void *)5;
/*
* bpf_trace_printk
*
* This helper is a "printk()-like" facility for debugging. It
* prints a message defined by format *fmt* (of size *fmt_size*)
* to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
* available. It can take up to three additional **u64**
* arguments (as an eBPF helpers, the total number of arguments is
* limited to five).
*
* Each time the helper is called, it appends a line to the trace.
* Lines are discarded while *\/sys/kernel/debug/tracing/trace* is
* open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this.
* The format of the trace is customizable, and the exact output
* one will get depends on the options set in
* *\/sys/kernel/debug/tracing/trace_options* (see also the
* *README* file under the same directory). However, it usually
* defaults to something like:
*
* ::
*
* telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted
* msg>
*
* In the above:
*
* * ``telnet`` is the name of the current task.
* * ``470`` is the PID of the current task.
* * ``001`` is the CPU number on which the task is
* running.
* * In ``.N..``, each character refers to a set of
* options (whether irqs are enabled, scheduling
* options, whether hard/softirqs are running, level of
* preempt_disabled respectively). **N** means that
* **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
* are set.
* * ``419421.045894`` is a timestamp.
* * ``0x00000001`` is a fake value used by BPF for the
* instruction pointer register.
* * ``<formatted msg>`` is the message formatted with
* *fmt*.
*
* The conversion specifiers supported by *fmt* are similar, but
* more limited than for printk(). They are **%d**, **%i**,
* **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
* **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
* of field, padding with zeroes, etc.) is available, and the
* helper will return **-EINVAL** (but print nothing) if it
* encounters an unknown specifier.
*
* Also, note that **bpf_trace_printk**\ () is slow, and should
* only be used for debugging purposes. For this reason, a notice
* block (spanning several lines) is printed to kernel logs and
* states that the helper should not be used "for production use"
* the first time this helper is used (or more precisely, when
* **trace_printk**\ () buffers are allocated). For passing values
* to user space, perf events should be preferred.
*
* Returns
* The number of bytes written to the buffer, or a negative error
* in case of failure.
*/
static long (*bpf_trace_printk)(const char *fmt, __u32 fmt_size,
...) = (void *)6;
/*
* bpf_get_prandom_u32
*
* Get a pseudo-random number.
*
* From a security point of view, this helper uses its own
* pseudo-random internal state, and cannot be used to infer the
* seed of other random functions in the kernel. However, it is
* essential to note that the generator used by the helper is not
* cryptographically secure.
*
* Returns
* A random 32-bit unsigned value.
*/
static __u32 (*bpf_get_prandom_u32)(void) = (void *)7;
/*
* bpf_get_smp_processor_id
*
* Get the SMP (symmetric multiprocessing) processor id. Note that
* all programs run with migration disabled, which means that the
* SMP processor id is stable during all the execution of the
* program.
*
* Returns
* The SMP id of the processor running the program.
*/
static __u32 (*bpf_get_smp_processor_id)(void) = (void *)8;
/*
* bpf_skb_store_bytes
*
* Store *len* bytes from address *from* into the packet
* associated to *skb*, at *offset*. *flags* are a combination of
* **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
* checksum for the packet after storing the bytes) and
* **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
* **->swhash** and *skb*\ **->l4hash** to 0).
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* performed again, if the helper is used in combination with
* direct packet access.
*
* Returns
* 0 on success, or a negative error in case of failure.
*/
static long (*bpf_skb_store_bytes)(struct __sk_buff *skb, __u32 offset,
const void *from, __u32 len,
__u64 flags) = (void *)9;
/*
* bpf_l3_csum_replace
*
* Recompute the layer 3 (e.g. IP) checksum for the packet
* associated to *skb*. Computation is incremental, so the helper
* must know the former value of the header field that was
* modified (*from*), the new value of this field (*to*), and the
* number of bytes (2 or 4) for this field, stored in *size*.
* Alternatively, it is possible to store the difference between
* the previous and the new values of the header field in *to*, by
* setting *from* and *size* to 0. For both methods, *offset*
* indicates the location of the IP checksum within the packet.
*
* This helper works in combination with **bpf_csum_diff**\ (),
* which does not update the checksum in-place, but offers more
* flexibility and can handle sizes larger than 2 or 4 for the
* checksum to update.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* performed again, if the helper is used in combination with
* direct packet access.
*
* Returns
* 0 on success, or a negative error in case of failure.
*/
static long (*bpf_l3_csum_replace)(struct __sk_buff *skb, __u32 offset,
__u64 from, __u64 to,
__u64 size) = (void *)10;
/*
* bpf_l4_csum_replace
*
* Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
* packet associated to *skb*. Computation is incremental, so the
* helper must know the former value of the header field that was
* modified (*from*), the new value of this field (*to*), and the
* number of bytes (2 or 4) for this field, stored on the lowest
* four bits of *flags*. Alternatively, it is possible to store
* the difference between the previous and the new values of the
* header field in *to*, by setting *from* and the four lowest
* bits of *flags* to 0. For both methods, *offset* indicates the
* location of the IP checksum within the packet. In addition to
* the size of the field, *flags* can be added (bitwise OR) actual
* flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left
* untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
* for updates resulting in a null checksum the value is set to
* **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
* the checksum is to be computed against a pseudo-header.
*
* This helper works in combination with **bpf_csum_diff**\ (),
* which does not update the checksum in-place, but offers more
* flexibility and can handle sizes larger than 2 or 4 for the
* checksum to update.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* performed again, if the helper is used in combination with
* direct packet access.
*
* Returns
* 0 on success, or a negative error in case of failure.
*/
static long (*bpf_l4_csum_replace)(struct __sk_buff *skb, __u32 offset,
__u64 from, __u64 to,
__u64 flags) = (void *)11;
/*
* bpf_tail_call
*
* This special helper is used to trigger a "tail call", or in
* other words, to jump into another eBPF program. The same stack
* frame is used (but values on stack and in registers for the
* caller are not accessible to the callee). This mechanism allows
* for program chaining, either for raising the maximum number of
* available eBPF instructions, or to execute given programs in
* conditional blocks. For security reasons, there is an upper
* limit to the number of successive tail calls that can be
* performed.
*
* Upon call of this helper, the program attempts to jump into a
* program referenced at index *index* in *prog_array_map*, a
* special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes
* *ctx*, a pointer to the context.
*
* If the call succeeds, the kernel immediately runs the first
* instruction of the new program. This is not a function call,
* and it never returns to the previous program. If the call
* fails, then the helper has no effect, and the caller continues
* to run its subsequent instructions. A call can fail if the
* destination program for the jump does not exist (i.e. *index*
* is superior to the number of entries in *prog_array_map*), or
* if the maximum number of tail calls has been reached for this
* chain of programs. This limit is defined in the kernel by the
* macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
* which is currently set to 33.
*
* Returns
* 0 on success, or a negative error in case of failure.
*/
static long (*bpf_tail_call)(void *ctx, void *prog_array_map,
__u32 index) = (void *)12;
/*
* bpf_clone_redirect
*
* Clone and redirect the packet associated to *skb* to another
* net device of index *ifindex*. Both ingress and egress
* interfaces can be used for redirection. The **BPF_F_INGRESS**
* value in *flags* is used to make the distinction (ingress path
* is selected if the flag is present, egress path otherwise).
* This is the only flag supported for now.
*
* In comparison with **bpf_redirect**\ () helper,
* **bpf_clone_redirect**\ () has the associated cost of
* duplicating the packet buffer, but this can be executed out of
* the eBPF program. Conversely, **bpf_redirect**\ () is more
* efficient, but it is handled through an action code where the
* redirection happens only after the eBPF program has returned.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* performed again, if the helper is used in combination with
* direct packet access.
*
* Returns
* 0 on success, or a negative error in case of failure.
*/
static long (*bpf_clone_redirect)(struct __sk_buff *skb, __u32 ifindex,
__u64 flags) = (void *)13;
/*
* bpf_get_current_pid_tgid
*
* Get the current pid and tgid.
*
* Returns
* A 64-bit integer containing the current tgid and pid, and
* created as such:
* *current_task*\ **->tgid << 32 \|**
* *current_task*\ **->pid**.
*/
static __u64 (*bpf_get_current_pid_tgid)(void) = (void *)14;
/*
* bpf_get_current_uid_gid
*
* Get the current uid and gid.
*
* Returns
* A 64-bit integer containing the current GID and UID, and
* created as such: *current_gid* **<< 32 \|** *current_uid*.
*/
static __u64 (*bpf_get_current_uid_gid)(void) = (void *)15;
/*
* bpf_get_current_comm
*
* Copy the **comm** attribute of the current task into *buf* of
* *size_of_buf*. The **comm** attribute contains the name of
* the executable (excluding the path) for the current task. The
* *size_of_buf* must be strictly positive. On success, the
* helper makes sure that the *buf* is NUL-terminated. On failure,
* it is filled with zeroes.
*
* Returns
* 0 on success, or a negative error in case of failure.
*/
static long (*bpf_get_current_comm)(void *buf, __u32 size_of_buf) = (void *)16;
/*
* bpf_get_cgroup_classid
*
* Retrieve the classid for the current task, i.e. for the net_cls
* cgroup to which *skb* belongs.
*
* This helper can be used on TC egress path, but not on ingress.
*
* The net_cls cgroup provides an interface to tag network packets
* based on a user-provided identifier for all traffic coming from
* the tasks belonging to the related cgroup. See also the related
* kernel documentation, available from the Linux sources in file
* *Documentation/admin-guide/cgroup-v1/net_cls.rst*.
*
* The Linux kernel has two versions for cgroups: there are
* cgroups v1 and cgroups v2. Both are available to users, who can
* use a mixture of them, but note that the net_cls cgroup is for
* cgroup v1 only. This makes it incompatible with BPF programs
* run on cgroups, which is a cgroup-v2-only feature (a socket can
* only hold data for one version of cgroups at a time).
*
* This helper is only available is the kernel was compiled with
* the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
* "**y**" or to "**m**".
*
* Returns
* The classid, or 0 for the default unconfigured classid.
*/
static __u32 (*bpf_get_cgroup_classid)(struct __sk_buff *skb) = (void *)17;
/*
* bpf_skb_vlan_push
*
* Push a *vlan_tci* (VLAN tag control information) of protocol
* *vlan_proto* to the packet associated to *skb*, then update
* the checksum. Note that if *vlan_proto* is different from
* **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
* be **ETH_P_8021Q**.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* performed again, if the helper is used in combination with
* direct packet access.
*
* Returns
* 0 on success, or a negative error in case of failure.
*/
static long (*bpf_skb_vlan_push)(struct __sk_buff *skb, __be16 vlan_proto,
__u16 vlan_tci) = (void *)18;
/*
* bpf_skb_vlan_pop
*
* Pop a VLAN header from the packet associated to *skb*.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* performed again, if the helper is used in combination with
* direct packet access.
*
* Returns
* 0 on success, or a negative error in case of failure.
*/
static long (*bpf_skb_vlan_pop)(struct __sk_buff *skb) = (void *)19;
/*
* bpf_skb_get_tunnel_key
*
* Get tunnel metadata. This helper takes a pointer *key* to an
* empty **struct bpf_tunnel_key** of **size**, that will be
* filled with tunnel metadata for the packet associated to *skb*.
* The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
* indicates that the tunnel is based on IPv6 protocol instead of
* IPv4.
*
* The **struct bpf_tunnel_key** is an object that generalizes the
* principal parameters used by various tunneling protocols into a
* single struct. This way, it can be used to easily make a
* decision based on the contents of the encapsulation header,
* "summarized" in this struct. In particular, it holds the IP
* address of the remote end (IPv4 or IPv6, depending on the case)
* in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also,
* this struct exposes the *key*\ **->tunnel_id**, which is
* generally mapped to a VNI (Virtual Network Identifier), making
* it programmable together with the **bpf_skb_set_tunnel_key**\
* () helper.
*
* Let's imagine that the following code is part of a program
* attached to the TC ingress interface, on one end of a GRE
* tunnel, and is supposed to filter out all messages coming from
* remote ends with IPv4 address other than 10.0.0.1:
*
* ::
*
* int ret;
* struct bpf_tunnel_key key = {};
*
* ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
* if (ret < 0)
* return TC_ACT_SHOT; // drop packet
*
* if (key.remote_ipv4 != 0x0a000001)
* return TC_ACT_SHOT; // drop packet
*
* return TC_ACT_OK; // accept packet
*
* This interface can also be used with all encapsulation devices
* that can operate in "collect metadata" mode: instead of having
* one network device per specific configuration, the "collect
* metadata" mode only requires a single device where the
* configuration can be extracted from this helper.
*
* This can be used together with various tunnels such as VXLan,
* Geneve, GRE or IP in IP (IPIP).
*
* Returns
* 0 on success, or a negative error in case of failure.
*/
static long (*bpf_skb_get_tunnel_key)(struct __sk_buff *skb,
struct bpf_tunnel_key *key, __u32 size,
__u64 flags) = (void *)20;
/*
* bpf_skb_set_tunnel_key
*
* Populate tunnel metadata for packet associated to *skb.* The
* tunnel metadata is set to the contents of *key*, of *size*. The
* *flags* can be set to a combination of the following values:
*
* **BPF_F_TUNINFO_IPV6**
* Indicate that the tunnel is based on IPv6 protocol
* instead of IPv4.
* **BPF_F_ZERO_CSUM_TX**
* For IPv4 packets, add a flag to tunnel metadata
* indicating that checksum computation should be skipped
* and checksum set to zeroes.
* **BPF_F_DONT_FRAGMENT**
* Add a flag to tunnel metadata indicating that the
* packet should not be fragmented.
* **BPF_F_SEQ_NUMBER**
* Add a flag to tunnel metadata indicating that a
* sequence number should be added to tunnel header before
* sending the packet. This flag was added for GRE
* encapsulation, but might be used with other protocols
* as well in the future.
* **BPF_F_NO_TUNNEL_KEY**
* Add a flag to tunnel metadata indicating that no tunnel
* key should be set in the resulting tunnel header.
*
* Here is a typical usage on the transmit path:
*
* ::
*
* struct bpf_tunnel_key key;
* populate key ...
* bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
* bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
*
* See also the description of the **bpf_skb_get_tunnel_key**\ ()
* helper for additional information.
*
* Returns
* 0 on success, or a negative error in case of failure.
*/
static long (*bpf_skb_set_tunnel_key)(struct __sk_buff *skb,
struct bpf_tunnel_key *key, __u32 size,
__u64 flags) = (void *)21;
/*
* bpf_perf_event_read
*
* Read the value of a perf event counter. This helper relies on a
* *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
* the perf event counter is selected when *map* is updated with
* perf event file descriptors. The *map* is an array whose size
* is the number of available CPUs, and each cell contains a value
* relative to one CPU. The value to retrieve is indicated by
* *flags*, that contains the index of the CPU to look up, masked
* with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
* **BPF_F_CURRENT_CPU** to indicate that the value for the
* current CPU should be retrieved.
*
* Note that before Linux 4.13, only hardware perf event can be
* retrieved.
*
* Also, be aware that the newer helper
* **bpf_perf_event_read_value**\ () is recommended over
* **bpf_perf_event_read**\ () in general. The latter has some ABI
* quirks where error and counter value are used as a return code
* (which is wrong to do since ranges may overlap). This issue is
* fixed with **bpf_perf_event_read_value**\ (), which at the same
* time provides more features over the **bpf_perf_event_read**\
* () interface. Please refer to the description of
* **bpf_perf_event_read_value**\ () for details.
*
* Returns
* The value of the perf event counter read from the map, or a
* negative error code in case of failure.
*/
static __u64 (*bpf_perf_event_read)(void *map, __u64 flags) = (void *)22;
/*
* bpf_redirect
*
* Redirect the packet to another net device of index *ifindex*.
* This helper is somewhat similar to **bpf_clone_redirect**\
* (), except that the packet is not cloned, which provides
* increased performance.
*
* Except for XDP, both ingress and egress interfaces can be used
* for redirection. The **BPF_F_INGRESS** value in *flags* is used
* to make the distinction (ingress path is selected if the flag
* is present, egress path otherwise). Currently, XDP only
* supports redirection to the egress interface, and accepts no
* flag at all.
*
* The same effect can also be attained with the more generic
* **bpf_redirect_map**\ (), which uses a BPF map to store the
* redirect target instead of providing it directly to the helper.
*
* Returns
* For XDP, the helper returns **XDP_REDIRECT** on success or
* **XDP_ABORTED** on error. For other program types, the values
* are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on
* error.
*/
static long (*bpf_redirect)(__u32 ifindex, __u64 flags) = (void *)23;
/*
* bpf_get_route_realm
*
* Retrieve the realm or the route, that is to say the
* **tclassid** field of the destination for the *skb*. The
* identifier retrieved is a user-provided tag, similar to the
* one used with the net_cls cgroup (see description for
* **bpf_get_cgroup_classid**\ () helper), but here this tag is
* held by a route (a destination entry), not by a task.
*
* Retrieving this identifier works with the clsact TC egress hook
* (see also **tc-bpf(8)**), or alternatively on conventional
* classful egress qdiscs, but not on TC ingress path. In case of
* clsact TC egress hook, this has the advantage that, internally,
* the destination entry has not been dropped yet in the transmit
* path. Therefore, the destination entry does not need to be
* artificially held via **netif_keep_dst**\ () for a classful
* qdisc until the *skb* is freed.
*
* This helper is available only if the kernel was compiled with
* **CONFIG_IP_ROUTE_CLASSID** configuration option.
*
* Returns
* The realm of the route for the packet associated to *skb*, or 0
* if none was found.
*/
static __u32 (*bpf_get_route_realm)(struct __sk_buff *skb) = (void *)24;
/*
* bpf_perf_event_output
*
* Write raw *data* blob into a special BPF perf event held by
* *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
* event must have the following attributes: **PERF_SAMPLE_RAW**
* as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
* **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
*
* The *flags* are used to indicate the index in *map* for which
* the value must be put, masked with **BPF_F_INDEX_MASK**.
* Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
* to indicate that the index of the current CPU core should be
* used.
*
* The value to write, of *size*, is passed through eBPF stack and
* pointed by *data*.
*
* The context of the program *ctx* needs also be passed to the
* helper.
*
* On user space, a program willing to read the values needs to
* call **perf_event_open**\ () on the perf event (either for
* one or for all CPUs) and to store the file descriptor into the
* *map*. This must be done before the eBPF program can send data
* into it. An example is available in file
* *samples/bpf/trace_output_user.c* in the Linux kernel source
* tree (the eBPF program counterpart is in
* *samples/bpf/trace_output_kern.c*).
*
* **bpf_perf_event_output**\ () achieves better performance
* than **bpf_trace_printk**\ () for sharing data with user
* space, and is much better suitable for streaming data from eBPF
* programs.
*
* Note that this helper is not restricted to tracing use cases
* and can be used with programs attached to TC or XDP as well,
* where it allows for passing data to user space listeners. Data
* can be:
*
* * Only custom structs,
* * Only the packet payload, or
* * A combination of both.
*
* Returns
* 0 on success, or a negative error in case of failure.
*/
static long (*bpf_perf_event_output)(void *ctx, void *map, __u64 flags,
void *data, __u64 size) = (void *)25;
/*
* bpf_skb_load_bytes
*
* This helper was provided as an easy way to load data from a
* packet. It can be used to load *len* bytes from *offset* from
* the packet associated to *skb*, into the buffer pointed by
* *to*.
*
* Since Linux 4.7, usage of this helper has mostly been replaced
* by "direct packet access", enabling packet data to be
* manipulated with *skb*\ **->data** and *skb*\ **->data_end**
* pointing respectively to the first byte of packet data and to
* the byte after the last byte of packet data. However, it
* remains useful if one wishes to read large quantities of data
* at once from a packet into the eBPF stack.
*
* Returns
* 0 on success, or a negative error in case of failure.
*/
static long (*bpf_skb_load_bytes)(const void *skb, __u32 offset, void *to,
__u32 len) = (void *)26;
/*
* bpf_get_stackid
*
* Walk a user or a kernel stack and return its id. To achieve
* this, the helper needs *ctx*, which is a pointer to the context
* on which the tracing program is executed, and a pointer to a
* *map* of type **BPF_MAP_TYPE_STACK_TRACE**.
*
* The last argument, *flags*, holds the number of stack frames to
* skip (from 0 to 255), masked with
* **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
* a combination of the following flags:
*
* **BPF_F_USER_STACK**
* Collect a user space stack instead of a kernel stack.
* **BPF_F_FAST_STACK_CMP**
* Compare stacks by hash only.
* **BPF_F_REUSE_STACKID**
* If two different stacks hash into the same *stackid*,
* discard the old one.
*
* The stack id retrieved is a 32 bit long integer handle which
* can be further combined with other data (including other stack
* ids) and used as a key into maps. This can be useful for
* generating a variety of graphs (such as flame graphs or off-cpu
* graphs).
*
* For walking a stack, this helper is an improvement over
* **bpf_probe_read**\ (), which can be used with unrolled loops
* but is not efficient and consumes a lot of eBPF instructions.
* Instead, **bpf_get_stackid**\ () can collect up to
* **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that
* this limit can be controlled with the **sysctl** program, and
* that it should be manually increased in order to profile long
* user stacks (such as stacks for Java programs). To do so, use:
*
* ::
*
* # sysctl kernel.perf_event_max_stack=<new value>
*
* Returns
* The positive or null stack id on success, or a negative error
* in case of failure.
*/
static long (*bpf_get_stackid)(void *ctx, void *map, __u64 flags) = (void *)27;
/*
* bpf_csum_diff
*
* Compute a checksum difference, from the raw buffer pointed by
* *from*, of length *from_size* (that must be a multiple of 4),
* towards the raw buffer pointed by *to*, of size *to_size*
* (same remark). An optional *seed* can be added to the value
* (this can be cascaded, the seed may come from a previous call
* to the helper).
*
* This is flexible enough to be used in several ways:
*
* * With *from_size* == 0, *to_size* > 0 and *seed* set to
* checksum, it can be used when pushing new data.
* * With *from_size* > 0, *to_size* == 0 and *seed* set to
* checksum, it can be used when removing data from a packet.
* * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it
* can be used to compute a diff. Note that *from_size* and
* *to_size* do not need to be equal.
*
* This helper can be used in combination with
* **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to
* which one can feed in the difference computed with
* **bpf_csum_diff**\ ().
*
* Returns
* The checksum result, or a negative error code in case of
* failure.
*/
static __s64 (*bpf_csum_diff)(__be32 *from, __u32 from_size, __be32 *to,
__u32 to_size, __wsum seed) = (void *)28;
/*
* bpf_skb_get_tunnel_opt
*
* Retrieve tunnel options metadata for the packet associated to
* *skb*, and store the raw tunnel option data to the buffer *opt*
* of *size*.
*
* This helper can be used with encapsulation devices that can
* operate in "collect metadata" mode (please refer to the related
* note in the description of **bpf_skb_get_tunnel_key**\ () for
* more details). A particular example where this can be used is
* in combination with the Geneve encapsulation protocol, where it
* allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper)
* and retrieving arbitrary TLVs (Type-Length-Value headers) from
* the eBPF program. This allows for full customization of these
* headers.
*
* Returns
* The size of the option data retrieved.
*/
static long (*bpf_skb_get_tunnel_opt)(struct __sk_buff *skb, void *opt,
__u32 size) = (void *)29;
/*
* bpf_skb_set_tunnel_opt
*
* Set tunnel options metadata for the packet associated to *skb*
* to the option data contained in the raw buffer *opt* of *size*.
*
* See also the description of the **bpf_skb_get_tunnel_opt**\ ()
* helper for additional information.
*
* Returns
* 0 on success, or a negative error in case of failure.
*/
static long (*bpf_skb_set_tunnel_opt)(struct __sk_buff *skb, void *opt,
__u32 size) = (void *)30;
/*
* bpf_skb_change_proto
*
* Change the protocol of the *skb* to *proto*. Currently
* supported are transition from IPv4 to IPv6, and from IPv6 to
* IPv4. The helper takes care of the groundwork for the
* transition, including resizing the socket buffer. The eBPF
* program is expected to fill the new headers, if any, via
* **skb_store_bytes**\ () and to recompute the checksums with
* **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\
* (). The main case for this helper is to perform NAT64
* operations out of an eBPF program.
*
* Internally, the GSO type is marked as dodgy so that headers are
* checked and segments are recalculated by the GSO/GRO engine.
* The size for GSO target is adapted as well.
*
* All values for *flags* are reserved for future usage, and must
* be left at zero.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* performed again, if the helper is used in combination with
* direct packet access.
*
* Returns
* 0 on success, or a negative error in case of failure.
*/
static long (*bpf_skb_change_proto)(struct __sk_buff *skb, __be16 proto,
__u64 flags) = (void *)31;
/*
* bpf_skb_change_type
*
* Change the packet type for the packet associated to *skb*. This
* comes down to setting *skb*\ **->pkt_type** to *type*, except
* the eBPF program does not have a write access to *skb*\
* **->pkt_type** beside this helper. Using a helper here allows
* for graceful handling of errors.
*
* The major use case is to change incoming *skb*s to
* **PACKET_HOST** in a programmatic way instead of having to
* recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for
* example.
*
* Note that *type* only allows certain values. At this time, they
* are:
*
* **PACKET_HOST**
* Packet is for us.
* **PACKET_BROADCAST**
* Send packet to all.
* **PACKET_MULTICAST**
* Send packet to group.
* **PACKET_OTHERHOST**
* Send packet to someone else.
*
* Returns
* 0 on success, or a negative error in case of failure.
*/
static long (*bpf_skb_change_type)(struct __sk_buff *skb,
__u32 type) = (void *)32;
/*
* bpf_skb_under_cgroup
*
* Check whether *skb* is a descendant of the cgroup2 held by
* *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
*
* Returns
* The return value depends on the result of the test, and can be:
*
* * 0, if the *skb* failed the cgroup2 descendant test.
* * 1, if the *skb* succeeded the cgroup2 descendant test.
* * A negative error code, if an error occurred.
*/
static long (*bpf_skb_under_cgroup)(struct __sk_buff *skb, void *map,
__u32 index) = (void *)33;
/*
* bpf_get_hash_recalc
*
* Retrieve the hash of the packet, *skb*\ **->hash**. If it is
* not set, in particular if the hash was cleared due to mangling,
* recompute this hash. Later accesses to the hash can be done
* directly with *skb*\ **->hash**.
*
* Calling **bpf_set_hash_invalid**\ (), changing a packet
* prototype with **bpf_skb_change_proto**\ (), or calling
* **bpf_skb_store_bytes**\ () with the
* **BPF_F_INVALIDATE_HASH** are actions susceptible to clear
* the hash and to trigger a new computation for the next call to
* **bpf_get_hash_recalc**\ ().
*
* Returns
* The 32-bit hash.
*/
static __u32 (*bpf_get_hash_recalc)(struct __sk_buff *skb) = (void *)34;
/*
* bpf_get_current_task
*
* Get the current task.
*
* Returns
* A pointer to the current task struct.
*/
static __u64 (*bpf_get_current_task)(void) = (void *)35;
/*
* bpf_probe_write_user
*
* Attempt in a safe way to write *len* bytes from the buffer
* *src* to *dst* in memory. It only works for threads that are in
* user context, and *dst* must be a valid user space address.
*
* This helper should not be used to implement any kind of
* security mechanism because of TOC-TOU attacks, but rather to
* debug, divert, and manipulate execution of semi-cooperative
* processes.
*
* Keep in mind that this feature is meant for experiments, and it
* has a risk of crashing the system and running programs.
* Therefore, when an eBPF program using this helper is attached,
* a warning including PID and process name is printed to kernel
* logs.
*
* Returns
* 0 on success, or a negative error in case of failure.
*/
static long (*bpf_probe_write_user)(void *dst, const void *src,
__u32 len) = (void *)36;
/*
* bpf_current_task_under_cgroup
*
* Check whether the probe is being run is the context of a given
* subset of the cgroup2 hierarchy. The cgroup2 to test is held by
* *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
*
* Returns
* The return value depends on the result of the test, and can be:
*
* * 1, if current task belongs to the cgroup2.
* * 0, if current task does not belong to the cgroup2.
* * A negative error code, if an error occurred.
*/
static long (*bpf_current_task_under_cgroup)(void *map,