From a5d5d1c310a1fc2dc2c3aba69675e8386ecef7b5 Mon Sep 17 00:00:00 2001 From: Michael Agun Date: Wed, 18 Sep 2024 12:27:50 -0700 Subject: [PATCH 01/17] WIP new ringbuffer proposal --- docs/RingBuffer.md | 259 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 259 insertions(+) create mode 100644 docs/RingBuffer.md diff --git a/docs/RingBuffer.md b/docs/RingBuffer.md new file mode 100644 index 0000000000..3cbae0384a --- /dev/null +++ b/docs/RingBuffer.md @@ -0,0 +1,259 @@ +# New ebpf Ring Buffer Map (proposal) + +## Overview + +The current ringbuffer uses a pure callback-based approach to reading the ringbuffer. +Linux also supports memory-mapped polling consumers, which can't be directly supported in the current model. + +The new ring buffer will use [I/O Completion Ports](https://learn.microsoft.com/en-us/windows/win32/fileio/i-o-completion-ports), +which is the windows analog to the linux epoll mechanism. + +bpf program API +```c +/** + * @brief Output record to ringbuf + * + * Note newly added flags (to specify wakeup options). + * + * Wakeup Options: + * - auto (default): notify if consumer has caught up + * - BPF_RB_FORCE_WAKEUP - always notify consumer + * - BPF_RB_NO_WAKEUP - never notify consumer + * + */ +ebpf_result_t +ebpf_ring_buffer_output(ebpf_ring_buffer_t* ring, uint8_t* data, size_t length, size_t flags) +``` + +New libbpf ringbuf API +```c + +//struct ring_buffer; +// +typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size); + +struct ring_buffer_opts { + size_t sz; /* size of this struct, for forward/backward compatiblity */ +}; + +#define ring_buffer_opts__last_field sz + +/** + * @brief Creates a new ring buffer manager. + * + * @param[in] map_fd File descriptor to ring buffer map. + * @param[in] sample_cb Pointer to ring buffer notification callback function. + * @param[in] ctx Pointer to sample_cb callback function. + * @param[in] opts Ring buffer options. + * + * @returns Pointer to ring buffer manager. + */ +struct ring_buffer * +ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx, + const struct ring_buffer_opts *opts); + +/** + * @brief Add extra RINGBUF maps to this ring buffer manager + */ +int ring_buffer__add(struct ring_buffer *rb, int map_fd, + ring_buffer_sample_fn sample_cb, void *ctx) + +/** + * @brief Frees a ring buffer manager. + * + * @param[in] rb Pointer to ring buffer manager to be freed. + * + */ +void ring_buffer__free(struct ring_buffer *rb); + +/** + * @brief poll ringbuf for new data + * Poll for available data and consume records, if any are available. + * Returns number of records consumed (or INT_MAX, whichever is less), or + * negative number, if any of the registered callbacks returned error. + */ +int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms); + +/** + * @brief catch consumer up to producer by invoking the callback for every available record + * Consume available ring buffer(s) data without event polling. + * Returns number of records consumed across all registered ring buffers (or + * INT_MAX, whichever is less), or negative number if any of the callbacks + * return error. + */ +int ring_buffer__consume(struct ring_buffer *rb); + +/** + * @brief Get a handle that can be used to sleep until data is available in the ring(s). + * + * The fd will correspond to an I/O Completion port handle + * + * NOTE: should we exactly match libbpf epoll_fd() signature and add epoll_wait() wrapper, or just match the pattern? + */ +HANDLE ring_buffer__epoll_fd(const struct ring_buffer *rb); +``` + +New ebpf APIs +```c +/** + * get pointers to mapped producer and consumer pages + */ +int ebpf_ring_buffer_get_buffer(void **producer, void **consumer); +``` + + + +libbpf low-level consumer example +```c++ +// Ring buffer record is 32bit header + data. + +typedef struct _rb_header +{ + uint8_t locked : 1; + uint8_t discarded : 1; + uint32_t length : 30; +} rb_header_t; + +typedef struct _rb_record +{ + rb_header_t header; + uint8_t data[]; +} rb_record_t; + +// +// == Reading mapped ringbuffer memory == +// + +void *rb_cons; // Pointer to read/write mapped consumer page. +void *rb_prod; // Pointer to start of read-only producer pages. + +// Open ringbuffer. +fd_t map_fd = bpf_obj_get(rb_map_name.c_str()); +if (map_fd == ebpf_fd_invalid) return 1; + +auto rb = ring_buffer__new( + map_fd, + NULL, //callback function for callback-based consumer + nullptr, nullptr); +if (!rb) return 1; + +// get pointers to the prod/cons pages +int err = ebpf_ring_buffer_get_buffer(&rb_prod, &rb_cons, /* &epoll_fd */); + +if (err) { + goto Cleanup; +} + +const uint64_t *prod_offset = (const uint64_t*)rb_prod; // Producer offset ptr (r only). +uint64_t *cons_offset = (uint64_t*)rb_cons; // Consumer offset ptr (r/w mapped). +const uint8_t *rb_data = ((const uint8_t*)rb_prod) + PAGESIZE; // Double-mapped rb data ptr (r only). + +// have_data used to track whether we should epoll or just keep reading. +bool have_data = *prod_offset > *cons_offset; + + +if (epoll_fd == ebpf_fd_invalid) { + // … log error … + // Close ringbuffer. + goto Cleanup; +} + +HANDLE iocp_handle = _get_handle_from_file_descriptor(epoll_fd) + +void *lp_ctx = NULL; +OVERLAPPED *overlapped = NULL; +DWORD bytesTransferred = 0; + +// Now loop until terminal error. +For(;;) { + if (!have_data) { // Only wait if we have already caught up. + // Wait for rb to notify (or we could spin/poll on have_data condition). + BOOL iocp_status = GetQueuedCompletionStatus(iocp_handle, &bytesTransfered, (LPWORD)&lp_ctx, &overlapped, INFINITE); + if (NULL == lp_ctx) { + //we are shutting down + goto Cleanup; + } + //TODO: convert this to handle IOCP status (instead of integer result) + if (err == E_TIMEOUT || E_RETRY || ...) { // Non-terminal error. + have_data = *prod_offset > *cons_offset; // It's possible we still have data. + if (!have_data) continue; + } else if (iocp_status && bytesTransferred > 0) { // We got notified of new data. + have_data = true; + } else { // Error polling ringbuffer. + LOG_ERROR(...); + break; + } + } + uint64_t prod = *prod_offset; + uint64_t cons = *cons_offset; + uint64_t remaining = prod - cons; + + if (remaining == 0) { + have_data = false; // Caught up to producer. + continue; + } else if (remaining < sizeof(rb_header_t)) { + LOG_ERROR(...); // Bad record or consumer offset out of alignment. + break; + } + + // Check header flags first, then read/skip data and update offset. + rb_header_t header = *(rb_header_t*)(&rb_data[cons % rb_size]); + if (header.locked) { // Next record not ready yet, epoll again. + have_data = false; + continue; + // Or we could spin/poll on ((rb_header_t*)(&rb_data[cons % rb_size]))->locked. + } + if (!header.discarded) { + const rb_record_t *record = *(const rb_record_t*)(&rb_data[cons % rb_size]); + // Read data from record->data[0 ... record->length-1]. + // ... business logic ... + } // Else it was discarded, skip and continue. + + // Update consumer offset. + cons += sizeof(rb_header_t) + record->length + /*alignment padding*/; + *cons_offset = cons; + + if (/* We want to flush the ringbuffer */) { + *cons_offset = *prod_offset; + } +} + +Cleanup: + +// Close ringbuffer. +ring_buffer__free(rb); +``` + + + +Simplified blocking ringbuf consumer +```c +//Note: the below theoretical helpers would only need access to producers/consumer pages (offsets and data pages) +//rb__empty(prod,cons) - check whether consumer offset == consumer offset (!empty doesn't mean data is ready) +//rb__flush(prod,cons) - just set consumer offset = producer offset (skips all completed/in-progress records) +//rb__next_record(prod,cons) - advance consumer offset to next record +//rb__get_record(prod,cons,&record) - get pointer to current record (if any), skipping discarded records +//Returns E_SUCCESS (0) if record ready, E_LOCKED if record still locked, E_EMPTY if consumer has caught up. + +for(;;) { + for(; !(err=rb__get_record(prod,cons,&record)); rb__next_record(prod,cons)) { + // Data is now in record->data[0 ... record->length-1]. + // … Do record handling here … + } + // 3 cases for err: + // 1) Ringbuf empty - Wait with epoll, or poll for !rb__empty(prod,cons). + // 2) Record locked - Wait with epoll, or spin/poll on header lock bit. + // 3) Corrupt record or consumer offset - Break (could flush to continue reading from next good record). + if (err!=E_EMPTY && err!=E_LOCKED) { + // … log error … + break; + } + if (err == /* Handled errors, e.g. timeout */) { + // … log error and continue (we might still have record(s) to read) … + } else if (err != E_SUCCESS) { + // … log error … + break; + } +} +return err; +``` From 80abd599786f725912613a0f35f50ec5777b8542 Mon Sep 17 00:00:00 2001 From: Michael Agun Date: Wed, 18 Sep 2024 14:49:21 -0700 Subject: [PATCH 02/17] add consumer descriptions and update examples. --- docs/RingBuffer.md | 89 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 68 insertions(+), 21 deletions(-) diff --git a/docs/RingBuffer.md b/docs/RingBuffer.md index 3cbae0384a..874814ef2f 100644 --- a/docs/RingBuffer.md +++ b/docs/RingBuffer.md @@ -6,9 +6,28 @@ The current ringbuffer uses a pure callback-based approach to reading the ringbu Linux also supports memory-mapped polling consumers, which can't be directly supported in the current model. The new ring buffer will use [I/O Completion Ports](https://learn.microsoft.com/en-us/windows/win32/fileio/i-o-completion-ports), -which is the windows analog to the linux epoll mechanism. +which is the windows analog to the linux epoll mechanism, and memory map producer+consumer pages into the user process in the same way that linux does. + +The new API will support 2 access methods: callbacks and mmap/epoll style access. + +callback consumer: + +1. call `ring_buffer__new` to set up callback +2. call `ring_buffer__consume` as needed to invoke the callback on any outsanding data that is ready + +mmap/epoll consumer: + +1. call `ring_buffer__new` to get a ringbuffer manager +2. call `ebpf_ring_buffer_get_buffer` to get pointers to the mapped producer/consumer pages +3. directly read records from the producer pages (and update consumer offset as we read) +4. call `ring_buffer__epoll_fd` to get the iocp handle as a fd_t so we can wait for new data +5. use `GetQueuedCompletionStatus`/`GetQueuedCompletionStatusEx` to wait for new data to be available + - We could provide a wrapper to make the common ebpf use cases simpler + +## API Changes + +### Changes to ebpf helper functions -bpf program API ```c /** * @brief Output record to ringbuf @@ -25,7 +44,8 @@ ebpf_result_t ebpf_ring_buffer_output(ebpf_ring_buffer_t* ring, uint8_t* data, size_t length, size_t flags) ``` -New libbpf ringbuf API +### Updated libbpf ringbuf API + ```c //struct ring_buffer; @@ -43,7 +63,7 @@ struct ring_buffer_opts { * * @param[in] map_fd File descriptor to ring buffer map. * @param[in] sample_cb Pointer to ring buffer notification callback function. - * @param[in] ctx Pointer to sample_cb callback function. + * @param[in] ctx Pointer to callback function context. * @param[in] opts Ring buffer options. * * @returns Pointer to ring buffer manager. @@ -54,6 +74,11 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx, /** * @brief Add extra RINGBUF maps to this ring buffer manager + * + * @param[in] rb Pointer to ring buffer manager. + * @param[in] map_fd File descriptor to ring buffer map. + * @param[in] sample_cb Pointer to ring buffer notification callback function. + * @param[in] ctx Pointer to callback function context. */ int ring_buffer__add(struct ring_buffer *rb, int map_fd, ring_buffer_sample_fn sample_cb, void *ctx) @@ -71,6 +96,10 @@ void ring_buffer__free(struct ring_buffer *rb); * Poll for available data and consume records, if any are available. * Returns number of records consumed (or INT_MAX, whichever is less), or * negative number, if any of the registered callbacks returned error. + * + * @param[in] rb Pointer to ring buffer manager. + * @param[in] timeout_ms maximum time to wait for (in milliseconds). + * */ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms); @@ -80,35 +109,49 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms); * Returns number of records consumed across all registered ring buffers (or * INT_MAX, whichever is less), or negative number if any of the callbacks * return error. + * + * @param[in] rb Pointer to ring buffer manager. */ int ring_buffer__consume(struct ring_buffer *rb); /** * @brief Get a handle that can be used to sleep until data is available in the ring(s). * - * The fd will correspond to an I/O Completion port handle + * @param[in] rb Pointer to ring buffer manager. * - * NOTE: should we exactly match libbpf epoll_fd() signature and add epoll_wait() wrapper, or just match the pattern? + * @return ebpf_invalid_fd on error + * @return fd_t corresponding to I/O Completion port hand on success */ -HANDLE ring_buffer__epoll_fd(const struct ring_buffer *rb); +fd_t ring_buffer__epoll_fd(const struct ring_buffer *rb); ``` -New ebpf APIs +### New ebpf APIs + ```c /** * get pointers to mapped producer and consumer pages + * + * @param[out] producer pointer to start of read-only mapped producer pages + * @param[out] consumer pointer to start of read-write mapped consumer page */ int ebpf_ring_buffer_get_buffer(void **producer, void **consumer); ``` +## Ringbuffer consumer +### libbpf mapped memory consumer example + + +This consumer directly accesses the records from the producer memory and directly updates the consumer offset to show the logic. -libbpf low-level consumer example ```c++ // Ring buffer record is 32bit header + data. +#define BPF_RB_LOCKED + typedef struct _rb_header { + //NOTE: bit fields are not portable, so this is just for explanation -- the new ringbuffer will use bit masking to perform equivalent operations on the bits of the header. uint8_t locked : 1; uint8_t discarded : 1; uint32_t length : 30; @@ -151,13 +194,13 @@ const uint8_t *rb_data = ((const uint8_t*)rb_prod) + PAGESIZE; // Double-mapped // have_data used to track whether we should epoll or just keep reading. bool have_data = *prod_offset > *cons_offset; - +// initialize iocp wait handle +epoll_fd = ring_buffer__epoll_fd(rb); if (epoll_fd == ebpf_fd_invalid) { // … log error … // Close ringbuffer. goto Cleanup; } - HANDLE iocp_handle = _get_handle_from_file_descriptor(epoll_fd) void *lp_ctx = NULL; @@ -169,19 +212,22 @@ For(;;) { if (!have_data) { // Only wait if we have already caught up. // Wait for rb to notify (or we could spin/poll on have_data condition). BOOL iocp_status = GetQueuedCompletionStatus(iocp_handle, &bytesTransfered, (LPWORD)&lp_ctx, &overlapped, INFINITE); + if (NULL == lp_ctx) { - //we are shutting down - goto Cleanup; - } - //TODO: convert this to handle IOCP status (instead of integer result) - if (err == E_TIMEOUT || E_RETRY || ...) { // Non-terminal error. + //we are shutting down + break; + } else if (!iocp_status || bytesTransferred <= 0) { // No notification + if (!iocp_status) { // error + uint32_t iocp_err = GetLastError(); + if (err == /* terminal error */) { + LOG_ERROR(...); + break; + } + } have_data = *prod_offset > *cons_offset; // It's possible we still have data. if (!have_data) continue; - } else if (iocp_status && bytesTransferred > 0) { // We got notified of new data. + } else { // We got notified of new data. have_data = true; - } else { // Error polling ringbuffer. - LOG_ERROR(...); - break; } } uint64_t prod = *prod_offset; @@ -224,9 +270,10 @@ Cleanup: ring_buffer__free(rb); ``` +### Simplified blocking ringbuf consumer +This consumer uses some theoretical helpers to simplify the above logic. -Simplified blocking ringbuf consumer ```c //Note: the below theoretical helpers would only need access to producers/consumer pages (offsets and data pages) //rb__empty(prod,cons) - check whether consumer offset == consumer offset (!empty doesn't mean data is ready) From 3f400bca395fb8f981b0996f0997d2054f1c97e9 Mon Sep 17 00:00:00 2001 From: Michael Agun Date: Wed, 18 Sep 2024 17:06:29 -0700 Subject: [PATCH 03/17] comment fixes --- docs/RingBuffer.md | 72 +++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/docs/RingBuffer.md b/docs/RingBuffer.md index 874814ef2f..b7ea624951 100644 --- a/docs/RingBuffer.md +++ b/docs/RingBuffer.md @@ -6,7 +6,7 @@ The current ringbuffer uses a pure callback-based approach to reading the ringbu Linux also supports memory-mapped polling consumers, which can't be directly supported in the current model. The new ring buffer will use [I/O Completion Ports](https://learn.microsoft.com/en-us/windows/win32/fileio/i-o-completion-ports), -which is the windows analog to the linux epoll mechanism, and memory map producer+consumer pages into the user process in the same way that linux does. +which is the windows API closest to the linux epoll mechanism, and memory map producer+consumer pages into the user process in the same way that linux does. The new API will support 2 access methods: callbacks and mmap/epoll style access. @@ -34,10 +34,10 @@ mmap/epoll consumer: * * Note newly added flags (to specify wakeup options). * - * Wakeup Options: - * - auto (default): notify if consumer has caught up - * - BPF_RB_FORCE_WAKEUP - always notify consumer - * - BPF_RB_NO_WAKEUP - never notify consumer + * Wakeup options: + * - auto (default): Notify if consumer has caught up. + * - BPF_RB_FORCE_WAKEUP - Always notify consumer. + * - BPF_RB_NO_WAKEUP - Never notify consumer. * */ ebpf_result_t @@ -48,8 +48,8 @@ ebpf_ring_buffer_output(ebpf_ring_buffer_t* ring, uint8_t* data, size_t length, ```c -//struct ring_buffer; -// +struct ring_buffer; + typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size); struct ring_buffer_opts { @@ -63,7 +63,7 @@ struct ring_buffer_opts { * * @param[in] map_fd File descriptor to ring buffer map. * @param[in] sample_cb Pointer to ring buffer notification callback function. - * @param[in] ctx Pointer to callback function context. + * @param[in] ctx Pointer to sample_cb callback function context. * @param[in] opts Ring buffer options. * * @returns Pointer to ring buffer manager. @@ -78,7 +78,7 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx, * @param[in] rb Pointer to ring buffer manager. * @param[in] map_fd File descriptor to ring buffer map. * @param[in] sample_cb Pointer to ring buffer notification callback function. - * @param[in] ctx Pointer to callback function context. + * @param[in] ctx Pointer to sample_cb callback function context. */ int ring_buffer__add(struct ring_buffer *rb, int map_fd, ring_buffer_sample_fn sample_cb, void *ctx) @@ -131,8 +131,8 @@ fd_t ring_buffer__epoll_fd(const struct ring_buffer *rb); /** * get pointers to mapped producer and consumer pages * - * @param[out] producer pointer to start of read-only mapped producer pages - * @param[out] consumer pointer to start of read-write mapped consumer page + * @param[out] producer pointer* to start of read-only mapped producer pages + * @param[out] consumer pointer* to start of read-write mapped consumer page */ int ebpf_ring_buffer_get_buffer(void **producer, void **consumer); ``` @@ -145,13 +145,15 @@ int ebpf_ring_buffer_get_buffer(void **producer, void **consumer); This consumer directly accesses the records from the producer memory and directly updates the consumer offset to show the logic. ```c++ -// Ring buffer record is 32bit header + data. -#define BPF_RB_LOCKED +// +// == Ringbuf helpers == +// +// Ring buffer record is 32bit header + data. typedef struct _rb_header { - //NOTE: bit fields are not portable, so this is just for explanation -- the new ringbuffer will use bit masking to perform equivalent operations on the bits of the header. + //NOTE: bit fields are not portable, so this is just for simpler example code -- the actual code should use bit masking to perform equivalent operations on the header bits. uint8_t locked : 1; uint8_t discarded : 1; uint32_t length : 30; @@ -163,11 +165,19 @@ typedef struct _rb_record uint8_t data[]; } rb_record_t; +/** + * @brief clear the ringbuffer. + */ +void rb_flush(uint64_t *cons_offset, const uint64_t *prod_offset) { + *cons_offset = *prod_offset; +} + + // -// == Reading mapped ringbuffer memory == +// == mmap/epoll consumer == // -void *rb_cons; // Pointer to read/write mapped consumer page. +void *rb_cons; // Pointer to read/write mapped consumer page with consumer offset. void *rb_prod; // Pointer to start of read-only producer pages. // Open ringbuffer. @@ -180,7 +190,7 @@ auto rb = ring_buffer__new( nullptr, nullptr); if (!rb) return 1; -// get pointers to the prod/cons pages +// get pointers to the producer/consumer pages int err = ebpf_ring_buffer_get_buffer(&rb_prod, &rb_cons, /* &epoll_fd */); if (err) { @@ -191,14 +201,13 @@ const uint64_t *prod_offset = (const uint64_t*)rb_prod; // Producer offset ptr ( uint64_t *cons_offset = (uint64_t*)rb_cons; // Consumer offset ptr (r/w mapped). const uint8_t *rb_data = ((const uint8_t*)rb_prod) + PAGESIZE; // Double-mapped rb data ptr (r only). -// have_data used to track whether we should epoll or just keep reading. +// have_data used to track whether we should wait for notification or just keep reading. bool have_data = *prod_offset > *cons_offset; -// initialize iocp wait handle +// Initialize iocp wait handle. epoll_fd = ring_buffer__epoll_fd(rb); if (epoll_fd == ebpf_fd_invalid) { // … log error … - // Close ringbuffer. goto Cleanup; } HANDLE iocp_handle = _get_handle_from_file_descriptor(epoll_fd) @@ -207,10 +216,10 @@ void *lp_ctx = NULL; OVERLAPPED *overlapped = NULL; DWORD bytesTransferred = 0; -// Now loop until terminal error. +// Now loop until error. For(;;) { if (!have_data) { // Only wait if we have already caught up. - // Wait for rb to notify (or we could spin/poll on have_data condition). + // Wait for rb to notify -- or we could spin/poll on *prod_offset > *cons_offset. BOOL iocp_status = GetQueuedCompletionStatus(iocp_handle, &bytesTransfered, (LPWORD)&lp_ctx, &overlapped, INFINITE); if (NULL == lp_ctx) { @@ -220,7 +229,7 @@ For(;;) { if (!iocp_status) { // error uint32_t iocp_err = GetLastError(); if (err == /* terminal error */) { - LOG_ERROR(...); + // … log error … break; } } @@ -238,13 +247,14 @@ For(;;) { have_data = false; // Caught up to producer. continue; } else if (remaining < sizeof(rb_header_t)) { - LOG_ERROR(...); // Bad record or consumer offset out of alignment. + // Bad record or consumer offset out of alignment. + // … log error … break; } // Check header flags first, then read/skip data and update offset. rb_header_t header = *(rb_header_t*)(&rb_data[cons % rb_size]); - if (header.locked) { // Next record not ready yet, epoll again. + if (header.locked) { // Next record not ready yet, wait on iocp. have_data = false; continue; // Or we could spin/poll on ((rb_header_t*)(&rb_data[cons % rb_size]))->locked. @@ -252,16 +262,12 @@ For(;;) { if (!header.discarded) { const rb_record_t *record = *(const rb_record_t*)(&rb_data[cons % rb_size]); // Read data from record->data[0 ... record->length-1]. - // ... business logic ... + // … business logic … } // Else it was discarded, skip and continue. - // Update consumer offset. - cons += sizeof(rb_header_t) + record->length + /*alignment padding*/; + // Update consumer offset (and pad record length to multiple of 8). + cons += sizeof(rb_header_t) + (record->length + 7 & ~7); *cons_offset = cons; - - if (/* We want to flush the ringbuffer */) { - *cons_offset = *prod_offset; - } } Cleanup: @@ -272,7 +278,7 @@ ring_buffer__free(rb); ### Simplified blocking ringbuf consumer -This consumer uses some theoretical helpers to simplify the above logic. +This consumer uses some possible helpers to simplify the above logic (might also want timeout). ```c //Note: the below theoretical helpers would only need access to producers/consumer pages (offsets and data pages) From f1d15ebc4f21804a32b6d08c941f64fabd194e73 Mon Sep 17 00:00:00 2001 From: Michael Agun Date: Wed, 25 Sep 2024 14:16:11 -0700 Subject: [PATCH 04/17] update flags notes and add epoll_fd comment --- docs/RingBuffer.md | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/docs/RingBuffer.md b/docs/RingBuffer.md index b7ea624951..c04392582d 100644 --- a/docs/RingBuffer.md +++ b/docs/RingBuffer.md @@ -32,10 +32,10 @@ mmap/epoll consumer: /** * @brief Output record to ringbuf * - * Note newly added flags (to specify wakeup options). + * Note newly added flag values (to specify wakeup options). * - * Wakeup options: - * - auto (default): Notify if consumer has caught up. + * Wakeup options (flags): + * - 0 (auto/default): Notify if consumer has caught up. * - BPF_RB_FORCE_WAKEUP - Always notify consumer. * - BPF_RB_NO_WAKEUP - Never notify consumer. * @@ -44,7 +44,14 @@ ebpf_result_t ebpf_ring_buffer_output(ebpf_ring_buffer_t* ring, uint8_t* data, size_t length, size_t flags) ``` -### Updated libbpf ringbuf API +### Updated supported libbpf functions + +One part to be settled is what epoll_fd should return: +- On linux epoll_fd returns an int file descriptor. +- IOCP uses a HANDLE to reference IOCP objects. + +Returning an int epoll fd could give complete linux ebpf code compatibility, but would require an epoll wrapper around IOCP calls, if we change the libbpf signature to return a HANDLE then it could be directly waited on with +[GetQueuedCompletionStatus](https://learn.microsoft.com/en-us/windows/win32/api/ioapiset/nf-ioapiset-getqueuedcompletionstatus) or GetQueuedCompletionStatusEx. ```c From c33939034a16ca38ef0d9f453aa4cb16e61c52ab Mon Sep 17 00:00:00 2001 From: Michael Agun Date: Wed, 25 Sep 2024 14:20:19 -0700 Subject: [PATCH 05/17] remove epoll fd note (get_handle is already used below). --- docs/RingBuffer.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/docs/RingBuffer.md b/docs/RingBuffer.md index c04392582d..b9594bf34f 100644 --- a/docs/RingBuffer.md +++ b/docs/RingBuffer.md @@ -46,13 +46,6 @@ ebpf_ring_buffer_output(ebpf_ring_buffer_t* ring, uint8_t* data, size_t length, ### Updated supported libbpf functions -One part to be settled is what epoll_fd should return: -- On linux epoll_fd returns an int file descriptor. -- IOCP uses a HANDLE to reference IOCP objects. - -Returning an int epoll fd could give complete linux ebpf code compatibility, but would require an epoll wrapper around IOCP calls, if we change the libbpf signature to return a HANDLE then it could be directly waited on with -[GetQueuedCompletionStatus](https://learn.microsoft.com/en-us/windows/win32/api/ioapiset/nf-ioapiset-getqueuedcompletionstatus) or GetQueuedCompletionStatusEx. - ```c struct ring_buffer; From eac29ff7357e530e518e2cbf6c99ce7f02824625 Mon Sep 17 00:00:00 2001 From: Michael Agun Date: Thu, 3 Oct 2024 12:01:39 -0700 Subject: [PATCH 06/17] Update wait handle design. --- docs/RingBuffer.md | 83 +++++++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 46 deletions(-) diff --git a/docs/RingBuffer.md b/docs/RingBuffer.md index b9594bf34f..4e86c44786 100644 --- a/docs/RingBuffer.md +++ b/docs/RingBuffer.md @@ -5,24 +5,21 @@ The current ringbuffer uses a pure callback-based approach to reading the ringbuffer. Linux also supports memory-mapped polling consumers, which can't be directly supported in the current model. -The new ring buffer will use [I/O Completion Ports](https://learn.microsoft.com/en-us/windows/win32/fileio/i-o-completion-ports), -which is the windows API closest to the linux epoll mechanism, and memory map producer+consumer pages into the user process in the same way that linux does. +The new API will support 2 consumer types: callbacks and direct access to the mapped producer memory (with poll to wait for data). -The new API will support 2 access methods: callbacks and mmap/epoll style access. +Callback consumer: -callback consumer: +1. Call `ring_buffer__new` to set up callback +2. Call `ring_buffer__consume` as needed to invoke the callback on any outsanding data that is ready -1. call `ring_buffer__new` to set up callback -2. call `ring_buffer__consume` as needed to invoke the callback on any outsanding data that is ready +Mapped memory consumer: -mmap/epoll consumer: - -1. call `ring_buffer__new` to get a ringbuffer manager -2. call `ebpf_ring_buffer_get_buffer` to get pointers to the mapped producer/consumer pages -3. directly read records from the producer pages (and update consumer offset as we read) -4. call `ring_buffer__epoll_fd` to get the iocp handle as a fd_t so we can wait for new data -5. use `GetQueuedCompletionStatus`/`GetQueuedCompletionStatusEx` to wait for new data to be available - - We could provide a wrapper to make the common ebpf use cases simpler +1. Call `ring_buffer__new` to get a ringbuffer manager. +2. Call `ebpf_ring_buffer_get_buffer` to get pointers to the mapped producer/consumer pages. +3. Call `ebpf_ring_buffer_get_buffer` to get pointers to the mapped producer and consumer memory. +4. Call `ebpf_ring_buffer_get_wait_handle` to get the wait handle. +5. Directly read records from the producer pages (and update consumer offset as we read). +6. Call `WaitForSingleObject`/`WaitForMultipleObject` as needed to wait for new data to be available. ## API Changes @@ -113,16 +110,6 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms); * @param[in] rb Pointer to ring buffer manager. */ int ring_buffer__consume(struct ring_buffer *rb); - -/** - * @brief Get a handle that can be used to sleep until data is available in the ring(s). - * - * @param[in] rb Pointer to ring buffer manager. - * - * @return ebpf_invalid_fd on error - * @return fd_t corresponding to I/O Completion port hand on success - */ -fd_t ring_buffer__epoll_fd(const struct ring_buffer *rb); ``` ### New ebpf APIs @@ -134,7 +121,17 @@ fd_t ring_buffer__epoll_fd(const struct ring_buffer *rb); * @param[out] producer pointer* to start of read-only mapped producer pages * @param[out] consumer pointer* to start of read-write mapped consumer page */ -int ebpf_ring_buffer_get_buffer(void **producer, void **consumer); +int ebpf_ring_buffer_get_buffer(const ebpf_map_t* map, void **producer, void **consumer); + +/** + * get the wait handle to use with WaitForSingleObject/WaitForMultipleObject + * + * @param[out] producer pointer* to start of read-only mapped producer pages + * @param[out] consumer pointer* to start of read-write mapped consumer page + * + * @returns Wait handle + */ +HANDLE ebpf_ring_buffer_get_wait_handle(const ebpf_map_t* map); ``` ## Ringbuffer consumer @@ -190,8 +187,15 @@ auto rb = ring_buffer__new( nullptr, nullptr); if (!rb) return 1; +// Initialize iocp wait handle. +HANDLE wait_handle = ebpf_ring_buffer_get_wait_handle(rb); +if (!wait_handle) { + // … log error … + goto Cleanup; +} + // get pointers to the producer/consumer pages -int err = ebpf_ring_buffer_get_buffer(&rb_prod, &rb_cons, /* &epoll_fd */); +int err = ebpf_ring_buffer_get_buffer(&rb_prod, &rb_cons); if (err) { goto Cleanup; @@ -204,14 +208,6 @@ const uint8_t *rb_data = ((const uint8_t*)rb_prod) + PAGESIZE; // Double-mapped // have_data used to track whether we should wait for notification or just keep reading. bool have_data = *prod_offset > *cons_offset; -// Initialize iocp wait handle. -epoll_fd = ring_buffer__epoll_fd(rb); -if (epoll_fd == ebpf_fd_invalid) { - // … log error … - goto Cleanup; -} -HANDLE iocp_handle = _get_handle_from_file_descriptor(epoll_fd) - void *lp_ctx = NULL; OVERLAPPED *overlapped = NULL; DWORD bytesTransferred = 0; @@ -220,18 +216,13 @@ DWORD bytesTransferred = 0; For(;;) { if (!have_data) { // Only wait if we have already caught up. // Wait for rb to notify -- or we could spin/poll on *prod_offset > *cons_offset. - BOOL iocp_status = GetQueuedCompletionStatus(iocp_handle, &bytesTransfered, (LPWORD)&lp_ctx, &overlapped, INFINITE); - - if (NULL == lp_ctx) { - //we are shutting down - break; - } else if (!iocp_status || bytesTransferred <= 0) { // No notification - if (!iocp_status) { // error - uint32_t iocp_err = GetLastError(); - if (err == /* terminal error */) { - // … log error … - break; - } + DWORD wait_status = WaitForSingleObject(wait_handle, INFINITE); + + if (wait_status != WAIT_OBJECT_0) { // No notification + uint32_t wait_err = GetLastError(); + if (wait_err == /* terminal error */) { + // … log error … + break; } have_data = *prod_offset > *cons_offset; // It's possible we still have data. if (!have_data) continue; From 5f473d4d7c40e468c494e457c075e22f86e108e7 Mon Sep 17 00:00:00 2001 From: Michael Agun Date: Thu, 3 Oct 2024 12:11:17 -0700 Subject: [PATCH 07/17] add offset to record header --- docs/RingBuffer.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/RingBuffer.md b/docs/RingBuffer.md index 4e86c44786..15b16d2863 100644 --- a/docs/RingBuffer.md +++ b/docs/RingBuffer.md @@ -147,13 +147,14 @@ This consumer directly accesses the records from the producer memory and directl // == Ringbuf helpers == // -// Ring buffer record is 32bit header + data. +// Ring buffer record is 64 bit header + data. typedef struct _rb_header { //NOTE: bit fields are not portable, so this is just for simpler example code -- the actual code should use bit masking to perform equivalent operations on the header bits. uint8_t locked : 1; uint8_t discarded : 1; uint32_t length : 30; + uint32_t offset; // for kernel use (offset of record in pages from start of buffer data area) } rb_header_t; typedef struct _rb_record From a8487153c46147577b97fe5163916e1b2641f08d Mon Sep 17 00:00:00 2001 From: Michael Agun Date: Mon, 7 Oct 2024 15:37:57 -0700 Subject: [PATCH 08/17] fix consumer steps and get_buffer return type --- docs/RingBuffer.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/RingBuffer.md b/docs/RingBuffer.md index 15b16d2863..5b7b55be6e 100644 --- a/docs/RingBuffer.md +++ b/docs/RingBuffer.md @@ -16,10 +16,9 @@ Mapped memory consumer: 1. Call `ring_buffer__new` to get a ringbuffer manager. 2. Call `ebpf_ring_buffer_get_buffer` to get pointers to the mapped producer/consumer pages. -3. Call `ebpf_ring_buffer_get_buffer` to get pointers to the mapped producer and consumer memory. -4. Call `ebpf_ring_buffer_get_wait_handle` to get the wait handle. -5. Directly read records from the producer pages (and update consumer offset as we read). -6. Call `WaitForSingleObject`/`WaitForMultipleObject` as needed to wait for new data to be available. +3. Call `ebpf_ring_buffer_get_wait_handle` to get the wait handle. +4. Directly read records from the producer pages (and update consumer offset as we read). +5. Call `WaitForSingleObject`/`WaitForMultipleObject` as needed to wait for new data to be available. ## API Changes @@ -121,7 +120,7 @@ int ring_buffer__consume(struct ring_buffer *rb); * @param[out] producer pointer* to start of read-only mapped producer pages * @param[out] consumer pointer* to start of read-write mapped consumer page */ -int ebpf_ring_buffer_get_buffer(const ebpf_map_t* map, void **producer, void **consumer); +ebpf_result_t ebpf_ring_buffer_get_buffer(const ebpf_map_t* map, void **producer, void **consumer); /** * get the wait handle to use with WaitForSingleObject/WaitForMultipleObject From ea47230aec71c557c519218886d3a14e91490abf Mon Sep 17 00:00:00 2001 From: Michael Agun Date: Fri, 18 Oct 2024 13:08:17 -0700 Subject: [PATCH 09/17] Ringbuffer design updates. Splits the design so callback consumers use the existing libbpf APIs, and mapped memory consumers use the new windows-specific functions. --- docs/RingBuffer.md | 124 +++++++++++++++------------------------------ 1 file changed, 41 insertions(+), 83 deletions(-) diff --git a/docs/RingBuffer.md b/docs/RingBuffer.md index 5b7b55be6e..24e54686c0 100644 --- a/docs/RingBuffer.md +++ b/docs/RingBuffer.md @@ -9,16 +9,15 @@ The new API will support 2 consumer types: callbacks and direct access to the ma Callback consumer: -1. Call `ring_buffer__new` to set up callback -2. Call `ring_buffer__consume` as needed to invoke the callback on any outsanding data that is ready +1. Call `ring_buffer__new` to set up callback. +2. The callback will be invoked for each record written to the ring buffer. Mapped memory consumer: -1. Call `ring_buffer__new` to get a ringbuffer manager. -2. Call `ebpf_ring_buffer_get_buffer` to get pointers to the mapped producer/consumer pages. -3. Call `ebpf_ring_buffer_get_wait_handle` to get the wait handle. -4. Directly read records from the producer pages (and update consumer offset as we read). -5. Call `WaitForSingleObject`/`WaitForMultipleObject` as needed to wait for new data to be available. +1. Call `ebpf_ring_buffer_get_buffer` to get pointers to the mapped producer/consumer pages. +2. Call `ebpf_ring_buffer_get_wait_handle` to get the wait handle. +3. Directly read records from the producer pages (and update consumer offset as we read). +4. Call `WaitForSingleObject`/`WaitForMultipleObject` as needed to wait for new data to be available. ## API Changes @@ -40,10 +39,15 @@ ebpf_result_t ebpf_ring_buffer_output(ebpf_ring_buffer_t* ring, uint8_t* data, size_t length, size_t flags) ``` -### Updated supported libbpf functions +### Existing libbpf functions for callback consumer -```c +The behaviour of these functions will be unchanged. + +Use the existing `ring_buffer__new()` to set up automatic callbacks for each record. +Call `ebpf_ring_buffer_get_buffer()` ([New eBPF APIs](#new-ebpf-apis-for-mapped-memory-consumer)) +to get direct access to the mapped ringbuffer memory. +```c struct ring_buffer; typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size); @@ -68,17 +72,6 @@ struct ring_buffer * ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx, const struct ring_buffer_opts *opts); -/** - * @brief Add extra RINGBUF maps to this ring buffer manager - * - * @param[in] rb Pointer to ring buffer manager. - * @param[in] map_fd File descriptor to ring buffer map. - * @param[in] sample_cb Pointer to ring buffer notification callback function. - * @param[in] ctx Pointer to sample_cb callback function context. - */ -int ring_buffer__add(struct ring_buffer *rb, int map_fd, - ring_buffer_sample_fn sample_cb, void *ctx) - /** * @brief Frees a ring buffer manager. * @@ -86,32 +79,9 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, * */ void ring_buffer__free(struct ring_buffer *rb); - -/** - * @brief poll ringbuf for new data - * Poll for available data and consume records, if any are available. - * Returns number of records consumed (or INT_MAX, whichever is less), or - * negative number, if any of the registered callbacks returned error. - * - * @param[in] rb Pointer to ring buffer manager. - * @param[in] timeout_ms maximum time to wait for (in milliseconds). - * - */ -int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms); - -/** - * @brief catch consumer up to producer by invoking the callback for every available record - * Consume available ring buffer(s) data without event polling. - * Returns number of records consumed across all registered ring buffers (or - * INT_MAX, whichever is less), or negative number if any of the callbacks - * return error. - * - * @param[in] rb Pointer to ring buffer manager. - */ -int ring_buffer__consume(struct ring_buffer *rb); ``` -### New ebpf APIs +### New ebpf APIs for mapped memory consumer ```c /** @@ -120,23 +90,19 @@ int ring_buffer__consume(struct ring_buffer *rb); * @param[out] producer pointer* to start of read-only mapped producer pages * @param[out] consumer pointer* to start of read-write mapped consumer page */ -ebpf_result_t ebpf_ring_buffer_get_buffer(const ebpf_map_t* map, void **producer, void **consumer); +ebpf_result_t ebpf_ring_buffer_get_buffer(fd_t map_fd, void **producer, void **consumer); /** * get the wait handle to use with WaitForSingleObject/WaitForMultipleObject * - * @param[out] producer pointer* to start of read-only mapped producer pages - * @param[out] consumer pointer* to start of read-write mapped consumer page - * * @returns Wait handle */ -HANDLE ebpf_ring_buffer_get_wait_handle(const ebpf_map_t* map); +HANDLE ebpf_ring_buffer_get_wait_handle(fd_t map_fd); ``` ## Ringbuffer consumer -### libbpf mapped memory consumer example - +### mapped memory consumer example This consumer directly accesses the records from the producer memory and directly updates the consumer offset to show the logic. @@ -149,7 +115,7 @@ This consumer directly accesses the records from the producer memory and directl // Ring buffer record is 64 bit header + data. typedef struct _rb_header { - //NOTE: bit fields are not portable, so this is just for simpler example code -- the actual code should use bit masking to perform equivalent operations on the header bits. + //NOTE: bit fields are not portable, so this is just for simpler example code -- the actual code should use bit masking to perform equivalent operations on the header bits, and ReadAcquire to read the header. uint8_t locked : 1; uint8_t discarded : 1; uint32_t length : 30; @@ -166,7 +132,7 @@ typedef struct _rb_record * @brief clear the ringbuffer. */ void rb_flush(uint64_t *cons_offset, const uint64_t *prod_offset) { - *cons_offset = *prod_offset; + WriteRelease64(cons_offset,ReadAcquire64(prod_offset)); } @@ -181,32 +147,28 @@ void *rb_prod; // Pointer to start of read-only producer pages. fd_t map_fd = bpf_obj_get(rb_map_name.c_str()); if (map_fd == ebpf_fd_invalid) return 1; -auto rb = ring_buffer__new( - map_fd, - NULL, //callback function for callback-based consumer - nullptr, nullptr); -if (!rb) return 1; - -// Initialize iocp wait handle. -HANDLE wait_handle = ebpf_ring_buffer_get_wait_handle(rb); +// Initialize wait handle for map. +HANDLE wait_handle = ebpf_ring_buffer_get_wait_handle(map_fd); if (!wait_handle) { // … log error … - goto Cleanup; + goto Exit; } // get pointers to the producer/consumer pages -int err = ebpf_ring_buffer_get_buffer(&rb_prod, &rb_cons); +int err = ebpf_ring_buffer_get_buffer(map_fd, &rb_prod, &rb_cons); if (err) { - goto Cleanup; + goto Exit; } const uint64_t *prod_offset = (const uint64_t*)rb_prod; // Producer offset ptr (r only). uint64_t *cons_offset = (uint64_t*)rb_cons; // Consumer offset ptr (r/w mapped). const uint8_t *rb_data = ((const uint8_t*)rb_prod) + PAGESIZE; // Double-mapped rb data ptr (r only). +uint64_t producer_offset = ReadAcquire64(prod_offset); +uint64_t consumer_offset = *cons_offset; // only one consumer so don't need ReadAcquire. // have_data used to track whether we should wait for notification or just keep reading. -bool have_data = *prod_offset > *cons_offset; +bool have_data = producer_offset > consumer_offset; void *lp_ctx = NULL; OVERLAPPED *overlapped = NULL; @@ -215,7 +177,7 @@ DWORD bytesTransferred = 0; // Now loop until error. For(;;) { if (!have_data) { // Only wait if we have already caught up. - // Wait for rb to notify -- or we could spin/poll on *prod_offset > *cons_offset. + // Wait for rb to notify -- or we could spin/poll until *prod_offset > *cons_offset. DWORD wait_status = WaitForSingleObject(wait_handle, INFINITE); if (wait_status != WAIT_OBJECT_0) { // No notification @@ -224,15 +186,14 @@ For(;;) { // … log error … break; } - have_data = *prod_offset > *cons_offset; // It's possible we still have data. + producer_offset = ReadAcquire64(prod_offset); + have_data = producer_offset > consumer_offset; // It's possible we still have data. if (!have_data) continue; } else { // We got notified of new data. have_data = true; } } - uint64_t prod = *prod_offset; - uint64_t cons = *cons_offset; - uint64_t remaining = prod - cons; + uint64_t remaining = producer_offset - consumer_offset; if (remaining == 0) { have_data = false; // Caught up to producer. @@ -244,30 +205,27 @@ For(;;) { } // Check header flags first, then read/skip data and update offset. - rb_header_t header = *(rb_header_t*)(&rb_data[cons % rb_size]); - if (header.locked) { // Next record not ready yet, wait on iocp. + rb_header_t header = (rb_header_t)(&rb_data[consumer_offset % rb_size]); + if (header.locked) { // Next record not ready yet, wait. have_data = false; continue; - // Or we could spin/poll on ((rb_header_t*)(&rb_data[cons % rb_size]))->locked. + // Or we could spin/poll on ((rb_header_t*)(&rb_data[consumer_offset % rb_size]))->locked. } if (!header.discarded) { - const rb_record_t *record = *(const rb_record_t*)(&rb_data[cons % rb_size]); + const rb_record_t *record = *(const rb_record_t*)(&rb_data[consumer_offset % rb_size]); // Read data from record->data[0 ... record->length-1]. // … business logic … } // Else it was discarded, skip and continue. // Update consumer offset (and pad record length to multiple of 8). - cons += sizeof(rb_header_t) + (record->length + 7 & ~7); - *cons_offset = cons; + consumer_offset += sizeof(rb_header_t) + (record->length + 7 & ~7); + WriteRelease64(cons_offset, consumer_offset); } -Cleanup: - -// Close ringbuffer. -ring_buffer__free(rb); +Exit: ``` -### Simplified blocking ringbuf consumer +### Simplified polling ringbuf consumer This consumer uses some possible helpers to simplify the above logic (might also want timeout). @@ -285,8 +243,8 @@ for(;;) { // … Do record handling here … } // 3 cases for err: - // 1) Ringbuf empty - Wait with epoll, or poll for !rb__empty(prod,cons). - // 2) Record locked - Wait with epoll, or spin/poll on header lock bit. + // 1) Ringbuf empty - Wait on handle, or poll for !rb__empty(prod,cons). + // 2) Record locked - Wait on handle, or spin/poll on header lock bit. // 3) Corrupt record or consumer offset - Break (could flush to continue reading from next good record). if (err!=E_EMPTY && err!=E_LOCKED) { // … log error … From 7ba4ec4955f9a16ac619b826632f7f028e3eea62 Mon Sep 17 00:00:00 2001 From: Michael Agun Date: Fri, 18 Oct 2024 13:14:14 -0700 Subject: [PATCH 10/17] add readacquire --- docs/RingBuffer.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/RingBuffer.md b/docs/RingBuffer.md index 24e54686c0..f5c801cb70 100644 --- a/docs/RingBuffer.md +++ b/docs/RingBuffer.md @@ -166,7 +166,7 @@ uint64_t *cons_offset = (uint64_t*)rb_cons; // Consumer offset ptr (r/w mapped). const uint8_t *rb_data = ((const uint8_t*)rb_prod) + PAGESIZE; // Double-mapped rb data ptr (r only). uint64_t producer_offset = ReadAcquire64(prod_offset); -uint64_t consumer_offset = *cons_offset; // only one consumer so don't need ReadAcquire. +uint64_t consumer_offset = ReadAcquire64(cons_offset); // have_data used to track whether we should wait for notification or just keep reading. bool have_data = producer_offset > consumer_offset; From 3e5ab72614329df2a93a30447fe03a685723fddf Mon Sep 17 00:00:00 2001 From: Michael Agun Date: Mon, 21 Oct 2024 07:57:51 -0700 Subject: [PATCH 11/17] remove synchronization from consumer offset --- docs/RingBuffer.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/RingBuffer.md b/docs/RingBuffer.md index f5c801cb70..69d99e6a1f 100644 --- a/docs/RingBuffer.md +++ b/docs/RingBuffer.md @@ -166,7 +166,7 @@ uint64_t *cons_offset = (uint64_t*)rb_cons; // Consumer offset ptr (r/w mapped). const uint8_t *rb_data = ((const uint8_t*)rb_prod) + PAGESIZE; // Double-mapped rb data ptr (r only). uint64_t producer_offset = ReadAcquire64(prod_offset); -uint64_t consumer_offset = ReadAcquire64(cons_offset); +uint64_t consumer_offset = *cons_offset; // have_data used to track whether we should wait for notification or just keep reading. bool have_data = producer_offset > consumer_offset; @@ -219,7 +219,7 @@ For(;;) { // Update consumer offset (and pad record length to multiple of 8). consumer_offset += sizeof(rb_header_t) + (record->length + 7 & ~7); - WriteRelease64(cons_offset, consumer_offset); + *cons_offset = consumer_offset; } Exit: From 288467aa32cb40e8a3efab89a7fdae3f93a71873 Mon Sep 17 00:00:00 2001 From: Michael Agun Date: Mon, 21 Oct 2024 08:06:49 -0700 Subject: [PATCH 12/17] Use NoFence for consumer offset --- docs/RingBuffer.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/RingBuffer.md b/docs/RingBuffer.md index 69d99e6a1f..5443129fb3 100644 --- a/docs/RingBuffer.md +++ b/docs/RingBuffer.md @@ -166,7 +166,7 @@ uint64_t *cons_offset = (uint64_t*)rb_cons; // Consumer offset ptr (r/w mapped). const uint8_t *rb_data = ((const uint8_t*)rb_prod) + PAGESIZE; // Double-mapped rb data ptr (r only). uint64_t producer_offset = ReadAcquire64(prod_offset); -uint64_t consumer_offset = *cons_offset; +uint64_t consumer_offset = ReadNoFence64(cons_offset); // have_data used to track whether we should wait for notification or just keep reading. bool have_data = producer_offset > consumer_offset; @@ -219,7 +219,7 @@ For(;;) { // Update consumer offset (and pad record length to multiple of 8). consumer_offset += sizeof(rb_header_t) + (record->length + 7 & ~7); - *cons_offset = consumer_offset; + WriteNoFence64(cons_offset,consumer_offset); } Exit: From 938d64209a789fa83c2c51fa12d1ffa3ed3f3912 Mon Sep 17 00:00:00 2001 From: Michael Agun Date: Mon, 21 Oct 2024 08:19:06 -0700 Subject: [PATCH 13/17] Add poll declaration and flag to control callback --- docs/RingBuffer.md | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/docs/RingBuffer.md b/docs/RingBuffer.md index 5443129fb3..5cfe73e03f 100644 --- a/docs/RingBuffer.md +++ b/docs/RingBuffer.md @@ -39,7 +39,7 @@ ebpf_result_t ebpf_ring_buffer_output(ebpf_ring_buffer_t* ring, uint8_t* data, size_t length, size_t flags) ``` -### Existing libbpf functions for callback consumer +### Updated libbpf API for callback consumer The behaviour of these functions will be unchanged. @@ -54,6 +54,11 @@ typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size); struct ring_buffer_opts { size_t sz; /* size of this struct, for forward/backward compatiblity */ + uint64_t flags; /* ring buffer option flags */ +}; + +enum ring_buffer_flags { + RINGBUF_FLAG_NO_AUTO_CALLBACK = (uint64_t)1 << 0 /* Don't automatically invoke callback for each record */ }; #define ring_buffer_opts__last_field sz @@ -72,6 +77,18 @@ struct ring_buffer * ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx, const struct ring_buffer_opts *opts); +/** + * @brief poll ringbuf for new data (NOT CURRENTLY SUPPORTED) + * Poll for available data and consume records, if any are available. + * Returns number of records consumed (or INT_MAX, whichever is less), or + * negative number, if any of the registered callbacks returned error. + * + * @param[in] rb Pointer to ring buffer manager. + * @param[in] timeout_ms maximum time to wait for (in milliseconds). + * + */ +int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms); + /** * @brief Frees a ring buffer manager. * From 407f3c1167d4b68b961f8a49db19a91db79ef8e1 Mon Sep 17 00:00:00 2001 From: Michael Agun Date: Wed, 20 Nov 2024 14:41:43 -0800 Subject: [PATCH 14/17] Update ringbuffer API. --- docs/RingBuffer.md | 170 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 153 insertions(+), 17 deletions(-) diff --git a/docs/RingBuffer.md b/docs/RingBuffer.md index 5cfe73e03f..5d3eb9c921 100644 --- a/docs/RingBuffer.md +++ b/docs/RingBuffer.md @@ -19,7 +19,30 @@ Mapped memory consumer: 3. Directly read records from the producer pages (and update consumer offset as we read). 4. Call `WaitForSingleObject`/`WaitForMultipleObject` as needed to wait for new data to be available. -## API Changes +### Differences from linux API + +Linux has only polling ring buffer consumers, even when callbacks are used. +On linux the user code can call `ring_buffer__consume()` to invoke the callback on all available data, +and `ring_buffer__poll()` to wait for data if needed and then consume available data. +Linux consumers can also directly read from the mapped memory by using `mmap()` to map the data +into user space and `ring_buffer__epoll_fd()` to get an epoll wait handle. + +On Windows asynchronous events are supported by default, +so nothing extra needs to be done for callbacks to be invoked. + +If the `RINGBUF_FLAG_NO_AUTO_CALLBACK` flag is set, callbacks will not automatically be called and `ring_buffer__poll()` +should be called to poll for available data and invoke the callback. On Windows a timeout of zero can be passed to +`ring_buffer__poll()` to get the same behaviour as `ring_buffer__consume()`. + +For direct memory mapped consumers on Windows, use `ebpf_ring_buffer_get_buffer` to get pointers to the producer and consumer +pages mapped into user space, and `ebpf_ring_buffer_get_wait_handle()` to get the SynchronizationEvent (auto-reset) KEVENT +to use with `WaitForSingleObject`/`WaitForMultipleObject`. + +Similar to the linux memory layout, the first page of the producer and consumer memory is the "producer page" and "consumer page", +which contain the 64 bit producer and consumer offsets as the first 8 bytes. +Only the producer may update the producer offset, and only the consumer may update the consumer offset. + +## ebpf-for-windows API Changes ### Changes to ebpf helper functions @@ -78,14 +101,20 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx, const struct ring_buffer_opts *opts); /** - * @brief poll ringbuf for new data (NOT CURRENTLY SUPPORTED) + * @brief poll ringbuf for new data * Poll for available data and consume records, if any are available. * Returns number of records consumed (or INT_MAX, whichever is less), or * negative number, if any of the registered callbacks returned error. * + * If timeout_ms is zero, poll will not wait but only invoke the callback on records that are ready. + * If timeout_ms is -1, poll will wait until data is ready (no timeout). + * + * This function is only supported when the RINGBUF_FLAG_NO_AUTO_CALLBACK flag is set. + * * @param[in] rb Pointer to ring buffer manager. * @param[in] timeout_ms maximum time to wait for (in milliseconds). * + * @returns number of records consumed, INT_MAX, or a negative number on error */ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms); @@ -93,7 +122,6 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms); * @brief Frees a ring buffer manager. * * @param[in] rb Pointer to ring buffer manager to be freed. - * */ void ring_buffer__free(struct ring_buffer *rb); ``` @@ -104,24 +132,99 @@ void ring_buffer__free(struct ring_buffer *rb); /** * get pointers to mapped producer and consumer pages * + * @param[in] map_fd File descriptor to ring buffer map. * @param[out] producer pointer* to start of read-only mapped producer pages * @param[out] consumer pointer* to start of read-write mapped consumer page + * + * @returns EBPF_SUCCESS on success, or error */ ebpf_result_t ebpf_ring_buffer_get_buffer(fd_t map_fd, void **producer, void **consumer); /** * get the wait handle to use with WaitForSingleObject/WaitForMultipleObject * + * @param[in] map_fd File descriptor to ring buffer map. + * * @returns Wait handle */ HANDLE ebpf_ring_buffer_get_wait_handle(fd_t map_fd); ``` +### New user-space helpers for memory mapped consumer + +```c +/** + * The below helpers simplify memory-mapped consumer logic + * by abstracting operations on the producer and consumer offsets. + */ + +/** + * Get pointer to consumer offset from consumer page. + * + * @param[in] cons pointer* to start of read-write mapped consumer page + * + * @returns Pointer to consumer offset + */ +uint64_t* rb__consumer_offset(void *cons); + +/** + * Get pointer to producer offset from producer page. + * + * @param[in] prod pointer* to start of read-only mapped producer pages + * + * @returns Pointer to producer offset + */ +volatile const uint64_t* rb__producer_offset(volatile const void *prod); + +/** + * Check whether consumer offset == producer offset. + * + * Note that not empty doesn't mean data is ready, just that there are records that have been allocated. + * You still need to check the locked and discarded bits of the record header to determine if a record is ready. + * + * @param[in] cons pointer* to start of read-write mapped consumer page + * @param[in] prod pointer* to start of read-only mapped producer pages + * + * @returns 0 if ring buffer is empty, 1 otherwise + */ +int rb__empty(volatile const void *prod, const void *cons); + +/** + * Clear the ring buffer by flushing all completed and in-progress records. + * + * This helper just sets the consumer offset to the producer offset + * + * @param[in] prod pointer* to start of read-only mapped producer pages + * @param[in,out] cons pointer* to start of read-write mapped consumer page + */ +void rb__flush(volatile const void *prod, void *cons); + +/** + * Advance consumer offset to next record (if any) + * + * @param[in] prod pointer* to start of read-only mapped producer pages + * @param[in,out] cons pointer* to start of read-write mapped consumer page + */ +void rb__next_record(volatile const void *prod, void *cons); + +/** + * Get record at current ringbuffer offset. + + * @param[in] prod pointer* to start of read-only mapped producer pages + * @param[in] cons pointer* to start of read-write mapped consumer page + * + * @returns E_SUCCESS (0) if record ready, E_LOCKED if record still locked, E_EMPTY if consumer has caught up. + */ +int rb__get_record(volatile const void *prod, const void *cons, volatile const void** record); + +``` + ## Ringbuffer consumer ### mapped memory consumer example -This consumer directly accesses the records from the producer memory and directly updates the consumer offset to show the logic. +This consumer directly accesses the records from the producer memory and directly updates the consumer offset to show the logic. Normally user code should use the ring buffer helpers +(see second example below) to simplify the logic. ```c++ @@ -244,16 +347,19 @@ Exit: ### Simplified polling ringbuf consumer -This consumer uses some possible helpers to simplify the above logic (might also want timeout). +This consumer uses the newly added helpers to consume the ring buffer. ```c -//Note: the below theoretical helpers would only need access to producers/consumer pages (offsets and data pages) -//rb__empty(prod,cons) - check whether consumer offset == consumer offset (!empty doesn't mean data is ready) -//rb__flush(prod,cons) - just set consumer offset = producer offset (skips all completed/in-progress records) -//rb__next_record(prod,cons) - advance consumer offset to next record -//rb__get_record(prod,cons,&record) - get pointer to current record (if any), skipping discarded records -//Returns E_SUCCESS (0) if record ready, E_LOCKED if record still locked, E_EMPTY if consumer has caught up. +// Initialize wait handle for map. +HANDLE wait_handle = ebpf_ring_buffer_get_wait_handle(map_fd); +if (!wait_handle) { + // … log error … + goto Exit; +} + +uint32_t wait_err = 0; +// Consumer loop. for(;;) { for(; !(err=rb__get_record(prod,cons,&record)); rb__next_record(prod,cons)) { // Data is now in record->data[0 ... record->length-1]. @@ -267,12 +373,42 @@ for(;;) { // … log error … break; } - if (err == /* Handled errors, e.g. timeout */) { - // … log error and continue (we might still have record(s) to read) … - } else if (err != E_SUCCESS) { - // … log error … - break; + DWORD wait_status = WaitForSingleObject(wait_handle, INFINITE); + + if (wait_status != WAIT_OBJECT_0) { // No notification + wait_err = GetLastError(); + if (wait_err == /* terminal error */) { + // … log error … + break; + } } } -return err; + +``` + +### Polling ring buffer consumer (linux-style) + +```c +// sample callback +int ring_buffer_sample_fn(void *ctx, void *data, size_t size) { + // … business logic to handle record … +} + +// consumer code +struct ring_buffer_opts opts; +opts.sz = sizeof(opts); +opts.flags = RINGBUF_FLAG_NO_AUTO_CALLBACK; //no automatic callbacks + +fd_t map_fd = bpf_obj_get(rb_map_name.c_str()); +if (map_fd == ebpf_fd_invalid) return 1; + +struct ring_buffer *rb = ring_buffer__new(map_fd, ring_buffer_sample_fn sample_cb, NULL); +if (rb == NULL) return 1; + +// now loop as long as there isn't an error +while(ring_buffer__poll(rb, -1) >= 0) { + // data processed by event callback +} + +ring_buffer__free(rb); ``` From d2eb1b93c485d3d143eafd5aea760a580cc03123 Mon Sep 17 00:00:00 2001 From: Michael Agun Date: Wed, 8 Jan 2025 11:18:09 -0800 Subject: [PATCH 15/17] feedback --- docs/RingBuffer.md | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/docs/RingBuffer.md b/docs/RingBuffer.md index 5d3eb9c921..40211bd31f 100644 --- a/docs/RingBuffer.md +++ b/docs/RingBuffer.md @@ -54,8 +54,8 @@ Only the producer may update the producer offset, and only the consumer may upda * * Wakeup options (flags): * - 0 (auto/default): Notify if consumer has caught up. - * - BPF_RB_FORCE_WAKEUP - Always notify consumer. - * - BPF_RB_NO_WAKEUP - Never notify consumer. + * - BPF_RB_FORCE_WAKEUP: Always notify consumer. + * - BPF_RB_NO_WAKEUP: Never notify consumer. * */ ebpf_result_t @@ -76,7 +76,7 @@ struct ring_buffer; typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size); struct ring_buffer_opts { - size_t sz; /* size of this struct, for forward/backward compatiblity */ + size_t sz; /* size of this struct, for forward/backward compatiblity */ uint64_t flags; /* ring buffer option flags */ }; @@ -103,8 +103,6 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx, /** * @brief poll ringbuf for new data * Poll for available data and consume records, if any are available. - * Returns number of records consumed (or INT_MAX, whichever is less), or - * negative number, if any of the registered callbacks returned error. * * If timeout_ms is zero, poll will not wait but only invoke the callback on records that are ready. * If timeout_ms is -1, poll will wait until data is ready (no timeout). @@ -130,18 +128,19 @@ void ring_buffer__free(struct ring_buffer *rb); ```c /** - * get pointers to mapped producer and consumer pages + * Get pointers to mapped producer and consumer pages. * * @param[in] map_fd File descriptor to ring buffer map. * @param[out] producer pointer* to start of read-only mapped producer pages * @param[out] consumer pointer* to start of read-write mapped consumer page * - * @returns EBPF_SUCCESS on success, or error + * @retval EBPF_SUCCESS The operation was successful. + * @retval other An error occurred. */ ebpf_result_t ebpf_ring_buffer_get_buffer(fd_t map_fd, void **producer, void **consumer); /** - * get the wait handle to use with WaitForSingleObject/WaitForMultipleObject + * Get the wait handle to use with WaitForSingleObject/WaitForMultipleObject. * * @param[in] map_fd File descriptor to ring buffer map. * From 067c90b85273d14ca76163314ef47c7a60aeb6e0 Mon Sep 17 00:00:00 2001 From: Michael Agun Date: Fri, 10 Jan 2025 14:00:07 -0800 Subject: [PATCH 16/17] Add SAL --- docs/RingBuffer.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/RingBuffer.md b/docs/RingBuffer.md index 40211bd31f..32fbb8edc8 100644 --- a/docs/RingBuffer.md +++ b/docs/RingBuffer.md @@ -59,7 +59,7 @@ Only the producer may update the producer offset, and only the consumer may upda * */ ebpf_result_t -ebpf_ring_buffer_output(ebpf_ring_buffer_t* ring, uint8_t* data, size_t length, size_t flags) +ebpf_ring_buffer_output(_Inout_ ebpf_ring_buffer_t* ring, _In_reads_bytes_(length) uint8_t* data, size_t length, size_t flags) ``` ### Updated libbpf API for callback consumer @@ -73,7 +73,7 @@ to get direct access to the mapped ringbuffer memory. ```c struct ring_buffer; -typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size); +typedef int (*ring_buffer_sample_fn)(_Inout_ void *ctx, _In_reads_bytes_(size) void *data, size_t size); struct ring_buffer_opts { size_t sz; /* size of this struct, for forward/backward compatiblity */ @@ -97,8 +97,8 @@ enum ring_buffer_flags { * @returns Pointer to ring buffer manager. */ struct ring_buffer * -ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx, - const struct ring_buffer_opts *opts); +ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, _Inout_ void *ctx, + _In_ const struct ring_buffer_opts *opts); /** * @brief poll ringbuf for new data @@ -114,14 +114,14 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx, * * @returns number of records consumed, INT_MAX, or a negative number on error */ -int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms); +int ring_buffer__poll(_In_ struct ring_buffer *rb, int timeout_ms); /** * @brief Frees a ring buffer manager. * * @param[in] rb Pointer to ring buffer manager to be freed. */ -void ring_buffer__free(struct ring_buffer *rb); +void ring_buffer__free(_Frees_ptr_opt_ struct ring_buffer *rb); ``` ### New ebpf APIs for mapped memory consumer @@ -137,7 +137,7 @@ void ring_buffer__free(struct ring_buffer *rb); * @retval EBPF_SUCCESS The operation was successful. * @retval other An error occurred. */ -ebpf_result_t ebpf_ring_buffer_get_buffer(fd_t map_fd, void **producer, void **consumer); +ebpf_result_t ebpf_ring_buffer_get_buffer(fd_t map_fd, _Out_ void **producer, _Out_ void **consumer); /** * Get the wait handle to use with WaitForSingleObject/WaitForMultipleObject. From 59523c98177e7a22dda3c85cbacab21833c4b591 Mon Sep 17 00:00:00 2001 From: Michael Agun Date: Wed, 15 Jan 2025 16:42:58 -0800 Subject: [PATCH 17/17] PR feedback and notes on sync/async consumers. --- docs/RingBuffer.md | 77 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 23 deletions(-) diff --git a/docs/RingBuffer.md b/docs/RingBuffer.md index 32fbb8edc8..dd428f0879 100644 --- a/docs/RingBuffer.md +++ b/docs/RingBuffer.md @@ -7,11 +7,18 @@ Linux also supports memory-mapped polling consumers, which can't be directly sup The new API will support 2 consumer types: callbacks and direct access to the mapped producer memory (with poll to wait for data). -Callback consumer: +Asynchronous callback consumer: -1. Call `ring_buffer__new` to set up callback. +1. Call `ring_buffer__new` to set up callback with RINGBUF_FLAG_AUTO_CALLBACK specified. + - Note: automatic callbacks are the current default behavior. + This may change in the future with #4142 to match the linux behavior so should always be specified. 2. The callback will be invoked for each record written to the ring buffer. +Synchronous callback consumer: + +1. Call `ring_buffer__new` to set up callback with RINGBUF_FLAG_NO_AUTO_CALLBACK specified. +2. Call `ring_buffer__poll()` to wait for data if needed and invoke the callback on all available records. + Mapped memory consumer: 1. Call `ebpf_ring_buffer_get_buffer` to get pointers to the mapped producer/consumer pages. @@ -21,18 +28,38 @@ Mapped memory consumer: ### Differences from linux API -Linux has only polling ring buffer consumers, even when callbacks are used. -On linux the user code can call `ring_buffer__consume()` to invoke the callback on all available data, -and `ring_buffer__poll()` to wait for data if needed and then consume available data. -Linux consumers can also directly read from the mapped memory by using `mmap()` to map the data -into user space and `ring_buffer__epoll_fd()` to get an epoll wait handle. +#### Poll and Consume + +On linux `ring_buffer__poll()` and `ring_buffer__consume()` are used to invoke the callback. +`poll()` waits for available data (or until timeout), then consume all available records. +`consume()` consumes all available records (without waiting). + +Windows will initially only support `ring_buffer__poll()`, which can be called with a timeout of zero +to get the same behaviour as `ring_buffer__consume()`. + +#### Asynchronous callbacks + +On Linux ring buffers currently support only synchronous callbacks (using poll/consume). +In contrast, Windows eBPF currently supports only asynchronous ring buffer callbacks, +where the callback is automatically invoked when data is available. + +This proposal adds support for synchronous consumers by setting the `RINGBUF_FLAG_NO_AUTO_CALLBACK` flag. +With the flag set, callbacks will not automatically be called. +To invoke the callback and `ring_buffer__poll()` +should be called to poll for available data and invoke the callback. +On Windows a timeout of zero can be passed to `ring_buffer__poll()` to get the same behaviour as `ring_buffer__consume()` (consume available records without waiting). + +When #4142 is resolved the default behaviour will be changed from asynchronous (automatic) to synchronous callbacks, +so `RINGBUF_FLAG_AUTO_CALLBACK` should always be specified for asynchronous callbacks for forward-compatibility. + +#### Memory mapped consumers -On Windows asynchronous events are supported by default, -so nothing extra needs to be done for callbacks to be invoked. +As an alternative to callbacks, Linux ring buffer consumers can directly access the +ring buffer data by calling `mmap()` on a ring_buffer map fd to map the data into user space. +`ring_buffer__epoll_fd()` is used (on Linux) to get an fd to use with epoll to wait for data. -If the `RINGBUF_FLAG_NO_AUTO_CALLBACK` flag is set, callbacks will not automatically be called and `ring_buffer__poll()` -should be called to poll for available data and invoke the callback. On Windows a timeout of zero can be passed to -`ring_buffer__poll()` to get the same behaviour as `ring_buffer__consume()`. +Windows doesn't have identical or directly compatible APIs to Linux mmap and epoll, so instead we will perfom the mapping +in the eBPF core and use KEVENTs to signal for new data. For direct memory mapped consumers on Windows, use `ebpf_ring_buffer_get_buffer` to get pointers to the producer and consumer pages mapped into user space, and `ebpf_ring_buffer_get_wait_handle()` to get the SynchronizationEvent (auto-reset) KEVENT @@ -80,8 +107,12 @@ struct ring_buffer_opts { uint64_t flags; /* ring buffer option flags */ }; +// Ring buffer manager options. +// - The default behaviour is currently automatic callbacks, but may change in the future per #4142. +// - Only specify one of AUTO_CALLBACKS or NO_AUTO_CALLBACKS - specifying both is not allowed. enum ring_buffer_flags { - RINGBUF_FLAG_NO_AUTO_CALLBACK = (uint64_t)1 << 0 /* Don't automatically invoke callback for each record */ + RINGBUF_FLAG_AUTO_CALLBACK = (uint64_t)1 << 0 /* Automatically invoke callback for each record */ + RINGBUF_FLAG_NO_AUTO_CALLBACK = (uint64_t)2 << 0 /* Don't automatically invoke callback for each record */ }; #define ring_buffer_opts__last_field sz @@ -107,7 +138,7 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, _Inout_ void *ctx, * If timeout_ms is zero, poll will not wait but only invoke the callback on records that are ready. * If timeout_ms is -1, poll will wait until data is ready (no timeout). * - * This function is only supported when the RINGBUF_FLAG_NO_AUTO_CALLBACK flag is set. + * This function is only supported when automatic callbacks are disabled (see RINGBUF_FLAG_NO_AUTO_CALLBACK). * * @param[in] rb Pointer to ring buffer manager. * @param[in] timeout_ms maximum time to wait for (in milliseconds). @@ -164,7 +195,7 @@ HANDLE ebpf_ring_buffer_get_wait_handle(fd_t map_fd); * * @returns Pointer to consumer offset */ -uint64_t* rb__consumer_offset(void *cons); +uint64_t* ebpf_ring_buffer_consumer_offset(void *cons); /** * Get pointer to producer offset from producer page. @@ -173,7 +204,7 @@ uint64_t* rb__consumer_offset(void *cons); * * @returns Pointer to producer offset */ -volatile const uint64_t* rb__producer_offset(volatile const void *prod); +volatile const uint64_t* ebpf_ring_buffer_producer_offset(volatile const void *prod); /** * Check whether consumer offset == producer offset. @@ -186,7 +217,7 @@ volatile const uint64_t* rb__producer_offset(volatile const void *prod); * * @returns 0 if ring buffer is empty, 1 otherwise */ -int rb__empty(volatile const void *prod, const void *cons); +int ebpf_ring_buffer_empty(volatile const void *prod, const void *cons); /** * Clear the ring buffer by flushing all completed and in-progress records. @@ -196,7 +227,7 @@ int rb__empty(volatile const void *prod, const void *cons); * @param[in] prod pointer* to start of read-only mapped producer pages * @param[in,out] cons pointer* to start of read-write mapped consumer page */ -void rb__flush(volatile const void *prod, void *cons); +void ebpf_ring_buffer_flush(volatile const void *prod, void *cons); /** * Advance consumer offset to next record (if any) @@ -204,7 +235,7 @@ void rb__flush(volatile const void *prod, void *cons); * @param[in] prod pointer* to start of read-only mapped producer pages * @param[in,out] cons pointer* to start of read-write mapped consumer page */ -void rb__next_record(volatile const void *prod, void *cons); +void ebpf_ring_buffer_next_record(volatile const void *prod, void *cons); /** * Get record at current ringbuffer offset. @@ -214,7 +245,7 @@ void rb__next_record(volatile const void *prod, void *cons); * * @returns E_SUCCESS (0) if record ready, E_LOCKED if record still locked, E_EMPTY if consumer has caught up. */ -int rb__get_record(volatile const void *prod, const void *cons, volatile const void** record); +int ebpf_ring_buffer_get_record(volatile const void *prod, const void *cons, volatile const void** record); ``` @@ -346,7 +377,7 @@ Exit: ### Simplified polling ringbuf consumer -This consumer uses the newly added helpers to consume the ring buffer. +This consumer uses the helpers to consume the ring buffer. ```c // Initialize wait handle for map. @@ -360,12 +391,12 @@ uint32_t wait_err = 0; // Consumer loop. for(;;) { - for(; !(err=rb__get_record(prod,cons,&record)); rb__next_record(prod,cons)) { + for(; !(err=ebpf_ring_buffer_get_record(prod,cons,&record)); ebpf_ring_buffer_next_record(prod,cons)) { // Data is now in record->data[0 ... record->length-1]. // … Do record handling here … } // 3 cases for err: - // 1) Ringbuf empty - Wait on handle, or poll for !rb__empty(prod,cons). + // 1) Ringbuf empty - Wait on handle, or poll for !ebpf_ring_buffer_empty(prod,cons). // 2) Record locked - Wait on handle, or spin/poll on header lock bit. // 3) Corrupt record or consumer offset - Break (could flush to continue reading from next good record). if (err!=E_EMPTY && err!=E_LOCKED) {