diff --git a/docs/eBpfExtensions.md b/docs/eBpfExtensions.md index 175b856aa0..8944d4f128 100644 --- a/docs/eBpfExtensions.md +++ b/docs/eBpfExtensions.md @@ -101,17 +101,17 @@ structure from provided data and context buffers. context structure and populates the returned data and context buffers. * `required_irql`: IRQL at which the eBPF program is invoked by bpf_prog_test_run_opts. * `capabilities`: 32-bit integer describing the optional capabilities / features supported by the extension. - * `supports_context_header`: Flag indicating extension supports adding a context header at the start of each context passed to the eBPF program. + * `supports_context_header`: Required flag indicating extension supports adding a context header at the start of each context passed to the eBPF program. This flag must be set. **Capabilities** `supports_context_header`: Flag indicating that extension supports adding a context header at the start of each context passed to the eBPF program. -An extension can choose to opt in to support context header at the start of each program context structure that is -passed to the eBPF program. To support this feature, the extension can use the macro `EBPF_CONTEXT_HEADER` to include -the context header at the start of the program context structure. Even when the context header is added, the pointer -passed to the eBPF program is after the context header. +This is required for all extensions to support so the core can store runtime state needed by helpers. +To support this feature, the extension can use the macro `EBPF_CONTEXT_HEADER` to include +the context header at the start of the program context structure. The context pointer passed to the +eBPF program points immediately after the context header. *Example* @@ -135,8 +135,9 @@ typedef struct _sample_program_context_header sample_program_context_t context; } sample_program_context_header_t; ``` -The extension passes a pointer to `context` inside `sample_program_context_header_t`, and not a pointer to -`sample_program_context_header_t`, when invoking the eBPF program. +The extension passes a pointer to `context` inside `sample_program_context_header_t` and not a pointer to +`sample_program_context_header_t` when invoking the eBPF program. The header is not accessible +by the program. #### `ebpf_program_info_t` Struct The various fields of this structure should be set as follows: diff --git a/include/bpf_helper_defs.h b/include/bpf_helper_defs.h index 63764df330..6a8a7f6170 100644 --- a/include/bpf_helper_defs.h +++ b/include/bpf_helper_defs.h @@ -495,3 +495,22 @@ EBPF_HELPER(uint64_t, bpf_ktime_get_ms, ()); #ifndef __doxygen #define bpf_ktime_get_ms ((bpf_ktime_get_ms_t)BPF_FUNC_ktime_get_ms) #endif + +/** + * @brief Copy data into perf event array map. + * + * @param[in, out] map Pointer to perf event array map. + * @param[in] data Data to copy into perf event array map. + * @param[in] size Length of data. + * @param[in] flags Flags indicating if notification for new data availability should be sent. + * @retval 0 The operation was successful. + * @retval -EBPF_INVALID_ARGUMENT One or more parameters are invalid. + * @retval -EBPF_OPERATION_NOT_SUPPORTED Operation not supported on this program or map. + * @retval -EBPF_NO_MEMORY Unable to allocate resources for this. + * entry. + * @retval -EBPF_OUT_OF_SPACE Map is full. + */ +EBPF_HELPER(int, bpf_perf_event_output, (void* ctx, void* perf_event_array, uint64_t flags, void* data, uint64_t size)); +#ifndef __doxygen +#define bpf_perf_event_output ((bpf_perf_event_output_t)BPF_FUNC_perf_event_output) +#endif diff --git a/include/ebpf_api.h b/include/ebpf_api.h index 0040498d38..2e6f573b14 100644 --- a/include/ebpf_api.h +++ b/include/ebpf_api.h @@ -96,11 +96,11 @@ extern "C" * @deprecated Use ebpf_enumerate_programs() instead. */ __declspec(deprecated("Use ebpf_enumerate_programs() instead.")) _Must_inspect_result_ ebpf_result_t - ebpf_enumerate_sections( - _In_z_ const char* file, - bool verbose, - _Outptr_result_maybenull_ ebpf_section_info_t** infos, - _Outptr_result_maybenull_z_ const char** error_message) EBPF_NO_EXCEPT; + ebpf_enumerate_sections( + _In_z_ const char* file, + bool verbose, + _Outptr_result_maybenull_ ebpf_section_info_t** infos, + _Outptr_result_maybenull_z_ const char** error_message) EBPF_NO_EXCEPT; /** * @brief Free memory returned from \ref ebpf_enumerate_programs. @@ -114,8 +114,8 @@ extern "C" * @param[in] data Memory to free. * @deprecated Use ebpf_free_programs() instead. */ - __declspec(deprecated("Use ebpf_free_programs() instead.")) void ebpf_free_sections( - _In_opt_ _Post_invalid_ ebpf_section_info_t* infos) EBPF_NO_EXCEPT; + __declspec(deprecated("Use ebpf_free_programs() instead.")) void + ebpf_free_sections(_In_opt_ _Post_invalid_ ebpf_section_info_t* infos) EBPF_NO_EXCEPT; /** * @brief Convert an eBPF program to human readable byte code. @@ -144,7 +144,8 @@ extern "C" * @param[out] error_message On failure points to a text description of * the error. */ - __declspec(deprecated("Use ebpf_api_elf_disassemble_program() instead.")) uint32_t ebpf_api_elf_disassemble_section( + __declspec(deprecated("Use ebpf_api_elf_disassemble_program() instead.")) uint32_t + ebpf_api_elf_disassemble_section( _In_z_ const char* file, _In_z_ const char* section, _Outptr_result_maybenull_z_ const char** disassembly, @@ -578,6 +579,22 @@ extern "C" ebpf_ring_buffer_map_write( fd_t ring_buffer_map_fd, _In_reads_bytes_(data_length) const void* data, size_t data_length) EBPF_NO_EXCEPT; + /** + * @brief Write data into the perf event array map. + * + * @param [in] perf_event_array_map_fd perf event array map file descriptor. + * @param [in] data Pointer to data to be written. + * @param [in] data_length Length of data to be written. + * @retval EPBF_SUCCESS Successfully wrote record into perf event array. + * @retval EBPF_OUT_OF_SPACE Unable to output to perf event array due to inadequate space. + * @retval EBPF_NO_MEMORY Out of memory. + */ + _Must_inspect_result_ ebpf_result_t + ebpf_perf_event_array_map_write( + fd_t perf_event_array_map_fd, + _In_reads_bytes_(data_length) const void* data, + size_t data_length) EBPF_NO_EXCEPT; + #ifdef __cplusplus } #endif diff --git a/include/ebpf_core_structs.h b/include/ebpf_core_structs.h index c59acb285c..7fcaa9585a 100644 --- a/include/ebpf_core_structs.h +++ b/include/ebpf_core_structs.h @@ -32,6 +32,13 @@ typedef struct _ebpf_ring_buffer_map_async_query_result size_t consumer; } ebpf_ring_buffer_map_async_query_result_t; +typedef struct _ebpf_perf_event_array_map_async_query_result +{ + size_t producer; + size_t consumer; + size_t lost_count; +} ebpf_perf_event_array_map_async_query_result_t; + typedef enum _ebpf_object_type { EBPF_OBJECT_UNKNOWN, diff --git a/include/ebpf_structs.h b/include/ebpf_structs.h index 56b0238a11..fecc3de997 100644 --- a/include/ebpf_structs.h +++ b/include/ebpf_structs.h @@ -21,20 +21,22 @@ typedef enum bpf_map_type BPF_MAP_TYPE_ARRAY = 2, ///< Array, where the map key is the array index. BPF_MAP_TYPE_PROG_ARRAY = 3, ///< Array of program fds usable with bpf_tail_call, where the map key is the array index. - BPF_MAP_TYPE_PERCPU_HASH = 4, ///< Per-CPU hash table. - BPF_MAP_TYPE_PERCPU_ARRAY = 5, ///< Per-CPU array. - BPF_MAP_TYPE_HASH_OF_MAPS = 6, ///< Hash table, where the map value is another map. - BPF_MAP_TYPE_ARRAY_OF_MAPS = 7, ///< Array, where the map value is another map. - BPF_MAP_TYPE_LRU_HASH = 8, ///< Least-recently-used hash table. - BPF_MAP_TYPE_LPM_TRIE = 9, ///< Longest prefix match trie. - BPF_MAP_TYPE_QUEUE = 10, ///< Queue. - BPF_MAP_TYPE_LRU_PERCPU_HASH = 11, ///< Per-CPU least-recently-used hash table. - BPF_MAP_TYPE_STACK = 12, ///< Stack. - BPF_MAP_TYPE_RINGBUF = 13 ///< Ring buffer. + BPF_MAP_TYPE_PERCPU_HASH = 4, ///< Per-CPU hash table. + BPF_MAP_TYPE_PERCPU_ARRAY = 5, ///< Per-CPU array. + BPF_MAP_TYPE_HASH_OF_MAPS = 6, ///< Hash table, where the map value is another map. + BPF_MAP_TYPE_ARRAY_OF_MAPS = 7, ///< Array, where the map value is another map. + BPF_MAP_TYPE_LRU_HASH = 8, ///< Least-recently-used hash table. + BPF_MAP_TYPE_LPM_TRIE = 9, ///< Longest prefix match trie. + BPF_MAP_TYPE_QUEUE = 10, ///< Queue. + BPF_MAP_TYPE_LRU_PERCPU_HASH = 11, ///< Per-CPU least-recently-used hash table. + BPF_MAP_TYPE_STACK = 12, ///< Stack. + BPF_MAP_TYPE_RINGBUF = 13, ///< Ring buffer. + BPF_MAP_TYPE_PERF_EVENT_ARRAY = 14, ///< Perf event array. } ebpf_map_type_t; -#define BPF_MAP_TYPE_PER_CPU(X) \ - ((X) == BPF_MAP_TYPE_PERCPU_HASH || (X) == BPF_MAP_TYPE_PERCPU_ARRAY || (X) == BPF_MAP_TYPE_LRU_PERCPU_HASH) +#define BPF_MAP_TYPE_PER_CPU(X) \ + ((X) == BPF_MAP_TYPE_PERCPU_HASH || (X) == BPF_MAP_TYPE_PERCPU_ARRAY || (X) == BPF_MAP_TYPE_LRU_PERCPU_HASH || \ + (X) == BPF_MAP_TYPE_PERF_EVENT_ARRAY) static const char* const _ebpf_map_type_names[] = { BPF_ENUM_TO_STRING(BPF_MAP_TYPE_UNSPEC), @@ -51,6 +53,7 @@ static const char* const _ebpf_map_type_names[] = { BPF_ENUM_TO_STRING(BPF_MAP_TYPE_LRU_PERCPU_HASH), BPF_ENUM_TO_STRING(BPF_MAP_TYPE_STACK), BPF_ENUM_TO_STRING(BPF_MAP_TYPE_RINGBUF), + BPF_ENUM_TO_STRING(BPF_MAP_TYPE_PERF_EVENT_ARRAY), }; static const char* const _ebpf_map_display_names[] = { @@ -68,6 +71,7 @@ static const char* const _ebpf_map_display_names[] = { "lru_percpu_hash", "stack", "ringbuf", + "perf_event_array", }; typedef enum ebpf_map_option @@ -166,6 +170,7 @@ typedef enum BPF_FUNC_strnlen_s = 29, ///< \ref bpf_strnlen_s BPF_FUNC_ktime_get_boot_ms = 30, ///< \ref bpf_ktime_get_boot_ms BPF_FUNC_ktime_get_ms = 31, ///< \ref bpf_ktime_get_ms + BPF_FUNC_perf_event_output = 32, ///< \ref bpf_perf_event_output } ebpf_helper_id_t; // Cross-platform BPF program types. @@ -408,3 +413,11 @@ struct bpf_prog_info uint32_t pinned_path_count; ///< Number of pinned paths. uint32_t link_count; ///< Number of attached links. }; + +/* BPF_FUNC_perf_event_output flags. */ +#define EBPF_MAP_FLAG_INDEX_MASK 0xffffffffULL +#define EBPF_MAP_FLAG_INDEX_SHIFT 0 +#define EBPF_MAP_FLAG_CURRENT_CPU EBPF_MAP_FLAG_INDEX_MASK +/* BPF_FUNC_perf_event_output for program types with data pointer in context */ +#define EBPF_MAP_FLAG_CTXLEN_SHIFT 32 +#define EBPF_MAP_FLAG_CTXLEN_MASK (0xfffffULL << EBPF_MAP_FLAG_CTXLEN_SHIFT) \ No newline at end of file diff --git a/libs/api/api_internal.h b/libs/api/api_internal.h index 0096acf943..16c17786f4 100644 --- a/libs/api/api_internal.h +++ b/libs/api/api_internal.h @@ -14,6 +14,7 @@ struct bpf_object; typedef struct _ebpf_ring_buffer_subscription ring_buffer_subscription_t; +typedef struct _ebpf_perf_event_array_subscription perf_event_array_subscription_t; typedef struct bpf_program { @@ -659,6 +660,37 @@ ebpf_ring_buffer_map_subscribe( bool ebpf_ring_buffer_map_unsubscribe(_In_ _Post_invalid_ ring_buffer_subscription_t* subscription) noexcept; +typedef void (*perf_buffer_sample_fn)(void* ctx, int cpu, void* data, uint32_t size); +typedef void (*perf_buffer_lost_fn)(void* ctx, int cpu, uint64_t cnt); + +/** + * @brief Subscribe for notifications from the input perf event array map. + * + * @param[in] perf_event_array_map_fd File descriptor to the perf event array map. + * @param[in, out] sample_callback_context Pointer to supplied context to be passed in notification callback. + * @param[in] sample_callback Function pointer to notification handler. + * @param[in] lost_callback Function pointer to lost record notification handler. + * @param[out] subscription Opaque pointer to perf event array subscription object. + * + * @retval EBPF_SUCCESS The operation was successful. + * @retval EBPF_NO_MEMORY Out of memory. + */ +_Must_inspect_result_ ebpf_result_t +ebpf_perf_event_array_map_subscribe( + fd_t perf_event_array_map_fd, + _Inout_opt_ void* callback_context, + perf_buffer_sample_fn sample_callback, + perf_buffer_lost_fn lost_callback, + _Outptr_ perf_event_array_subscription_t** subscription) noexcept; + +/** + * @brief Unsubscribe from the perf event array map event notifications. + * + * @param[in] subscription Pointer to perf event array subscription to be canceled. + */ +bool +ebpf_perf_event_array_map_unsubscribe(_In_ _Post_invalid_ perf_event_array_subscription_t* subscription) noexcept; + /** * @brief Get list of programs and stats in an ELF eBPF file. * @param[in] file Name of ELF file containing eBPF program. diff --git a/libs/api/ebpf_api.cpp b/libs/api/ebpf_api.cpp index 1113c01338..fe3b6d97a0 100644 --- a/libs/api/ebpf_api.cpp +++ b/libs/api/ebpf_api.cpp @@ -10,6 +10,7 @@ #include "bpf2c.h" #include "device_helper.hpp" #include "ebpf_api.h" +#include "ebpf_perf_event_array_record.h" #include "ebpf_protocol.h" #include "ebpf_ring_buffer_record.h" #include "ebpf_serialize.h" @@ -4545,6 +4546,199 @@ ebpf_ring_buffer_map_unsubscribe(_In_ _Post_invalid_ ring_buffer_subscription_t* } CATCH_NO_MEMORY_BOOL +typedef struct _ebpf_perf_event_array_subscription +{ + _ebpf_perf_event_array_subscription() + : unsubscribed(false), perf_event_array_map_handle(ebpf_handle_invalid), callback_context(nullptr), + sample_callback(nullptr), lost_callback(nullptr), buffer(nullptr), reply({}), async_ioctl_completion(nullptr), + async_ioctl_failed(false) + { + } + ~_ebpf_perf_event_array_subscription() { EBPF_LOG_ENTRY(); } + std::mutex lock; + _Write_guarded_by_(lock) boolean unsubscribed; + ebpf_handle_t perf_event_array_map_handle; + void* callback_context; + perf_buffer_sample_fn sample_callback; + perf_buffer_lost_fn lost_callback; + uint8_t* buffer; + uint32_t cpu_id; + ebpf_operation_perf_event_array_map_async_query_reply_t reply; + _Write_guarded_by_(lock) async_ioctl_completion_t* async_ioctl_completion; + _Write_guarded_by_(lock) bool async_ioctl_failed; +} ebpf_perf_event_array_subscription_t; + +typedef std::unique_ptr ebpf_perf_event_array_subscription_ptr; + +static ebpf_result_t +_ebpf_perf_event_array_map_async_query_completion(_Inout_ void* completion_context) NO_EXCEPT_TRY +{ + EBPF_LOG_ENTRY(); + ebpf_assert(completion_context); + + ebpf_perf_event_array_subscription_t* subscription = + reinterpret_cast(completion_context); + + const uint32_t cpu_id = subscription->cpu_id; + + size_t consumer = 0; + size_t producer = 0; + size_t lost_count = 0; + + ebpf_result_t result = EBPF_SUCCESS; + // Check the result of the completed async IOCTL call. + result = get_async_ioctl_result(subscription->async_ioctl_completion); + + if (result != EBPF_SUCCESS) { + if (result != EBPF_CANCELED) { + // The async IOCTL was not canceled, but completed with a failure status. Mark the subscription object as + // such, so that it gets freed when the user eventually unsubscribes. + std::scoped_lock lock{subscription->lock}; + subscription->async_ioctl_failed = true; + EBPF_RETURN_RESULT(result); + } else { + // User has canceled subscription. Invoke user specified callback for the final time with NULL record. This + // will let the user app clean up its state. + TraceLoggingWrite( + ebpf_tracelog_provider, + EBPF_TRACELOG_EVENT_GENERIC_MESSAGE, + TraceLoggingLevel(WINEVENT_LEVEL_INFO), + TraceLoggingKeyword(EBPF_TRACELOG_KEYWORD_API), + TraceLoggingString( + __FUNCTION__, "perf_event_array map async query completion invoked with EBPF_CANCELED.")); + + subscription->sample_callback(subscription->callback_context, cpu_id, nullptr, 0); + } + } else { + // Async IOCTL operation returned with success status. Read the ring buffer records and indicate it to the + // subscriber. + + size_t perf_event_array_size = 0; + uint32_t dummy; + + result = _get_map_descriptor_properties( + subscription->perf_event_array_map_handle, + &dummy, + &dummy, + &dummy, + reinterpret_cast(&perf_event_array_size)); + if (result != EBPF_SUCCESS) { + EBPF_RETURN_RESULT(result); + } + + ebpf_operation_perf_event_array_map_async_query_reply_t* reply = &subscription->reply; + ebpf_perf_event_array_map_async_query_result_t* async_query_result = &reply->async_query_result; + consumer = async_query_result->consumer; + producer = async_query_result->producer; + lost_count = async_query_result->lost_count; + if (lost_count > 0) { + subscription->lost_callback(subscription->callback_context, cpu_id, lost_count); + } + for (;;) { + auto record = + ebpf_perf_event_array_next_record(subscription->buffer, perf_event_array_size, consumer, producer); + + if (record == nullptr) { + // No more records. + break; + } + + subscription->sample_callback( + subscription->callback_context, + cpu_id, + const_cast(reinterpret_cast(record->data)), + record->header.length - EBPF_OFFSET_OF(ebpf_perf_event_array_record_t, data)); + consumer += record->header.length; + } + } + + bool free_subscription = false; + { + std::scoped_lock lock{subscription->lock}; + + if (subscription->unsubscribed) { + // If the user has unsubscribed, this is the final callback. Mark the + // subscription context for deletion. + result = EBPF_CANCELED; + free_subscription = true; + } else { + // If still subscribed, post the next async IOCTL call while holding the lock. It is safe to do so as the + // async call is not blocking. + + // First, register wait for the new async IOCTL operation completion. + result = register_wait_async_ioctl_operation(subscription->async_ioctl_completion); + if (result != EBPF_SUCCESS) { + EBPF_RETURN_RESULT(result); + } + + // Then, post the async IOCTL. + ebpf_operation_perf_event_array_map_async_query_request_t async_query_request{ + sizeof(async_query_request), + ebpf_operation_id_t::EBPF_OPERATION_RING_BUFFER_MAP_ASYNC_QUERY, + subscription->perf_event_array_map_handle, + cpu_id, + consumer}; + memset(&subscription->reply, 0, sizeof(ebpf_operation_perf_event_array_map_async_query_reply_t)); + result = win32_error_code_to_ebpf_result(invoke_ioctl( + async_query_request, + subscription->reply, + get_async_ioctl_operation_overlapped(subscription->async_ioctl_completion))); + if (result != EBPF_SUCCESS) { + if (result == EBPF_PENDING) { + result = EBPF_SUCCESS; + } else { + subscription->async_ioctl_failed = true; + } + } + } + } + if (free_subscription) { + delete subscription; + } + + EBPF_RETURN_RESULT(result); +} +CATCH_NO_MEMORY_EBPF_RESULT + +_Must_inspect_result_ ebpf_result_t +ebpf_perf_event_array_map_subscribe( + fd_t map_fd, + _Inout_opt_ void* callback_context, + perf_buffer_sample_fn sample_callback, + perf_buffer_lost_fn lost_callback, + _Outptr_ perf_event_array_subscription_t** subscription) NO_EXCEPT_TRY +{ + EBPF_LOG_ENTRY(); + UNREFERENCED_PARAMETER(map_fd); + UNREFERENCED_PARAMETER(callback_context); + UNREFERENCED_PARAMETER(sample_callback); + UNREFERENCED_PARAMETER(lost_callback); + UNREFERENCED_PARAMETER(subscription); + EBPF_RETURN_RESULT(EBPF_OPERATION_NOT_SUPPORTED); +} +CATCH_NO_MEMORY_EBPF_RESULT + +_Must_inspect_result_ ebpf_result_t +ebpf_perf_event_array_map_write(fd_t map_fd, _In_reads_bytes_(data_length) const void* data, size_t data_length) + NO_EXCEPT_TRY +{ + EBPF_LOG_ENTRY(); + UNREFERENCED_PARAMETER(map_fd); + UNREFERENCED_PARAMETER(data); + UNREFERENCED_PARAMETER(data_length); + EBPF_RETURN_RESULT(EBPF_OPERATION_NOT_SUPPORTED); +} +CATCH_NO_MEMORY_EBPF_RESULT + +bool +ebpf_perf_event_array_map_unsubscribe(_In_ _Post_invalid_ perf_event_array_subscription_t* subscription) NO_EXCEPT_TRY +{ + EBPF_LOG_ENTRY(); + UNREFERENCED_PARAMETER(subscription); + EBPF_RETURN_BOOL(false); +} +CATCH_NO_MEMORY_BOOL + _Must_inspect_result_ ebpf_result_t ebpf_program_test_run(fd_t program_fd, _Inout_ ebpf_test_run_options_t* options) NO_EXCEPT_TRY { diff --git a/libs/execution_context/ebpf_core.c b/libs/execution_context/ebpf_core.c index ae366920c3..a4af02016d 100644 --- a/libs/execution_context/ebpf_core.c +++ b/libs/execution_context/ebpf_core.c @@ -107,6 +107,10 @@ _ebpf_core_get_time_since_boot_ms(); static uint64_t _ebpf_core_get_time_ms(); +static int +_ebpf_core_perf_event_output( + _In_ void* ctx, _Inout_ ebpf_map_t* map, uint64_t flags, _In_reads_bytes_(length) uint8_t* data, size_t length); + #define EBPF_CORE_GLOBAL_HELPER_EXTENSION_VERSION 0 static ebpf_program_type_descriptor_t _ebpf_global_helper_program_descriptor = { @@ -152,6 +156,8 @@ static const void* _ebpf_general_helpers[] = { (void*)&_ebpf_core_strlen_s, (void*)&_ebpf_core_get_time_since_boot_ms, (void*)&_ebpf_core_get_time_ms, + // Perf event array (perf buffer) output. + (void*)&_ebpf_core_perf_event_output, }; static const ebpf_helper_function_addresses_t _ebpf_global_helper_function_dispatch_table = { @@ -2116,6 +2122,37 @@ _ebpf_core_protocol_ring_buffer_map_query_buffer( EBPF_RETURN_RESULT(result); } +static ebpf_result_t +_ebpf_core_protocol_perf_event_array_map_query_buffer( + _In_ const ebpf_operation_perf_event_array_map_query_buffer_request_t* request, + _Out_ ebpf_operation_perf_event_array_map_query_buffer_reply_t* reply) +{ + EBPF_LOG_ENTRY(); + + ebpf_map_t* map = NULL; + ebpf_result_t result = + EBPF_OBJECT_REFERENCE_BY_HANDLE(request->map_handle, EBPF_OBJECT_MAP, (ebpf_core_object_t**)&map); + if (result != EBPF_SUCCESS) { + goto Exit; + } + + if (ebpf_map_get_definition(map)->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { + result = EBPF_INVALID_ARGUMENT; + EBPF_LOG_MESSAGE_ERROR( + EBPF_TRACELOG_LEVEL_ERROR, + EBPF_TRACELOG_KEYWORD_CORE, + "perf event array query buffer operation called on a map that is not of the perf event array type.", + result); + goto Exit; + } + result = ebpf_perf_event_array_map_query_buffer( + map, request->cpu_id, (uint8_t**)(uintptr_t*)&reply->buffer_address, &reply->consumer_offset); + +Exit: + EBPF_OBJECT_RELEASE_REFERENCE((ebpf_core_object_t*)map); + EBPF_RETURN_RESULT(result); +} + static ebpf_result_t _ebpf_core_protocol_ring_buffer_map_async_query( _In_ const ebpf_operation_ring_buffer_map_async_query_request_t* request, @@ -2207,6 +2244,85 @@ _ebpf_core_map_find_element(ebpf_map_t* map, const uint8_t* key) } } +static ebpf_result_t +_ebpf_core_protocol_perf_event_array_map_async_query( + _In_ const ebpf_operation_perf_event_array_map_async_query_request_t* request, + _Inout_updates_bytes_(reply_length) ebpf_operation_perf_event_array_map_async_query_reply_t* reply, + uint16_t reply_length, + _Inout_ void* async_context) +{ + UNREFERENCED_PARAMETER(reply_length); + + ebpf_map_t* map = NULL; + bool reference_taken = FALSE; + + ebpf_result_t result = + EBPF_OBJECT_REFERENCE_BY_HANDLE(request->map_handle, EBPF_OBJECT_MAP, (ebpf_core_object_t**)&map); + if (result != EBPF_SUCCESS) { + goto Exit; + } + reference_taken = TRUE; + + if (ebpf_map_get_definition(map)->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { + result = EBPF_INVALID_ARGUMENT; + EBPF_LOG_MESSAGE_ERROR( + EBPF_TRACELOG_LEVEL_ERROR, + EBPF_TRACELOG_KEYWORD_CORE, + "perf event array async query operation called on a map that is not of the perf event array type.", + result); + goto Exit; + } + + // Return buffer already consumed by caller in previous notification. + result = ebpf_perf_event_array_map_return_buffer(map, request->cpu_id, request->consumer_offset); + if (result != EBPF_SUCCESS) { + goto Exit; + } + + reply->header.id = EBPF_OPERATION_PERF_EVENT_ARRAY_MAP_ASYNC_QUERY; + reply->header.length = sizeof(ebpf_operation_ring_buffer_map_async_query_reply_t); + result = ebpf_perf_event_array_map_async_query(map, request->cpu_id, &reply->async_query_result, async_context); + +Exit: + if (reference_taken) { + EBPF_OBJECT_RELEASE_REFERENCE((ebpf_core_object_t*)map); + } + return result; +} + +static ebpf_result_t +_ebpf_core_protocol_perf_event_array_map_write_data( + _In_ const ebpf_operation_perf_event_array_map_write_data_request_t* request) +{ + ebpf_map_t* map = NULL; + size_t data_length = 0; + ebpf_result_t result = + EBPF_OBJECT_REFERENCE_BY_HANDLE(request->map_handle, EBPF_OBJECT_MAP, (ebpf_core_object_t**)&map); + if (result != EBPF_SUCCESS) { + goto Exit; + } + if (ebpf_map_get_definition(map)->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { + result = EBPF_INVALID_ARGUMENT; + EBPF_LOG_MESSAGE_ERROR( + EBPF_TRACELOG_LEVEL_ERROR, + EBPF_TRACELOG_KEYWORD_CORE, + "perf event array write data operation called on a map that is not of the perf event array type.", + result); + goto Exit; + } + result = ebpf_safe_size_t_subtract( + request->header.length, + EBPF_OFFSET_OF(ebpf_operation_perf_event_array_map_write_data_request_t, data), + &data_length); + if (result != EBPF_SUCCESS) { + goto Exit; + } + result = ebpf_perf_event_output(NULL, map, EBPF_MAP_FLAG_CURRENT_CPU, (uint8_t*)request->data, data_length); +Exit: + EBPF_OBJECT_RELEASE_REFERENCE((ebpf_core_object_t*)map); + EBPF_RETURN_RESULT(result); +} + static int64_t _ebpf_core_map_update_element(ebpf_map_t* map, const uint8_t* key, const uint8_t* value, uint64_t flags) { @@ -2488,6 +2604,13 @@ _ebpf_core_ring_buffer_output( return -ebpf_ring_buffer_map_output(map, data, length); } +static int +_ebpf_core_perf_event_output( + _In_ void* ctx, _Inout_ ebpf_map_t* map, uint64_t flags, _In_reads_bytes_(length) uint8_t* data, size_t length) +{ + return -ebpf_perf_event_output(ctx, map, flags, data, length); +} + static int _ebpf_core_map_push_elem(_Inout_ ebpf_map_t* map, _In_ const uint8_t* value, uint64_t flags) { @@ -2627,65 +2750,62 @@ typedef struct _ebpf_protocol_handler #define PROTOCOL_ALL_MODES PROTOCOL_NATIVE_MODE #endif -#define DECLARE_PROTOCOL_HANDLER_FIXED_REQUEST_NO_REPLY(OPERATION, FLAGS) \ - { \ - EBPF_PROTOCOL_FIXED_REQUEST_NO_REPLY, (void*)_ebpf_core_protocol_##OPERATION, \ - sizeof(ebpf_operation_##OPERATION##_request_t), .flags.value = FLAGS \ - } - -#define DECLARE_PROTOCOL_HANDLER_FIXED_REQUEST_FIXED_REPLY(OPERATION, FLAGS) \ - { \ - EBPF_PROTOCOL_FIXED_REQUEST_FIXED_REPLY, (void*)_ebpf_core_protocol_##OPERATION, \ - sizeof(ebpf_operation_##OPERATION##_request_t), sizeof(ebpf_operation_##OPERATION##_reply_t), \ - .flags.value = FLAGS \ - } - -#define DECLARE_PROTOCOL_HANDLER_FIXED_REQUEST_VARIABLE_REPLY(OPERATION, VARIABLE_REPLY, FLAGS) \ - { \ - EBPF_PROTOCOL_FIXED_REQUEST_VARIABLE_REPLY, (void*)_ebpf_core_protocol_##OPERATION, \ - sizeof(ebpf_operation_##OPERATION##_request_t), \ - EBPF_OFFSET_OF(ebpf_operation_##OPERATION##_reply_t, VARIABLE_REPLY), .flags.value = FLAGS \ - } - -#define DECLARE_PROTOCOL_HANDLER_VARIABLE_REQUEST_NO_REPLY(OPERATION, VARIABLE_REQUEST, FLAGS) \ - { \ - EBPF_PROTOCOL_VARIABLE_REQUEST_NO_REPLY, (void*)_ebpf_core_protocol_##OPERATION, \ - EBPF_OFFSET_OF(ebpf_operation_##OPERATION##_request_t, VARIABLE_REQUEST), .flags.value = FLAGS \ - } +#define DECLARE_PROTOCOL_HANDLER_FIXED_REQUEST_NO_REPLY(OPERATION, FLAGS) \ + {EBPF_PROTOCOL_FIXED_REQUEST_NO_REPLY, \ + (void*)_ebpf_core_protocol_##OPERATION, \ + sizeof(ebpf_operation_##OPERATION##_request_t), \ + .flags.value = FLAGS} + +#define DECLARE_PROTOCOL_HANDLER_FIXED_REQUEST_FIXED_REPLY(OPERATION, FLAGS) \ + {EBPF_PROTOCOL_FIXED_REQUEST_FIXED_REPLY, \ + (void*)_ebpf_core_protocol_##OPERATION, \ + sizeof(ebpf_operation_##OPERATION##_request_t), \ + sizeof(ebpf_operation_##OPERATION##_reply_t), \ + .flags.value = FLAGS} + +#define DECLARE_PROTOCOL_HANDLER_FIXED_REQUEST_VARIABLE_REPLY(OPERATION, VARIABLE_REPLY, FLAGS) \ + {EBPF_PROTOCOL_FIXED_REQUEST_VARIABLE_REPLY, \ + (void*)_ebpf_core_protocol_##OPERATION, \ + sizeof(ebpf_operation_##OPERATION##_request_t), \ + EBPF_OFFSET_OF(ebpf_operation_##OPERATION##_reply_t, VARIABLE_REPLY), \ + .flags.value = FLAGS} + +#define DECLARE_PROTOCOL_HANDLER_VARIABLE_REQUEST_NO_REPLY(OPERATION, VARIABLE_REQUEST, FLAGS) \ + {EBPF_PROTOCOL_VARIABLE_REQUEST_NO_REPLY, \ + (void*)_ebpf_core_protocol_##OPERATION, \ + EBPF_OFFSET_OF(ebpf_operation_##OPERATION##_request_t, VARIABLE_REQUEST), \ + .flags.value = FLAGS} #define DECLARE_PROTOCOL_HANDLER_VARIABLE_REQUEST_FIXED_REPLY(OPERATION, VARIABLE_REQUEST, FLAGS) \ - { \ - EBPF_PROTOCOL_VARIABLE_REQUEST_FIXED_REPLY, (void*)_ebpf_core_protocol_##OPERATION, \ - EBPF_OFFSET_OF(ebpf_operation_##OPERATION##_request_t, VARIABLE_REQUEST), \ - sizeof(ebpf_operation_##OPERATION##_reply_t), .flags.value = FLAGS \ - } + {EBPF_PROTOCOL_VARIABLE_REQUEST_FIXED_REPLY, \ + (void*)_ebpf_core_protocol_##OPERATION, \ + EBPF_OFFSET_OF(ebpf_operation_##OPERATION##_request_t, VARIABLE_REQUEST), \ + sizeof(ebpf_operation_##OPERATION##_reply_t), \ + .flags.value = FLAGS} #define DECLARE_PROTOCOL_HANDLER_VARIABLE_REQUEST_VARIABLE_REPLY(OPERATION, VARIABLE_REQUEST, VARIABLE_REPLY, FLAGS) \ - { \ - EBPF_PROTOCOL_VARIABLE_REQUEST_VARIABLE_REPLY, (void*)_ebpf_core_protocol_##OPERATION, \ - EBPF_OFFSET_OF(ebpf_operation_##OPERATION##_request_t, VARIABLE_REQUEST), \ - EBPF_OFFSET_OF(ebpf_operation_##OPERATION##_reply_t, VARIABLE_REPLY), .flags.value = FLAGS \ - } - -#define DECLARE_PROTOCOL_HANDLER_FIXED_REQUEST_FIXED_REPLY_ASYNC(OPERATION, FLAGS) \ - { \ - EBPF_PROTOCOL_FIXED_REQUEST_FIXED_REPLY_ASYNC, (void*)_ebpf_core_protocol_##OPERATION, \ - sizeof(ebpf_operation_##OPERATION##_request_t), sizeof(ebpf_operation_##OPERATION##_reply_t), \ - .flags.value = FLAGS \ - } - -#define DECLARE_PROTOCOL_HANDLER_VARIABLE_REQUEST_VARIABLE_REPLY_ASYNC( \ - OPERATION, VARIABLE_REQUEST, VARIABLE_REPLY, FLAGS) \ - { \ - EBPF_PROTOCOL_VARIABLE_REQUEST_VARIABLE_REPLY_ASYNC, (void*)_ebpf_core_protocol_##OPERATION, \ - EBPF_OFFSET_OF(ebpf_operation_##OPERATION##_request_t, VARIABLE_REQUEST), \ - EBPF_OFFSET_OF(ebpf_operation_##OPERATION##_reply_t, VARIABLE_REPLY), .flags.value = FLAGS \ - } - -#define DECLARE_PROTOCOL_HANDLER_INVALID(type) \ - { \ - type, NULL, 0, 0, .flags.value = 0 \ - } + {EBPF_PROTOCOL_VARIABLE_REQUEST_VARIABLE_REPLY, \ + (void*)_ebpf_core_protocol_##OPERATION, \ + EBPF_OFFSET_OF(ebpf_operation_##OPERATION##_request_t, VARIABLE_REQUEST), \ + EBPF_OFFSET_OF(ebpf_operation_##OPERATION##_reply_t, VARIABLE_REPLY), \ + .flags.value = FLAGS} + +#define DECLARE_PROTOCOL_HANDLER_FIXED_REQUEST_FIXED_REPLY_ASYNC(OPERATION, FLAGS) \ + {EBPF_PROTOCOL_FIXED_REQUEST_FIXED_REPLY_ASYNC, \ + (void*)_ebpf_core_protocol_##OPERATION, \ + sizeof(ebpf_operation_##OPERATION##_request_t), \ + sizeof(ebpf_operation_##OPERATION##_reply_t), \ + .flags.value = FLAGS} + +#define DECLARE_PROTOCOL_HANDLER_VARIABLE_REQUEST_VARIABLE_REPLY_ASYNC( \ + OPERATION, VARIABLE_REQUEST, VARIABLE_REPLY, FLAGS) \ + {EBPF_PROTOCOL_VARIABLE_REQUEST_VARIABLE_REPLY_ASYNC, \ + (void*)_ebpf_core_protocol_##OPERATION, \ + EBPF_OFFSET_OF(ebpf_operation_##OPERATION##_request_t, VARIABLE_REQUEST), \ + EBPF_OFFSET_OF(ebpf_operation_##OPERATION##_reply_t, VARIABLE_REPLY), \ + .flags.value = FLAGS} + +#define DECLARE_PROTOCOL_HANDLER_INVALID(type) {type, NULL, 0, 0, .flags.value = 0} #define ALIAS_TYPES(X, Y) \ typedef ebpf_operation_##X##_request_t ebpf_operation_##Y##_request_t; \ @@ -2747,8 +2867,11 @@ static ebpf_protocol_handler_t _ebpf_protocol_handlers[] = { get_next_pinned_program_path, start_path, next_path, PROTOCOL_ALL_MODES), DECLARE_PROTOCOL_HANDLER_FIXED_REQUEST_NO_REPLY(bind_map, PROTOCOL_ALL_MODES), DECLARE_PROTOCOL_HANDLER_FIXED_REQUEST_FIXED_REPLY(ring_buffer_map_query_buffer, PROTOCOL_ALL_MODES), + DECLARE_PROTOCOL_HANDLER_FIXED_REQUEST_FIXED_REPLY(perf_event_array_map_query_buffer, PROTOCOL_ALL_MODES), DECLARE_PROTOCOL_HANDLER_FIXED_REQUEST_FIXED_REPLY_ASYNC(ring_buffer_map_async_query, PROTOCOL_ALL_MODES), DECLARE_PROTOCOL_HANDLER_VARIABLE_REQUEST_NO_REPLY(ring_buffer_map_write_data, data, PROTOCOL_ALL_MODES), + DECLARE_PROTOCOL_HANDLER_FIXED_REQUEST_FIXED_REPLY_ASYNC(perf_event_array_map_async_query, PROTOCOL_ALL_MODES), + DECLARE_PROTOCOL_HANDLER_VARIABLE_REQUEST_NO_REPLY(perf_event_array_map_write_data, data, PROTOCOL_ALL_MODES), DECLARE_PROTOCOL_HANDLER_VARIABLE_REQUEST_FIXED_REPLY(load_native_module, data, PROTOCOL_NATIVE_MODE), DECLARE_PROTOCOL_HANDLER_FIXED_REQUEST_VARIABLE_REPLY(load_native_programs, data, PROTOCOL_NATIVE_MODE), DECLARE_PROTOCOL_HANDLER_VARIABLE_REQUEST_VARIABLE_REPLY_ASYNC(program_test_run, data, data, PROTOCOL_ALL_MODES), diff --git a/libs/execution_context/ebpf_general_helpers.c b/libs/execution_context/ebpf_general_helpers.c index 609d409ec7..51fd0f47aa 100644 --- a/libs/execution_context/ebpf_general_helpers.c +++ b/libs/execution_context/ebpf_general_helpers.c @@ -203,6 +203,15 @@ ebpf_helper_function_prototype_t ebpf_core_helper_function_prototype_array[] = { EBPF_RETURN_TYPE_INTEGER, {0}}, {EBPF_HELPER_FUNCTION_PROTOTYPE_HEADER, BPF_FUNC_ktime_get_ms, "bpf_ktime_get_ms", EBPF_RETURN_TYPE_INTEGER, {0}}, + {EBPF_HELPER_FUNCTION_PROTOTYPE_HEADER, + BPF_FUNC_perf_event_output, + "bpf_perf_event_output", + EBPF_RETURN_TYPE_INTEGER, + {EBPF_ARGUMENT_TYPE_PTR_TO_CTX, + EBPF_ARGUMENT_TYPE_PTR_TO_MAP, + EBPF_ARGUMENT_TYPE_ANYTHING, + EBPF_ARGUMENT_TYPE_PTR_TO_READABLE_MEM, + EBPF_ARGUMENT_TYPE_CONST_SIZE}}, }; #ifdef __cplusplus diff --git a/libs/execution_context/ebpf_link.c b/libs/execution_context/ebpf_link.c index ac4a210c0b..d5c02a032d 100644 --- a/libs/execution_context/ebpf_link.c +++ b/libs/execution_context/ebpf_link.c @@ -215,6 +215,9 @@ _ebpf_link_client_attach_provider( client_dispatch_table = (void*)&_ebpf_link_dispatch_table_with_context_header; } else { client_dispatch_table = (void*)&_ebpf_link_dispatch_table; + // TODO: Before merging perf event array context headers must be required. + // status = STATUS_INVALID_PARAMETER; + // goto Done; } ebpf_lock_unlock(&link->lock, state); diff --git a/libs/execution_context/ebpf_maps.c b/libs/execution_context/ebpf_maps.c index 68cddb9a0d..d0dd3b2bdb 100644 --- a/libs/execution_context/ebpf_maps.c +++ b/libs/execution_context/ebpf_maps.c @@ -10,6 +10,7 @@ #include "ebpf_hash_table.h" #include "ebpf_maps.h" #include "ebpf_object.h" +#include "ebpf_perf_event_array.h" #include "ebpf_program.h" #include "ebpf_ring_buffer.h" #include "ebpf_tracelog.h" @@ -247,6 +248,37 @@ typedef struct _ebpf_core_ring_buffer_map_async_query_context void* async_context; } ebpf_core_ring_buffer_map_async_query_context_t; +typedef struct _ebpf_core_perf_event_ring +{ + ebpf_lock_t lock; + ebpf_list_entry_t async_contexts; +} ebpf_core_perf_event_ring_t; + +typedef struct _ebpf_core_perf_event_array_map +{ + // TODO: Current placeholder copied from ring buffer map. + ebpf_core_map_t core_map; + ebpf_lock_t lock; + // Flag that is set the first time an async operation is queued to the map. + // This flag only transitions from off -> on. When this flag is set, + // updates to the map acquire the lock and check the async_contexts list. + // Note that queueing an async operation thus causes a perf degradation + // for all subsequent updates, so should only be allowed to admin. + // Note that we use a single trip wire for the perf array, so once + // one ring receives an async request the whole perf array uses async. + bool async_contexts_trip_wire; + ebpf_core_perf_event_ring_t rings[1]; +} ebpf_core_perf_event_array_map_t; + +typedef struct _ebpf_core_perf_event_array_map_async_query_context +{ + ebpf_list_entry_t entry; + ebpf_core_perf_event_array_map_t* perf_event_array_map; + uint32_t cpu_id; + ebpf_perf_event_array_map_async_query_result_t* async_query_result; + void* async_context; +} ebpf_core_perf_event_array_map_async_query_context_t; + /** * Core map structure for BPF_MAP_TYPE_QUEUE and BPF_MAP_TYPE_STACK * ebpf_core_circular_map_t stores an array of uint8_t* pointers. Each pointer @@ -2291,6 +2323,254 @@ ebpf_ring_buffer_map_async_query( EBPF_RETURN_RESULT(result); } +static void +_ebpf_perf_event_array_map_cancel_async_query(_In_ _Frees_ptr_ void* cancel_context) +{ + EBPF_LOG_ENTRY(); + ebpf_core_perf_event_array_map_async_query_context_t* context = + (ebpf_core_perf_event_array_map_async_query_context_t*)cancel_context; + ebpf_core_perf_event_array_map_t* perf_event_array_map = context->perf_event_array_map; + ebpf_lock_state_t state = ebpf_lock_lock(&perf_event_array_map->lock); + ebpf_list_remove_entry(&context->entry); + ebpf_lock_unlock(&perf_event_array_map->lock, state); + ebpf_async_complete(context->async_context, 0, EBPF_CANCELED); + ebpf_free(context); + EBPF_LOG_EXIT(); +} + +static _Requires_lock_held_(perf_event_array_map->lock) void _ebpf_perf_event_array_map_signal_async_query_complete( + _Inout_ ebpf_core_perf_event_array_map_t* perf_event_array_map, uint32_t cpu_id) +{ + EBPF_LOG_ENTRY(); + // Skip if no async_contexts have ever been queued. + if (!perf_event_array_map->async_contexts_trip_wire) { + return; + } + ebpf_core_perf_event_ring_t* ring = &perf_event_array_map->rings[cpu_id]; + + ebpf_core_map_t* map = &perf_event_array_map->core_map; + while (!ebpf_list_is_empty(&ring->async_contexts)) { + ebpf_core_perf_event_array_map_async_query_context_t* context = + EBPF_FROM_FIELD(ebpf_core_perf_event_array_map_async_query_context_t, entry, ring->async_contexts.Flink); + ebpf_perf_event_array_map_async_query_result_t* async_query_result = context->async_query_result; + async_query_result->lost_count = + ebpf_perf_event_array_get_reset_lost_count((ebpf_perf_event_array_t*)map->data, context->cpu_id); + ebpf_perf_event_array_query( + (ebpf_perf_event_array_t*)map->data, + context->cpu_id, + &async_query_result->consumer, + &async_query_result->producer); + ebpf_list_remove_entry(&context->entry); + ebpf_operation_perf_event_array_map_async_query_reply_t* reply = EBPF_FROM_FIELD( + ebpf_operation_perf_event_array_map_async_query_reply_t, async_query_result, async_query_result); + ebpf_async_complete(context->async_context, sizeof(*reply), EBPF_SUCCESS); + ebpf_free(context); + context = NULL; + } +} + +static void +_delete_perf_event_array_map(_In_ _Post_invalid_ ebpf_core_map_t* map) +{ + EBPF_LOG_ENTRY(); + uint32_t ring_count = ebpf_perf_event_array_get_ring_count((ebpf_perf_event_array_t*)map->data); + // Free the rings + ebpf_perf_event_array_destroy((ebpf_perf_event_array_t*)map->data); + + ebpf_core_perf_event_array_map_t* perf_event_array_map = + EBPF_FROM_FIELD(ebpf_core_perf_event_array_map_t, core_map, map); + + // Cancel any outstanting contexts for each ring. + for (uint32_t cpu_id = 0; cpu_id < ring_count; cpu_id++) { + // Snap the async context list. + ebpf_list_entry_t temp_list; + ebpf_list_initialize(&temp_list); + ebpf_lock_state_t state = ebpf_lock_lock(&perf_event_array_map->rings[cpu_id].lock); + ebpf_list_entry_t* first_entry = perf_event_array_map->rings[cpu_id].async_contexts.Flink; + if (!ebpf_list_is_empty(&perf_event_array_map->rings[cpu_id].async_contexts)) { + ebpf_list_remove_entry(&perf_event_array_map->rings[cpu_id].async_contexts); + ebpf_list_append_tail_list(&temp_list, first_entry); + } + ebpf_lock_unlock(&perf_event_array_map->rings[cpu_id].lock, state); + // Cancel all pending async query operations. + for (ebpf_list_entry_t* temp_entry = temp_list.Flink; temp_entry != &temp_list; + temp_entry = temp_entry->Flink) { + ebpf_core_perf_event_array_map_async_query_context_t* context = + EBPF_FROM_FIELD(ebpf_core_perf_event_array_map_async_query_context_t, entry, temp_entry); + ebpf_async_complete(context->async_context, 0, EBPF_CANCELED); + } + } + ebpf_epoch_free(perf_event_array_map); +} + +static ebpf_result_t +_create_perf_event_array_map( + _In_ const ebpf_map_definition_in_memory_t* map_definition, + ebpf_handle_t inner_map_handle, + _Outptr_ ebpf_core_map_t** map) +{ + ebpf_result_t result = EBPF_SUCCESS; + ebpf_core_perf_event_array_map_t* perf_event_array_map = NULL; + ebpf_perf_event_array_t* perf_event_array = NULL; + uint32_t cpu_count = ebpf_get_cpu_count(); + + EBPF_LOG_ENTRY(); + + *map = NULL; + + if (inner_map_handle != ebpf_handle_invalid) { + result = EBPF_INVALID_ARGUMENT; + goto Exit; + } + + size_t perf_event_array_map_size = + EBPF_OFFSET_OF(ebpf_core_perf_event_array_map_t, rings) + cpu_count * sizeof(ebpf_core_perf_event_ring_t); + + perf_event_array_map = ebpf_epoch_allocate_with_tag(perf_event_array_map_size, EBPF_POOL_TAG_MAP); + if (perf_event_array_map == NULL) { + result = EBPF_NO_MEMORY; + goto Exit; + } + memset(perf_event_array_map, 0, perf_event_array_map_size); + + perf_event_array_map->core_map.ebpf_map_definition = *map_definition; + + ebpf_perf_event_array_opts_t opts = {0}; + result = ebpf_perf_event_array_create( + (ebpf_perf_event_array_t**)&perf_event_array_map->core_map.data, map_definition->max_entries, &opts); + if (result != EBPF_SUCCESS) { + goto Exit; + } + perf_event_array = (ebpf_perf_event_array_t*)perf_event_array_map->core_map.data; + + for (uint32_t cpu_id = 0; cpu_id < cpu_count; cpu_id++) { + ebpf_core_perf_event_ring_t* ring = &perf_event_array_map->rings[cpu_id]; + ebpf_list_initialize(&ring->async_contexts); + ebpf_lock_create(&ring->lock); + } + + *map = &perf_event_array_map->core_map; + perf_event_array = NULL; + perf_event_array_map = NULL; + +Exit: + ebpf_perf_event_array_destroy(perf_event_array); + ebpf_epoch_free(perf_event_array_map); + + EBPF_RETURN_RESULT(result); +} + +_Must_inspect_result_ ebpf_result_t +ebpf_perf_event_output( + _In_ void* ctx, _Inout_ ebpf_map_t* map, uint64_t flags, _In_reads_bytes_(length) uint8_t* data, size_t length) +{ + + ebpf_result_t result = EBPF_SUCCESS; + + EBPF_LOG_ENTRY(); + + if (ctx == NULL && (flags & EBPF_MAP_FLAG_CTXLEN_MASK) != 0) { + result = EBPF_OPERATION_NOT_SUPPORTED; + goto Exit; + } + uint32_t cpu_id; // After perf_event_array_output cpu_id contains the cpu_id we wrote to. + result = ebpf_perf_event_array_output(ctx, (ebpf_perf_event_array_t*)map->data, flags, data, length, &cpu_id); + if (result != EBPF_SUCCESS) { + goto Exit; + } + + ebpf_core_perf_event_array_map_t* perf_event_array_map = + EBPF_FROM_FIELD(ebpf_core_perf_event_array_map_t, core_map, map); + + ebpf_lock_state_t state = ebpf_lock_lock(&perf_event_array_map->lock); + _ebpf_perf_event_array_map_signal_async_query_complete(perf_event_array_map, cpu_id); + ebpf_lock_unlock(&perf_event_array_map->lock, state); + +Exit: + EBPF_RETURN_RESULT(result); +} + +_Must_inspect_result_ ebpf_result_t +ebpf_perf_event_array_map_query_buffer( + _In_ const ebpf_map_t* map, uint32_t cpu_id, _Outptr_ uint8_t** buffer, _Out_ size_t* consumer_offset) +{ + size_t producer_offset; + ebpf_perf_event_array_query((ebpf_perf_event_array_t*)map->data, cpu_id, consumer_offset, &producer_offset); + return ebpf_perf_event_array_map_buffer((ebpf_perf_event_array_t*)map->data, cpu_id, buffer); +} + +_Must_inspect_result_ ebpf_result_t +ebpf_perf_event_array_map_return_buffer(_In_ const ebpf_map_t* map, uint32_t cpu_id, size_t consumer_offset) +{ + size_t producer_offset; + size_t old_consumer_offset; + size_t consumed_data_length; + EBPF_LOG_ENTRY(); + ebpf_perf_event_array_query((ebpf_perf_event_array_t*)map->data, cpu_id, &old_consumer_offset, &producer_offset); + ebpf_result_t result = ebpf_safe_size_t_subtract(consumer_offset, old_consumer_offset, &consumed_data_length); + if (result != EBPF_SUCCESS) { + goto Exit; + } + result = ebpf_perf_event_array_return((ebpf_perf_event_array_t*)map->data, cpu_id, consumed_data_length); +Exit: + EBPF_RETURN_RESULT(result); +} + +_Must_inspect_result_ ebpf_result_t +ebpf_perf_event_array_map_async_query( + _Inout_ ebpf_map_t* map, + uint32_t cpu_id, + _Inout_ ebpf_perf_event_array_map_async_query_result_t* async_query_result, + _Inout_ void* async_context) +{ + ebpf_result_t result = EBPF_PENDING; + EBPF_LOG_ENTRY(); + + ebpf_core_perf_event_array_map_t* perf_event_array_map = + EBPF_FROM_FIELD(ebpf_core_perf_event_array_map_t, core_map, map); + ebpf_core_perf_event_ring_t* ring = &perf_event_array_map->rings[cpu_id]; + + ebpf_lock_state_t state = ebpf_lock_lock(&ring->lock); + + // Fail the async query as there is already another async query operation queued. + if (!ebpf_list_is_empty(&ring->async_contexts)) { + result = EBPF_INVALID_ARGUMENT; + goto Exit; + } + + // Allocate and initialize the async query context and queue it up. + ebpf_core_perf_event_array_map_async_query_context_t* context = + ebpf_allocate_with_tag(sizeof(ebpf_core_perf_event_array_map_async_query_context_t), EBPF_POOL_TAG_ASYNC); + if (!context) { + result = EBPF_NO_MEMORY; + goto Exit; + } + ebpf_list_initialize(&context->entry); + context->perf_event_array_map = perf_event_array_map; + context->cpu_id = cpu_id; + context->async_query_result = async_query_result; + context->async_context = async_context; + + ebpf_assert_success( + ebpf_async_set_cancel_callback(async_context, context, _ebpf_perf_event_array_map_cancel_async_query)); + + ebpf_list_insert_tail(&ring->async_contexts, &context->entry); + perf_event_array_map->async_contexts_trip_wire = true; + + // If there is already some data available in the cpu ring, indicate the results right away. + ebpf_perf_event_array_query( + (ebpf_perf_event_array_t*)map->data, cpu_id, &async_query_result->consumer, &async_query_result->producer); + + if (async_query_result->producer != async_query_result->consumer) { + _ebpf_perf_event_array_map_signal_async_query_complete(perf_event_array_map, cpu_id); + } + +Exit: + ebpf_lock_unlock(&ring->lock, state); + + EBPF_RETURN_RESULT(result); +} + const ebpf_map_metadata_table_t ebpf_map_metadata_tables[] = { { BPF_MAP_TYPE_UNSPEC, @@ -2422,6 +2702,14 @@ const ebpf_map_metadata_table_t ebpf_map_metadata_tables[] = { .zero_length_key = true, .zero_length_value = true, }, + { + BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .create_map = _create_perf_event_array_map, + .delete_map = _delete_perf_event_array_map, + .zero_length_key = true, + .zero_length_value = true, + .per_cpu = true, + }, }; // ebpf_map_get_table(type) - get the metadata table for the given map type. diff --git a/libs/execution_context/ebpf_maps.h b/libs/execution_context/ebpf_maps.h index 287e3d21db..9d7acc3bac 100644 --- a/libs/execution_context/ebpf_maps.h +++ b/libs/execution_context/ebpf_maps.h @@ -244,6 +244,63 @@ extern "C" _Must_inspect_result_ ebpf_result_t ebpf_ring_buffer_map_output(_Inout_ ebpf_map_t* map, _In_reads_bytes_(length) uint8_t* data, size_t length); + /** + * @brief Get pointer to the perf event array's shared data for a specific cpu. + * + * @param[in] map Perf event array map to query. + * @param[in] cpu_id CPU ID to query. + * @param[out] buffer Pointer to perf event array data. + * @param[out] consumer_offset Offset of consumer in perf event array data. + * @retval EBPF_SUCCESS Successfully mapped the perf event array. + * @retval EBPF_INVALID_ARGUMENT Unable to map the perf event array. + */ + _Must_inspect_result_ ebpf_result_t + ebpf_perf_event_array_map_query_buffer( + _In_ const ebpf_map_t* map, uint32_t cpu_id, _Outptr_ uint8_t** buffer, _Out_ size_t* consumer_offset); + + /** + * @brief Return consumed buffer back to the perf event array map. + * + * @param[in] map Perf event array map. + * @param[in] cpu_id CPU ID to return buffer space to. + * @param[in] length Length of bytes to return to the perf event array. + * @retval EBPF_SUCCESS Successfully returned records to the perf event array. + * @retval EBPF_INVALID_ARGUMENT Unable to return records to the perf event array. + */ + _Must_inspect_result_ ebpf_result_t + ebpf_perf_event_array_map_return_buffer(_In_ const ebpf_map_t* map, uint32_t cpu_id, size_t length); + + /** + * @brief Issue an asynchronous query to perf event array map. + * + * @param[in, out] map Perf event array map to issue the async query on. + * @param[in] cpu_id CPU ID to query. + * @param[in, out] async_query_result Pointer to structure for storing result of the async query. + * @param[in, out] async_context Async context associated with the query. + * @retval EBPF_SUCCESS The operation was successful. + * @retval EBPF_NO_MEMORY Insufficient memory to complete this operation. + */ + _Must_inspect_result_ ebpf_result_t + ebpf_perf_event_array_map_async_query( + _Inout_ ebpf_map_t* map, + uint32_t cpu_id, + _Inout_ ebpf_perf_event_array_map_async_query_result_t* async_query_result, + _Inout_ void* async_context); + + /** + * @brief Write out a variable sized record to the perf event array map. + * + * @param[in, out] map Pointer to map of type EBPF_MAP_TYPE_PERF_EVENT_ARRAY. + * @param[in] data Data of record to write into perf event array map. + * @param[in] length Length of data. + * @retval EBPF_SUCCESS Successfully wrote record into perf event array. + * @retval EBPF_OUT_OF_SPACE Unable to output to perf event array due to inadequate space. + */ + EBPF_INLINE_HINT + _Must_inspect_result_ ebpf_result_t + ebpf_perf_event_output( + _In_ void* ctx, _Inout_ ebpf_map_t* map, uint64_t flags, _In_reads_bytes_(length) uint8_t* data, size_t length); + /** * @brief Insert an element at the end of the map (only valid for stack and queue). * diff --git a/libs/execution_context/ebpf_program.c b/libs/execution_context/ebpf_program.c index 24aed2a872..1d1382f84c 100644 --- a/libs/execution_context/ebpf_program.c +++ b/libs/execution_context/ebpf_program.c @@ -1569,6 +1569,9 @@ ebpf_program_invoke( // If context header is supported, store the execution state in the context. if (use_context_header) { ebpf_program_set_runtime_state(execution_state, context); + const ebpf_context_descriptor_t* context_descriptor = + program->extension_program_data->program_info->program_type_descriptor->context_descriptor; + ebpf_program_set_header_context_descriptor(context_descriptor, context); } // Top-level tail caller(1) + tail callees(33). @@ -2695,4 +2698,46 @@ ebpf_program_get_runtime_state(_In_ const void* program_context, _Outptr_ const // slot [0] contains the execution context state. ebpf_context_header_t* header = CONTAINING_RECORD(program_context, ebpf_context_header_t, context); *state = (ebpf_execution_context_state_t*)header->context_header[0]; +} + +void +ebpf_program_set_header_context_descriptor( + const ebpf_context_descriptor_t* context_descriptor, _Inout_ void* program_context) +{ + // slot [1] contains the context_descriptor for the program. + ebpf_context_header_t* header = CONTAINING_RECORD(program_context, ebpf_context_header_t, context); + + header->context_header[1] = (uint64_t)context_descriptor; +} + +void +ebpf_program_get_header_context_descriptor( + _In_ const void* program_context, _Outptr_ const ebpf_context_descriptor_t** context_descriptor) +{ + ebpf_context_header_t* header = CONTAINING_RECORD(program_context, ebpf_context_header_t, context); + // ebpf_program_t *program = (ebpf_program_t*)header->context_header[1]; + //*context_descriptor = + // program->extension_program_data->program_info->program_type_descriptor->context_descriptor; + + // Just storing context descriptor for testing. + *context_descriptor = (ebpf_context_descriptor_t*)header->context_header[1]; +} + +void +ebpf_program_get_context_data( + _In_ const void* program_context, _Outptr_ const uint8_t** data_start, _Outptr_ const uint8_t** data_end) +{ + ebpf_context_descriptor_t* context_descriptor; + ebpf_program_get_header_context_descriptor(program_context, &context_descriptor); + if (context_descriptor->data < 0 || context_descriptor->end < 0) { + *data_start = NULL; + *data_end = NULL; + return; + } else { + ebpf_assert( + (context_descriptor->data + 8) <= context_descriptor->size && + (context_descriptor->end + 8) <= context_descriptor->size); + *data_start = *(const uint8_t**)((char*)program_context + context_descriptor->data); + *data_end = *(const uint8_t**)((char*)program_context + context_descriptor->end); + } } \ No newline at end of file diff --git a/libs/execution_context/ebpf_program.h b/libs/execution_context/ebpf_program.h index a1d47fbf3d..419ce68634 100644 --- a/libs/execution_context/ebpf_program.h +++ b/libs/execution_context/ebpf_program.h @@ -455,6 +455,44 @@ extern "C" ebpf_program_get_runtime_state( _In_ const void* program_context, _Outptr_ const ebpf_execution_context_state_t** state); + /** + * @brief Set the context_descriptor in the program context header. + * Writes a pointer to the ebpf_program_t object to Slot [1]. + * + * Note: Extension must support context headers. + * + * @param[in] context_descriptor Pointer to the context descriptor for the program. + * @param[in,out] program_context Pointer to the program context to set the header for. + */ + void + ebpf_program_set_header_context_descriptor( + _In_ const ebpf_context_descriptor_t* context_descriptor, _Inout_ void* program_context); + + /** + * @brief Get the context descriptor from the program context header. + * Slot [1] contains the context descriptor pointer. + * + * Note: Extension must support context headers. + * + * @param[in] program_context Pointer to the program context. + * @param[out] context_descriptor Pointer to the program context to set. + */ + void + ebpf_program_get_header_context_descriptor( + _In_ const void* program_context, _Outptr_ const ebpf_context_descriptor_t** context_descriptor); + + /** + * @brief Get the data start and end pointers from the program context. + * + * Note: Extension must support context headers. + * + * @param[in] program_context Pointer to the program context. + * @param[out] data_start Pointer to the start of the context data. + * @param[out] data_end Pointer to the end of the context data (after the last byte). + */ + void + ebpf_program_get_context_data( + _In_ const void* program_context, _Outptr_ const uint8_t** data_start, _Outptr_ const uint8_t** data_end); #ifdef __cplusplus } #endif diff --git a/libs/execution_context/ebpf_protocol.h b/libs/execution_context/ebpf_protocol.h index 7f5d678909..5752bcbd70 100644 --- a/libs/execution_context/ebpf_protocol.h +++ b/libs/execution_context/ebpf_protocol.h @@ -40,6 +40,9 @@ typedef enum _ebpf_operation_id EBPF_OPERATION_RING_BUFFER_MAP_QUERY_BUFFER, EBPF_OPERATION_RING_BUFFER_MAP_ASYNC_QUERY, EBPF_OPERATION_RING_BUFFER_MAP_WRITE_DATA, + EBPF_OPERATION_PERF_EVENT_ARRAY_MAP_QUERY_BUFFER, + EBPF_OPERATION_PERF_EVENT_ARRAY_MAP_ASYNC_QUERY, + EBPF_OPERATION_PERF_EVENT_ARRAY_MAP_WRITE_DATA, EBPF_OPERATION_LOAD_NATIVE_MODULE, EBPF_OPERATION_LOAD_NATIVE_PROGRAMS, EBPF_OPERATION_PROGRAM_TEST_RUN, @@ -406,6 +409,44 @@ typedef struct _ebpf_operation_ring_buffer_map_write_data_request uint8_t data[1]; } ebpf_operation_ring_buffer_map_write_data_request_t; +typedef struct _ebpf_operation_perf_event_array_map_query_buffer_request +{ + struct _ebpf_operation_header header; + ebpf_handle_t map_handle; + uint32_t cpu_id; +} ebpf_operation_perf_event_array_map_query_buffer_request_t; + +typedef struct _ebpf_operation_perf_event_array_map_query_buffer_reply +{ + struct _ebpf_operation_header header; + // Address to user-space read-only buffer for the ring-buffer records. + uint64_t buffer_address; + // The current consumer offset, so that subsequent reads can start from here. + size_t consumer_offset; +} ebpf_operation_perf_event_array_map_query_buffer_reply_t; + +typedef struct _ebpf_operation_perf_event_array_map_async_query_request +{ + struct _ebpf_operation_header header; + ebpf_handle_t map_handle; + uint32_t cpu_id; + // Offset till which the consumer has read data so far. + size_t consumer_offset; +} ebpf_operation_perf_event_array_map_async_query_request_t; + +typedef struct _ebpf_operation_perf_event_array_map_async_query_reply +{ + struct _ebpf_operation_header header; + ebpf_perf_event_array_map_async_query_result_t async_query_result; +} ebpf_operation_perf_event_array_map_async_query_reply_t; + +typedef struct _ebpf_operation_perf_event_array_map_write_data_request +{ + struct _ebpf_operation_header header; + ebpf_handle_t map_handle; + uint8_t data[1]; +} ebpf_operation_perf_event_array_map_write_data_request_t; + typedef struct _ebpf_operation_load_native_module_request { struct _ebpf_operation_header header; diff --git a/libs/execution_context/unit/execution_context_unit_test.cpp b/libs/execution_context/unit/execution_context_unit_test.cpp index 56060654a5..98ac73441e 100644 --- a/libs/execution_context/unit/execution_context_unit_test.cpp +++ b/libs/execution_context/unit/execution_context_unit_test.cpp @@ -8,12 +8,14 @@ #include "ebpf_core.h" #include "ebpf_maps.h" #include "ebpf_object.h" +#include "ebpf_perf_event_array.h" #include "ebpf_program.h" #include "ebpf_ring_buffer.h" #include "helpers.h" #include "test_helper.hpp" #include +#include #include #include @@ -417,6 +419,15 @@ TEST_CASE("map_create_invalid", "[execution_context][negative]") 20, }, }, + { + "BPF_MAP_TYPE_PERF_EVENT_ARRAY", + { + BPF_MAP_TYPE_PERF_EVENT_ARRAY, + 4, // Key size must be 0 for perf event array. + 20, + 20, + }, + }, { "BPF_MAP_TYPE_HASH_OF_MAPS", { @@ -1378,6 +1389,179 @@ TEST_CASE("ring_buffer_async_query", "[execution_context]") } } +TEST_CASE("perf_event_array_unsupported_ops", "[execution_context][perf_event_array][negative]") +{ + _ebpf_core_initializer core; + core.initialize(); + ebpf_map_definition_in_memory_t map_definition{BPF_MAP_TYPE_PERF_EVENT_ARRAY, 0, 0, 64 * 1024}; + map_ptr map; + { + ebpf_map_t* local_map; + cxplat_utf8_string_t map_name = {0}; + REQUIRE( + ebpf_map_create(&map_name, &map_definition, (uintptr_t)ebpf_handle_invalid, &local_map) == EBPF_SUCCESS); + map.reset(local_map); + } + + uint32_t key = 0; + uint32_t value2 = 0; + REQUIRE( + ebpf_map_update_entry(map.get(), sizeof(key), reinterpret_cast(&key), 0, nullptr, EBPF_ANY, 0) == + EBPF_INVALID_ARGUMENT); + + // Negative test cases. + REQUIRE( + ebpf_map_update_entry( + map.get(), 0, nullptr, sizeof(value2), reinterpret_cast(&value2), EBPF_ANY, 0) == + EBPF_INVALID_ARGUMENT); + + REQUIRE(ebpf_map_update_entry(map.get(), 0, nullptr, 0, nullptr, EBPF_ANY, 0) == EBPF_OPERATION_NOT_SUPPORTED); + + REQUIRE(ebpf_map_get_program_from_entry(map.get(), sizeof(&key), reinterpret_cast(&key)) == nullptr); + REQUIRE(ebpf_map_get_program_from_entry(map.get(), 0, 0) == nullptr); + + REQUIRE( + ebpf_map_find_entry(map.get(), sizeof(key), reinterpret_cast(&key), 0, nullptr, 0) == + EBPF_INVALID_ARGUMENT); + REQUIRE( + ebpf_map_find_entry(map.get(), 0, nullptr, sizeof(value2), reinterpret_cast(&value2), 0) == + EBPF_INVALID_ARGUMENT); + + REQUIRE(ebpf_map_find_entry(map.get(), 0, nullptr, 0, nullptr, 0) == EBPF_OPERATION_NOT_SUPPORTED); + REQUIRE(ebpf_map_delete_entry(map.get(), 0, nullptr, 0) == EBPF_OPERATION_NOT_SUPPORTED); + REQUIRE(ebpf_map_next_key(map.get(), 0, nullptr, nullptr) == EBPF_OPERATION_NOT_SUPPORTED); + REQUIRE(ebpf_map_push_entry(map.get(), 0, nullptr, 0) == EBPF_OPERATION_NOT_SUPPORTED); + REQUIRE(ebpf_map_pop_entry(map.get(), 0, nullptr, 0) == EBPF_OPERATION_NOT_SUPPORTED); + REQUIRE(ebpf_map_peek_entry(map.get(), 0, nullptr, 0) == EBPF_OPERATION_NOT_SUPPORTED); +} + +TEST_CASE("perf_event_array_async_query", "[execution_context][perf_event_array]") +{ + _ebpf_core_initializer core; + core.initialize(); + ebpf_map_definition_in_memory_t map_definition{BPF_MAP_TYPE_PERF_EVENT_ARRAY, 0, 0, 64 * 1024}; + map_ptr map; + { + ebpf_map_t* local_map; + cxplat_utf8_string_t map_name = {0}; + REQUIRE( + ebpf_map_create(&map_name, &map_definition, (uintptr_t)ebpf_handle_invalid, &local_map) == EBPF_SUCCESS); + map.reset(local_map); + } + + struct _completion + { + uint8_t* buffer; + uint32_t cpu_id; + size_t consumer_offset = 0; + size_t callback_count = 0; + size_t record_count = 0; + size_t norecord_count = 0; + size_t lost_count = 0; + size_t cancel_count = 0; + uint64_t value = 0; + ebpf_perf_event_array_map_async_query_result_t async_query_result = {}; + }; + uint32_t ring_count = ebpf_get_cpu_count(); + std::vector<_completion> completions(ring_count); + + // Map each ring and set up completion callbacks. + for (uint32_t cpu_id = 0; cpu_id < ring_count; cpu_id++) { + auto& completion = completions[cpu_id]; + completion.cpu_id = cpu_id; + // Map the ring memory. + REQUIRE( + ebpf_perf_event_array_map_query_buffer( + map.get(), completion.cpu_id, &completion.buffer, &completion.consumer_offset) == EBPF_SUCCESS); + + // Set up the completion callback. + REQUIRE( + ebpf_async_set_completion_callback( + &completion, [](_Inout_ void* context, size_t output_buffer_length, ebpf_result_t result) { + UNREFERENCED_PARAMETER(output_buffer_length); + auto completion = reinterpret_cast<_completion*>(context); + auto async_query_result = &completion->async_query_result; + completion->callback_count++; + completion->lost_count += async_query_result->lost_count; + auto record = ebpf_perf_event_array_next_record( + completion->buffer, + sizeof(uint64_t), + async_query_result->consumer, + async_query_result->producer); + if (record == nullptr) { + completion->norecord_count++; + } else { + completion->record_count++; + completion->value = *(uint64_t*)(record->data); + } + if (result != EBPF_SUCCESS) { + REQUIRE(result == EBPF_CANCELED); + completion->cancel_count++; + } + }) == EBPF_SUCCESS); + + // Start the async query. + ebpf_result_t result = + ebpf_perf_event_array_map_async_query(map.get(), cpu_id, &completion.async_query_result, &completion); + if (result != EBPF_PENDING) { // If async query failed synchronously, reset the completion callback. + REQUIRE(ebpf_async_reset_completion_callback(&completion) == EBPF_SUCCESS); + } + REQUIRE(result == EBPF_PENDING); + } + + // Confirm none of the completions have been called yet. + for (auto& completion : completions) { + REQUIRE(completion.callback_count == 0); + } + + // Write a single record. + void* ctx = nullptr; + uint64_t value = 1; + uint64_t flags = EBPF_MAP_FLAG_CURRENT_CPU; + REQUIRE( + ebpf_perf_event_output(ctx, map.get(), flags, reinterpret_cast(&value), sizeof(value)) == + EBPF_SUCCESS); + + // Confirm that a single ring got the correct record and all other rings are empty. + size_t total_callback_count = 0; + size_t total_record_count = 0; + size_t total_norecord_count = 0; + size_t total_lost_count = 0; + size_t cancel_count = 0; + + for (auto& completion : completions) { + CAPTURE( + completion.cpu_id, + completion.record_count, + completion.norecord_count, + completion.cancel_count, + completion.lost_count); + CHECK(completion.callback_count <= 1); + CHECK(completion.lost_count == 0); + // We try cancelling each op, but only ones that haven't completed will actually cancel. + bool must_cancel = completion.callback_count == 0; + bool cancel_result = ebpf_async_cancel(&completion); + if (cancel_result == true) { + cancel_count++; + } + CHECK(cancel_result == must_cancel); + total_callback_count += completion.callback_count; + total_record_count += completion.record_count; + total_norecord_count += completion.norecord_count; + total_lost_count += completion.lost_count; + if (completion.record_count > 0) { + // This was the ring that got the record. + CHECK(completion.record_count == 1); + CHECK(completion.value == value); + } + } + CAPTURE(ring_count, total_callback_count, total_record_count, total_norecord_count, total_lost_count, cancel_count); + REQUIRE(total_record_count == 1); + REQUIRE(total_lost_count == 0); + REQUIRE(total_norecord_count == ring_count - 1); + REQUIRE(cancel_count == ring_count - 1); +} + std::vector _program_types = { EBPF_PROGRAM_TYPE_XDP, EBPF_PROGRAM_TYPE_BIND, @@ -2221,6 +2405,37 @@ TEST_CASE("EBPF_OPERATION_RING_BUFFER_MAP_ASYNC_QUERY", "[execution_context][neg invoke_protocol(EBPF_OPERATION_RING_BUFFER_MAP_ASYNC_QUERY, request, reply, &async) == EBPF_INVALID_ARGUMENT); } +TEST_CASE("EBPF_OPERATION_PERF_EVENT_ARRAY_MAP_QUERY_BUFFER", "[execution_context][perf_event_array][negative]") +{ + NEGATIVE_TEST_PROLOG(); + ebpf_operation_perf_event_array_map_query_buffer_request_t request; + // ebpf_operation_perf_event_array_map_query_buffer_reply_t reply; + + request.map_handle = ebpf_handle_invalid - 1; + REQUIRE(invoke_protocol(EBPF_OPERATION_PERF_EVENT_ARRAY_MAP_QUERY_BUFFER, request) == EBPF_INVALID_OBJECT); + + request.map_handle = map_handles.begin()->second; + REQUIRE(invoke_protocol(EBPF_OPERATION_PERF_EVENT_ARRAY_MAP_QUERY_BUFFER, request) == EBPF_INVALID_ARGUMENT); +} + +TEST_CASE("EBPF_OPERATION_PERF_EVENT_ARRAY_MAP_ASYNC_QUERY", "[execution_context][perf_event_array][negative]") +{ + NEGATIVE_TEST_PROLOG(); + ebpf_operation_perf_event_array_map_async_query_request_t request; + ebpf_operation_perf_event_array_map_async_query_reply_t reply; + int async = 1; + + request.map_handle = ebpf_handle_invalid - 1; + REQUIRE( + invoke_protocol(EBPF_OPERATION_PERF_EVENT_ARRAY_MAP_ASYNC_QUERY, request, reply, &async) == + EBPF_INVALID_OBJECT); + + request.map_handle = map_handles["BPF_MAP_TYPE_HASH"]; + REQUIRE( + invoke_protocol(EBPF_OPERATION_PERF_EVENT_ARRAY_MAP_ASYNC_QUERY, request, reply, &async) == + EBPF_INVALID_ARGUMENT); +} + TEST_CASE("EBPF_OPERATION_LOAD_NATIVE_MODULE short header", "[execution_context][negative]") { _ebpf_core_initializer core; diff --git a/libs/runtime/ebpf_perf_event_array.c b/libs/runtime/ebpf_perf_event_array.c new file mode 100644 index 0000000000..5cf4dbe6af --- /dev/null +++ b/libs/runtime/ebpf_perf_event_array.c @@ -0,0 +1,375 @@ +// Copyright (c) eBPF for Windows contributors +// SPDX-License-Identifier: MIT + +#include "ebpf_epoch.h" +#include "ebpf_perf_event_array.h" +#include "ebpf_perf_event_array_record.h" +#include "ebpf_platform.h" +#include "ebpf_program.h" +#include "ebpf_ring_buffer.h" +#include "ebpf_ring_buffer_record.h" +#include "ebpf_tracelog.h" + +typedef struct _ebpf_perf_ring +{ + ebpf_lock_t lock; + size_t length; + size_t consumer_offset; + size_t producer_offset; + uint8_t* shared_buffer; + ebpf_ring_descriptor_t* ring_descriptor; + size_t lost_records; + uint64_t pad; +} ebpf_perf_ring_t; +typedef struct _ebpf_perf_event_array +{ + uint32_t ring_count; + uint32_t pad1; + uint64_t pad2[7]; + ebpf_perf_ring_t rings[1]; +} ebpf_perf_event_array_t; + +static_assert(sizeof(ebpf_perf_ring_t) % EBPF_CACHE_LINE_SIZE == 0, "ebpf_perf_ring_t is not cache aligned."); +static_assert( + sizeof(ebpf_perf_event_array_t) % EBPF_CACHE_LINE_SIZE == 0, "ebpf_perf_event_array_t is not cache aligned."); + +inline static size_t +_perf_array_record_size(size_t data_size) +{ + return EBPF_OFFSET_OF(ebpf_perf_event_array_record_t, data) + data_size; +} + +inline static size_t +_perf_array_padded_size(size_t size) +{ + return (size + 7) & ~7; +} + +inline static size_t +_perf_array_get_length(_In_ const ebpf_perf_event_array_t* perf_event_array, uint32_t cpu_id) +{ + return perf_event_array->rings[cpu_id].length; +} + +inline static size_t +_perf_array_get_producer_offset(_In_ const ebpf_perf_event_array_t* perf_event_array, uint32_t cpu_id) +{ + const ebpf_perf_ring_t* ring = &perf_event_array->rings[cpu_id]; + return ring->producer_offset % ring->length; +} + +inline static size_t +_perf_array_get_consumer_offset(_In_ const ebpf_perf_event_array_t* perf_event_array, uint32_t cpu_id) +{ + const ebpf_perf_ring_t* ring = &perf_event_array->rings[cpu_id]; + return ring->consumer_offset % ring->length; +} + +inline static size_t +_perf_array_get_used_capacity(_In_ const ebpf_perf_event_array_t* perf_event_array, uint32_t cpu_id) +{ + const ebpf_perf_ring_t* ring = &perf_event_array->rings[cpu_id]; + ebpf_assert(ring->producer_offset >= ring->consumer_offset); + return ring->producer_offset - ring->consumer_offset; +} + +inline static void +_perf_array_advance_producer_offset(_Inout_ ebpf_perf_event_array_t* perf_event_array, uint32_t cpu_id, size_t length) +{ + perf_event_array->rings[cpu_id].producer_offset += length; +} + +inline static void +_perf_array_advance_consumer_offset(_Inout_ ebpf_perf_event_array_t* perf_event_array, uint32_t cpu_id, size_t length) +{ + perf_event_array->rings[cpu_id].consumer_offset += length; +} + +inline static _Ret_notnull_ ebpf_perf_event_array_record_t* +_perf_array_record_at_offset(_In_ ebpf_perf_event_array_t* perf_event_array, uint32_t cpu_id, size_t offset) +{ + ebpf_perf_ring_t* ring = &perf_event_array->rings[cpu_id]; + return (ebpf_perf_event_array_record_t*)&ring->shared_buffer[offset % ring->length]; +} + +inline static _Ret_notnull_ ebpf_perf_event_array_record_t* +_perf_array_next_consumer_record(_In_ ebpf_perf_event_array_t* perf_event_array, uint32_t cpu_id) +{ + return _perf_array_record_at_offset( + perf_event_array, cpu_id, _perf_array_get_consumer_offset(perf_event_array, cpu_id)); +} + +inline static _Ret_maybenull_ ebpf_perf_event_array_record_t* +_perf_event_array_acquire_record( + _Inout_ ebpf_perf_event_array_t* perf_event_array, uint32_t cpu_id, size_t requested_length) +{ + ebpf_perf_event_array_record_t* record = NULL; + requested_length = _perf_array_record_size(requested_length); + size_t padded_length = _perf_array_padded_size(requested_length); + ebpf_perf_ring_t* ring = &perf_event_array->rings[cpu_id]; + size_t remaining_space = ring->length - (ring->producer_offset - ring->consumer_offset); + + if (remaining_space > padded_length) { + record = _perf_array_record_at_offset( + perf_event_array, cpu_id, _perf_array_get_producer_offset(perf_event_array, cpu_id)); + _perf_array_advance_producer_offset(perf_event_array, cpu_id, padded_length); + record->header.length = (uint32_t)requested_length; + record->header.locked = 1; + record->header.discarded = 0; + } + return record; +} + +_Must_inspect_result_ ebpf_result_t +ebpf_perf_event_array_create( + _Outptr_ ebpf_perf_event_array_t** perf_event_array, size_t capacity, _In_ ebpf_perf_event_array_opts_t* opts) +{ + EBPF_LOG_ENTRY(); + UNREFERENCED_PARAMETER(opts); + ebpf_result_t result; + ebpf_perf_event_array_t* local_perf_event_array = NULL; + uint32_t ring_count = ebpf_get_cpu_count(); + size_t total_size = sizeof(ebpf_perf_event_array_t) + sizeof(ebpf_perf_ring_t) * (ring_count - 1); + + local_perf_event_array = ebpf_epoch_allocate_with_tag(total_size, EBPF_POOL_TAG_RING_BUFFER); + if (!local_perf_event_array) { + result = EBPF_NO_MEMORY; + goto Error; + } + local_perf_event_array->ring_count = ring_count; + + for (uint32_t i = 0; i < ring_count; i++) { + ebpf_perf_ring_t* ring = &local_perf_event_array->rings[i]; + ring->length = capacity; + ring->lost_records = 0; + + ring->ring_descriptor = ebpf_allocate_ring_buffer_memory(capacity); + if (!ring->ring_descriptor) { + result = EBPF_NO_MEMORY; + goto Error; + } + ring->shared_buffer = ebpf_ring_descriptor_get_base_address(ring->ring_descriptor); + } + + *perf_event_array = local_perf_event_array; + local_perf_event_array = NULL; + return EBPF_SUCCESS; + +Error: + if (local_perf_event_array) { + for (uint32_t i = 0; i < ring_count; i++) { + if (local_perf_event_array->rings[i].ring_descriptor) { + ebpf_free_ring_buffer_memory(local_perf_event_array->rings[i].ring_descriptor); + } + } + ebpf_epoch_free(local_perf_event_array); + } + EBPF_RETURN_RESULT(result); +} + +void +ebpf_perf_event_array_destroy(_Frees_ptr_opt_ ebpf_perf_event_array_t* perf_event_array) +{ + if (perf_event_array) { + EBPF_LOG_ENTRY(); + uint32_t ring_count = perf_event_array->ring_count; + for (uint32_t i = 0; i < ring_count; i++) { + ebpf_free_ring_buffer_memory(perf_event_array->rings[i].ring_descriptor); + } + ebpf_epoch_free(perf_event_array); + EBPF_RETURN_VOID(); + } +} + +_Must_inspect_result_ ebpf_result_t +_ebpf_perf_event_array_output( + _Inout_ ebpf_perf_event_array_t* perf_event_array, + uint32_t cpu_id, + _In_reads_bytes_(length) const uint8_t* data, + size_t length, + _In_reads_bytes_(extra_length) const uint8_t* extra_data, + size_t extra_length) +{ + ebpf_assert(cpu_id < perf_event_array->ring_count); + + ebpf_lock_state_t state = ebpf_lock_lock(&perf_event_array->rings[cpu_id].lock); + ebpf_perf_event_array_record_t* record = + _perf_event_array_acquire_record(perf_event_array, cpu_id, length + extra_length); + ebpf_result_t result = EBPF_SUCCESS; + + if (record == NULL) { + result = EBPF_OUT_OF_SPACE; + perf_event_array->rings[cpu_id].lost_records++; + goto Done; + } + + record->header.discarded = 0; + record->header.locked = 0; + memcpy(record->data, data, length); + if (extra_data != NULL) { + memcpy(record->data + length, extra_data, extra_length); + } + result = EBPF_SUCCESS; + +Done: + ebpf_lock_unlock(&perf_event_array->rings[cpu_id].lock, state); + return result; +} + +_Must_inspect_result_ ebpf_result_t +ebpf_perf_event_array_output_simple( + _Inout_ ebpf_perf_event_array_t* perf_event_array, + uint32_t cpu_id, + _In_reads_bytes_(length) uint8_t* data, + size_t length) +{ + if (cpu_id == (uint32_t)EBPF_MAP_FLAG_CURRENT_CPU) { + cpu_id = ebpf_get_current_cpu(); + } + return _ebpf_perf_event_array_output(perf_event_array, cpu_id, data, length, NULL, 0); +} + +_Must_inspect_result_ ebpf_result_t +ebpf_perf_event_array_output( + _In_ void* ctx, + _Inout_ ebpf_perf_event_array_t* perf_event_array, + uint64_t flags, + _In_reads_bytes_(length) uint8_t* data, + size_t length, + _Out_opt_ uint32_t* cpu_id) +{ + // UNREFERENCED_PARAMETER(ctx); + // ebpf_result_t result; + uint32_t _cpu_id = (flags & EBPF_MAP_FLAG_INDEX_MASK) >> EBPF_MAP_FLAG_INDEX_SHIFT; + uint32_t capture_length = (uint32_t)((flags & EBPF_MAP_FLAG_CTXLEN_MASK) >> EBPF_MAP_FLAG_CTXLEN_SHIFT); + uint32_t current_cpu = ebpf_get_current_cpu(); + const void* extra_data = NULL; + size_t extra_length = 0; + + if (_cpu_id == EBPF_MAP_FLAG_CURRENT_CPU) { + _cpu_id = current_cpu; + if (cpu_id != NULL) { + *cpu_id = _cpu_id; + } + } else if (_cpu_id != current_cpu) { + // We only support writes to the current CPU. + return EBPF_INVALID_ARGUMENT; + } else if (cpu_id != NULL) { + *cpu_id = _cpu_id; + } + + if (capture_length != 0) { + // Caller requested data capture. + ebpf_assert(ctx != NULL); + + uint8_t *ctx_data_start, *ctx_data_end; + ebpf_program_get_context_data(ctx, &ctx_data_start, &ctx_data_end); + + if (ctx_data_start == NULL || ctx_data_end == NULL) { + // No context data pointer. + return EBPF_OPERATION_NOT_SUPPORTED; + } else if ((uint64_t)(ctx_data_end - ctx_data_start) < (uint64_t)capture_length) { + // Requested capture length larger than data. + return EBPF_INVALID_ARGUMENT; + } + + extra_data = ctx_data_start; + extra_length = capture_length; + } + + return _ebpf_perf_event_array_output(perf_event_array, _cpu_id, data, length, extra_data, extra_length); +} + +uint32_t +ebpf_perf_event_array_get_ring_count(_In_ const ebpf_perf_event_array_t* perf_event_array) +{ + return perf_event_array->ring_count; +} + +size_t +ebpf_perf_event_array_get_reset_lost_count(_In_ ebpf_perf_event_array_t* perf_event_array, uint32_t cpu_id) +{ + ebpf_perf_ring_t* ring = &perf_event_array->rings[cpu_id]; + ebpf_lock_state_t state = ebpf_lock_lock(&ring->lock); + size_t lost_count = ring->lost_records; + ring->lost_records = 0; + ebpf_lock_unlock(&ring->lock, state); + return lost_count; +} + +void +ebpf_perf_event_array_query( + _In_ ebpf_perf_event_array_t* perf_event_array, uint32_t cpu_id, _Out_ size_t* consumer, _Out_ size_t* producer) +{ + ebpf_perf_ring_t* ring = &perf_event_array->rings[cpu_id]; + ebpf_lock_state_t state = ebpf_lock_lock(&ring->lock); + *consumer = ring->consumer_offset; + *producer = ring->producer_offset; + ebpf_lock_unlock(&ring->lock, state); +} + +_Must_inspect_result_ ebpf_result_t +ebpf_perf_event_array_return(_Inout_ ebpf_perf_event_array_t* perf_event_array, uint32_t cpu_id, size_t length) +{ + EBPF_LOG_ENTRY(); + length = _perf_array_padded_size(length); + + ebpf_result_t result; + ebpf_perf_ring_t* ring = &perf_event_array->rings[cpu_id]; + ebpf_lock_state_t state = ebpf_lock_lock(&ring->lock); + size_t local_length = length; + size_t offset = _perf_array_get_consumer_offset(perf_event_array, cpu_id); + + if ((length > _perf_array_get_length(perf_event_array, cpu_id)) || + length > _perf_array_get_used_capacity(perf_event_array, cpu_id)) { + EBPF_LOG_MESSAGE_UINT64_UINT64( + EBPF_TRACELOG_LEVEL_ERROR, + EBPF_TRACELOG_KEYWORD_MAP, + "ebpf_perf_event_array_return: Buffer too large", + ring->producer_offset, + ring->consumer_offset); + result = EBPF_INVALID_ARGUMENT; + goto Done; + } + + // Verify count. + while (local_length != 0) { + ebpf_perf_event_array_record_t* record = _perf_array_record_at_offset(perf_event_array, cpu_id, offset); + size_t padded_record_length = _perf_array_padded_size(record->header.length); + if (local_length < padded_record_length) { + break; + } + offset += padded_record_length; + local_length -= padded_record_length; + } + // Did it end on a record boundary? + if (local_length != 0) { + EBPF_LOG_MESSAGE_UINT64( + EBPF_TRACELOG_LEVEL_ERROR, + EBPF_TRACELOG_KEYWORD_MAP, + "ebpf_perf_event_array_return: Invalid buffer length", + local_length); + result = EBPF_INVALID_ARGUMENT; + goto Done; + } + + _perf_array_advance_consumer_offset(perf_event_array, cpu_id, length); + result = EBPF_SUCCESS; + +Done: + ebpf_lock_unlock(&ring->lock, state); + EBPF_RETURN_RESULT(result); +} + +_Must_inspect_result_ ebpf_result_t +ebpf_perf_event_array_map_buffer( + _In_ const ebpf_perf_event_array_t* perf_event_array, uint32_t cpu_id, _Outptr_ uint8_t** buffer) +{ + const ebpf_perf_ring_t* ring = &perf_event_array->rings[cpu_id]; + *buffer = ebpf_ring_map_readonly_user(ring->ring_descriptor); + if (!*buffer) { + return EBPF_INVALID_ARGUMENT; + } else { + return EBPF_SUCCESS; + } +} \ No newline at end of file diff --git a/libs/runtime/ebpf_perf_event_array.h b/libs/runtime/ebpf_perf_event_array.h new file mode 100644 index 0000000000..021b8e8d36 --- /dev/null +++ b/libs/runtime/ebpf_perf_event_array.h @@ -0,0 +1,137 @@ +// Copyright (c) eBPF for Windows contributors +// SPDX-License-Identifier: MIT + +#pragma once + +#include "ebpf_perf_event_array_record.h" +#include "ebpf_shared_framework.h" + +CXPLAT_EXTERN_C_BEGIN + +typedef struct _ebpf_perf_event_array ebpf_perf_event_array_t; +typedef struct _ebpf_perf_event_array_opts +{ + size_t sz; /* size of this struct, for forward/backward compatiblity */ + uint64_t flags; +} ebpf_perf_event_array_opts_t; +#define perf_event_array_opts__last_field sz + +typedef enum _perf_event_array_flags +{ + PERF_ARRAY_FLAG_AUTO_CALLBACK = (uint64_t)1 << 0 /* Automatically invoke callback for each record */ +} perf_event_array_flags_t; + +/** + * @brief Allocate a perf_event_array with capacity. + * + * @param[out] perf_event_array Pointer to buffer that holds buffer pointer on success. + * @param[in] capacity Size in bytes of ring buffer. + * @retval EBPF_SUCCESS Successfully allocated ring buffer. + * @retval EBPF_NO_MEMORY Unable to allocate ring buffer. + */ +_Must_inspect_result_ ebpf_result_t +ebpf_perf_event_array_create( + _Outptr_ ebpf_perf_event_array_t** perf_event_array, size_t capacity, _In_ ebpf_perf_event_array_opts_t* opts); + +/** + * @brief Free a ring buffer. + * + * @param[in] perf_event_array Perf event array to free. + */ +void +ebpf_perf_event_array_destroy(_Frees_ptr_opt_ ebpf_perf_event_array_t* perf_event_array); + +/** + * @brief Write out a variable sized record to the perf event array. + * + * @param[in] ctx Context to write to. + * @param[in, out] perf_event_array Perf event array to write to. + * @param[in] cpu_id CPU ring to write to (or (uint32_t)-1 for auto). + * @param[in] data Data to copy into record. + * @param[in] length Length of data to copy. + * @retval EBPF_SUCCESS Successfully wrote record ring buffer. + * @retval EBPF_OUT_OF_SPACE Unable to output to ring buffer due to inadequate space. + */ +_Must_inspect_result_ ebpf_result_t +ebpf_perf_event_array_output_simple( + _Inout_ ebpf_perf_event_array_t* perf_event_array, + uint32_t cpu_id, + _In_reads_bytes_(length) uint8_t* data, + size_t length); + +/** + * @brief Write out a variable sized record to the perf event array. + * + * @param[in] ctx Context to write to. + * @param[in, out] perf_event_array Perf event array to write to. + * @param[in] flags perf event output flags. + * @param[in] data Data to copy into record. + * @param[in] length Length of data to copy. + * @param[out] cpu_id CPU ring that was written to. + * @retval EBPF_SUCCESS Successfully wrote record ring buffer. + * @retval EBPF_OUT_OF_SPACE Unable to output to ring buffer due to inadequate space. + */ +_Must_inspect_result_ ebpf_result_t +ebpf_perf_event_array_output( + _In_ void* ctx, + _Inout_ ebpf_perf_event_array_t* perf_event_array, + uint64_t flags, + _In_reads_bytes_(length) uint8_t* data, + size_t length, + _Out_opt_ uint32_t* cpu_id); + +/** + * @brief Get the number of rings in the perf event array. + * @param[in] perf_event_array Perf event array to query. + * @return Number of rings in the perf event array. + */ +uint32_t +ebpf_perf_event_array_get_ring_count(_In_ const ebpf_perf_event_array_t* perf_event_array); + +/** + * @brief Get the number of dropped records in a perf event ring and reset the count. + * @param[in] perf_event_array Perf event array to query. + * @param[in] cpu_id CPU ring to query. + * @return Number of dropped records in the ring. + */ +size_t +ebpf_perf_event_array_get_reset_lost_count(_In_ ebpf_perf_event_array_t* perf_event_array, uint32_t cpu_id); + +/** + * @brief Query the current ready and free offsets from the ring buffer. + * + * @param[in] perf_event_array Perf event array to query. + * @param[in] cpu_id CPU ID to query. + * @param[out] consumer Offset of the first buffer that can be consumed. + * @param[out] producer Offset of the next buffer to be produced. + */ +void +ebpf_perf_event_array_query( + _In_ ebpf_perf_event_array_t* perf_event_array, uint32_t cpu_id, _Out_ size_t* consumer, _Out_ size_t* producer); + +/** + * @brief Mark one or more records in the ring buffer as returned to the ring. + * + * @param[in, out] perf_event_array Perf event array to update. + * @param[in] cpu_id CPU ID to query. + * @param[in] length Length of bytes to return to the ring buffer. + * @retval EBPF_SUCCESS Successfully returned records to the ring buffer. + * @retval EBPF_INVALID_ARGUMENT Unable to return records to the ring buffer. + */ +_Must_inspect_result_ ebpf_result_t +ebpf_perf_event_array_return(_Inout_ ebpf_perf_event_array_t* perf_event_array, uint32_t cpu_id, size_t length); + +/** + * @brief Get pointer to the ring buffer shared data. + * + * @param[in] perf_event_array Perf event array to query. + * @param[in] cpu_id CPU ID to query. + * @param[out] buffer Pointer to ring buffer data. + * @retval EBPF_SUCCESS Successfully mapped the ring buffer. + * @retval EBPF_INVALID_ARGUMENT Unable to map the ring buffer. + */ +_Must_inspect_result_ ebpf_result_t +ebpf_perf_event_array_map_buffer( + _In_ const ebpf_perf_event_array_t* perf_event_array, uint32_t cpu_id, _Outptr_ uint8_t** buffer); + +CXPLAT_EXTERN_C_END diff --git a/libs/runtime/kernel/platform_kernel.vcxproj b/libs/runtime/kernel/platform_kernel.vcxproj index 08f88a29f0..aac620a010 100644 --- a/libs/runtime/kernel/platform_kernel.vcxproj +++ b/libs/runtime/kernel/platform_kernel.vcxproj @@ -17,6 +17,7 @@ + @@ -45,6 +46,7 @@ + diff --git a/libs/runtime/kernel/platform_kernel.vcxproj.filters b/libs/runtime/kernel/platform_kernel.vcxproj.filters index 8fcd53718e..1e7b5d1a22 100644 --- a/libs/runtime/kernel/platform_kernel.vcxproj.filters +++ b/libs/runtime/kernel/platform_kernel.vcxproj.filters @@ -43,6 +43,9 @@ Source Files + + Source Files + Source Files @@ -132,6 +135,9 @@ Header Files + + Header Files + Header Files diff --git a/libs/runtime/unit/platform_unit_test.cpp b/libs/runtime/unit/platform_unit_test.cpp index 5ec5849237..cb33543240 100644 --- a/libs/runtime/unit/platform_unit_test.cpp +++ b/libs/runtime/unit/platform_unit_test.cpp @@ -10,8 +10,10 @@ #include "ebpf_epoch.h" #include "ebpf_hash_table.h" #include "ebpf_nethooks.h" +#include "ebpf_perf_event_array.h" #include "ebpf_pinning_table.h" #include "ebpf_platform.h" +#include "ebpf_program.h" #include "ebpf_program_types.h" #include "ebpf_random.h" #include "ebpf_ring_buffer.h" @@ -200,6 +202,23 @@ typedef class _ebpf_epoch_scope bool in_epoch; } ebpf_epoch_scope_t; +struct scoped_cpu_affinity +{ + scoped_cpu_affinity(uint32_t i) : old_affinity_mask{} + { + affinity_set = ebpf_set_current_thread_cpu_affinity(i, &old_affinity_mask) == EBPF_SUCCESS; + REQUIRE(affinity_set); + } + ~scoped_cpu_affinity() + { + if (affinity_set) { + ebpf_restore_current_thread_cpu_affinity(&old_affinity_mask); + } + } + GROUP_AFFINITY old_affinity_mask; + bool affinity_set = false; +}; + TEST_CASE("hash_table_test", "[platform]") { std::vector key_1(13); @@ -1146,6 +1165,471 @@ TEST_CASE("ring_buffer_reserve_submit_discard", "[platform]") ring_buffer = nullptr; } +const static size_t PERF_RECORD_HEADER_SIZE = EBPF_OFFSET_OF(ebpf_perf_event_array_record_t, data); + +size_t +_perf_record_size(size_t data_size) +{ + return data_size + PERF_RECORD_HEADER_SIZE; +} + +size_t +_perf_pad_size(size_t size) +{ + return (size + 7) & ~7; +} + +TEST_CASE("context_descriptor_header", "[platform][perf_event_array]") +{ + // Confirm context descriptor header in program context works as expected. + + struct context_t + { + uint8_t* data; + uint8_t* data_end; + }; + // full context includes EBPF_CONTEXT_HEADER plus the program accessible portion. + struct full_context_t + { + EBPF_CONTEXT_HEADER; + context_t ctx; + } context; + + // ctx points to the bpf-program accessible portion (just after the header). + void* ctx = &context.ctx; + + // The context descriptor tells the platform where to find the data pointers. + ebpf_context_descriptor_t context_descriptor = { + sizeof(context_t), EBPF_OFFSET_OF(context_t, data), EBPF_OFFSET_OF(context_t, data_end), -1}; + ebpf_program_set_header_context_descriptor(&context_descriptor, ctx); + + const ebpf_context_descriptor_t* test_ctx_descriptor; + ebpf_program_get_header_context_descriptor(ctx, &test_ctx_descriptor); + REQUIRE(test_ctx_descriptor == &context_descriptor); + + const uint8_t *data_start, *data_end; + + context_descriptor = { + sizeof(context.ctx), EBPF_OFFSET_OF(context_t, data), EBPF_OFFSET_OF(context_t, data_end), -1}; + context.ctx.data = (uint8_t*)((void*)0x0123456789abcdef); + context.ctx.data_end = (uint8_t*)((void*)0xfedcba9876543210); + ebpf_program_get_context_data(ctx, &data_start, &data_end); + REQUIRE(data_start == context.ctx.data); + REQUIRE(data_end == context.ctx.data_end); +} + +/** + * @brief Run a test on perf_event_output using the given parameters. + * + * Assumes the perf_event_array is already created and mapped. + * + * Pins the current thread to the target cpu_id during the test. + * + * @param[in] perf_event_array Pointer to the perf_event_array. + * @param[in] buffer Pointer to the mapped buffer for cpu_id. + * @param[in] size Size of the mapped buffer. + * @param[in] cpu_id CPU id to use (and temporarily pin to). + * @param[in] flags perf_event_output flags to use. + * @param[in] data Data to write to the perf_event_array. + * @param[in] length Length of the data to write. + * @param[in] ctx_data data pointer in the simulated program context. + * @param[in] ctx_data_length Length of the program context data (<0 if no data pointer, <-1 if no ctx header). + * @param[in] expected_result Expected result of the perf_event_output call. + * @param[in] consume Whether to return the space to buffer after the test. + */ +void +_test_perf_event_output( + ebpf_perf_event_array_t* perf_event_array, + uint8_t* buffer, + size_t size, + uint32_t cpu_id, + uint64_t flags, + uint8_t* data, + size_t length, + uint8_t* ctx_data, + int64_t ctx_data_length, + ebpf_result_t expected_result, + bool consume = true) +{ + // context_t - Simple program context with data pointers. + struct context_t + { + uint8_t* data = (uint8_t*)((void*)42); + uint8_t* data_end = (uint8_t*)((void*)47); + uint8_t ctx_extra[8]; + }; + // full_context_t - Context with header (needed to find context descriptor). + struct full_context_t + { + EBPF_CONTEXT_HEADER; + context_t ctx; + } full_context; + // ctx points to the bpf-program accessible portion (just after the header). + context_t* ctx = &full_context.ctx; + + // Put some data in the program context (which we should never see in the output). + for (int i = 0; i < sizeof(ctx->ctx_extra); i++) { + ctx->ctx_extra[i] = static_cast(192 + i % 64); + } + + // Initialize context descriptor for the test program context. + ebpf_context_descriptor_t context_descriptor = { + sizeof(context_t), EBPF_OFFSET_OF(context_t, data), EBPF_OFFSET_OF(context_t, data_end), -1}; + + full_context.context_header[1] = (uint64_t)&context_descriptor; + ebpf_program_set_header_context_descriptor(&context_descriptor, ctx); + + // ctx_data_length cases: + // <-1: No context header (unsafe to use with capture length in flags). + // -1: No context data pointer. + // >=0: Data pointer in context. + + // If ctx_data_length is < -1, then there is no context header. + // perf_event_output has no way to know if the context header is present or not, + // so only programs where the extension has context header support can call + // perf_event_output with the CTXLEN field set in the flags. + + if (ctx_data_length <= -2) { // -2: No ctx header (do NOT use -2 with capture length specified). + full_context.context_header[1] = 0; + ctx_data = nullptr; + ctx_data_length = 0; + } else if (ctx_data_length < 0) { // -1: No ctx data pointer (capture length returns error). + ctx_data = nullptr; + ctx_data_length = 0; + context_descriptor.data = -1; + context_descriptor.end = -1; + } else { + const uint8_t *data_start, *data_end; + ebpf_program_get_context_data(ctx, &data_start, &data_end); + REQUIRE(data_start == ctx->data); + REQUIRE(data_end == ctx->data_end); + } + ctx->data = ctx_data; + ctx->data_end = ctx_data + ctx_data_length; + + bool use_current_cpu = (flags & EBPF_MAP_FLAG_INDEX_MASK) == EBPF_MAP_FLAG_CURRENT_CPU; + size_t capture_length = (flags & EBPF_MAP_FLAG_CTXLEN_MASK) >> EBPF_MAP_FLAG_CTXLEN_SHIFT; + + // Capture the relevant test parameters. + CAPTURE(cpu_id, use_current_cpu, size, ctx_data_length, length, capture_length, expected_result, consume); + + // perf_event_array only allows writing to the current cpu, so we pin to the requested cpu for the test. + scoped_cpu_affinity affinity(cpu_id); + + size_t old_consumer, old_producer; + size_t new_consumer, new_producer; + ebpf_perf_event_array_query(perf_event_array, cpu_id, &old_consumer, &old_producer); + CAPTURE( + ctx, + flags, + data, + ctx_data, + old_consumer, + old_producer, + ctx->data, + ctx->data_end, + context_descriptor.size, + context_descriptor.data, + context_descriptor.end); + REQUIRE(ebpf_perf_event_array_output(ctx, perf_event_array, flags, data, length, NULL) == expected_result); + + ebpf_perf_event_array_query(perf_event_array, cpu_id, &new_consumer, &new_producer); + + CAPTURE(old_consumer, old_producer, new_consumer, new_producer); + + if (expected_result == EBPF_SUCCESS) { + // Verify the new producer offset (padded to 8 bytes). + REQUIRE(new_producer == old_producer + _perf_pad_size(_perf_record_size(length + capture_length))); + + // Verify the record just written. + auto record = ebpf_perf_event_array_next_record(buffer, size, new_consumer, new_producer); + REQUIRE(record != nullptr); + REQUIRE(record->header.length == _perf_record_size(length + capture_length)); + REQUIRE(memcmp(record->data, data, length) == 0); + REQUIRE(memcmp(record->data + length, ctx_data, capture_length) == 0); + + if (consume) { + REQUIRE(ebpf_perf_event_array_return(perf_event_array, cpu_id, record->header.length) == EBPF_SUCCESS); + size_t final_consumer, final_producer; + ebpf_perf_event_array_query(perf_event_array, cpu_id, &final_consumer, &final_producer); + CAPTURE(final_consumer, final_producer); + REQUIRE((final_producer - final_consumer) == (old_producer - old_consumer)); + REQUIRE(final_producer == new_producer); + } else { + size_t final_consumer, final_producer; + ebpf_perf_event_array_query(perf_event_array, cpu_id, &final_consumer, &final_producer); + REQUIRE(final_consumer == new_consumer); + REQUIRE(final_producer == new_producer); + } + } else { + // Verify that the producer and consumer offsets have not changed. + REQUIRE(new_consumer == old_consumer); + REQUIRE(new_producer == old_producer); + } +} + +TEST_CASE("perf_event_output", "[platform][perf_event_array]") +{ + _test_helper test_helper; + test_helper.initialize(); + scoped_cpu_affinity affinity(0); // Pin to cpu 0 for the test. + size_t consumer; + size_t producer; + ebpf_perf_event_array_t* perf_event_array; + ebpf_perf_event_array_opts_t* opts = nullptr; + + uint8_t* buffer; + std::vector data(10); + size_t size = 64 * 1024; + void* ctx = nullptr; + uint32_t cpu_id = 0; + uint64_t flags = EBPF_MAP_FLAG_CURRENT_CPU; + + REQUIRE(ebpf_perf_event_array_create(&perf_event_array, size, opts) == EBPF_SUCCESS); + REQUIRE(ebpf_perf_event_array_map_buffer(perf_event_array, cpu_id, &buffer) == EBPF_SUCCESS); + + ebpf_perf_event_array_query(perf_event_array, cpu_id, &consumer, &producer); + + // Ring is empty. + REQUIRE(producer == consumer); + REQUIRE(consumer == 0); + + REQUIRE(ebpf_perf_event_array_output(ctx, perf_event_array, flags, data.data(), data.size(), NULL) == EBPF_SUCCESS); + ebpf_perf_event_array_query(perf_event_array, cpu_id, &consumer, &producer); + + // Ring is not empty. + REQUIRE(producer == _perf_pad_size(_perf_record_size(data.size()))); + REQUIRE(consumer == 0); + + auto record = ebpf_perf_event_array_next_record(buffer, size, consumer, producer); + REQUIRE(record != nullptr); + REQUIRE(record->header.length == _perf_record_size(data.size())); + + REQUIRE(ebpf_perf_event_array_return(perf_event_array, cpu_id, record->header.length) == EBPF_SUCCESS); + + ebpf_perf_event_array_query(perf_event_array, cpu_id, &consumer, &producer); + REQUIRE(consumer == producer); + REQUIRE(producer == _perf_pad_size(_perf_record_size(data.size()))); + + record = ebpf_perf_event_array_next_record(buffer, size, consumer, producer); + REQUIRE(record == nullptr); + + size_t write_count = 0; + + data.resize(1023); + while (ebpf_perf_event_array_output(ctx, perf_event_array, flags, data.data(), data.size(), NULL) == EBPF_SUCCESS) { + if (++write_count > 1000) { + INFO("Too many writes to perf_event_array."); + REQUIRE(false); + } + } + + ebpf_perf_event_array_query(perf_event_array, cpu_id, &consumer, &producer); + REQUIRE(ebpf_perf_event_array_return(perf_event_array, cpu_id, (producer - consumer) % size) == EBPF_SUCCESS); + + data.resize((size - _perf_record_size(0) - 1) & ~7); // remaining space rounded down to multiple of 8 + // Fill ring. + REQUIRE(ebpf_perf_event_array_output(ctx, perf_event_array, flags, data.data(), data.size(), NULL) == EBPF_SUCCESS); + + ebpf_perf_event_array_destroy(perf_event_array); + perf_event_array = nullptr; +} + +TEST_CASE("perf_event_output_percpu", "[platform][perf_event_array]") +{ + _test_helper test_helper; + test_helper.initialize(); + size_t consumer; + size_t producer; + ebpf_perf_event_array_t* perf_event_array; + ebpf_perf_event_array_opts_t* opts = nullptr; + + uint8_t* buffer; + std::vector data(10); + size_t size = 64 * 1024; + void* ctx = nullptr; + uint64_t flags = EBPF_MAP_FLAG_CURRENT_CPU; + + REQUIRE(ebpf_perf_event_array_create(&perf_event_array, size, opts) == EBPF_SUCCESS); + + uint32_t cpu_count = ebpf_get_cpu_count(); + for (uint32_t cpu_id = 0; cpu_id < cpu_count; cpu_id++) { + // Set CPU affinity to the current CPU. + scoped_cpu_affinity affinity(cpu_id); + + // Output an event. + REQUIRE( + ebpf_perf_event_array_output(ctx, perf_event_array, flags, data.data(), data.size(), NULL) == EBPF_SUCCESS); + + // Query all CPU buffers and ensure only the current CPU has data. + for (uint32_t query_cpu_id = 0; query_cpu_id < cpu_count; query_cpu_id++) { + REQUIRE(ebpf_perf_event_array_map_buffer(perf_event_array, query_cpu_id, &buffer) == EBPF_SUCCESS); + ebpf_perf_event_array_query(perf_event_array, query_cpu_id, &consumer, &producer); + + if (query_cpu_id == cpu_id) { + // The current CPU should have the data. + REQUIRE((producer - consumer) == _perf_pad_size(_perf_record_size(data.size()))); + } else { + // Other CPUs should not have data. + REQUIRE(producer == consumer); + } + } + + // Return the data. + REQUIRE(ebpf_perf_event_array_return(perf_event_array, cpu_id, _perf_record_size(data.size())) == EBPF_SUCCESS); + } + + ebpf_perf_event_array_destroy(perf_event_array); + perf_event_array = nullptr; +} + +TEST_CASE("perf_event_output_capture", "[platform][perf_event_array]") +{ + _test_helper test_helper; + test_helper.initialize(); + ebpf_perf_event_array_t* perf_event_array; + ebpf_perf_event_array_opts_t* opts = nullptr; + + std::vector buffers; + std::vector data(1024); + std::vector ctx_data(1024); + // Initialize data. + for (int i = 0; i < data.size(); i++) { + data[i] = static_cast(0 + i % 64); + } + for (int i = 0; i < ctx_data.size(); i++) { + ctx_data[i] = static_cast(64 + i % 64); + } + + size_t size = 64 * 1024; + + REQUIRE(ebpf_perf_event_array_create(&perf_event_array, size, opts) == EBPF_SUCCESS); + uint32_t ring_count = ebpf_perf_event_array_get_ring_count(perf_event_array); + + for (uint32_t i = 0; i < ring_count; i++) { + uint8_t* buffer; + REQUIRE(ebpf_perf_event_array_map_buffer(perf_event_array, i, &buffer) == EBPF_SUCCESS); + buffers.push_back(buffer); + + size_t consumer, producer; + ebpf_perf_event_array_query(perf_event_array, i, &consumer, &producer); + + // Ensure ring is empty. + REQUIRE(producer == consumer); + REQUIRE(consumer == 0); + } + + struct _test_params + { + uint32_t cpu_id; + bool use_current_cpu; + int64_t ctx_data_length; + size_t data_length; + uint32_t capture_length; + ebpf_result_t expected_result; + const char* test_string; + } test_params[] = { +#define STRINGIZE2(x) #x +#define STRINGIZE(x) STRINGIZE2(x) +#define PERF_TEST_CASE(cpu, use_current_cpu, ctx_data_len, data_len, capture_len, expected) \ + {cpu, \ + use_current_cpu, \ + ctx_data_len, \ + data_len, \ + capture_len, \ + expected, \ + "Line " STRINGIZE(__LINE__) ": {" #cpu ", " #use_current_cpu ", " #ctx_data_len ", " #data_len ", " #capture_len \ + ", " #expected "}"} + // Tests with no context header. + // - Note: Context headers are now required for all extensions, + // so these tests just validate that without CTXLEN the context header isn't used. + PERF_TEST_CASE(0, true, -2, 0, 0, EBPF_SUCCESS), + PERF_TEST_CASE(0, true, -2, 1, 0, EBPF_SUCCESS), + PERF_TEST_CASE(1, true, -2, 8, 0, EBPF_SUCCESS), + PERF_TEST_CASE(1, false, -2, 10, 0, EBPF_SUCCESS), + PERF_TEST_CASE(0, false, -2, 1024, 0, EBPF_SUCCESS), + // Auto CPU tests with no ctx_data. + PERF_TEST_CASE(0, true, -1, 0, 0, EBPF_SUCCESS), + PERF_TEST_CASE(1, true, -1, 0, 0, EBPF_SUCCESS), + PERF_TEST_CASE(0, true, -1, 1, 0, EBPF_SUCCESS), + PERF_TEST_CASE(0, true, -1, 8, 0, EBPF_SUCCESS), + PERF_TEST_CASE(0, true, -1, 10, 0, EBPF_SUCCESS), + PERF_TEST_CASE(0, true, -1, 1024, 0, EBPF_SUCCESS), + PERF_TEST_CASE(1, true, -1, 10, 0, EBPF_SUCCESS), + PERF_TEST_CASE(1, true, -1, 1024, 0, EBPF_SUCCESS), + // Manual CPU selection tests (no ctx_data). + PERF_TEST_CASE(0, false, -1, 10, 0, EBPF_SUCCESS), + PERF_TEST_CASE(1, false, -1, 10, 0, EBPF_SUCCESS), + // Empty ctx_data tests. + PERF_TEST_CASE(0, true, 0, 10, 0, EBPF_SUCCESS), + PERF_TEST_CASE(0, true, 0, 1024, 0, EBPF_SUCCESS), + PERF_TEST_CASE(1, true, 0, 1024, 0, EBPF_SUCCESS), + // Tests with ctx_data but no capture request. + PERF_TEST_CASE(0, true, 8, 10, 0, EBPF_SUCCESS), + PERF_TEST_CASE(0, true, 8, 1024, 0, EBPF_SUCCESS), + PERF_TEST_CASE(1, false, 8, 1024, 0, EBPF_SUCCESS), + // Tests with no data but with capture. + PERF_TEST_CASE(0, true, 8, 0, 8, EBPF_SUCCESS), + PERF_TEST_CASE(0, true, 1024, 0, 8, EBPF_SUCCESS), + PERF_TEST_CASE(0, false, 1024, 0, 1024, EBPF_SUCCESS), + // Tests with data and capture. + PERF_TEST_CASE(0, true, 8, 10, 8, EBPF_SUCCESS), + PERF_TEST_CASE(0, true, 1024, 1024, 8, EBPF_SUCCESS), + PERF_TEST_CASE(0, true, 1024, 1024, 1024, EBPF_SUCCESS), + PERF_TEST_CASE(1, true, 1024, 1024, 8, EBPF_SUCCESS), + PERF_TEST_CASE(1, false, 1024, 1024, 8, EBPF_SUCCESS), + // Invalid data length tests. + PERF_TEST_CASE(0, true, 0, size, 0, EBPF_OUT_OF_SPACE), + PERF_TEST_CASE(0, true, 0, size + 1, 0, EBPF_OUT_OF_SPACE), + // Invalid capture requests. + PERF_TEST_CASE(0, true, -1, 10, 1, EBPF_OPERATION_NOT_SUPPORTED), + PERF_TEST_CASE(0, true, 0, 10, 1, EBPF_INVALID_ARGUMENT), + PERF_TEST_CASE(1, true, 0, 10, 1, EBPF_INVALID_ARGUMENT), + PERF_TEST_CASE(1, false, 0, 10, 1, EBPF_INVALID_ARGUMENT), + PERF_TEST_CASE(0, false, 10, 10, 11, EBPF_INVALID_ARGUMENT), +#undef PERF_TEST_CASE +#undef STRINGIZE +#undef STRINGIZE2 + }; + size_t test_count = sizeof(test_params) / sizeof(test_params[0]); + + // Run the tests, verifying before and after state for each call to perf_event_output. + for (int test_index = 0; test_index < test_count; test_index++) { + auto* test = &test_params[test_index]; + const char* test_string = test->test_string; + CAPTURE(test_index, test_string); + uint64_t test_flags = test->use_current_cpu ? EBPF_MAP_FLAG_CURRENT_CPU : test->cpu_id; + if (test->capture_length > 0) { + test_flags |= ((uint64_t)test->capture_length << EBPF_MAP_FLAG_CTXLEN_SHIFT) & EBPF_MAP_FLAG_CTXLEN_MASK; + } + _test_perf_event_output( + perf_event_array, + buffers[test->cpu_id], + size, + test->cpu_id, + test_flags, + data.data(), + test->data_length, + ctx_data.data(), + test->ctx_data_length, + test->expected_result); + } + + // Ensure all rings are empty. + for (uint32_t cpu_id = 0; cpu_id < ring_count; cpu_id++) { + CAPTURE(cpu_id); + size_t consumer, producer; + ebpf_perf_event_array_query(perf_event_array, cpu_id, &consumer, &producer); + // Ensure ring is empty. + CHECK(producer == consumer); + auto record = ebpf_perf_event_array_next_record(buffers[cpu_id], size, consumer, producer); + CHECK(record == nullptr); + } + + ebpf_perf_event_array_destroy(perf_event_array); + perf_event_array = nullptr; +} + TEST_CASE("error codes", "[platform]") { for (ebpf_result_t result = EBPF_SUCCESS; result < EBPF_RESULT_COUNT; result = (ebpf_result_t)(result + 1)) { diff --git a/libs/runtime/user/platform_user.vcxproj b/libs/runtime/user/platform_user.vcxproj index 4da43dd5b6..c05f13f17f 100644 --- a/libs/runtime/user/platform_user.vcxproj +++ b/libs/runtime/user/platform_user.vcxproj @@ -18,6 +18,7 @@ + @@ -34,6 +35,7 @@ + diff --git a/libs/runtime/user/platform_user.vcxproj.filters b/libs/runtime/user/platform_user.vcxproj.filters index e79658e741..ae23bf96ce 100644 --- a/libs/runtime/user/platform_user.vcxproj.filters +++ b/libs/runtime/user/platform_user.vcxproj.filters @@ -55,6 +55,9 @@ Source Files + + Source Files + Source Files @@ -111,6 +114,9 @@ Header Files + + Header Files + Header Files diff --git a/libs/shared/ebpf_perf_event_array_record.h b/libs/shared/ebpf_perf_event_array_record.h new file mode 100644 index 0000000000..fac27ac2eb --- /dev/null +++ b/libs/shared/ebpf_perf_event_array_record.h @@ -0,0 +1,39 @@ +// Copyright (c) eBPF for Windows contributors +// SPDX-License-Identifier: MIT +#pragma once +#include "cxplat.h" + +CXPLAT_EXTERN_C_BEGIN + +typedef struct _ebpf_perf_event_array_record +{ + struct + { + uint8_t locked : 1; + uint8_t discarded : 1; + uint32_t length : 30; + } header; + uint8_t data[1]; +} ebpf_perf_event_array_record_t; + +/** + * @brief Locate the next record in the ring buffer's data buffer and + * advance consumer offset. + * + * @param[in] buffer Pointer to the start of the ring buffer's data buffer. + * @param[in] buffer_length Length of the ring buffer's data buffer. + * @param[in] consumer Consumer offset. + * @param[in] producer Producer offset. + * @return Pointer to the next record or NULL if no more records. + */ +inline const ebpf_perf_event_array_record_t* +ebpf_perf_event_array_next_record(_In_ const uint8_t* buffer, size_t buffer_length, size_t consumer, size_t producer) +{ + ebpf_assert(producer >= consumer); + if (producer == consumer) { + return NULL; + } + return (ebpf_perf_event_array_record_t*)(buffer + consumer % buffer_length); +} + +CXPLAT_EXTERN_C_END diff --git a/libs/shared/kernel/shared_kernel.vcxproj b/libs/shared/kernel/shared_kernel.vcxproj index d6a152c808..0b58789773 100644 --- a/libs/shared/kernel/shared_kernel.vcxproj +++ b/libs/shared/kernel/shared_kernel.vcxproj @@ -15,6 +15,7 @@ + diff --git a/libs/shared/kernel/shared_kernel.vcxproj.filters b/libs/shared/kernel/shared_kernel.vcxproj.filters index cacccd623b..6b910ac8e2 100644 --- a/libs/shared/kernel/shared_kernel.vcxproj.filters +++ b/libs/shared/kernel/shared_kernel.vcxproj.filters @@ -30,6 +30,9 @@ Header Files + + Header Files + Header Files diff --git a/libs/shared/user/shared_user.vcxproj b/libs/shared/user/shared_user.vcxproj index 2dff1e424a..8962cf1e1d 100644 --- a/libs/shared/user/shared_user.vcxproj +++ b/libs/shared/user/shared_user.vcxproj @@ -8,6 +8,7 @@ + diff --git a/libs/shared/user/shared_user.vcxproj.filters b/libs/shared/user/shared_user.vcxproj.filters index cacccd623b..6b910ac8e2 100644 --- a/libs/shared/user/shared_user.vcxproj.filters +++ b/libs/shared/user/shared_user.vcxproj.filters @@ -30,6 +30,9 @@ Header Files + + Header Files + Header Files diff --git a/tests/sample/bindmonitor_perf_event_array.c b/tests/sample/bindmonitor_perf_event_array.c new file mode 100644 index 0000000000..bd6c0820d6 --- /dev/null +++ b/tests/sample/bindmonitor_perf_event_array.c @@ -0,0 +1,39 @@ +// Copyright (c) eBPF for Windows contributors +// SPDX-License-Identifier: MIT + +// Whenever this sample program changes, bpf2c_tests will fail unless the +// expected files in tests\bpf2c_tests\expected are updated. The following +// script can be used to regenerate the expected files: +// generate_expected_bpf2c_output.ps1 +// +// Usage: +// .\scripts\generate_expected_bpf2c_output.ps1 +// Example: +// .\scripts\generate_expected_bpf2c_output.ps1 .\x64\Debug\ + +#include "bpf_helpers.h" + +struct +{ + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(max_entries, 64 * 1024); +} process_map SEC(".maps"); + +SEC("bind") +bind_action_t +bind_monitor(bind_md_t* ctx) +{ + uint64_t flags = (1ULL << 32) - 1; + switch (ctx->operation) { + case BIND_OPERATION_BIND: + if (ctx->app_id_end > ctx->app_id_start) { + (void)bpf_perf_event_output( + ctx, &process_map, flags, ctx->app_id_start, ctx->app_id_end - ctx->app_id_start); + } + break; + default: + break; + } + + return BIND_PERMIT; +}