From 37613367a007b6340d5e40d0fbb900f6851a2eb2 Mon Sep 17 00:00:00 2001 From: Anurag Saxena <43585259+saxena-anurag@users.noreply.github.com> Date: Fri, 15 Mar 2024 17:16:14 -0700 Subject: [PATCH] Fix ring_buf issues (#3354) * fix * add tests * code cleanup * fix tests * cr comments * cr comments --- libs/api/ebpf_api.cpp | 2 +- libs/runtime/ebpf_ring_buffer.c | 29 ++++++-- libs/runtime/ebpf_ring_buffer.h | 2 +- libs/shared/ebpf_ring_buffer_record.h | 1 + tests/api_test/api_test.cpp | 73 ++++++++++++++++++- .../expected/bindmonitor_ringbuf_dll.c | 6 +- .../expected/bindmonitor_ringbuf_raw.c | 6 +- .../expected/bindmonitor_ringbuf_sys.c | 6 +- tests/sample/bindmonitor_ringbuf.c | 2 +- 9 files changed, 108 insertions(+), 19 deletions(-) diff --git a/libs/api/ebpf_api.cpp b/libs/api/ebpf_api.cpp index 00ab929e22..ec9be180a1 100644 --- a/libs/api/ebpf_api.cpp +++ b/libs/api/ebpf_api.cpp @@ -4177,7 +4177,7 @@ _ebpf_ring_buffer_map_async_query_completion(_Inout_ void* completion_context) N // Async IOCTL operation returned with success status. Read the ring buffer records and indicate it to the // subscriber. - size_t ring_buffer_size; + size_t ring_buffer_size = 0; uint32_t dummy; result = _get_map_descriptor_properties( diff --git a/libs/runtime/ebpf_ring_buffer.c b/libs/runtime/ebpf_ring_buffer.c index 7fedec637e..abc1db4bc1 100644 --- a/libs/runtime/ebpf_ring_buffer.c +++ b/libs/runtime/ebpf_ring_buffer.c @@ -34,6 +34,13 @@ _ring_get_consumer_offset(_In_ const ebpf_ring_buffer_t* ring) return ring->consumer_offset % ring->length; } +inline static size_t +_ring_get_used_capacity(_In_ const ebpf_ring_buffer_t* ring) +{ + ebpf_assert(ring->producer_offset >= ring->consumer_offset); + return ring->producer_offset - ring->consumer_offset; +} + inline static void _ring_advance_producer_offset(_Inout_ ebpf_ring_buffer_t* ring, size_t length) { @@ -146,23 +153,30 @@ ebpf_ring_buffer_output(_Inout_ ebpf_ring_buffer_t* ring, _In_reads_bytes_(lengt } void -ebpf_ring_buffer_query(_In_ const ebpf_ring_buffer_t* ring, _Out_ size_t* consumer, _Out_ size_t* producer) +ebpf_ring_buffer_query(_In_ ebpf_ring_buffer_t* ring, _Out_ size_t* consumer, _Out_ size_t* producer) { + ebpf_lock_state_t state = ebpf_lock_lock(&ring->lock); *consumer = ring->consumer_offset; *producer = ring->producer_offset; + ebpf_lock_unlock(&ring->lock, state); } _Must_inspect_result_ ebpf_result_t ebpf_ring_buffer_return(_Inout_ ebpf_ring_buffer_t* ring, size_t length) { + EBPF_LOG_ENTRY(); ebpf_result_t result; ebpf_lock_state_t state = ebpf_lock_lock(&ring->lock); size_t local_length = length; size_t offset = _ring_get_consumer_offset(ring); - // Check if length is valid. - if ((length > _ring_get_length(ring)) || - (length + _ring_get_consumer_offset(ring) > _ring_get_producer_offset(ring))) { + if ((length > _ring_get_length(ring)) || length > _ring_get_used_capacity(ring)) { + EBPF_LOG_MESSAGE_UINT64_UINT64( + EBPF_TRACELOG_LEVEL_ERROR, + EBPF_TRACELOG_KEYWORD_MAP, + "ebpf_ring_buffer_return: Buffer too large", + ring->producer_offset, + ring->consumer_offset); result = EBPF_INVALID_ARGUMENT; goto Done; } @@ -178,6 +192,11 @@ ebpf_ring_buffer_return(_Inout_ ebpf_ring_buffer_t* ring, size_t length) } // Did it end on a record boundary? if (local_length != 0) { + EBPF_LOG_MESSAGE_UINT64( + EBPF_TRACELOG_LEVEL_ERROR, + EBPF_TRACELOG_KEYWORD_MAP, + "ebpf_ring_buffer_return: Invalid buffer length", + local_length); result = EBPF_INVALID_ARGUMENT; goto Done; } @@ -187,7 +206,7 @@ ebpf_ring_buffer_return(_Inout_ ebpf_ring_buffer_t* ring, size_t length) Done: ebpf_lock_unlock(&ring->lock, state); - return result; + EBPF_RETURN_RESULT(result); } _Must_inspect_result_ ebpf_result_t diff --git a/libs/runtime/ebpf_ring_buffer.h b/libs/runtime/ebpf_ring_buffer.h index 9a8343dc21..4a72d81ce1 100644 --- a/libs/runtime/ebpf_ring_buffer.h +++ b/libs/runtime/ebpf_ring_buffer.h @@ -49,7 +49,7 @@ ebpf_ring_buffer_output(_Inout_ ebpf_ring_buffer_t* ring_buffer, _In_reads_bytes * @param[out] producer Offset of the next buffer to be produced. */ void -ebpf_ring_buffer_query(_In_ const ebpf_ring_buffer_t* ring_buffer, _Out_ size_t* consumer, _Out_ size_t* producer); +ebpf_ring_buffer_query(_In_ ebpf_ring_buffer_t* ring_buffer, _Out_ size_t* consumer, _Out_ size_t* producer); /** * @brief Mark one or more records in the ring buffer as returned to the ring. diff --git a/libs/shared/ebpf_ring_buffer_record.h b/libs/shared/ebpf_ring_buffer_record.h index 019eb24ef1..8d5dc11c92 100644 --- a/libs/shared/ebpf_ring_buffer_record.h +++ b/libs/shared/ebpf_ring_buffer_record.h @@ -29,6 +29,7 @@ typedef struct _ebpf_ring_buffer_record inline const ebpf_ring_buffer_record_t* ebpf_ring_buffer_next_record(_In_ const uint8_t* buffer, size_t buffer_length, size_t consumer, size_t producer) { + ebpf_assert(producer >= consumer); if (producer == consumer) { return NULL; } diff --git a/tests/api_test/api_test.cpp b/tests/api_test/api_test.cpp index 5fcfa9b1b9..57b7f9d409 100644 --- a/tests/api_test/api_test.cpp +++ b/tests/api_test/api_test.cpp @@ -1121,6 +1121,7 @@ TEST_CASE("load_native_program_invalid5", "[native][negative]") { _load_invalid_program("invalid_maps3.sys", EBPF_EXECUTION_NATIVE, -EINVAL); } +#endif // _DEBUG TEST_CASE("ioctl_stress", "[stress]") { @@ -1211,7 +1212,7 @@ TEST_CASE("ioctl_stress", "[stress]") } } - // Wait for 10 seconds + // Wait for 60 seconds std::this_thread::sleep_for(std::chrono::seconds(60)); stop_requested = true; @@ -1228,4 +1229,72 @@ TEST_CASE("ioctl_stress", "[stress]") bpf_object__close(object); } -#endif \ No newline at end of file +TEST_CASE("test_ringbuffer_wraparound", "[stress]") +{ + // Load bindmonitor_ringbuf.sys. + struct bpf_object* object = nullptr; + fd_t program_fd; + uint32_t event_count = 0; + std::string app_id = "api_test.exe"; + uint32_t thread_count = 2; + native_module_helper_t native_helper; + native_helper.initialize("bindmonitor_ringbuf", EBPF_EXECUTION_NATIVE); + + REQUIRE( + _program_load_helper( + native_helper.get_file_name().c_str(), BPF_PROG_TYPE_BIND, EBPF_EXECUTION_NATIVE, &object, &program_fd) == + 0); + + // Get fd of process_map map. + fd_t process_map_fd = bpf_object__find_map_fd_by_name(object, "process_map"); + + uint32_t max_entries = bpf_map__max_entries(bpf_object__find_map_by_name(object, "process_map")); + uint32_t max_iterations = static_cast(10 * (max_entries / app_id.size())); + printf("max_iterations: %d\n", max_iterations); + REQUIRE(max_iterations % thread_count == 0); + uint32_t iterations_per_thread = max_iterations / thread_count; + + // Subscribe to the ring buffer. + auto ring = ring_buffer__new( + process_map_fd, + [](void* ctx, void*, size_t) { + (*((uint32_t*)ctx))++; + return 0; + }, + &event_count, + nullptr); + + // Create 2 threads that invoke the program to trigger ring buffer events. + std::vector threads; + for (uint32_t i = 0; i < thread_count; i++) { + threads.emplace_back([&]() { + bind_md_t ctx = {}; + bpf_test_run_opts opts = {}; + opts.ctx_in = &ctx; + opts.ctx_size_in = sizeof(ctx); + opts.ctx_out = &ctx; + opts.ctx_size_out = sizeof(ctx); + opts.data_in = app_id.data(); + opts.data_size_in = static_cast(app_id.size()); + opts.data_out = app_id.data(); + opts.data_size_out = static_cast(app_id.size()); + + for (uint32_t i = 0; i < iterations_per_thread; i++) { + int result = bpf_prog_test_run_opts(program_fd, &opts); + REQUIRE(result == 0); + } + }); + } + + // Wait for threads to complete. + for (auto& t : threads) { + t.join(); + } + REQUIRE(event_count == max_iterations); + + // Unsubscribe from the ring buffer. + ring_buffer__free(ring); + + // Clean up. + bpf_object__close(object); +} \ No newline at end of file diff --git a/tests/bpf2c_tests/expected/bindmonitor_ringbuf_dll.c b/tests/bpf2c_tests/expected/bindmonitor_ringbuf_dll.c index 66ec50fd54..1580177d48 100644 --- a/tests/bpf2c_tests/expected/bindmonitor_ringbuf_dll.c +++ b/tests/bpf2c_tests/expected/bindmonitor_ringbuf_dll.c @@ -45,7 +45,7 @@ static map_entry_t _maps[] = { BPF_MAP_TYPE_RINGBUF, // Type of map. 0, // Size in bytes of a map key. 0, // Size in bytes of a map value. - 262144, // Maximum number of entries allowed in the map. + 65536, // Maximum number of entries allowed in the map. 0, // Inner map index. LIBBPF_PIN_NONE, // Pinning type for the map. 7, // Identifier for a map template. @@ -111,7 +111,7 @@ bind_monitor(void* context) if (r2 != IMMEDIATE(0)) #line 26 "sample/bindmonitor_ringbuf.c" goto label_1; - // EBPF_OP_LDXDW pc=2 dst=r2 src=r1 offset=0 imm=0 + // EBPF_OP_LDXDW pc=2 dst=r2 src=r1 offset=0 imm=0 #line 28 "sample/bindmonitor_ringbuf.c" r2 = *(uint64_t*)(uintptr_t)(r1 + OFFSET(0)); // EBPF_OP_LDXDW pc=3 dst=r3 src=r1 offset=8 imm=0 @@ -122,7 +122,7 @@ bind_monitor(void* context) if (r2 >= r3) #line 28 "sample/bindmonitor_ringbuf.c" goto label_1; - // EBPF_OP_SUB64_REG pc=5 dst=r3 src=r2 offset=0 imm=0 + // EBPF_OP_SUB64_REG pc=5 dst=r3 src=r2 offset=0 imm=0 #line 29 "sample/bindmonitor_ringbuf.c" r3 -= r2; // EBPF_OP_LDDW pc=6 dst=r1 src=r0 offset=0 imm=0 diff --git a/tests/bpf2c_tests/expected/bindmonitor_ringbuf_raw.c b/tests/bpf2c_tests/expected/bindmonitor_ringbuf_raw.c index dbcceb11a2..0fd79e883a 100644 --- a/tests/bpf2c_tests/expected/bindmonitor_ringbuf_raw.c +++ b/tests/bpf2c_tests/expected/bindmonitor_ringbuf_raw.c @@ -19,7 +19,7 @@ static map_entry_t _maps[] = { BPF_MAP_TYPE_RINGBUF, // Type of map. 0, // Size in bytes of a map key. 0, // Size in bytes of a map value. - 262144, // Maximum number of entries allowed in the map. + 65536, // Maximum number of entries allowed in the map. 0, // Inner map index. LIBBPF_PIN_NONE, // Pinning type for the map. 7, // Identifier for a map template. @@ -85,7 +85,7 @@ bind_monitor(void* context) if (r2 != IMMEDIATE(0)) #line 26 "sample/bindmonitor_ringbuf.c" goto label_1; - // EBPF_OP_LDXDW pc=2 dst=r2 src=r1 offset=0 imm=0 + // EBPF_OP_LDXDW pc=2 dst=r2 src=r1 offset=0 imm=0 #line 28 "sample/bindmonitor_ringbuf.c" r2 = *(uint64_t*)(uintptr_t)(r1 + OFFSET(0)); // EBPF_OP_LDXDW pc=3 dst=r3 src=r1 offset=8 imm=0 @@ -96,7 +96,7 @@ bind_monitor(void* context) if (r2 >= r3) #line 28 "sample/bindmonitor_ringbuf.c" goto label_1; - // EBPF_OP_SUB64_REG pc=5 dst=r3 src=r2 offset=0 imm=0 + // EBPF_OP_SUB64_REG pc=5 dst=r3 src=r2 offset=0 imm=0 #line 29 "sample/bindmonitor_ringbuf.c" r3 -= r2; // EBPF_OP_LDDW pc=6 dst=r1 src=r0 offset=0 imm=0 diff --git a/tests/bpf2c_tests/expected/bindmonitor_ringbuf_sys.c b/tests/bpf2c_tests/expected/bindmonitor_ringbuf_sys.c index 69ff793b2f..db62b78c5f 100644 --- a/tests/bpf2c_tests/expected/bindmonitor_ringbuf_sys.c +++ b/tests/bpf2c_tests/expected/bindmonitor_ringbuf_sys.c @@ -180,7 +180,7 @@ static map_entry_t _maps[] = { BPF_MAP_TYPE_RINGBUF, // Type of map. 0, // Size in bytes of a map key. 0, // Size in bytes of a map value. - 262144, // Maximum number of entries allowed in the map. + 65536, // Maximum number of entries allowed in the map. 0, // Inner map index. LIBBPF_PIN_NONE, // Pinning type for the map. 7, // Identifier for a map template. @@ -246,7 +246,7 @@ bind_monitor(void* context) if (r2 != IMMEDIATE(0)) #line 26 "sample/bindmonitor_ringbuf.c" goto label_1; - // EBPF_OP_LDXDW pc=2 dst=r2 src=r1 offset=0 imm=0 + // EBPF_OP_LDXDW pc=2 dst=r2 src=r1 offset=0 imm=0 #line 28 "sample/bindmonitor_ringbuf.c" r2 = *(uint64_t*)(uintptr_t)(r1 + OFFSET(0)); // EBPF_OP_LDXDW pc=3 dst=r3 src=r1 offset=8 imm=0 @@ -257,7 +257,7 @@ bind_monitor(void* context) if (r2 >= r3) #line 28 "sample/bindmonitor_ringbuf.c" goto label_1; - // EBPF_OP_SUB64_REG pc=5 dst=r3 src=r2 offset=0 imm=0 + // EBPF_OP_SUB64_REG pc=5 dst=r3 src=r2 offset=0 imm=0 #line 29 "sample/bindmonitor_ringbuf.c" r3 -= r2; // EBPF_OP_LDDW pc=6 dst=r1 src=r0 offset=0 imm=0 diff --git a/tests/sample/bindmonitor_ringbuf.c b/tests/sample/bindmonitor_ringbuf.c index 166e4c86d0..3a65fa18d2 100644 --- a/tests/sample/bindmonitor_ringbuf.c +++ b/tests/sample/bindmonitor_ringbuf.c @@ -16,7 +16,7 @@ struct { __uint(type, BPF_MAP_TYPE_RINGBUF); - __uint(max_entries, 256 * 1024); + __uint(max_entries, 64 * 1024); } process_map SEC(".maps"); SEC("bind")