From acf85eb9abb3de4863fe9350a6cdddee71c9be44 Mon Sep 17 00:00:00 2001 From: black-sliver <59490463+black-sliver@users.noreply.github.com> Date: Wed, 12 Jun 2024 18:54:59 +0200 Subject: [PATCH] Speedups: remove dependency on c++ (#2796) * Speedups: remove dependency on c++ * Speedups: intset: handle malloc failing * Speedups: intset: fix corner case for int64 on 32bit systems original idea was to only use bucket->val if int(-1) # this is all 0xff... adding 1 results in 0, but it's not negative +# configure INTSET for player +cdef extern from *: + """ + #define INTSET_NAME ap_player_set + #define INTSET_TYPE uint32_t // has to match ap_player_t + """ + +# create INTSET for player +cdef extern from "intset.h": + """ + #undef INTSET_NAME + #undef INTSET_TYPE + """ + ctypedef struct ap_player_set: + pass + + ap_player_set* ap_player_set_new(size_t bucket_count) nogil + void ap_player_set_free(ap_player_set* set) nogil + bint ap_player_set_add(ap_player_set* set, ap_player_t val) nogil + bint ap_player_set_contains(ap_player_set* set, ap_player_t val) nogil + cdef struct LocationEntry: # layout is so that @@ -185,7 +206,7 @@ cdef class LocationStore: def find_item(self, slots: Set[int], seeked_item_id: int) -> Generator[Tuple[int, int, int, int, int], None, None]: cdef ap_id_t item = seeked_item_id cdef ap_player_t receiver - cdef std_set[ap_player_t] receivers + cdef ap_player_set* receivers cdef size_t slot_count = len(slots) if slot_count == 1: # specialized implementation for single slot @@ -197,13 +218,20 @@ cdef class LocationStore: yield entry.sender, entry.location, entry.item, entry.receiver, entry.flags elif slot_count: # generic implementation with lookup in set - for receiver in slots: - receivers.insert(receiver) - with nogil: - for entry in self.entries[:self.entry_count]: - if entry.item == item and receivers.count(entry.receiver): - with gil: - yield entry.sender, entry.location, entry.item, entry.receiver, entry.flags + receivers = ap_player_set_new(min(1023, slot_count)) # limit top level struct to 16KB + if not receivers: + raise MemoryError() + try: + for receiver in slots: + if not ap_player_set_add(receivers, receiver): + raise MemoryError() + with nogil: + for entry in self.entries[:self.entry_count]: + if entry.item == item and ap_player_set_contains(receivers, entry.receiver): + with gil: + yield entry.sender, entry.location, entry.item, entry.receiver, entry.flags + finally: + ap_player_set_free(receivers) def get_for_player(self, slot: int) -> Dict[int, Set[int]]: cdef ap_player_t receiver = slot diff --git a/_speedups.pyxbld b/_speedups.pyxbld index e1fe19b2efc6..974eaed03b6a 100644 --- a/_speedups.pyxbld +++ b/_speedups.pyxbld @@ -1,8 +1,10 @@ -# This file is required to get pyximport to work with C++. -# Switching from std::set to a pure C implementation is still on the table to simplify everything. +# This file is used when doing pyximport +import os def make_ext(modname, pyxfilename): from distutils.extension import Extension return Extension(name=modname, sources=[pyxfilename], - language='c++') + depends=["intset.h"], + include_dirs=[os.getcwd()], + language="c") diff --git a/intset.h b/intset.h new file mode 100644 index 000000000000..fac84fb6f890 --- /dev/null +++ b/intset.h @@ -0,0 +1,135 @@ +/* A specialized unordered_set implementation for literals, where bucket_count + * is defined at initialization rather than increased automatically. + */ +#include +#include +#include +#include + +#ifndef INTSET_NAME +#error "Please #define INTSET_NAME ... before including intset.h" +#endif + +#ifndef INTSET_TYPE +#error "Please #define INTSET_TYPE ... before including intset.h" +#endif + +/* macros to generate unique names from INTSET_NAME */ +#ifndef INTSET_CONCAT +#define INTSET_CONCAT_(a, b) a ## b +#define INTSET_CONCAT(a, b) INTSET_CONCAT_(a, b) +#define INTSET_FUNC_(a, b) INTSET_CONCAT(a, _ ## b) +#endif + +#define INTSET_FUNC(name) INTSET_FUNC_(INTSET_NAME, name) +#define INTSET_BUCKET INTSET_CONCAT(INTSET_NAME, Bucket) +#define INTSET_UNION INTSET_CONCAT(INTSET_NAME, Union) + +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4200) +#endif + + +typedef struct { + size_t count; + union INTSET_UNION { + INTSET_TYPE val; + INTSET_TYPE *data; + } v; +} INTSET_BUCKET; + +typedef struct { + size_t bucket_count; + INTSET_BUCKET buckets[]; +} INTSET_NAME; + +static INTSET_NAME *INTSET_FUNC(new)(size_t buckets) +{ + size_t i, size; + INTSET_NAME *set; + + if (buckets < 1) + buckets = 1; + if ((SIZE_MAX - sizeof(INTSET_NAME)) / sizeof(INTSET_BUCKET) < buckets) + return NULL; + size = sizeof(INTSET_NAME) + buckets * sizeof(INTSET_BUCKET); + set = (INTSET_NAME*)malloc(size); + if (!set) + return NULL; + memset(set, 0, size); /* gcc -fanalyzer does not understand this sets all buckets' count to 0 */ + for (i = 0; i < buckets; i++) { + set->buckets[i].count = 0; + } + set->bucket_count = buckets; + return set; +} + +static void INTSET_FUNC(free)(INTSET_NAME *set) +{ + size_t i; + if (!set) + return; + for (i = 0; i < set->bucket_count; i++) { + if (set->buckets[i].count > 1) + free(set->buckets[i].v.data); + } + free(set); +} + +static bool INTSET_FUNC(contains)(INTSET_NAME *set, INTSET_TYPE val) +{ + size_t i; + INTSET_BUCKET* bucket = &set->buckets[(size_t)val % set->bucket_count]; + if (bucket->count == 1) + return bucket->v.val == val; + for (i = 0; i < bucket->count; ++i) { + if (bucket->v.data[i] == val) + return true; + } + return false; +} + +static bool INTSET_FUNC(add)(INTSET_NAME *set, INTSET_TYPE val) +{ + INTSET_BUCKET* bucket; + + if (INTSET_FUNC(contains)(set, val)) + return true; /* ok */ + + bucket = &set->buckets[(size_t)val % set->bucket_count]; + if (bucket->count == 0) { + bucket->v.val = val; + bucket->count = 1; + } else if (bucket->count == 1) { + INTSET_TYPE old = bucket->v.val; + bucket->v.data = (INTSET_TYPE*)malloc(2 * sizeof(INTSET_TYPE)); + if (!bucket->v.data) { + bucket->v.val = old; + return false; /* error */ + } + bucket->v.data[0] = old; + bucket->v.data[1] = val; + bucket->count = 2; + } else { + size_t new_bucket_size; + INTSET_TYPE* new_bucket_data; + + new_bucket_size = (bucket->count + 1) * sizeof(INTSET_TYPE); + new_bucket_data = (INTSET_TYPE*)realloc(bucket->v.data, new_bucket_size); + if (!new_bucket_data) + return false; /* error */ + bucket->v.data = new_bucket_data; + bucket->v.data[bucket->count++] = val; + } + return true; /* success */ +} + + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +#undef INTSET_FUNC +#undef INTSET_BUCKET +#undef INTSET_UNION diff --git a/test/cpp/CMakeLists.txt b/test/cpp/CMakeLists.txt new file mode 100644 index 000000000000..927b7494dac4 --- /dev/null +++ b/test/cpp/CMakeLists.txt @@ -0,0 +1,49 @@ +cmake_minimum_required(VERSION 3.5) +project(ap-cpp-tests) + +enable_testing() + +find_package(GTest REQUIRED) + +if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + add_definitions("/source-charset:utf-8") + set(CMAKE_CXX_FLAGS_DEBUG "/MTd") + set(CMAKE_CXX_FLAGS_RELEASE "/MT") +elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + # enable static analysis for gcc + add_compile_options(-fanalyzer -Werror) + # disable stuff that gets triggered by googletest + add_compile_options(-Wno-analyzer-malloc-leak) + # enable asan for gcc + add_compile_options(-fsanitize=address) + add_link_options(-fsanitize=address) +endif () + +add_executable(test_default) + +target_include_directories(test_default + PRIVATE + ${GTEST_INCLUDE_DIRS} +) + +target_link_libraries(test_default + ${GTEST_BOTH_LIBRARIES} +) + +add_test( + NAME test_default + COMMAND test_default +) + +set_property( + TEST test_default + PROPERTY ENVIRONMENT "ASAN_OPTIONS=allocator_may_return_null=1" +) + +file(GLOB ITEMS *) +foreach(item ${ITEMS}) + if(IS_DIRECTORY ${item} AND EXISTS ${item}/CMakeLists.txt) + message(${item}) + add_subdirectory(${item}) + endif() +endforeach() diff --git a/test/cpp/README.md b/test/cpp/README.md new file mode 100644 index 000000000000..792b9be77e72 --- /dev/null +++ b/test/cpp/README.md @@ -0,0 +1,32 @@ +# C++ tests + +Test framework for C and C++ code in AP. + +## Adding a Test + +### GoogleTest + +Adding GoogleTests is as simple as creating a directory with +* one or more `test_*.cpp` files that define tests using + [GoogleTest API](https://google.github.io/googletest/) +* a `CMakeLists.txt` that adds the .cpp files to `test_default` target using + [target_sources](https://cmake.org/cmake/help/latest/command/target_sources.html) + +### CTest + +If either GoogleTest is not suitable for the test or the build flags / sources / libraries are incompatible, +you can add another CTest to the project using add_target and add_test, similar to how it's done for `test_default`. + +## Running Tests + +* Install [CMake](https://cmake.org/). +* Build and/or install GoogleTest and make sure + [CMake can find it](https://cmake.org/cmake/help/latest/module/FindGTest.html), or + [create a parent `CMakeLists.txt` that fetches GoogleTest](https://google.github.io/googletest/quickstart-cmake.html). +* Enter the directory with the top-most `CMakeLists.txt` and run + ```sh + mkdir build + cmake -S . -B build/ -DCMAKE_BUILD_TYPE=Release + cmake --build build/ --config Release && \ + ctest --test-dir build/ -C Release --output-on-failure + ``` diff --git a/test/cpp/intset/CMakeLists.txt b/test/cpp/intset/CMakeLists.txt new file mode 100644 index 000000000000..175e0bd0b9e8 --- /dev/null +++ b/test/cpp/intset/CMakeLists.txt @@ -0,0 +1,4 @@ +target_sources(test_default + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/test_intset.cpp +) diff --git a/test/cpp/intset/test_intset.cpp b/test/cpp/intset/test_intset.cpp new file mode 100644 index 000000000000..2f85bea960c4 --- /dev/null +++ b/test/cpp/intset/test_intset.cpp @@ -0,0 +1,105 @@ +#include +#include +#include + +// uint32Set +#define INTSET_NAME uint32Set +#define INTSET_TYPE uint32_t +#include "../../../intset.h" +#undef INTSET_NAME +#undef INTSET_TYPE + +// int64Set +#define INTSET_NAME int64Set +#define INTSET_TYPE int64_t +#include "../../../intset.h" + + +TEST(IntsetTest, ZeroBuckets) +{ + // trying to allocate with zero buckets has to either fail or be functioning + uint32Set *set = uint32Set_new(0); + if (!set) + return; // failed -> OK + + EXPECT_FALSE(uint32Set_contains(set, 1)); + EXPECT_TRUE(uint32Set_add(set, 1)); + EXPECT_TRUE(uint32Set_contains(set, 1)); + uint32Set_free(set); +} + +TEST(IntsetTest, Duplicate) +{ + // adding the same number again can't fail + uint32Set *set = uint32Set_new(2); + ASSERT_TRUE(set); + EXPECT_TRUE(uint32Set_add(set, 0)); + EXPECT_TRUE(uint32Set_add(set, 0)); + EXPECT_TRUE(uint32Set_contains(set, 0)); + uint32Set_free(set); +} + +TEST(IntsetTest, SetAllocFailure) +{ + // try to allocate 100TB of RAM, should fail and return NULL + if (sizeof(size_t) < 8) + GTEST_SKIP() << "Alloc error not testable on 32bit"; + int64Set *set = int64Set_new(6250000000000ULL); + EXPECT_FALSE(set); + int64Set_free(set); +} + +TEST(IntsetTest, SetAllocOverflow) +{ + // try to overflow argument passed to malloc + int64Set *set = int64Set_new(std::numeric_limits::max()); + EXPECT_FALSE(set); + int64Set_free(set); +} + +TEST(IntsetTest, NullFree) +{ + // free(NULL) should not try to free buckets + uint32Set_free(NULL); + int64Set_free(NULL); +} + +TEST(IntsetTest, BucketRealloc) +{ + // add a couple of values to the same bucket to test growing the bucket + uint32Set* set = uint32Set_new(1); + ASSERT_TRUE(set); + EXPECT_FALSE(uint32Set_contains(set, 0)); + EXPECT_TRUE(uint32Set_add(set, 0)); + EXPECT_TRUE(uint32Set_contains(set, 0)); + for (uint32_t i = 1; i < 32; ++i) { + EXPECT_TRUE(uint32Set_add(set, i)); + EXPECT_TRUE(uint32Set_contains(set, i - 1)); + EXPECT_TRUE(uint32Set_contains(set, i)); + EXPECT_FALSE(uint32Set_contains(set, i + 1)); + } + uint32Set_free(set); +} + +TEST(IntSet, Max) +{ + constexpr auto n = std::numeric_limits::max(); + uint32Set *set = uint32Set_new(1); + ASSERT_TRUE(set); + EXPECT_FALSE(uint32Set_contains(set, n)); + EXPECT_TRUE(uint32Set_add(set, n)); + EXPECT_TRUE(uint32Set_contains(set, n)); + uint32Set_free(set); +} + +TEST(InsetTest, Negative) +{ + constexpr auto n = std::numeric_limits::min(); + static_assert(n < 0, "n not negative"); + int64Set *set = int64Set_new(3); + ASSERT_TRUE(set); + EXPECT_FALSE(int64Set_contains(set, n)); + EXPECT_TRUE(int64Set_add(set, n)); + EXPECT_TRUE(int64Set_contains(set, n)); + int64Set_free(set); +} diff --git a/test/netutils/test_location_store.py b/test/netutils/test_location_store.py index a7f117255faa..f3e83989bea4 100644 --- a/test/netutils/test_location_store.py +++ b/test/netutils/test_location_store.py @@ -1,4 +1,5 @@ # Tests for _speedups.LocationStore and NetUtils._LocationStore +import os import typing import unittest import warnings @@ -7,6 +8,8 @@ State = typing.Dict[typing.Tuple[int, int], typing.Set[int]] RawLocations = typing.Dict[int, typing.Dict[int, typing.Tuple[int, int, int]]] +ci = bool(os.environ.get("CI")) # always set in GitHub actions + sample_data: RawLocations = { 1: { 11: (21, 2, 7), @@ -24,6 +27,9 @@ 3: { 9: (99, 4, 0), }, + 5: { + 9: (99, 5, 0), + } } empty_state: State = { @@ -45,14 +51,14 @@ class TestLocationStore(unittest.TestCase): store: typing.Union[LocationStore, _LocationStore] def test_len(self) -> None: - self.assertEqual(len(self.store), 4) + self.assertEqual(len(self.store), 5) self.assertEqual(len(self.store[1]), 3) def test_key_error(self) -> None: with self.assertRaises(KeyError): _ = self.store[0] with self.assertRaises(KeyError): - _ = self.store[5] + _ = self.store[6] locations = self.store[1] # no Exception with self.assertRaises(KeyError): _ = locations[7] @@ -71,7 +77,7 @@ def test_get(self) -> None: self.assertEqual(self.store[1].get(10, (None, None, None)), (None, None, None)) def test_iter(self) -> None: - self.assertEqual(sorted(self.store), [1, 2, 3, 4]) + self.assertEqual(sorted(self.store), [1, 2, 3, 4, 5]) self.assertEqual(len(self.store), len(sample_data)) self.assertEqual(list(self.store[1]), [11, 12, 13]) self.assertEqual(len(self.store[1]), len(sample_data[1])) @@ -85,13 +91,26 @@ def test_items(self) -> None: self.assertEqual(sorted(self.store[1].items())[0][1], self.store[1][11]) def test_find_item(self) -> None: + # empty player set self.assertEqual(sorted(self.store.find_item(set(), 99)), []) + # no such player, single + self.assertEqual(sorted(self.store.find_item({6}, 99)), []) + # no such player, set + self.assertEqual(sorted(self.store.find_item({7, 8, 9}, 99)), []) + # no such item self.assertEqual(sorted(self.store.find_item({3}, 1)), []) - self.assertEqual(sorted(self.store.find_item({5}, 99)), []) + # valid matches self.assertEqual(sorted(self.store.find_item({3}, 99)), [(4, 9, 99, 3, 0)]) self.assertEqual(sorted(self.store.find_item({3, 4}, 99)), [(3, 9, 99, 4, 0), (4, 9, 99, 3, 0)]) + self.assertEqual(sorted(self.store.find_item({2, 3, 4}, 99)), + [(3, 9, 99, 4, 0), (4, 9, 99, 3, 0)]) + # test hash collision in set + self.assertEqual(sorted(self.store.find_item({3, 5}, 99)), + [(4, 9, 99, 3, 0), (5, 9, 99, 5, 0)]) + self.assertEqual(sorted(self.store.find_item(set(range(2048)), 13)), + [(1, 13, 13, 1, 0)]) def test_get_for_player(self) -> None: self.assertEqual(self.store.get_for_player(3), {4: {9}}) @@ -196,18 +215,20 @@ def setUp(self) -> None: super().setUp() -@unittest.skipIf(LocationStore is _LocationStore, "_speedups not available") +@unittest.skipIf(LocationStore is _LocationStore and not ci, "_speedups not available") class TestSpeedupsLocationStore(Base.TestLocationStore): """Run base method tests for cython implementation.""" def setUp(self) -> None: + self.assertFalse(LocationStore is _LocationStore, "Failed to load _speedups") self.store = LocationStore(sample_data) super().setUp() -@unittest.skipIf(LocationStore is _LocationStore, "_speedups not available") +@unittest.skipIf(LocationStore is _LocationStore and not ci, "_speedups not available") class TestSpeedupsLocationStoreConstructor(Base.TestLocationStoreConstructor): """Run base constructor tests and tests the additional constraints for cython implementation.""" def setUp(self) -> None: + self.assertFalse(LocationStore is _LocationStore, "Failed to load _speedups") self.type = LocationStore super().setUp()