From 159256675604ee16aed9623a46b419b90b763735 Mon Sep 17 00:00:00 2001 From: chaoticgd <43898262+chaoticgd@users.noreply.github.com> Date: Wed, 18 Sep 2024 21:01:44 +0100 Subject: [PATCH 1/3] Copy symbol handles to avoid exposing address/name map iterators This will reduce the chance of iterator invalidation issues. --- src/ccc/mdebug_importer.cpp | 20 ++++++------- src/ccc/symbol_database.cpp | 46 +++++++++++++++++++++++------- src/ccc/symbol_database.h | 27 ++++-------------- test/ccc/symbol_database_tests.cpp | 18 ++++++------ 4 files changed, 60 insertions(+), 51 deletions(-) diff --git a/src/ccc/mdebug_importer.cpp b/src/ccc/mdebug_importer.cpp index a179a86..ab5c927 100644 --- a/src/ccc/mdebug_importer.cpp +++ b/src/ccc/mdebug_importer.cpp @@ -348,7 +348,7 @@ static Result resolve_type_name( CCC_ASSERT(source_file); auto handle = source_file->stabs_type_number_to_handle.find(unresolved_stabs->stabs_type_number); if(handle != source_file->stabs_type_number_to_handle.end()) { - type_name.data_type_handle = handle->second.value; + type_name.data_type_handle = handle->second; type_name.is_forward_declared = false; type_name.unresolved_stabs.reset(); return Result(); @@ -359,10 +359,10 @@ static Result resolve_type_name( // its name instead. This happens when a type is forward declared but not // defined in a given translation unit. if(!unresolved_stabs->type_name.empty()) { - for(auto& name_handle : database.data_types.handles_from_name(unresolved_stabs->type_name)) { - DataType* data_type = database.data_types.symbol_from_handle(name_handle.second); + for(DataTypeHandle& handle : database.data_types.handles_from_name(unresolved_stabs->type_name)) { + DataType* data_type = database.data_types.symbol_from_handle(handle); if(data_type && group.is_in_group(*data_type)) { - type_name.data_type_handle = name_handle.second.value; + type_name.data_type_handle = handle; type_name.is_forward_declared = true; type_name.unresolved_stabs.reset(); return Result(); @@ -412,7 +412,7 @@ static Result resolve_type_name( (*forward_declared_type)->set_type(std::move(forward_declared_node)); (*forward_declared_type)->not_defined_in_any_translation_unit = true; - type_name.data_type_handle = (*forward_declared_type)->handle().value; + type_name.data_type_handle = (*forward_declared_type)->handle(); type_name.is_forward_declared = true; type_name.unresolved_stabs.reset(); @@ -545,18 +545,18 @@ static void detect_duplicate_functions(SymbolDatabase& database, const SymbolGro // translation units. FunctionHandle best_handle; u32 best_offset = UINT32_MAX; - for(const auto& [address, handle] : functions_with_same_address) { + for(FunctionHandle handle : functions_with_same_address) { ccc::Function* function = database.functions.symbol_from_handle(handle); if(!function || !group.is_in_group(*function) || function->mangled_name() != test_function.mangled_name()) { continue; } - if(address - source_file_address < best_offset) { + if(test_function.address().value - source_file_address < best_offset) { if(best_handle.valid()) { duplicate_functions.emplace_back(best_handle); } best_handle = function->handle(); - best_offset = address - source_file_address; + best_offset = test_function.address().value - source_file_address; } else { duplicate_functions.emplace_back(function->handle()); } @@ -643,8 +643,8 @@ void fill_in_pointers_to_member_function_definitions(SymbolDatabase& database) type_name = qualified_name.substr(0, name_separator_pos - 1); } - for(const auto& name_handle : database.data_types.handles_from_name(type_name)) { - DataType* data_type = database.data_types.symbol_from_handle(name_handle.second); + for(DataTypeHandle handle : database.data_types.handles_from_name(type_name)) { + DataType* data_type = database.data_types.symbol_from_handle(handle); if(!data_type || !data_type->type() || data_type->type()->descriptor != ast::STRUCT_OR_UNION) { continue; } diff --git a/src/ccc/symbol_database.cpp b/src/ccc/symbol_database.cpp index bdce24a..ca50b0a 100644 --- a/src/ccc/symbol_database.cpp +++ b/src/ccc/symbol_database.cpp @@ -104,22 +104,40 @@ typename SymbolList::ConstIterator SymbolList::end() con } template -typename SymbolList::AddressToHandleMapIterators SymbolList::handles_from_starting_address(Address address) const +std::vector> SymbolList::handles_from_starting_address(Address address) const { - auto iterators = m_address_to_handle.equal_range(address.value); - return {iterators.first, iterators.second}; + std::vector> handles; + + auto [begin, end] = m_address_to_handle.equal_range(address.value); + for(auto iterator = begin; iterator != end; iterator++) { + handles.emplace_back(iterator->second); + } + + return handles; } template -typename SymbolList::AddressToHandleMapIterators SymbolList::handles_from_address_range(AddressRange range) const +std::vector> SymbolList::handles_from_address_range(AddressRange range) const { + std::vector> handles; + + typename AddressToHandleMap::const_iterator begin, end; if(range.low.valid()) { - return {m_address_to_handle.lower_bound(range.low.value), m_address_to_handle.lower_bound(range.high.value)}; + begin = m_address_to_handle.lower_bound(range.low.value); + end = m_address_to_handle.lower_bound(range.high.value); } else if(range.high.valid()) { - return {m_address_to_handle.begin(), m_address_to_handle.lower_bound(range.high.value)}; + begin = m_address_to_handle.begin(); + end = m_address_to_handle.lower_bound(range.high.value); } else { - return {m_address_to_handle.end(), m_address_to_handle.end()}; + begin = m_address_to_handle.end(); + end = m_address_to_handle.end(); + } + + for(auto iterator = begin; iterator != end; iterator++) { + handles.emplace_back(iterator->second); } + + return handles; } template @@ -134,10 +152,16 @@ SymbolHandle SymbolList::first_handle_from_starting_addr } template -typename SymbolList::NameToHandleMapIterators SymbolList::handles_from_name(const std::string& name) const +std::vector> SymbolList::handles_from_name(const std::string& name) const { - auto iterators = m_name_to_handle.equal_range(name); - return {iterators.first, iterators.second}; + std::vector> handles; + + auto [begin, end] = m_name_to_handle.equal_range(name); + for(auto iterator = begin; iterator != end; iterator++) { + handles.emplace_back(iterator->second); + } + + return handles; } template @@ -929,7 +953,7 @@ Result SymbolDatabase::create_data_type_if_unique( // Types with this name have previously been processed, we need to // figure out if this one matches any of the previous ones. bool match = false; - for(auto [key, existing_type_handle] : types_with_same_name) { + for(DataTypeHandle existing_type_handle : types_with_same_name) { DataType* existing_type = data_types.symbol_from_handle(existing_type_handle); CCC_ASSERT(existing_type); diff --git a/src/ccc/symbol_database.h b/src/ccc/symbol_database.h index 0458576..71c6baa 100644 --- a/src/ccc/symbol_database.h +++ b/src/ccc/symbol_database.h @@ -107,32 +107,14 @@ class SymbolList { Iterator end(); ConstIterator end() const; - using AddressToHandleMap = std::multimap>; - using NameToHandleMap = std::multimap>; - - template - class Iterators { - public: - Iterators(Iterator b, Iterator e) - : m_begin(b), m_end(e) {} - Iterator begin() const { return m_begin; } - Iterator end() const { return m_end; } - protected: - Iterator m_begin; - Iterator m_end; - }; - - using AddressToHandleMapIterators = Iterators; - using NameToHandleMapIterators = Iterators; - // Lookup symbols by their address. - AddressToHandleMapIterators handles_from_starting_address(Address address) const; - AddressToHandleMapIterators handles_from_address_range(AddressRange range) const; + std::vector> handles_from_starting_address(Address address) const; + std::vector> handles_from_address_range(AddressRange range) const; SymbolHandle first_handle_from_starting_address(Address address) const; SymbolHandle first_handle_after_address(Address address) const; // Lookup symbols by their name. - NameToHandleMapIterators handles_from_name(const std::string& name) const; + std::vector> handles_from_name(const std::string& name) const; SymbolHandle first_handle_from_name(const std::string& name) const; // Find a symbol with an address range that contains the provided address. @@ -217,6 +199,9 @@ class SymbolList { void link_name_map(SymbolType& symbol); void unlink_name_map(SymbolType& symbol); + using AddressToHandleMap = std::multimap>; + using NameToHandleMap = std::multimap>; + std::vector m_symbols; AddressToHandleMap m_address_to_handle; NameToHandleMap m_name_to_handle; diff --git a/test/ccc/symbol_database_tests.cpp b/test/ccc/symbol_database_tests.cpp index 89eca7c..2e18903 100644 --- a/test/ccc/symbol_database_tests.cpp +++ b/test/ccc/symbol_database_tests.cpp @@ -57,22 +57,22 @@ TEST(CCCSymbolDatabase, HandlesFromAddressRange) EXPECT_EQ(empty_after_open.begin(), empty_after_open.end()); auto last = database.functions.handles_from_address_range(AddressRange(19, 30)); - EXPECT_EQ(last.begin()->second, handles[19]); + EXPECT_EQ(*last.begin(), handles[19]); auto last_open = database.functions.handles_from_address_range(AddressRange(19, Address())); - EXPECT_EQ(last_open.begin()->second, handles[19]); + EXPECT_EQ(*last_open.begin(), handles[19]); auto first_half = database.functions.handles_from_address_range(AddressRange(5, 15)); - EXPECT_EQ(first_half.begin()->second, handles[10]); - EXPECT_EQ(first_half.end()->second, handles[15]); + EXPECT_EQ(*first_half.begin(), handles[10]); + EXPECT_EQ(*(--first_half.end()), handles[15]); auto second_half = database.functions.handles_from_address_range(AddressRange(15, 25)); - EXPECT_EQ(second_half.begin()->second, handles[15]); - EXPECT_EQ((--second_half.end())->second, handles[19]); + EXPECT_EQ(*second_half.begin(), handles[15]); + EXPECT_EQ(*(--second_half.end()), handles[19]); auto whole_range = database.functions.handles_from_address_range(AddressRange(10, 20)); - EXPECT_EQ(whole_range.begin()->second, handles[10]); - EXPECT_EQ((--whole_range.end())->second, handles[19]); + EXPECT_EQ(*whole_range.begin(), handles[10]); + EXPECT_EQ(*(--whole_range.end()), handles[19]); } TEST(CCCSymbolDatabase, HandleFromStartingAddress) @@ -424,7 +424,7 @@ TEST(CCCSymbolDatabase, DeduplicateWobblyTypedefs) EXPECT_EQ(++handles.begin(), handles.end()); // Validate that we can lookup the struct and that it has a single field which is a type name. - DataType* chosen_type = database.data_types.symbol_from_handle(handles.begin()->second); + DataType* chosen_type = database.data_types.symbol_from_handle(*handles.begin()); ASSERT_TRUE(chosen_type && chosen_type->type() && chosen_type->type()->descriptor == ast::STRUCT_OR_UNION); ast::StructOrUnion& chosen_struct = chosen_type->type()->as(); ASSERT_EQ(chosen_struct.fields.size(), 1); From 968a907dfb0aaf32eca739ea613f1a43d78c5002 Mon Sep 17 00:00:00 2001 From: chaoticgd <43898262+chaoticgd@users.noreply.github.com> Date: Wed, 18 Sep 2024 21:11:57 +0100 Subject: [PATCH 2/3] Fix HandlesFromAddressRange test --- src/ccc/util.h | 2 +- test/ccc/symbol_database_tests.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ccc/util.h b/src/ccc/util.h index fc2cc84..aeb2bf1 100644 --- a/src/ccc/util.h +++ b/src/ccc/util.h @@ -270,7 +270,7 @@ struct AddressRange { Address high; AddressRange() {} - AddressRange(Address address) : low(address), high(address) {} + AddressRange(Address address) : low(address), high(address.value + 1) {} AddressRange(Address l, Address h) : low(l), high(h) {} friend auto operator<=>(const AddressRange& lhs, const AddressRange& rhs) = default; diff --git a/test/ccc/symbol_database_tests.cpp b/test/ccc/symbol_database_tests.cpp index 2e18903..87cdaa3 100644 --- a/test/ccc/symbol_database_tests.cpp +++ b/test/ccc/symbol_database_tests.cpp @@ -64,7 +64,7 @@ TEST(CCCSymbolDatabase, HandlesFromAddressRange) auto first_half = database.functions.handles_from_address_range(AddressRange(5, 15)); EXPECT_EQ(*first_half.begin(), handles[10]); - EXPECT_EQ(*(--first_half.end()), handles[15]); + EXPECT_EQ(*(--first_half.end()), handles[14]); auto second_half = database.functions.handles_from_address_range(AddressRange(15, 25)); EXPECT_EQ(*second_half.begin(), handles[15]); From abf86fa6c043e0f322b86d5408a6d07a54f6d18d Mon Sep 17 00:00:00 2001 From: chaoticgd <43898262+chaoticgd@users.noreply.github.com> Date: Wed, 18 Sep 2024 21:13:20 +0100 Subject: [PATCH 3/3] Remove misleading AddressRange constructor --- src/ccc/util.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ccc/util.h b/src/ccc/util.h index aeb2bf1..72826f4 100644 --- a/src/ccc/util.h +++ b/src/ccc/util.h @@ -270,7 +270,6 @@ struct AddressRange { Address high; AddressRange() {} - AddressRange(Address address) : low(address), high(address.value + 1) {} AddressRange(Address l, Address h) : low(l), high(h) {} friend auto operator<=>(const AddressRange& lhs, const AddressRange& rhs) = default;