From 31e4588b5375877df23e3765464a449ac0e4b1ad Mon Sep 17 00:00:00 2001 From: chaoticgd <43898262+chaoticgd@users.noreply.github.com> Date: Wed, 30 Oct 2024 19:48:20 +0000 Subject: [PATCH] Add initial DWARF implementation --- CMakeLists.txt | 2 + src/ccc/dwarf_section.cpp | 503 +++++++++++++++++++++++++++++++++++++ src/ccc/dwarf_section.h | 214 ++++++++++++++++ src/ccc/elf.cpp | 18 +- src/ccc/elf.h | 2 + src/ccc/elf_symtab.cpp | 4 +- src/ccc/mdebug_section.cpp | 10 +- src/ccc/sndll.cpp | 9 +- src/ccc/symbol_file.cpp | 4 +- src/ccc/symbol_table.cpp | 59 ++++- src/ccc/symbol_table.h | 27 +- src/ccc/util.h | 30 ++- src/stdump.cpp | 4 +- 13 files changed, 857 insertions(+), 29 deletions(-) create mode 100644 src/ccc/dwarf_section.cpp create mode 100644 src/ccc/dwarf_section.h diff --git a/CMakeLists.txt b/CMakeLists.txt index a1865fa..f3928b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,6 +40,8 @@ add_library(ccc STATIC src/ccc/data_refinement.h src/ccc/dependency.cpp src/ccc/dependency.h + src/ccc/dwarf_section.cpp + src/ccc/dwarf_section.h src/ccc/elf.cpp src/ccc/elf.h src/ccc/elf_symtab.cpp diff --git a/src/ccc/dwarf_section.cpp b/src/ccc/dwarf_section.cpp new file mode 100644 index 0000000..85f3d84 --- /dev/null +++ b/src/ccc/dwarf_section.cpp @@ -0,0 +1,503 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#include "dwarf_section.h" + +#include "importer_flags.h" + +namespace ccc::dwarf { + +Value::Value() = default; + +Value::Value(const Value& rhs) +{ + memcpy(this, &rhs, sizeof(Value)); +} + +Value::~Value() = default; + +Value& Value::operator=(const Value& rhs) +{ + memcpy(this, &rhs, sizeof(Value)); + return *this; +} + +bool Value::valid() const +{ + return form_to_string(m_form) != nullptr; +} + +Value Value::from_address(u32 address) +{ + Value result; + result.m_form = FORM_ADDR; + result.m_value.address = address; + return result; +} + +Value Value::from_reference(u32 reference) +{ + Value result; + result.m_form = FORM_REF; + result.m_value.reference = reference; + return result; +} + +Value Value::from_constant(u64 constant) +{ + Value result; + result.m_form = FORM_DATA8; + result.m_value.constant = constant; + return result; +} + +Value Value::from_block(std::span block) +{ + Value result; + result.m_form = FORM_BLOCK4; + result.m_value.block.begin = block.data(); + result.m_value.block.end = block.data() + block.size(); + return result; +} + +Value Value::from_string(const char* string) +{ + Value result; + result.m_form = FORM_STRING; + result.m_value.string = string; + return result; +} + +u32 Value::address() const +{ + CCC_ASSERT(m_form == FORM_ADDR); + return m_value.address; +} + +u32 Value::reference() const +{ + CCC_ASSERT(m_form == FORM_REF); + return m_value.reference; +} + +u64 Value::constant() const +{ + CCC_ASSERT(m_form == FORM_DATA8); + return m_value.constant; +} + +std::span Value::block() const +{ + CCC_ASSERT(m_form == FORM_BLOCK4); + return std::span(m_value.block.begin, m_value.block.end); +} + +const char* Value::string() const +{ + CCC_ASSERT(m_form == FORM_STRING); + return m_value.string; +} + +// ***************************************************************************** + +Result> DIE::parse(std::span debug, u32 offset, u32 importer_flags) +{ + DIE die; + + die.m_debug = debug; + die.m_offset = offset; + + std::optional length = copy_unaligned(debug, offset); + CCC_CHECK(length.has_value(), "Cannot read length for die at 0x%x.", offset); + die.m_length = *length; + offset += sizeof(u32); + + if (die.m_length < 8) { + return std::optional(std::nullopt); + } + + std::optional tag = copy_unaligned(debug, offset); + CCC_CHECK(tag.has_value(), "Cannot read tag for die at 0x%x.", offset); + die.m_tag = static_cast(*tag); + offset += sizeof(u16); + + die.m_importer_flags = importer_flags; + + return std::optional(die); +} + +RequiredAttributes DIE::require_attributes(std::span input) +{ + RequiredAttributes output; + + for (u32 i = 0; i < static_cast(input.size()); i++) { + RequiredAttribute& attribute = output.emplace(input[i].attribute, input[i]).first->second; + attribute.index = i; + } + + return output; +} + +Result> DIE::first_child() const +{ + u32 sibling_offset = 0; + u32 offset = m_offset + 6; + while (offset < m_offset + m_length) { + Result attribute = parse_attribute(offset); + CCC_RETURN_IF_ERROR(attribute); + + if (attribute->attribute == AT_sibling && attribute->form == FORM_REF) { + sibling_offset = attribute->value.reference(); + } + } + + if (m_offset + m_length == sibling_offset) { + return std::optional(std::nullopt); + } + + return DIE::parse(m_debug, m_offset + m_length, m_importer_flags); +} + +Result> DIE::sibling() const +{ + u32 offset = m_offset + 6; + while (offset < m_offset + m_length) { + Result attribute = parse_attribute(offset); + CCC_RETURN_IF_ERROR(attribute); + + if (attribute->attribute == AT_sibling && attribute->form == FORM_REF) { + return DIE::parse(m_debug, attribute->value.reference(), m_importer_flags); + } + } + + return std::optional(std::nullopt); +} + +Tag DIE::tag() const +{ + return m_tag; +} + +Result DIE::attributes(std::span output, const RequiredAttributes& required) const +{ + u32 offset = m_offset + 6; + while (offset < m_offset + m_length) { + Result attribute = parse_attribute(offset); + CCC_RETURN_IF_ERROR(attribute); + + auto iterator = required.find(attribute->attribute); + if (iterator == required.end()) { + continue; + } + + CCC_CHECK(iterator->second.valid_forms & 1 << (attribute->form), + "Attribute %s has an unexpected form %s.", + form_to_string(attribute->form), + attribute_to_string(attribute->attribute)); + + *output[iterator->second.index] = std::move(attribute->value); + } + + return Result(); +} + +Result> DIE::all_attributes() const +{ + std::vector result; + + u32 offset = m_offset + 6; + while (offset < m_offset + m_length) { + Result attribute = parse_attribute(offset); + CCC_RETURN_IF_ERROR(attribute); + + result.emplace_back(std::move(*attribute)); + } + + return result; +} + +#define ATTRIBUTE_PARSER_CHECK(condition, message) \ + CCC_CHECK(condition, message " at 0x%x inside DIE at 0x%x.", offset, m_offset); +#define ATTRIBUTE_PARSER_CHECK_ARG(condition, message, arg) \ + CCC_CHECK(condition, message " at 0x%x inside DIE at 0x%x.", arg, offset, m_offset); + +Result DIE::parse_attribute(u32& offset) const +{ + AttributeTuple result; + + const std::optional name = copy_unaligned(m_debug, offset); + ATTRIBUTE_PARSER_CHECK(name.has_value(), "Cannot read attribute name"); + offset += sizeof(u16); + + u8 form = *name & 0xf; + ATTRIBUTE_PARSER_CHECK_ARG(form_to_string(form) != nullptr, "Unknown attribute form 0x%hhx", form); + + u16 attribute = *name >> 4; + bool known_attribute = attribute_to_string(attribute); + if (!known_attribute) { + const char* uknown_attribute_error_message = + "Unknown user attribute name 0x%03hx at 0x%x inside DIE at 0x%x."; + if ((m_importer_flags & STRICT_PARSING) == 0 && attribute >= AT_lo_user && attribute <= AT_hi_user) { + CCC_WARN(uknown_attribute_error_message, *name, offset, m_offset); + } else { + return CCC_FAILURE(uknown_attribute_error_message, *name, offset, m_offset); + } + } + + result.form = static_cast
(form); + result.attribute = static_cast(attribute); + + switch (form) { + case FORM_ADDR: { + std::optional address = copy_unaligned(m_debug, offset); + ATTRIBUTE_PARSER_CHECK(address.has_value(), "Cannot read address attribute"); + result.value = Value::from_address(*address); + offset += sizeof(u32); + break; + } + case FORM_REF: { + std::optional reference = copy_unaligned(m_debug, offset); + ATTRIBUTE_PARSER_CHECK(reference.has_value(), "Cannot read reference attribute"); + result.value = Value::from_reference(*reference); + offset += sizeof(u32); + break; + } + case FORM_BLOCK2: { + std::optional size = copy_unaligned(m_debug, offset); + ATTRIBUTE_PARSER_CHECK(size.has_value(), "Cannot read block attribute size"); + offset += sizeof(u16); + + ATTRIBUTE_PARSER_CHECK((u64) offset + *size <= m_debug.size(), "Cannot read block attribute data"); + result.value = Value::from_block(m_debug.subspan(offset, *size)); + offset += *size; + + break; + } + case FORM_BLOCK4: { + std::optional size = copy_unaligned(m_debug, offset); + ATTRIBUTE_PARSER_CHECK(size.has_value(), "Cannot read block attribute size"); + offset += sizeof(u32); + + ATTRIBUTE_PARSER_CHECK((u64) offset + *size <= m_debug.size(), "Cannot read block attribute data"); + result.value = Value::from_block(m_debug.subspan(offset, *size)); + offset += *size; + + break; + } + case FORM_DATA2: { + std::optional constant = copy_unaligned(m_debug, offset); + ATTRIBUTE_PARSER_CHECK(constant.has_value(), "Cannot read constant attribute"); + result.value = Value::from_constant(*constant); + offset += sizeof(u16); + break; + } + case FORM_DATA4: { + std::optional constant = copy_unaligned(m_debug, offset); + ATTRIBUTE_PARSER_CHECK(constant.has_value(), "Cannot read constant attribute"); + result.value = Value::from_constant(*constant); + offset += sizeof(u32); + break; + } + case FORM_DATA8: { + std::optional constant = copy_unaligned(m_debug, offset); + ATTRIBUTE_PARSER_CHECK(constant.has_value(), "Cannot read constant attribute"); + result.value = Value::from_constant(*constant); + offset += sizeof(u64); + break; + } + case FORM_STRING: { + const char* string = get_string(m_debug, offset); + ATTRIBUTE_PARSER_CHECK(string, "Cannot read string attribute"); + result.value = Value::from_string(string); + offset += strlen(string) + 1; + break; + } + } + + return result; +} + + +// ***************************************************************************** + +SectionReader::SectionReader(std::span debug, std::span line) + : m_debug(debug), m_line(line) {} + +Result SectionReader::first_die(u32 importer_flags) const +{ + Result> die = DIE::parse(m_debug, 0, importer_flags); + CCC_RETURN_IF_ERROR(die); + CCC_CHECK(die->has_value(), "DIE at offset 0x0 is null."); + return **die; +} + +static void indent(FILE* out, s32 depth) +{ + for (s32 i = 0; i < depth; i++) { + fputc('\t', out); + } +} + +Result SectionReader::print_dies(FILE* out, DIE die, s32 depth) const +{ + std::optional current_die = std::move(die); + + while (current_die.has_value()) { + indent(out, depth); + fprintf(out, "%s ", tag_to_string(current_die->tag())); + + Result result = print_attributes(out, *current_die); + CCC_RETURN_IF_ERROR(result); + + Result> child = current_die->first_child(); + CCC_RETURN_IF_ERROR(child); + + if (*child != std::nullopt) { + Result child_result = print_dies(out, **child, depth + 1); + CCC_RETURN_IF_ERROR(child_result); + } + + Result> next = current_die->sibling(); + CCC_RETURN_IF_ERROR(next); + current_die = *next; + } + + return Result(); +} + +Result SectionReader::print_attributes(FILE* out, const DIE& die) const +{ + Result> attributes = die.all_attributes(); + CCC_RETURN_IF_ERROR(attributes); + + for (const auto& [attribute, form, value] : *attributes) { + fprintf(out, "%s=", attribute_to_string(attribute)); + switch (form) { + case FORM_ADDR: fprintf(out, "0x%x", value.address()); break; + case FORM_REF: fprintf(out, "DIE@0x%x", value.reference()); break; + case FORM_BLOCK2: fprintf(out, "(block2)"); break; + case FORM_BLOCK4: fprintf(out, "(block4)"); break; + case FORM_DATA2: fprintf(out, "0x%hx", (short) value.constant()); break; + case FORM_DATA4: fprintf(out, "0x%x", (int) value.constant()); break; + case FORM_DATA8: fprintf(out, "0x%llx", (long long) value.constant()); break; + case FORM_STRING: fprintf(out, "\"%s\"", value.string()); break; + } + fprintf(out, " "); + } + fprintf(out, "\n"); + + return Result(); +} + +const char* tag_to_string(u32 tag) +{ + switch (tag) { + case TAG_padding: return "padding"; + case TAG_array_type: return "array_type"; + case TAG_class_type: return "class_type"; + case TAG_entry_point: return "entry_point"; + case TAG_enumeration_type: return "enumeration_type"; + case TAG_formal_parameter: return "formal_parameter"; + case TAG_global_subroutine: return "global_subroutine"; + case TAG_global_variable: return "global_variable"; + case TAG_label: return "label"; + case TAG_lexical_block: return "lexical_block"; + case TAG_local_variable: return "local_variable"; + case TAG_member: return "member"; + case TAG_pointer_type: return "pointer_type"; + case TAG_reference_type: return "reference_type"; + case TAG_compile_unit: return "compile_unit"; + case TAG_string_type: return "string_type"; + case TAG_structure_type: return "structure_type"; + case TAG_subroutine: return "subroutine"; + case TAG_subroutine_type: return "subroutine_type"; + case TAG_typedef: return "typedef"; + case TAG_union_type: return "union_type"; + case TAG_unspecified_parameters: return "unspecified_parameters"; + case TAG_variant: return "variant"; + case TAG_common_block: return "common_block"; + case TAG_common_inclusion: return "common_inclusion"; + case TAG_inheritance: return "inheritance"; + case TAG_inlined_subroutine: return "inlined_subroutine"; + case TAG_module: return "module"; + case TAG_ptr_to_member_type: return "ptr_to_member_type"; + case TAG_set_type: return "set_type"; + case TAG_subrange_type: return "subrange_type"; + case TAG_with_stmt: return "with_stmt"; + case TAG_format_label: return "format_label"; + case TAG_namelist: return "namelist"; + case TAG_function_template: return "function_template"; + case TAG_class_template: return "class_template"; + } + + return "unknown"; +} + +const char* form_to_string(u32 form) +{ + switch (form) { + case FORM_ADDR: return "addr"; + case FORM_REF: return "ref"; + case FORM_BLOCK2: return "block2"; + case FORM_BLOCK4: return "block4"; + case FORM_DATA2: return "data2"; + case FORM_DATA4: return "data4"; + case FORM_DATA8: return "data8"; + case FORM_STRING: return "string"; + } + + return nullptr; +} + +const char* attribute_to_string(u32 attribute) +{ + switch (attribute) { + case AT_sibling: return "sibling"; + case AT_location: return "location"; + case AT_name: return "name"; + case AT_fund_type: return "fund_type"; + case AT_mod_fund_type: return "mod_fund_type"; + case AT_user_def_type: return "user_def_type"; + case AT_mod_u_d_type: return "mod_u_d_type"; + case AT_ordering: return "ordering"; + case AT_subscr_data: return "subscr_data"; + case AT_byte_size: return "byte_size"; + case AT_bit_offset: return "bit_offset"; + case AT_bit_size: return "bit_size"; + case AT_element_list: return "element_list"; + case AT_stmt_list: return "stmt_list"; + case AT_low_pc: return "low_pc"; + case AT_high_pc: return "high_pc"; + case AT_language: return "language"; + case AT_member: return "member"; + case AT_discr: return "discr"; + case AT_discr_value: return "discr_value"; + case AT_string_length: return "string_length"; + case AT_common_reference: return "common_reference"; + case AT_comp_dir: return "comp_dir"; + case AT_const_value: return "const_value"; + case AT_containing_type: return "containing_type"; + case AT_default_value: return "default_value"; + case AT_friends: return "friends"; + case AT_inline: return "inline"; + case AT_is_optional: return "is_optional"; + case AT_lower_bound: return "lower_bound"; + case AT_program: return "program"; + case AT_private: return "private"; + case AT_producer: return "producer"; + case AT_protected: return "protected"; + case AT_prototyped: return "prototyped"; + case AT_public: return "public"; + case AT_pure_virtual: return "pure_virtual"; + case AT_return_addr: return "return_addr"; + case AT_specification: return "specification"; + case AT_start_scope: return "start_scope"; + case AT_stride_size: return "stride_size"; + case AT_upper_bound: return "upper_bound"; + case AT_virtual: return "virtual"; + } + + return nullptr; +} + +} diff --git a/src/ccc/dwarf_section.h b/src/ccc/dwarf_section.h new file mode 100644 index 0000000..4431ddd --- /dev/null +++ b/src/ccc/dwarf_section.h @@ -0,0 +1,214 @@ +// This file is part of the Chaos Compiler Collection. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "util.h" + +#include + +namespace ccc::dwarf { + +enum Tag : u16 { + TAG_padding = 0x0000, + TAG_array_type = 0x0001, + TAG_class_type = 0x0002, + TAG_entry_point = 0x0003, + TAG_enumeration_type = 0x0004, + TAG_formal_parameter = 0x0005, + TAG_global_subroutine = 0x0006, + TAG_global_variable = 0x0007, + TAG_label = 0x000a, + TAG_lexical_block = 0x000b, + TAG_local_variable = 0x000c, + TAG_member = 0x000d, + TAG_pointer_type = 0x000f, + TAG_reference_type = 0x0010, + TAG_compile_unit = 0x0011, + TAG_string_type = 0x0012, + TAG_structure_type = 0x0013, + TAG_subroutine = 0x0014, + TAG_subroutine_type = 0x0015, + TAG_typedef = 0x0016, + TAG_union_type = 0x0017, + TAG_unspecified_parameters = 0x0018, + TAG_variant = 0x0019, + TAG_common_block = 0x001a, + TAG_common_inclusion = 0x001b, + TAG_inheritance = 0x001c, + TAG_inlined_subroutine = 0x001d, + TAG_module = 0x001e, + TAG_ptr_to_member_type = 0x001f, + TAG_set_type = 0x0020, + TAG_subrange_type = 0x0021, + TAG_with_stmt = 0x0022, + + /* GNU extensions */ + + TAG_format_label = 0x8000, /* for FORTRAN 77 and Fortran 90 */ + TAG_namelist = 0x8001, /* For Fortran 90 */ + TAG_function_template = 0x8002, /* for C++ */ + TAG_class_template = 0x8003 /* for C++ */ +}; + +enum Form { + FORM_ADDR = 0x1, + FORM_REF = 0x2, + FORM_BLOCK2 = 0x3, + FORM_BLOCK4 = 0x4, + FORM_DATA2 = 0x5, + FORM_DATA4 = 0x6, + FORM_DATA8 = 0x7, + FORM_STRING = 0x8, +}; + +enum Attribute { + AT_sibling = 0x001, + AT_location = 0x002, + AT_name = 0x003, + AT_fund_type = 0x005, + AT_mod_fund_type = 0x006, + AT_user_def_type = 0x007, + AT_mod_u_d_type = 0x008, + AT_ordering = 0x009, + AT_subscr_data = 0x00a, + AT_byte_size = 0x00b, + AT_bit_offset = 0x00c, + AT_bit_size = 0x00d, + AT_element_list = 0x00f, + AT_stmt_list = 0x010, + AT_low_pc = 0x011, + AT_high_pc = 0x012, + AT_language = 0x013, + AT_member = 0x014, + AT_discr = 0x015, + AT_discr_value = 0x016, + AT_string_length = 0x019, + AT_common_reference = 0x01a, + AT_comp_dir = 0x01b, + AT_const_value = 0x01c, + AT_containing_type = 0x01d, + AT_default_value = 0x01e, + AT_friends = 0x01f, + AT_inline = 0x020, + AT_is_optional = 0x021, + AT_lower_bound = 0x022, + AT_program = 0x023, + AT_private = 0x024, + AT_producer = 0x025, + AT_protected = 0x026, + AT_prototyped = 0x027, + AT_public = 0x028, + AT_pure_virtual = 0x029, + AT_return_addr = 0x02a, + AT_specification = 0x02b, + AT_start_scope = 0x02c, + AT_stride_size = 0x02e, + AT_upper_bound = 0x02f, + AT_virtual = 0x030, + AT_lo_user = 0x200, + AT_hi_user = 0x3ff +}; + +// The value of an attribute. +class Value { +public: + Value(); + Value(const Value& rhs); + ~Value(); + Value& operator=(const Value& rhs); + + bool valid() const; + + static Value from_address(u32 address); + static Value from_reference(u32 reference); + static Value from_constant(u64 constant); + static Value from_block(std::span block); + static Value from_string(const char* string); + + u32 address() const; + u32 reference() const; + u64 constant() const; + std::span block() const; + const char* string() const; + +protected: + u8 m_form = 0; + union { + u32 address; + u32 reference; + u64 constant; + struct { + const u8* begin; + const u8* end; + } block; + const char* string; + } m_value; +}; + +struct AttributeTuple { + Attribute attribute; + Form form; + Value value; +}; + +struct RequiredAttribute { + Attribute attribute; + u32 valid_forms; + u32 index; +}; + +using RequiredAttributes = std::map; + +// Represents a Debugging Information Entry. Intended to be used to +// incrementally parse a .debug section. +class DIE { +public: + // Parse a single DIE. Will return std::nullopt for padding entries smaller + // than 8 bytes. + static Result> parse(std::span debug, u32 offset, u32 importer_flags); + + // Generate a map of attributes to read, to be used for parsing attributes. + static RequiredAttributes require_attributes(std::span input); + + Result> first_child() const; + Result> sibling() const; + + Tag tag() const; + + // Parse the attributes, and output the ones specified by the required parameter. + Result attributes(std::span output, const RequiredAttributes& required) const; + + // Parse the attributes, and output them all in order. + Result> all_attributes() const; + +protected: + // Parse a single attribute and advance the offset. + Result parse_attribute(u32& offset) const; + + std::span m_debug; + u32 m_offset; + u32 m_length; + Tag m_tag; + u32 m_importer_flags; +}; + +class SectionReader { +public: + SectionReader(std::span debug, std::span line); + + Result first_die(u32 importer_flags) const; + + Result print_dies(FILE* out, DIE die, s32 depth) const; + Result print_attributes(FILE* out, const DIE& die) const; + +protected: + std::span m_debug; + std::span m_line; +}; + +const char* tag_to_string(u32 tag); +const char* form_to_string(u32 form); +const char* attribute_to_string(u32 attribute); + +} diff --git a/src/ccc/elf.cpp b/src/ccc/elf.cpp index 8e1c976..6aeb022 100644 --- a/src/ccc/elf.cpp +++ b/src/ccc/elf.cpp @@ -26,21 +26,21 @@ Result ElfFile::parse(std::vector image) ElfFile elf; elf.image = std::move(image); - const ElfIdentHeader* ident = get_packed(elf.image, 0); + const ElfIdentHeader* ident = get_unaligned(elf.image, 0); CCC_CHECK(ident, "ELF ident header out of range."); CCC_CHECK(ident->magic == CCC_FOURCC("\x7f\x45\x4c\x46"), "Not an ELF file."); CCC_CHECK(ident->e_class == ElfIdentClass::B32, "Wrong ELF class (not 32 bit)."); - const ElfFileHeader* header = get_packed(elf.image, sizeof(ElfIdentHeader)); + const ElfFileHeader* header = get_unaligned(elf.image, sizeof(ElfIdentHeader)); CCC_CHECK(header, "ELF file header out of range."); elf.file_header = *header; - const ElfSectionHeader* shstr_section_header = get_packed(elf.image, header->shoff + header->shstrndx * sizeof(ElfSectionHeader)); + const ElfSectionHeader* shstr_section_header = get_unaligned(elf.image, header->shoff + header->shstrndx * sizeof(ElfSectionHeader)); CCC_CHECK(shstr_section_header, "ELF section name header out of range."); for (u32 i = 0; i < header->shnum; i++) { u64 header_offset = header->shoff + i * sizeof(ElfSectionHeader); - const ElfSectionHeader* section_header = get_packed(elf.image, header_offset); + const ElfSectionHeader* section_header = get_unaligned(elf.image, header_offset); CCC_CHECK(section_header, "ELF section header out of range."); const char* name = get_string(elf.image, shstr_section_header->offset + section_header->name); @@ -53,7 +53,7 @@ Result ElfFile::parse(std::vector image) for (u32 i = 0; i < header->phnum; i++) { u64 header_offset = header->phoff + i * sizeof(ElfProgramHeader); - const ElfProgramHeader* program_header = get_packed(elf.image, header_offset); + const ElfProgramHeader* program_header = get_unaligned(elf.image, header_offset); CCC_CHECK(program_header, "ELF program header out of range."); elf.segments.emplace_back(*program_header); @@ -234,6 +234,14 @@ const ElfSection* ElfFile::lookup_section(const char* name) const return nullptr; } +Result> ElfFile::section_contents(const ElfSection& section) const +{ + CCC_CHECK((u64) section.header.offset + section.header.size <= image.size(), + "Section '%s' out of range.", section.name.c_str()); + + return std::span(image).subspan(section.header.offset, section.header.size); +} + std::optional ElfFile::file_offset_to_virtual_address(u32 file_offset) const { for (const ElfProgramHeader& segment : segments) { diff --git a/src/ccc/elf.h b/src/ccc/elf.h index 4afd521..5935104 100644 --- a/src/ccc/elf.h +++ b/src/ccc/elf.h @@ -136,6 +136,8 @@ struct ElfFile { static std::optional parse_link_once_section_name(const std::string& section_name); const ElfSection* lookup_section(const char* name) const; + Result> section_contents(const ElfSection& section) const; + std::optional file_offset_to_virtual_address(u32 file_offset) const; // Find the program header for the segment that contains the entry point. diff --git a/src/ccc/elf_symtab.cpp b/src/ccc/elf_symtab.cpp index adda99c..674f228 100644 --- a/src/ccc/elf_symtab.cpp +++ b/src/ccc/elf_symtab.cpp @@ -60,7 +60,7 @@ Result import_symbols( const DemanglerFunctions& demangler) { for (u32 i = 0; i < symtab.size() / sizeof(Symbol); i++) { - const Symbol* symbol = get_packed(symtab, i * sizeof(Symbol)); + const Symbol* symbol = get_unaligned(symtab, i * sizeof(Symbol)); CCC_ASSERT(symbol); Address address; @@ -153,7 +153,7 @@ Result print_symbol_table(FILE* out, std::span symtab, std::span fprintf(out, " Num: Value Size Type Bind Vis Ndx Name\n"); for (u32 i = 0; i < symtab.size() / sizeof(Symbol); i++) { - const Symbol* symbol = get_packed(symtab, i * sizeof(Symbol)); + const Symbol* symbol = get_unaligned(symtab, i * sizeof(Symbol)); CCC_ASSERT(symbol); const char* type = symbol_type_to_string(symbol->type()); diff --git a/src/ccc/mdebug_section.cpp b/src/ccc/mdebug_section.cpp index 42f7ff1..23ad7ad 100644 --- a/src/ccc/mdebug_section.cpp +++ b/src/ccc/mdebug_section.cpp @@ -103,7 +103,7 @@ Result SymbolTableReader::init(std::span elf, s32 section_offset m_elf = elf; m_section_offset = section_offset; - m_hdrr = get_packed(m_elf, m_section_offset); + m_hdrr = get_unaligned(m_elf, m_section_offset); CCC_CHECK(m_hdrr != nullptr, "MIPS debug section header out of bounds."); CCC_CHECK(m_hdrr->magic == 0x7009, "Invalid symbolic header."); @@ -129,7 +129,7 @@ Result SymbolTableReader::parse_file(s32 index) const File file; u64 fd_offset = m_hdrr->file_descriptors_offset + index * sizeof(FileDescriptor); - const FileDescriptor* fd_header = get_packed(m_elf, fd_offset + m_fudge_offset); + const FileDescriptor* fd_header = get_unaligned(m_elf, fd_offset + m_fudge_offset); CCC_CHECK(fd_header != nullptr, "MIPS debug file descriptor out of bounds."); CCC_CHECK(fd_header->f_big_endian() == 0, "Not little endian or bad file descriptor table."); @@ -146,7 +146,7 @@ Result SymbolTableReader::parse_file(s32 index) const for (s64 j = 0; j < fd_header->symbol_count; j++) { u64 rel_symbol_offset = (fd_header->isym_base + j) * sizeof(SymbolHeader); u64 symbol_offset = m_hdrr->local_symbols_offset + rel_symbol_offset + m_fudge_offset; - const SymbolHeader* symbol_header = get_packed(m_elf, symbol_offset); + const SymbolHeader* symbol_header = get_unaligned(m_elf, symbol_offset); CCC_CHECK(symbol_header != nullptr, "Symbol header out of bounds."); s32 strings_offset = m_hdrr->local_strings_offset + fd_header->strings_offset + m_fudge_offset; @@ -168,7 +168,7 @@ Result SymbolTableReader::parse_file(s32 index) const for (s64 i = 0; i < fd_header->procedure_descriptor_count; i++) { u64 rel_procedure_offset = (fd_header->ipd_first + i) * sizeof(ProcedureDescriptor); u64 procedure_offset = m_hdrr->procedure_descriptors_offset + rel_procedure_offset + m_fudge_offset; - const ProcedureDescriptor* procedure_descriptor = get_packed(m_elf, procedure_offset); + const ProcedureDescriptor* procedure_descriptor = get_unaligned(m_elf, procedure_offset); CCC_CHECK(procedure_descriptor != nullptr, "Procedure descriptor out of bounds."); CCC_CHECK(procedure_descriptor->symbol_index < file.symbols.size(), "Symbol index out of bounds."); @@ -188,7 +188,7 @@ Result> SymbolTableReader::parse_external_symbols() const std::vector external_symbols; for (s64 i = 0; i < m_hdrr->external_symbols_count; i++) { u64 sym_offset = m_hdrr->external_symbols_offset + i * sizeof(ExternalSymbolHeader); - const ExternalSymbolHeader* external_header = get_packed(m_elf, sym_offset + m_fudge_offset); + const ExternalSymbolHeader* external_header = get_unaligned(m_elf, sym_offset + m_fudge_offset); CCC_CHECK(external_header != nullptr, "External header out of bounds."); Result sym = get_symbol(external_header->symbol, m_elf, m_hdrr->external_strings_offset + m_fudge_offset); diff --git a/src/ccc/sndll.cpp b/src/ccc/sndll.cpp index ad995a4..e486a3f 100644 --- a/src/ccc/sndll.cpp +++ b/src/ccc/sndll.cpp @@ -54,18 +54,19 @@ static const char* sndll_symbol_type_to_string(SNDLLSymbolType type); Result parse_sndll_file(std::span image, Address address, SNDLLType type) { - const u32* magic = get_packed(image, 0); + const std::optional magic = copy_unaligned(image, 0); + CCC_CHECK(magic.has_value(), "File too small."); CCC_CHECK((*magic & 0xffffff) == CCC_FOURCC("SNR\00"), "Not a SNDLL %s.", address.valid() ? "section" : "file"); char version = *magic >> 24; switch (version) { case '1': { - const SNDLLHeaderV1* header = get_packed(image, 0); + const SNDLLHeaderV1* header = get_unaligned(image, 0); CCC_CHECK(header, "File too small to contain SNDLL V1 header."); return parse_sndll_common(image, address, type, header->common, SNDLL_V1); } case '2': { - const SNDLLHeaderV2* header = get_packed(image, 0); + const SNDLLHeaderV2* header = get_unaligned(image, 0); CCC_CHECK(header, "File too small to contain SNDLL V2 header."); return parse_sndll_common(image, address, type, header->common, SNDLL_V2); } @@ -95,7 +96,7 @@ static Result parse_sndll_common( for (u32 i = 0; i < common.symbol_count; i++) { u32 symbol_offset = common.symbols - address.get_or_zero() + i * sizeof(SNDLLSymbolHeader); - const SNDLLSymbolHeader* symbol_header = get_packed(image, symbol_offset); + const SNDLLSymbolHeader* symbol_header = get_unaligned(image, symbol_offset); CCC_CHECK(symbol_header, "SNDLL symbol out of range."); const char* string = nullptr; diff --git a/src/ccc/symbol_file.cpp b/src/ccc/symbol_file.cpp index 00e7a43..b785a66 100644 --- a/src/ccc/symbol_file.cpp +++ b/src/ccc/symbol_file.cpp @@ -7,8 +7,8 @@ namespace ccc { Result> parse_symbol_file(std::vector image, std::string file_name) { - const u32* magic = get_packed(image, 0); - CCC_CHECK(magic, "File too small."); + std::optional magic = copy_unaligned(image, 0); + CCC_CHECK(magic.has_value(), "File too small."); std::unique_ptr symbol_file; diff --git a/src/ccc/symbol_table.cpp b/src/ccc/symbol_table.cpp index 1a62aca..fcfd3d8 100644 --- a/src/ccc/symbol_table.cpp +++ b/src/ccc/symbol_table.cpp @@ -5,6 +5,7 @@ #include "elf.h" #include "elf_symtab.h" +#include "dwarf_section.h" #include "mdebug_importer.h" #include "mdebug_section.h" #include "sndll.h" @@ -13,6 +14,7 @@ namespace ccc { const std::vector SYMBOL_TABLE_FORMATS = { {MDEBUG, "mdebug", ".mdebug"}, + {DWARF, "dwarf", ".debug"}, {SYMTAB, "symtab", ".symtab"}, {SNDLL, "sndll", ".sndata"} }; @@ -58,8 +60,25 @@ Result> create_elf_symbol_table( symbol_table = std::make_unique(elf.image, (s32) section.header.offset); break; } + case DWARF: { + const ElfSection* debug_section = elf.lookup_section(".debug"); + CCC_CHECK(debug_section, "No .debug section."); + + const ElfSection* line_section = elf.lookup_section(".line"); + CCC_CHECK(line_section, "No .line section."); + + Result> debug = elf.section_contents(*debug_section); + CCC_RETURN_IF_ERROR(debug); + + Result> line = elf.section_contents(*line_section); + CCC_RETURN_IF_ERROR(line); + + symbol_table = std::make_unique(*debug, *line); + + break; + } case SYMTAB: { - CCC_CHECK(section.header.offset + section.header.size <= elf.image.size(), + CCC_CHECK((u64) section.header.offset + section.header.size <= elf.image.size(), "Section '%s' out of range.", section.name.c_str()); std::span data = std::span(elf.image).subspan(section.header.offset, section.header.size); @@ -68,7 +87,7 @@ Result> create_elf_symbol_table( "Section '%s' has out of range link field.", section.name.c_str()); const ElfSection& linked_section = elf.sections[section.header.link]; - CCC_CHECK(linked_section.header.offset + linked_section.header.size <= elf.image.size(), + CCC_CHECK((u64) linked_section.header.offset + linked_section.header.size <= elf.image.size(), "Linked section '%s' out of range.", linked_section.name.c_str()); std::span linked_data = std::span(elf.image).subspan( linked_section.header.offset, linked_section.header.size); @@ -78,7 +97,7 @@ Result> create_elf_symbol_table( break; } case SNDLL: { - CCC_CHECK(section.header.offset + section.header.size <= elf.image.size(), + CCC_CHECK((u64) section.header.offset + section.header.size <= elf.image.size(), "Section '%s' out of range.", section.name.c_str()); std::span data = std::span(elf.image).subspan(section.header.offset, section.header.size); @@ -184,6 +203,40 @@ Result MdebugSymbolTable::print_symbols(FILE* out, u32 flags) const return Result(); } +// ***************************************************************************** + +DwarfSymbolTable::DwarfSymbolTable(std::span debug, std::span line) + : m_debug(debug), m_line(line) {} + +const char* DwarfSymbolTable::name() const +{ + return "DWARF Symbol Table"; +} + +Result DwarfSymbolTable::import( + SymbolDatabase& database, + const SymbolGroup& group, + u32 importer_flags, + const DemanglerFunctions& demangler, + const std::atomic_bool* interrupt) const +{ + return Result(); +} + +Result DwarfSymbolTable::print_headers(FILE* out) const +{ + return Result(); +} + +Result DwarfSymbolTable::print_symbols(FILE* out, u32 flags) const +{ + dwarf::SectionReader reader(m_debug, m_line); + + Result first_die = reader.first_die(NO_IMPORTER_FLAGS); + CCC_RETURN_IF_ERROR(first_die); + + return reader.print_dies(out, std::move(*first_die), 0); +} // ***************************************************************************** diff --git a/src/ccc/symbol_table.h b/src/ccc/symbol_table.h index 2002ab0..217e2f9 100644 --- a/src/ccc/symbol_table.h +++ b/src/ccc/symbol_table.h @@ -11,10 +11,12 @@ namespace ccc { // Determine which symbol tables are present in a given file. +// A lower number means the symbol table will be imported first. enum SymbolTableFormat { MDEBUG = 0, // The infamous Third Eye symbol table. - SYMTAB = 1, // Standard ELF symbol table. - SNDLL = 2 // SNDLL dynamic linker symbol table. + DWARF = 1, // DWARF symbol table. + SYMTAB = 2, // Standard ELF symbol table. + SNDLL = 3 // SNDLL dynamic linker symbol table. }; struct SymbolTableFormatInfo { @@ -98,6 +100,27 @@ class MdebugSymbolTable : public SymbolTable { s32 m_section_offset; }; +class DwarfSymbolTable : public SymbolTable { +public: + DwarfSymbolTable(std::span debug, std::span line); + + const char* name() const override; + + Result import( + SymbolDatabase& database, + const SymbolGroup& group, + u32 importer_flags, + const DemanglerFunctions& demangler, + const std::atomic_bool* interrupt) const override; + + Result print_headers(FILE* out) const override; + Result print_symbols(FILE* out, u32 flags) const override; + +protected: + std::span m_debug; + std::span m_line; +}; + class SymtabSymbolTable : public SymbolTable { public: SymtabSymbolTable(std::span symtab, std::span strtab); diff --git a/src/ccc/util.h b/src/ccc/util.h index f78dcfe..df7834c 100644 --- a/src/ccc/util.h +++ b/src/ccc/util.h @@ -208,13 +208,35 @@ void warn_impl(const char* source_file, int source_line, const char* format, Arg #endif template -const T* get_packed(std::span bytes, u64 offset) +const T* get_aligned(std::span bytes, u64 offset) { - if (offset + sizeof(T) <= bytes.size()) { - return reinterpret_cast(&bytes[offset]); - } else { + if (offset + sizeof(T) > bytes.size() || offset % alignof(T) != 0) { return nullptr; } + + return reinterpret_cast(&bytes[offset]); +} + +template +const T* get_unaligned(std::span bytes, u64 offset) +{ + if (offset + sizeof(T) > bytes.size()) { + return nullptr; + } + + return reinterpret_cast(&bytes[offset]); +} + +template +const std::optional copy_unaligned(std::span bytes, u64 offset) +{ + if (offset + sizeof(T) > bytes.size()) { + return std::nullopt; + } + + T value; + memcpy(&value, &bytes[offset], sizeof(T)); + return value; } const char* get_string(std::span bytes, u64 offset); diff --git a/src/stdump.cpp b/src/stdump.cpp index fa80adf..8c46a58 100644 --- a/src/stdump.cpp +++ b/src/stdump.cpp @@ -158,8 +158,8 @@ static void identify_symbol_tables_in_file(FILE* out, u32* totals, u32* unknown_ Result> file = platform::read_binary_file(file_path); CCC_EXIT_IF_ERROR(file); - const u32* fourcc = get_packed(*file, 0); - if (!fourcc) { + const std::optional fourcc = copy_unaligned(*file, 0); + if (!fourcc.has_value()) { fprintf(out, " file too small\n"); return; }