From 2b61d3c9c93da868fef3be9e0635f29ea6004590 Mon Sep 17 00:00:00 2001 From: sudoBash418 Date: Sun, 1 Sep 2024 23:43:38 -0600 Subject: [PATCH] Add support for symbolicating APK/ZIP-embedded libraries on Android By default, modern Android build tools will store native libraries uncompressed, and the [loader][1] will map them directly from the APK (instead of the package manager extracting them on installation). This commit adds support for symbolicating these embedded libraries. To avoid parsing ZIP structures, the offset of the library within the archive is determined via /proc/self/maps. [1]: https://cs.android.com/search?q=open_library_in_zipfile&ss=android%2Fplatform%2Fsuperproject%2Fmain --- Cargo.lock | 1 + Cargo.toml | 3 ++ src/symbolize/gimli.rs | 23 ++++++------ src/symbolize/gimli/elf.rs | 35 ++++++++++++++++++ src/symbolize/gimli/libs_dl_iterate_phdr.rs | 36 ++++++++++++++++--- src/symbolize/gimli/mmap_unix.rs | 4 +-- .../gimli/parse_running_mmaps_unix.rs | 4 +++ 7 files changed, 89 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8f4388b0e..11338ab0e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -40,6 +40,7 @@ dependencies = [ "dylib-dep", "libc", "libloading", + "memchr", "miniz_oxide", "object", "rustc-demangle", diff --git a/Cargo.toml b/Cargo.toml index 00e9b246f..668fd5eb8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,9 @@ miniz_oxide = { version = "0.8", default-features = false } addr2line = { version = "0.24.0", default-features = false } libc = { version = "0.2.156", default-features = false } +[target.'cfg(target_os = "android")'.dependencies] +memchr = { version = "2.7", default-features = false } + [target.'cfg(not(all(windows, target_env = "msvc", not(target_vendor = "uwp"))))'.dependencies.object] version = "0.36.0" default-features = false diff --git a/src/symbolize/gimli.rs b/src/symbolize/gimli.rs index 8c7051d4d..7502d2c73 100644 --- a/src/symbolize/gimli.rs +++ b/src/symbolize/gimli.rs @@ -187,7 +187,7 @@ impl<'data> Context<'data> { fn mmap(path: &Path) -> Option { let file = File::open(path).ok()?; let len = file.metadata().ok()?.len().try_into().ok()?; - unsafe { Mmap::map(&file, len) } + unsafe { Mmap::map(&file, len, 0) } } cfg_if::cfg_if! { @@ -269,6 +269,8 @@ struct Cache { struct Library { name: OsString, + #[cfg(target_os = "android")] + zip_offset: usize, #[cfg(target_os = "aix")] /// On AIX, the library mmapped can be a member of a big-archive file. /// For example, with a big-archive named libfoo.a containing libbar.so, @@ -295,17 +297,16 @@ struct LibrarySegment { len: usize, } -#[cfg(target_os = "aix")] fn create_mapping(lib: &Library) -> Option { - let name = &lib.name; - let member_name = &lib.member_name; - Mapping::new(name.as_ref(), member_name) -} - -#[cfg(not(target_os = "aix"))] -fn create_mapping(lib: &Library) -> Option { - let name = &lib.name; - Mapping::new(name.as_ref()) + cfg_if::cfg_if! { + if #[cfg(target_os = "aix")] { + Mapping::new(lib.name.as_ref(), &lib.member_name) + } else if #[cfg(target_os = "android")] { + Mapping::new_android(lib.name.as_ref(), lib.zip_offset) + } else { + Mapping::new(lib.name.as_ref()) + } + } } // unsafe because this is required to be externally synchronized diff --git a/src/symbolize/gimli/elf.rs b/src/symbolize/gimli/elf.rs index 906a30054..6112cefcb 100644 --- a/src/symbolize/gimli/elf.rs +++ b/src/symbolize/gimli/elf.rs @@ -43,6 +43,41 @@ impl Mapping { }) } + /// On Android, shared objects can be loaded directly from a + /// ZIP archive. For example, an app may load a library from + /// `/data/app/com.example/base.apk!/lib/x86_64/mylib.so` + /// + /// For one of these "ZIP-embedded" libraries, `zip_offset` will be + /// non-zero (see [super::libs_dl_iterate_phdr]). + #[cfg(target_os = "android")] + pub fn new_android(path: &Path, zip_offset: usize) -> Option { + fn map_embedded_library(path: &Path, zip_offset: usize) -> Option { + // get path of ZIP archive (delimited by `!/`) + let raw_path = path.as_os_str().as_bytes(); + let zip_path = memchr::memmem::find(raw_path, b"!/").map(|match_index| { + Path::new(OsStr::from_bytes(raw_path.split_at(match_index).0)) + })?; + + let file = fs::File::open(zip_path).ok()?; + let len: usize = file.metadata().ok()?.len().try_into().ok()?; + + // NOTE: we map the remainder of the entire archive instead of just the library so we don't have to determine its length + // NOTE: mmap will fail if `zip_offset` is not page-aligned + let map = unsafe { super::mmap::Mmap::map(&file, len - zip_offset, zip_offset) }?; + + Mapping::mk(map, |map, stash| { + Context::new(stash, Object::parse(&map)?, None, None) + }) + } + + // if ZIP offset is non-zero, try mapping as a ZIP-embedded library + if zip_offset > 0 { + map_embedded_library(path, zip_offset).or_else(|| Self::new(path)) + } else { + Self::new(path) + } + } + /// Load debuginfo from an external debug file. fn new_debug(original_path: &Path, path: PathBuf, crc: Option) -> Option { let map = super::mmap(&path)?; diff --git a/src/symbolize/gimli/libs_dl_iterate_phdr.rs b/src/symbolize/gimli/libs_dl_iterate_phdr.rs index e15750ec4..cdad96cad 100644 --- a/src/symbolize/gimli/libs_dl_iterate_phdr.rs +++ b/src/symbolize/gimli/libs_dl_iterate_phdr.rs @@ -9,12 +9,21 @@ use super::mystd::os::unix::prelude::*; use super::{Library, LibrarySegment, OsString, Vec}; use core::slice; +struct CallbackData { + ret: Vec, + #[cfg(target_os = "android")] + maps: Option>, +} pub(super) fn native_libraries() -> Vec { - let mut ret = Vec::new(); + let mut cb_data = CallbackData { + ret: Vec::new(), + #[cfg(target_os = "android")] + maps: super::parse_running_mmaps::parse_maps().ok(), + }; unsafe { - libc::dl_iterate_phdr(Some(callback), core::ptr::addr_of_mut!(ret).cast()); + libc::dl_iterate_phdr(Some(callback), core::ptr::addr_of_mut!(cb_data).cast()); } - return ret; + cb_data.ret } fn infer_current_exe(base_addr: usize) -> OsString { @@ -50,7 +59,11 @@ unsafe extern "C" fn callback( let dlpi_phdr = unsafe { (*info).dlpi_phdr }; let dlpi_phnum = unsafe { (*info).dlpi_phnum }; // SAFETY: We assured this. - let libs = unsafe { &mut *vec.cast::>() }; + let CallbackData { + ret: libs, + #[cfg(target_os = "android")] + maps, + } = unsafe { &mut *vec.cast::() }; // most implementations give us the main program first let is_main = libs.is_empty(); // we may be statically linked, which means we are main and mostly one big blob of code @@ -73,6 +86,19 @@ unsafe extern "C" fn callback( OsStr::from_bytes(unsafe { CStr::from_ptr(dlpi_name) }.to_bytes()).to_owned() } }; + #[cfg(target_os = "android")] + let zip_offset = { + // only check for ZIP-embedded file if we have data from /proc/self/maps + maps.as_ref().and_then(|maps| { + // check if file is embedded within a ZIP archive by searching for `!/` + memchr::memmem::find(name.as_bytes(), b"!/").and_then(|_| { + // find MapsEntry matching library's base address + maps.iter() + .find(|m| m.ip_matches(dlpi_addr as usize)) + .map(|m| m.offset()) + }) + }) + }; let headers = if dlpi_phdr.is_null() || dlpi_phnum == 0 { &[] } else { @@ -81,6 +107,8 @@ unsafe extern "C" fn callback( }; libs.push(Library { name, + #[cfg(target_os = "android")] + zip_offset: zip_offset.unwrap_or(0), segments: headers .iter() .map(|header| LibrarySegment { diff --git a/src/symbolize/gimli/mmap_unix.rs b/src/symbolize/gimli/mmap_unix.rs index 261ffc1d8..551328e22 100644 --- a/src/symbolize/gimli/mmap_unix.rs +++ b/src/symbolize/gimli/mmap_unix.rs @@ -15,14 +15,14 @@ pub struct Mmap { } impl Mmap { - pub unsafe fn map(file: &File, len: usize) -> Option { + pub unsafe fn map(file: &File, len: usize, offset: usize) -> Option { let ptr = mmap64( ptr::null_mut(), len, libc::PROT_READ, libc::MAP_PRIVATE, file.as_raw_fd(), - 0, + offset as i64, ); if ptr == libc::MAP_FAILED { return None; diff --git a/src/symbolize/gimli/parse_running_mmaps_unix.rs b/src/symbolize/gimli/parse_running_mmaps_unix.rs index 5d4b34675..b7062de0e 100644 --- a/src/symbolize/gimli/parse_running_mmaps_unix.rs +++ b/src/symbolize/gimli/parse_running_mmaps_unix.rs @@ -76,6 +76,10 @@ impl MapsEntry { pub(super) fn ip_matches(&self, ip: usize) -> bool { self.address.0 <= ip && ip < self.address.1 } + + pub(super) fn offset(&self) -> usize { + self.offset + } } impl FromStr for MapsEntry {