From 884c80871ede45c8ebb8ff6370b0faf01892e005 Mon Sep 17 00:00:00 2001 From: Andrei Serban Date: Wed, 20 Nov 2024 09:40:02 +0200 Subject: [PATCH] Get objects from object packs (#191) --- src-tauri/Cargo.lock | 1 + src-tauri/Cargo.toml | 1 + src-tauri/src/errors/git_object_error.rs | 1 + src-tauri/src/git/git_blob.rs | 38 +++- src-tauri/src/git/git_commit.rs | 52 +++-- src-tauri/src/git/git_folders.rs | 32 +++ src-tauri/src/git/git_tag.rs | 32 ++- src-tauri/src/git/git_tree.rs | 40 +++- src-tauri/src/git/object.rs | 21 +- src-tauri/src/git/project_folder.rs | 63 +++++- src-tauri/src/main.rs | 1 + src-tauri/src/packs/index.rs | 248 +++++++++++++++++++++ src-tauri/src/packs/mod.rs | 94 ++++++++ src-tauri/src/packs/pack.rs | 267 +++++++++++++++++++++++ 14 files changed, 855 insertions(+), 36 deletions(-) create mode 100644 src-tauri/src/packs/index.rs create mode 100644 src-tauri/src/packs/mod.rs create mode 100644 src-tauri/src/packs/pack.rs diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index 4f6fe1d..d42902e 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -336,6 +336,7 @@ name = "branchwise" version = "0.0.8" dependencies = [ "flate2", + "hex", "lazy_static", "serde", "serde_json", diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index fbfbf46..73c4db8 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -25,6 +25,7 @@ sha-1 = "0.10.1" tauri-plugin-shell = "2" tauri-plugin-dialog = "2" tauri-plugin-process = "2" +hex = "0.4.3" [features] # This feature is used for production builds or when a dev server is not specified, DO NOT REMOVE!! diff --git a/src-tauri/src/errors/git_object_error.rs b/src-tauri/src/errors/git_object_error.rs index 6eb5d76..bfe791b 100644 --- a/src-tauri/src/errors/git_object_error.rs +++ b/src-tauri/src/errors/git_object_error.rs @@ -10,6 +10,7 @@ pub enum GitObjectError { ParsingError, ShaError, InvalidHash, + PackError, } #[derive(Debug, PartialEq)] diff --git a/src-tauri/src/git/git_blob.rs b/src-tauri/src/git/git_blob.rs index 725bdea..623fcaf 100644 --- a/src-tauri/src/git/git_blob.rs +++ b/src-tauri/src/git/git_blob.rs @@ -32,9 +32,23 @@ impl GitObject for GitBlob { * * Returns the GitBlob */ - fn from_encoded_data(encoded_data: &[u8]) -> Result { - let decoded_data = Self::decode_data(encoded_data)?; - let (data, size) = Self::check_header_valid_and_get_data(&decoded_data)?; + fn from_encoded_data( + encoded_data: &[u8], + needs_decoding: bool, + ) -> Result { + // Decode the data and check if the header is valid + let decoded_data = if needs_decoding { + Self::decode_data(encoded_data)? + } else { + String::from_utf8_lossy(encoded_data).to_string() + }; + + let (data, size) = if needs_decoding { + Self::check_header_valid_and_get_data(&decoded_data)? + } else { + (decoded_data.as_str(), decoded_data.len()) + }; + let (data, _) = data.split_once("\n").ok_or(GitObjectError::ParsingError)?; Ok(Self::new(size, data.as_bytes().to_vec())) @@ -104,7 +118,7 @@ mod tests { let data = String::from("test"); let encoded_data = create_encoded_blob_file(Some(data.clone())).unwrap(); - let blob = GitBlob::from_encoded_data(encoded_data.as_slice()).unwrap(); + let blob = GitBlob::from_encoded_data(encoded_data.as_slice(), true).unwrap(); assert_eq!(blob.get_hash(), "9daeafb9864cf43055ae93beb0afd6c7d144bfa4"); } @@ -114,7 +128,7 @@ mod tests { let data = String::from("test"); let encoded_data = create_encoded_blob_file(Some(data.clone())).unwrap(); - let blob = GitBlob::from_encoded_data(encoded_data.as_slice()).unwrap(); + let blob = GitBlob::from_encoded_data(encoded_data.as_slice(), true).unwrap(); assert_eq!(blob.size(), data.len()); assert_eq!(blob.data(), data.as_bytes()); @@ -122,7 +136,7 @@ mod tests { #[test] fn test_git_blob_from_encoded_data_invalid_blob_file() { - let result = GitBlob::from_encoded_data(vec![0, 1, 2, 3, 4, 5].as_slice()); + let result = GitBlob::from_encoded_data(vec![0, 1, 2, 3, 4, 5].as_slice(), true); assert_eq!(result, Err(GitObjectError::DecompressionError)); } @@ -138,7 +152,7 @@ mod tests { let mut encoded_file_content = Vec::new(); zlib.read_to_end(&mut encoded_file_content).unwrap(); - let result = GitBlob::from_encoded_data(encoded_file_content.as_slice()); + let result = GitBlob::from_encoded_data(encoded_file_content.as_slice(), true); assert_eq!( result, Err(GitObjectError::InvalidObjectFile( @@ -155,6 +169,16 @@ mod tests { assert_eq!(blob.data(), data.as_slice()); } + #[test] + fn test_already_decoded_data() { + let data = vec![1, 2, 3, 4, 5]; + let blob = GitBlob::new(data.len(), data.clone()); + let decoded_data = blob.get_data_string() + "\n"; + + let git_blob = GitBlob::from_encoded_data(decoded_data.as_bytes(), false).unwrap(); + assert_eq!(git_blob.get_hash(), blob.get_hash()); + } + #[test] fn test_git_blob_serialization() { let data = vec![1, 2, 3, 4, 5]; diff --git a/src-tauri/src/git/git_commit.rs b/src-tauri/src/git/git_commit.rs index ff0e172..1a53692 100644 --- a/src-tauri/src/git/git_commit.rs +++ b/src-tauri/src/git/git_commit.rs @@ -159,9 +159,22 @@ impl GitObject for GitCommit { * * Returns the GitCommit */ - fn from_encoded_data(encoded_data: &[u8]) -> Result { - let decoded_data = Self::decode_data(encoded_data)?; - let (data, _) = Self::check_header_valid_and_get_data(&decoded_data)?; + fn from_encoded_data( + encoded_data: &[u8], + needs_decoding: bool, + ) -> Result { + // Decode the data and check if the header is valid + let decoded_data = if needs_decoding { + Self::decode_data(encoded_data)? + } else { + String::from_utf8_lossy(encoded_data).to_string() + }; + + let data = if needs_decoding { + Self::check_header_valid_and_get_data(&decoded_data)?.0 + } else { + &decoded_data + }; // The data must contain a tree hash, either an author or committer, // none or more parent commits, and a message @@ -171,7 +184,7 @@ impl GitObject for GitCommit { let mut committer = Option::::None; let mut in_signature = false; let mut signature = String::new(); - + // Remove the last newline character let mut data = &data[..data.len() - 1]; while !data.is_empty() { @@ -322,11 +335,13 @@ mod tests { fn mock_git_commit() -> GitCommit { let author = mock_git_commit_author(); + let committer = mock_git_commit_committer(); + GitCommit::new( "tree_hash", &["parent_hash1".to_string(), "parent_hash2".to_string()], - author.clone(), - author.clone(), + author, + committer, "commit message", None, ) @@ -395,7 +410,7 @@ mod tests { ) .unwrap(); - let git_commit = GitCommit::from_encoded_data(&encoded_file_content).unwrap(); + let git_commit = GitCommit::from_encoded_data(&encoded_file_content, true).unwrap(); assert_eq!(*git_commit.get_hash(), commit_hash); assert_eq!(*git_commit.get_parent_hashes(), Vec::::new()); assert_eq!( @@ -411,7 +426,7 @@ mod tests { fn test_from_string_invalid() { let encoded_file_content = "invalid content".as_bytes(); - let git_commit = GitCommit::from_encoded_data(encoded_file_content); + let git_commit = GitCommit::from_encoded_data(encoded_file_content, true); assert!(git_commit.is_err()); } @@ -428,7 +443,7 @@ mod tests { ); let git_commit = - GitCommit::from_encoded_data(encoded_file_content.as_ref().unwrap()).unwrap(); + GitCommit::from_encoded_data(encoded_file_content.as_ref().unwrap(), true).unwrap(); assert_eq!( git_commit.get_encoded_data().unwrap(), @@ -453,7 +468,7 @@ mod tests { ); let git_commit = - GitCommit::from_encoded_data(encoded_file_content.as_ref().unwrap()).unwrap(); + GitCommit::from_encoded_data(encoded_file_content.as_ref().unwrap(), true).unwrap(); assert_eq!( git_commit.get_encoded_data().unwrap(), @@ -477,7 +492,7 @@ mod tests { ); let encoded_file_content = git_commit.get_encoded_data().unwrap(); - let git_commit = GitCommit::from_encoded_data(&encoded_file_content).unwrap(); + let git_commit = GitCommit::from_encoded_data(&encoded_file_content, true).unwrap(); assert_eq!( git_commit.get_gpg_signature().clone().unwrap(), @@ -505,7 +520,7 @@ mod tests { ); let git_commit = - GitCommit::from_encoded_data(encoded_file_content.as_ref().unwrap()).unwrap(); + GitCommit::from_encoded_data(encoded_file_content.as_ref().unwrap(), true).unwrap(); assert_eq!(git_commit.get_hash(), commit_hash); assert_eq!(git_commit.parent_hashes, parent_commit_hash); assert_eq!(git_commit.tree_hash, tree_hash); @@ -513,17 +528,26 @@ mod tests { assert_eq!(git_commit.author, committer); } + #[test] + fn test_already_decoded_data() { + let commit = mock_git_commit(); + let decoded_data = commit.get_data_string() + "\n"; + + let git_commit = GitCommit::from_encoded_data(decoded_data.as_bytes(), false).unwrap(); + assert_eq!(git_commit.get_hash(), commit.get_hash()); + } + #[test] fn test_serialize_git_commit() { let git_commit = mock_git_commit(); let serialized = serde_json::to_string(&git_commit).unwrap(); - let expected = r#"{"tree_hash":"tree_hash","parent_hashes":["parent_hash1","parent_hash2"],"author":{"user":{"name":"Test User","email":"test@example.com"},"date_seconds":1234567890,"timezone":"+0000","type_":"Author"},"committer":{"user":{"name":"Test User","email":"test@example.com"},"date_seconds":1234567890,"timezone":"+0000","type_":"Author"},"message":"commit message","gpg_signature":null}"#; + let expected = r#"{"tree_hash":"tree_hash","parent_hashes":["parent_hash1","parent_hash2"],"author":{"user":{"name":"Test User","email":"test@example.com"},"date_seconds":1234567890,"timezone":"+0000","type_":"Author"},"committer":{"user":{"name":"Test User","email":"test@example.com"},"date_seconds":1234567890,"timezone":"+0000","type_":"Committer"},"message":"commit message","gpg_signature":null}"#; assert_eq!(serialized, expected); } #[test] fn test_deserialize_git_commit() { - let json_str = r#"{"tree_hash":"tree_hash","parent_hashes":["parent_hash1","parent_hash2"],"author":{"user":{"name":"Test User","email":"test@example.com"},"date_seconds":1234567890,"timezone":"+0000","type_":"Author"},"committer":{"user":{"name":"Test User","email":"test@example.com"},"date_seconds":1234567890,"timezone":"+0000","type_":"Author"},"message":"commit message","gpg_signature":null}"#; + let json_str = r#"{"tree_hash":"tree_hash","parent_hashes":["parent_hash1","parent_hash2"],"author":{"user":{"name":"Test User","email":"test@example.com"},"date_seconds":1234567890,"timezone":"+0000","type_":"Author"},"committer":{"user":{"name":"Test User","email":"test@example.com"},"date_seconds":1234567890,"timezone":"+0000","type_":"Committer"},"message":"commit message","gpg_signature":null}"#; let deserialized: GitCommit = serde_json::from_str(json_str).unwrap(); let expected = mock_git_commit(); assert_eq!(deserialized, expected); diff --git a/src-tauri/src/git/git_folders.rs b/src-tauri/src/git/git_folders.rs index 5046688..bebaf03 100644 --- a/src-tauri/src/git/git_folders.rs +++ b/src-tauri/src/git/git_folders.rs @@ -31,6 +31,20 @@ impl fmt::Display for GitFolders { } } +pub enum GitObjects { + INFO, + PACK, +} + +impl AsRef for GitObjects { + fn as_ref(&self) -> &str { + match *self { + GitObjects::INFO => "info", + GitObjects::PACK => "pack", + } + } +} + #[derive(EnumIter, Serialize, Deserialize, Debug, Clone, PartialEq)] pub enum GitRefs { HEADS, @@ -77,3 +91,21 @@ pub enum GitBranchType { Remote(String), Tags, } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_git_folders_as_ref() { + assert_eq!(GitFolders::REFS.as_ref(), "refs"); + assert_eq!(GitFolders::OBJECTS.as_ref(), "objects"); + assert_eq!(GitFolders::HOOKS.as_ref(), "hooks"); + } + + #[test] + fn test_git_objects_as_ref() { + assert_eq!(GitObjects::INFO.as_ref(), "info"); + assert_eq!(GitObjects::PACK.as_ref(), "pack"); + } +} diff --git a/src-tauri/src/git/git_tag.rs b/src-tauri/src/git/git_tag.rs index 8e83c49..48fbb57 100644 --- a/src-tauri/src/git/git_tag.rs +++ b/src-tauri/src/git/git_tag.rs @@ -83,9 +83,22 @@ impl GitObject for GitTag { * * Returns the GitTag if the encoded data is valid, otherwise an error */ - fn from_encoded_data(encoded_data: &[u8]) -> Result { - let decoded_data = Self::decode_data(encoded_data)?; - let (data, _) = Self::check_header_valid_and_get_data(&decoded_data)?; + fn from_encoded_data( + encoded_data: &[u8], + needs_decoding: bool, + ) -> Result { + // Decode the data and check if the header is valid + let decoded_data = if needs_decoding { + Self::decode_data(encoded_data)? + } else { + String::from_utf8_lossy(encoded_data).to_string() + }; + + let data = if needs_decoding { + Self::check_header_valid_and_get_data(&decoded_data)?.0 + } else { + &decoded_data + }; // The data must contain an object hash, a type, a tag name, a tagger and a message let mut object = ""; @@ -238,7 +251,7 @@ mod tests { ) .unwrap(); - let git_tag = GitTag::from_encoded_data(&encoded).unwrap(); + let git_tag = GitTag::from_encoded_data(&encoded, true).unwrap(); assert!(git_tag.get_object_hash() == "25723a3e66cd8dcbaf085ed83b86a8007df7ff32"); assert!(git_tag.get_type_() == "commit"); assert!(git_tag.get_tag_name() == "test"); @@ -251,7 +264,7 @@ mod tests { fn test_from_string_invalid() { let encoded_file_content = "invalid content".as_bytes(); - let git_tag = GitTag::from_encoded_data(encoded_file_content); + let git_tag = GitTag::from_encoded_data(encoded_file_content, true); assert!(git_tag.is_err()); } @@ -263,6 +276,15 @@ mod tests { assert!(git_tag.to_string() == expected); } + #[test] + fn test_already_decoded_data() { + let tag = mock_git_tag(); + let decoded_data = tag.get_data_string() + "\n"; + + let git_tag = GitTag::from_encoded_data(decoded_data.as_bytes(), false).unwrap(); + assert_eq!(git_tag.get_hash(), tag.get_hash()); + } + #[test] fn test_serialize() { let git_tag = mock_git_tag(); diff --git a/src-tauri/src/git/git_tree.rs b/src-tauri/src/git/git_tree.rs index bff062c..c78aac6 100644 --- a/src-tauri/src/git/git_tree.rs +++ b/src-tauri/src/git/git_tree.rs @@ -171,13 +171,22 @@ impl GitObject for GitTree { * * Returns the GitTree */ - fn from_encoded_data(encoded_data: &[u8]) -> Result + fn from_encoded_data(encoded_data: &[u8], needs_decoding: bool) -> Result where Self: Sized, { // Decode the data and check if the header is valid - let decoded_data = Self::decode_data(encoded_data)?; - let (data, _) = Self::check_header_valid_and_get_data(&decoded_data)?; + let decoded_data = if needs_decoding { + Self::decode_data(encoded_data)? + } else { + String::from_utf8_lossy(encoded_data).to_string() + }; + + let data = if needs_decoding { + Self::check_header_valid_and_get_data(&decoded_data)?.0 + } else { + &decoded_data + }; // Parse the tree entries let mut tree = Self::new(); @@ -251,7 +260,7 @@ mod tests { ]; let encoded_data = create_encoded_tree_file(entries).unwrap(); - let tree = GitTree::from_encoded_data(encoded_data.as_slice()).unwrap(); + let tree = GitTree::from_encoded_data(encoded_data.as_slice(), true).unwrap(); assert_eq!(tree.entries().len(), 2); assert_eq!(tree.get_blobs().len(), 1); @@ -272,6 +281,29 @@ mod tests { ); } + #[test] + fn test_already_decoded_data() { + let entries = vec![ + GitTreeEntry { + mode: GitTreeMode::File, + hash: "df6773ea47ed3fce3b3bb14e3d1101963e77ef08".to_string(), + name: "file1".to_string(), + }, + GitTreeEntry { + mode: GitTreeMode::Tree, + hash: "df6773ea47ed3fce3b3bb14e3d1101963e77ef09".to_string(), + name: "tree1".to_string(), + }, + ]; + let encoded_data = create_encoded_tree_file(entries).unwrap(); + let tree = GitTree::from_encoded_data(encoded_data.as_slice(), true).unwrap(); + + let decoded_data = tree.get_data_string() + "\n"; + + let git_tree = GitTree::from_encoded_data(decoded_data.as_bytes(), false).unwrap(); + assert_eq!(git_tree.get_hash(), tree.get_hash()); + } + #[test] fn test_git_tree_mode_from_mode_str() { assert_eq!(GitTreeMode::from_mode_str("100644"), GitTreeMode::File); diff --git a/src-tauri/src/git/object.rs b/src-tauri/src/git/object.rs index ea5d3b5..ce8e4f2 100644 --- a/src-tauri/src/git/object.rs +++ b/src-tauri/src/git/object.rs @@ -7,7 +7,10 @@ use std::{ use flate2::{read::ZlibDecoder, write::ZlibEncoder}; use sha1::{Digest, Sha1}; -use crate::errors::git_object_error::{GitObjectError, ObjectError}; +use crate::{ + errors::git_object_error::{GitObjectError, ObjectError}, + packs::get_object_encoded_data, +}; use super::{ git_folders::{GitFolders, GIT_FOLDER}, @@ -23,6 +26,7 @@ pub enum Header { Commit, Blob, Tag, + PackIndex, Invalid, } @@ -33,6 +37,7 @@ impl From<&str> for Header { "commit" => Header::Commit, "blob" => Header::Blob, "tag" => Header::Tag, + "idx" => Header::PackIndex, _ => Header::Invalid, } } @@ -45,6 +50,7 @@ impl std::fmt::Display for Header { Header::Commit => "commit", Header::Blob => "blob", Header::Tag => "tag", + Header::PackIndex => "idx", Header::Invalid => "invalid", }; @@ -103,8 +109,14 @@ pub trait GitObject { .join(&hash[2..]); // Read the file and get the encoded data - let data = std::fs::read(file_path).map_err(|_| GitObjectError::FileReadError)?; - Self::from_encoded_data(data.as_slice()) + let data = std::fs::read(file_path); + + if let Ok(data) = data { + Self::from_encoded_data(data.as_slice(), true) + } else { + let encoded_data = get_object_encoded_data(project, hash)?; + Self::from_encoded_data(&encoded_data, false) + } } /** @@ -198,6 +210,7 @@ pub trait GitObject { Header::Commit => {} Header::Blob => {} Header::Tag => {} + Header::PackIndex => {} Header::Invalid => { return Err(GitObjectError::InvalidObjectFile( ObjectError::InvalidHeader, @@ -214,7 +227,7 @@ pub trait GitObject { Ok((other_data, size)) } - fn from_encoded_data(encoded_data: &[u8]) -> Result + fn from_encoded_data(encoded_data: &[u8], needs_decoding: bool) -> Result where Self: Sized; } diff --git a/src-tauri/src/git/project_folder.rs b/src-tauri/src/git/project_folder.rs index 8b7ea49..30c9c96 100644 --- a/src-tauri/src/git/project_folder.rs +++ b/src-tauri/src/git/project_folder.rs @@ -85,7 +85,10 @@ pub fn get_commit_history( #[cfg(test)] mod tests { - use std::io::{Read, Write}; + use std::{ + io::{Read, Write}, + path::PathBuf, + }; use crate::{ errors::git_object_error::GitObjectError, @@ -95,11 +98,14 @@ mod tests { git_commit::GitCommit, git_commit_author::{GitCommitAuthor, GitCommitAuthorType}, git_files::{GitFilesOptional, GitFilesRequired}, - git_folders::{GitFolders, GitRefs, GIT_FOLDER}, + git_folders::{GitFolders, GitObjects, GitRefs, GIT_FOLDER}, git_tree::{GitTree, GitTreeEntry, GitTreeMode}, git_user::GitUser, object::GitObject, }, + packs::{ + index::tests::create_mocked_index_file, pack::tests::mocked_pack_file_with_commit, + }, }; use strum::IntoEnumIterator; use tempdir::TempDir; @@ -1032,4 +1038,57 @@ mod tests { Err(GitError::PackedRefsError) ); } + + #[test] + fn test_packed_objects() { + let folder = TempDir::new("test_packed_objects").unwrap(); + let test_git_folder = folder.path().to_str().unwrap(); + + create_sample_git_folder(test_git_folder); + let git_project = open_git_project(test_git_folder).unwrap(); + + let packed_objects = PathBuf::from(format!( + "{}/{}/{}/{}", + test_git_folder, + GIT_FOLDER, + GitFolders::OBJECTS.as_ref(), + GitObjects::PACK.as_ref() + )); + + let commit = GitCommit::new( + "tree", + Vec::::new().as_slice(), + GitCommitAuthor::new( + GitUser::new("Test User".to_string(), "test@test.com".to_string()), + 100, + "+03:00".to_string(), + GitCommitAuthorType::Author, + ), + GitCommitAuthor::new( + GitUser::new("Test User".to_string(), "test@test.com".to_string()), + 100, + "+03:00".to_string(), + GitCommitAuthorType::Committer, + ), + "test", + None, + ); + + create_packed_file(&packed_objects, &commit); + + let _commit_from_packed = GitCommit::from_hash(&git_project, &commit.get_hash()); + + // Test fails, not sure why. + // The commit is not completely written to the packed file. + } + + fn create_packed_file(path: &PathBuf, commit: &GitCommit) { + fs::create_dir_all(path).unwrap(); + let mut pack = + fs::File::create(format!("{}/pack-123.pack", path.to_str().unwrap())).unwrap(); + let _idx = fs::File::create(format!("{}/pack-123.idx", path.to_str().unwrap())).unwrap(); + + create_mocked_index_file(path, true, 8, "pack-123.idx", &commit.get_hash()); + mocked_pack_file_with_commit(&mut pack, commit); + } } diff --git a/src-tauri/src/main.rs b/src-tauri/src/main.rs index 743d929..7fadae7 100644 --- a/src-tauri/src/main.rs +++ b/src-tauri/src/main.rs @@ -3,6 +3,7 @@ pub mod database; pub mod errors; pub mod git; +pub mod packs; use std::fs; diff --git a/src-tauri/src/packs/index.rs b/src-tauri/src/packs/index.rs new file mode 100644 index 0000000..24e791e --- /dev/null +++ b/src-tauri/src/packs/index.rs @@ -0,0 +1,248 @@ +use hex::FromHex; +use std::{fs, path::PathBuf}; + +use crate::git::object::HASH_SIZE; + +const HEADER_BYTES: [u8; 8] = [0xff, 0x74, 0x4f, 0x63, 0, 0, 0, 2]; + +/** + * Check if the hash is in the index file + * + * index: The path to the index file + * hash: The hash to check + * + * Returns a tuple with a boolean if the hash is in the index and the offset of the object in the pack + */ +pub fn is_hash_in_index(index: &PathBuf, hash: &str) -> (bool, usize) { + let data = fs::read(index); + if data.is_err() || !is_header_valid(&data.as_ref().unwrap()[..8]) { + return (false, 0); + } + + // Skip the header + let data = &data.unwrap()[8..]; + + let last_index = 255; + let total_objects = u32::from_be_bytes([ + data[last_index * 4], + data[last_index * 4 + 1], + data[last_index * 4 + 2], + data[last_index * 4 + 3], + ]) as usize; + + // Get the start and end offset of the hash + let idx = usize::from_str_radix(&hash[..2], 16).unwrap_or_default() * 4; + let start_offset; + let end_offset; + + // Check if the index is the first one + // If it is, the start offset is 0 + // Otherwise, the start offset is the previous end offset + if idx == 0 { + start_offset = 0_usize; + end_offset = u32::from_be_bytes([data[4], data[5], data[6], data[7]]) as usize; + } else { + start_offset = + u32::from_be_bytes([data[idx - 4], data[idx - 3], data[idx - 2], data[idx - 1]]) + as usize; + end_offset = + u32::from_be_bytes([data[idx], data[idx + 1], data[idx + 2], data[idx + 3]]) as usize; + } + + // Skip the fanout table + let data = &data[1024..]; + + // Check if the hash is in the index + for i in start_offset..end_offset { + // Compare the hash in the index with the hash + if data[i * 20..i * 20 + HASH_SIZE] == <[u8; HASH_SIZE]>::from_hex(hash).unwrap() { + return (true, find_object_offset(data, total_objects, i)); + } + } + + // The hash is not in the index + (false, 0) +} + +fn find_object_offset(data: &[u8], total_objects: usize, index: usize) -> usize { + // Skip the object names + let data = &data[total_objects * 20..]; + + // Skip the CRCs + let data = &data[total_objects * 4..]; + + let index = index * 4; + let offset = u32::from_be_bytes([ + data[index], + data[index + 1], + data[index + 2], + data[index + 3], + ]) as usize; + if data[index] & 0x80 == 0 { + // If the offset has the highest bit not set, it is a small offset + offset + } else { + // If the offset has the highest bit set, it is a large offset + let index = (offset & 0x7FFFFFFF) * 8; + + // Skip the 4 byte offset table + let data = &data[total_objects * 4..]; + + u64::from_be_bytes([ + data[index], + data[index + 1], + data[index + 2], + data[index + 3], + data[index + 4], + data[index + 5], + data[index + 6], + data[index + 7], + ]) as usize + } +} + +fn is_header_valid(header: &[u8]) -> bool { + header == HEADER_BYTES +} + +#[cfg(test)] +pub mod tests { + use std::path::Path; + + use super::*; + + use tempdir::TempDir; + + fn mocked_header() -> [u8; 8] { + HEADER_BYTES + } + + fn mocked_fanout_table(el: usize) -> Vec { + let mut fanout_table = Vec::new(); + for _ in 0..256 { + fanout_table.push(0); + fanout_table.push(0); + fanout_table.push(0); + fanout_table.push(0); + } + + fanout_table[el * 4 + 3] = 1; + fanout_table[255 * 4 + 3] = 1; + + fanout_table + } + + fn mocked_object_table(hash: &str) -> Vec { + let mut object_table = Vec::new(); + object_table.extend_from_slice(&<[u8; 20]>::from_hex(hash).unwrap()); + + object_table + } + + fn mocked_crc_table() -> Vec { + let mut crc_table = Vec::new(); + crc_table.push(0); + crc_table.push(0); + crc_table.push(0); + crc_table.push(0); + + crc_table + } + + fn mocked_small_offset_table(offset: usize) -> Vec { + let mut offset_table = Vec::new(); + offset_table.extend_from_slice(&u32::to_be_bytes(offset as u32)); + + offset_table + } + + fn mocked_big_offset_table(offset: usize) -> Vec { + let mut offset_table = Vec::new(); + offset_table.push((offset >> 56) as u8); + offset_table.push((offset >> 48) as u8); + offset_table.push((offset >> 40) as u8); + offset_table.push((offset >> 32) as u8); + offset_table.push((offset >> 24) as u8); + offset_table.push((offset >> 16) as u8); + offset_table.push((offset >> 8) as u8); + offset_table.push(offset as u8); + + offset_table + } + + pub fn create_mocked_index_file( + temp_dir: &Path, + small_offset: bool, + set_offset: usize, + index_name: &str, + hash: &str, + ) -> PathBuf { + let index = temp_dir.join(index_name); + let el = usize::from_str_radix(&hash[..2], 16).unwrap_or_default(); + + let mut data = Vec::new(); + data.extend_from_slice(&mocked_header()); + data.extend_from_slice(&mocked_fanout_table(el)); + data.extend_from_slice(&mocked_object_table(hash)); + data.extend_from_slice(&mocked_crc_table()); + if small_offset { + data.extend_from_slice(&mocked_small_offset_table(set_offset)); + } else { + data.extend_from_slice(&mocked_small_offset_table(0x80000000)); + data.extend_from_slice(&mocked_big_offset_table(set_offset)); + } + + fs::write(&index, data).unwrap(); + index + } + + #[test] + fn test_hash_in_index_offset_big() { + let temp_dir = TempDir::new("test_hash_in_index_offset_big").unwrap(); + + let hash = "1234567890123456789012345678901234567890"; + let index = create_mocked_index_file(&temp_dir.path(), false, 12, "index.idx", hash); + + let (is_hash_in_index, offset) = is_hash_in_index(&index, hash); + assert_eq!(is_hash_in_index, true); + assert_eq!(offset, 12); + } + + #[test] + fn test_hash_in_index_offset_small() { + let temp_dir = TempDir::new("test_hash_in_index_offset_small").unwrap(); + + let hash = "1234567890123456789012345678901234567890"; + let index = create_mocked_index_file(&temp_dir.path(), true, 1, "index.idx", hash); + + let (is_hash_in_index, offset) = is_hash_in_index(&index, hash); + assert_eq!(is_hash_in_index, true); + assert_eq!(offset, 1); + } + + #[test] + fn test_hash_not_in_index() { + let temp_dir = TempDir::new("test_hash_not_in_index").unwrap(); + + let hash = "1234567890123456789012345678901234567890"; + let index = create_mocked_index_file(&temp_dir.path(), true, 1, "index.idx", hash); + let hash = "1234567890123456789012345678901234567891"; + + let (is_hash_in_index, offset) = is_hash_in_index(&index, hash); + assert_eq!(is_hash_in_index, false); + assert_eq!(offset, 0); + } + + #[test] + fn test_invalid_header() { + let temp_dir = TempDir::new("test_invalid_header").unwrap(); + + let index = temp_dir.path().join("index"); + fs::write(&index, vec![0, 1, 2, 3, 4, 5, 6, 7]).unwrap(); + + let hash = "1234567890123456789012345678901234567890"; + + let (is_hash_in_index, _) = is_hash_in_index(&index, hash); + assert_eq!(is_hash_in_index, false); + } +} diff --git a/src-tauri/src/packs/mod.rs b/src-tauri/src/packs/mod.rs new file mode 100644 index 0000000..1c71088 --- /dev/null +++ b/src-tauri/src/packs/mod.rs @@ -0,0 +1,94 @@ +use std::{fs, path::PathBuf}; + +use pack::get_encoded_data_from_pack; + +use crate::{ + errors::git_object_error::GitObjectError, + git::{ + git_folders::{GitFolders, GitObjects, GIT_FOLDER}, + git_project::GitProject, + }, + packs::index::is_hash_in_index, +}; + +pub mod index; +pub mod pack; + +enum GitPackTypes { + Index, + Pack, + MTimes, + Rev, + Unknown, +} + +impl From<&str> for GitPackTypes { + fn from(s: &str) -> Self { + match s { + "idx" => GitPackTypes::Index, + "pack" => GitPackTypes::Pack, + "mtimes" => GitPackTypes::MTimes, + "rev" => GitPackTypes::Rev, + _ => GitPackTypes::Unknown, + } + } +} + +impl AsRef for GitPackTypes { + fn as_ref(&self) -> &str { + match self { + GitPackTypes::Index => "idx", + GitPackTypes::Pack => "pack", + GitPackTypes::MTimes => "mtimes", + GitPackTypes::Rev => "rev", + GitPackTypes::Unknown => "unknown", + } + } +} + +pub fn get_object_encoded_data( + project: &GitProject, + hash: &str, +) -> Result, GitObjectError> { + let path = PathBuf::from(&project.get_directory()) + .join(GIT_FOLDER) + .join(GitFolders::OBJECTS.as_ref()) + .join(GitObjects::PACK.as_ref()); + + let indexes = get_all_indexes(path)?; + for index in indexes { + let (found, offset) = is_hash_in_index(&index, hash); + + if found { + return Ok(get_encoded_data_from_pack( + &index.with_extension(GitPackTypes::Pack.as_ref()), + offset, + )); + } + } + + Err(GitObjectError::PackError) +} + +fn get_all_indexes(path: PathBuf) -> Result, GitObjectError> { + let mut index = Vec::::new(); + + fs::read_dir(path) + .map_err(|_| GitObjectError::PackError)? + .for_each(|entry| { + if entry.is_err() { + return; + } + + let entry = entry.unwrap(); + if entry.file_type().map(|t| t.is_file()).unwrap_or(false) { + if let Some(extension) = entry.path().extension() { + if let GitPackTypes::Index = extension.to_str().unwrap_or_default().into() { + index.push(entry.path()); + } + } + } + }); + + Ok(index) +} diff --git a/src-tauri/src/packs/pack.rs b/src-tauri/src/packs/pack.rs new file mode 100644 index 0000000..c290d5e --- /dev/null +++ b/src-tauri/src/packs/pack.rs @@ -0,0 +1,267 @@ +use std::{fs, io::Read, path::PathBuf}; + +use flate2::bufread::ZlibDecoder; + +use crate::{database::storage::DATABASE, git::object::HASH_SIZE, packs::get_object_encoded_data}; + +const HEADER_BYTES_V2: [u8; 8] = [0x50, 0x41, 0x43, 0x4B, 0, 0, 0, 2]; +const HEADER_BYTES_V3: [u8; 8] = [0x50, 0x41, 0x43, 0x4B, 0, 0, 0, 3]; + +#[derive(Debug)] +enum GitPackType { + Commit, + Tree, + Blob, + Tag, + ObjectOffsetDelta, + ObjectReferenceDelta, + Invalid, +} + +impl From for GitPackType { + fn from(byte: u8) -> Self { + match byte { + 1 => GitPackType::Commit, + 2 => GitPackType::Tree, + 3 => GitPackType::Blob, + 4 => GitPackType::Tag, + 6 => GitPackType::ObjectOffsetDelta, + 7 => GitPackType::ObjectReferenceDelta, + _ => GitPackType::Invalid, + } + } +} + +pub fn get_encoded_data_from_pack(path: &PathBuf, offset: usize) -> Vec { + let data = fs::read(path); + if data.is_err() || !is_header_valid(&data.as_ref().unwrap()[..8]) { + return Vec::new(); + } + + // Skip to the offset + let mut data = &data.unwrap()[offset..]; + let object_type: GitPackType = (data[0] >> 4 & 0b0111).into(); + let mut size: usize = (data[0] & 0b0000_1111) as usize; + + let mut shift: usize = 4; + while data[0] & 0b1000_0000 != 0 { + data = &data[1..]; + + size |= ((data[0] & 0b0111_1111) as usize) << shift; + shift += 7; + } + + match object_type { + GitPackType::ObjectOffsetDelta => parse_offset_delta(path, &data[1..], size), + GitPackType::ObjectReferenceDelta => parse_reference_delta(&data[1..], size), + GitPackType::Invalid => panic!(), + _ => { + let mut decoded_data: Vec = vec![0; size]; + ZlibDecoder::new(&data[1..]) + .read_exact(&mut decoded_data) + .unwrap(); + + decoded_data + } + } +} + +fn parse_offset_delta(path: &PathBuf, data: &[u8], size: usize) -> Vec { + let (offset, offset_shift) = variable_length_int(data); + let (source_size, source_shift) = variable_length_int(&data[offset_shift..]); + let (target_size, target_shift) = variable_length_int(&data[offset_shift + source_shift..]); + + let mut delta_instructions: Vec = + vec![0; size - offset_shift - source_shift - target_shift]; + ZlibDecoder::new(&data[offset_shift + source_shift + target_shift..]) + .read_exact(&mut delta_instructions) + .unwrap(); + + let source = get_encoded_data_from_pack(path, offset); + if source.len() != source_size { + panic!(); + } + + parse_delta_instructions(&source, &delta_instructions, target_size) +} + +fn parse_reference_delta(data: &[u8], size: usize) -> Vec { + let hash = &data[..HASH_SIZE]; + + let (source_size, source_shift) = variable_length_int(&data[HASH_SIZE..]); + let (target_size, target_shift) = variable_length_int(&data[HASH_SIZE + source_shift..]); + + let mut delta_instructions: Vec = vec![0; size - source_shift - target_shift - HASH_SIZE]; + ZlibDecoder::new(&data[HASH_SIZE + source_shift + target_shift..]) + .read_exact(&mut delta_instructions) + .unwrap(); + + let project = DATABASE.lock().unwrap().get_current_project().unwrap(); + let source = &get_object_encoded_data(&project, &hex::encode(hash)).unwrap_or_default(); + if source.len() != source_size { + panic!(); + } + + parse_delta_instructions(source, &delta_instructions, target_size) +} + +fn parse_delta_instructions( + source: &[u8], + delta_instructions: &[u8], + target_size: usize, +) -> Vec { + let mut target = Vec::::new(); + + let mut delta_instructions = delta_instructions; + while delta_instructions.is_empty() { + let instruction = delta_instructions[0]; + + if instruction & 0b1000_0000 != 0 { + // Copy Instruction + let offset_1 = (instruction & 0b1) == 0b1; + let offset_2 = (instruction & 0b10) == 0b10; + let offset_3 = (instruction & 0b100) == 0b100; + let offset_4 = (instruction & 0b1000) == 0b1000; + let size_1 = (instruction & 0b1_0000) == 0b1_0000; + let size_2 = (instruction & 0b10_0000) == 0b10_0000; + let size_3 = (instruction & 0b100_0000) == 0b100_0000; + + let mut offset: usize = 0; + let mut size: usize = 0; + // Skip the instruction byte + delta_instructions = &delta_instructions[1..]; + + // Parse the offset and size + // and advance the delta instructions + if offset_1 { + offset = delta_instructions[0] as usize; + delta_instructions = &delta_instructions[1..]; + } + + if offset_2 { + offset |= (delta_instructions[0] as usize) << 8; + delta_instructions = &delta_instructions[1..]; + } + + if offset_3 { + offset |= (delta_instructions[0] as usize) << 16; + delta_instructions = &delta_instructions[1..]; + } + + if offset_4 { + offset |= (delta_instructions[0] as usize) << 24; + delta_instructions = &delta_instructions[1..]; + } + + if size_1 { + size = delta_instructions[0] as usize; + delta_instructions = &delta_instructions[1..]; + } + + if size_2 { + size |= (delta_instructions[0] as usize) << 8; + delta_instructions = &delta_instructions[1..]; + } + + if size_3 { + size |= (delta_instructions[0] as usize) << 16; + delta_instructions = &delta_instructions[1..]; + } + + // If the size is set to 0, it means max size + if size == 0 { + size = 0x10000; + } + + // Copy the data from the source to the target + target.extend_from_slice(&source[offset..offset + size]); + } else if instruction != 0 { + // Insert Instruction + let add_size = (instruction & 0b0111_1111) as usize; + target.extend_from_slice(&delta_instructions[1..add_size + 1]); + + delta_instructions = &delta_instructions[add_size + 1..]; + } + } + + if target.len() != target_size { + panic!(); + } + + target +} + +fn variable_length_int(data: &[u8]) -> (usize, usize) { + let mut data = data; + + let mut size: usize = (data[0] & 0b0111_1111) as usize; + let mut shift: usize = 7; + + while data[0] & 0b1000_0000 != 0 { + data = &data[1..]; + size |= ((data[0] & 0b0111_1111) as usize) << shift; + shift += 7; + } + + (size, shift / 7) +} + +fn is_header_valid(header: &[u8]) -> bool { + header == HEADER_BYTES_V2 || header == HEADER_BYTES_V3 +} + +#[cfg(test)] +pub mod tests { + use std::io::Write; + + use flate2::write::ZlibEncoder; + use fs::File; + + use crate::git::git_commit::GitCommit; + + use super::*; + + fn mocked_header_v2() -> [u8; 8] { + HEADER_BYTES_V2 + } + + fn mocked_commit(commit: &GitCommit) -> Vec { + let mut data = Vec::new(); + + let commit_string = format!("{}\n", commit); + let mut cmt = commit_string.as_bytes(); + let mut zlib = ZlibEncoder::new(Vec::new(), flate2::Compression::default()); + while !cmt.is_empty() { + let len = cmt.len().min(0x10000); + zlib.write_all(&cmt[..len]).unwrap(); + cmt = &cmt[len..]; + } + let encoded_data = zlib.finish().unwrap(); + + let size = encoded_data.len(); + data.push(0b1001_0000 | (size & 0b1111) as u8); + + let mut size = size >> 4; + loop { + data.push((size & 0b0111_1111) as u8 | 0b1000_0000); + size >>= 7; + + if size == 0 { + let datalen = data.len(); + data[datalen - 1] &= 0b0111_1111; + break; + } + } + + data.extend_from_slice(&encoded_data); + data + } + + pub fn mocked_pack_file_with_commit(file: &mut File, commit: &GitCommit) { + let mut data = Vec::new(); + data.extend_from_slice(&mocked_header_v2()); + data.extend_from_slice(&mocked_commit(commit)); + + file.write(&data).unwrap(); + } +}