From 944cdef946d07c3771032460944e4b9dd81551ff Mon Sep 17 00:00:00 2001 From: Sander Date: Tue, 10 Dec 2024 08:26:45 +0400 Subject: [PATCH] eval-cache: track env vars during caching --- .../20241210011111_create-env-input.sql | 16 + devenv-eval-cache/src/command.rs | 301 +++++++++++++----- devenv-eval-cache/src/db.rs | 281 ++++++++++------ devenv-eval-cache/src/lib.rs | 4 +- devenv-eval-cache/src/op.rs | 24 +- devenv/src/cnix.rs | 2 +- devenv/src/devenv.rs | 16 +- 7 files changed, 443 insertions(+), 201 deletions(-) create mode 100644 devenv-eval-cache/migrations/20241210011111_create-env-input.sql diff --git a/devenv-eval-cache/migrations/20241210011111_create-env-input.sql b/devenv-eval-cache/migrations/20241210011111_create-env-input.sql new file mode 100644 index 000000000..572675380 --- /dev/null +++ b/devenv-eval-cache/migrations/20241210011111_create-env-input.sql @@ -0,0 +1,16 @@ +-- Rename table for file inputs +ALTER TABLE file_path +RENAME TO file_input; + +ALTER TABLE cmd_input_path +RENAME COLUMN file_path_id TO file_input_id; + +CREATE TABLE env_input ( + id INTEGER NOT NULL PRIMARY KEY, + cached_cmd_id INTEGER NOT NULL, + name TEXT NOT NULL, + content_hash CHAR(64) NOT NULL, + updated_at INTEGER NOT NULL DEFAULT (strftime ('%s', 'now')), + FOREIGN KEY (cached_cmd_id) REFERENCES cached_cmd (id) ON DELETE CASCADE, + UNIQUE (cached_cmd_id, name) +); diff --git a/devenv-eval-cache/src/command.rs b/devenv-eval-cache/src/command.rs index 023e57362..efb509995 100644 --- a/devenv-eval-cache/src/command.rs +++ b/devenv-eval-cache/src/command.rs @@ -6,7 +6,7 @@ use std::path::{Path, PathBuf}; use std::process::{self, Command, Stdio}; use std::time::{SystemTime, UNIX_EPOCH}; use thiserror::Error; -use tracing::{debug, info, trace}; +use tracing::{debug, trace}; use crate::{ db, hash, @@ -144,28 +144,47 @@ impl<'a> CachedCommand<'a> { }); } - let mut sources = ops - .into_iter() - .filter_map(|op| op.owned_source()) - .filter(|source| { - !self - .excluded_paths - .iter() - .any(|path| source.starts_with(path)) - }) - .collect::>(); + let mut env_inputs = Vec::new(); + let mut sources = Vec::new(); + + for op in ops.into_iter() { + if let Op::GetEnv { name } = op { + if let Ok(env_input) = EnvInputDesc::new(name) { + env_inputs.push(env_input); + } + continue; + } + + match op { + Op::CopiedSource { source, .. } + | Op::EvaluatedFile { source } + | Op::ReadFile { source } + | Op::ReadDir { source } + | Op::TrackedPath { source } + if !self + .excluded_paths + .iter() + .any(|path| source.starts_with(path)) => + { + sources.push(source); + } + _ => {} + } + } // Watch additional paths sources.extend_from_slice(&self.extra_paths); - let file_path_futures = sources + let file_input_futures = sources .into_iter() .map(|source| { - tokio::task::spawn_blocking(move || FilePath::new(source).map_err(CommandError::Io)) + tokio::task::spawn_blocking(move || { + FileInputDesc::new(source).map_err(CommandError::Io) + }) }) .collect::>(); - let mut file_paths = join_all(file_path_futures) + let file_inputs = join_all(file_input_futures) .await .into_iter() .flatten() @@ -173,23 +192,24 @@ impl<'a> CachedCommand<'a> { .filter_map(Result::ok) .collect::>(); - file_paths.sort_by(|a, b| a.path.cmp(&b.path)); - file_paths.dedup(); + let mut inputs = file_inputs + .into_iter() + .map(Input::File) + .chain(env_inputs.into_iter().map(Input::Env)) + .collect::>(); - let input_hash = hash::digest( - &file_paths - .iter() - .map(|p| p.content_hash.clone()) - .collect::(), - ); + inputs.sort(); + inputs.dedup(); - let _ = db::insert_command_with_files( + let input_hash = Input::compute_input_hash(&inputs); + + let _ = db::insert_command_with_inputs( self.pool, &raw_cmd, &cmd_hash, &input_hash, &stdout, - &file_paths, + &inputs, ) .await .map_err(CommandError::Sqlx)?; @@ -198,7 +218,7 @@ impl<'a> CachedCommand<'a> { status, stdout, stderr, - paths: file_paths, + inputs, }) } } @@ -216,19 +236,61 @@ pub struct Output { pub stdout: Vec, /// The data that the process wrote to stderr. pub stderr: Vec, - /// A list of paths that the command depends on and their hashes. - pub paths: Vec, + /// A list of inputs that the command depends on and their hashes. + pub inputs: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)] +pub enum Input { + File(FileInputDesc), + Env(EnvInputDesc), } -#[derive(Clone, Debug, PartialEq)] -pub struct FilePath { +impl Input { + pub fn content_hash(&self) -> &str { + match self { + Self::File(desc) => &desc.content_hash, + Self::Env(desc) => &desc.content_hash, + } + } + + pub fn compute_input_hash(inputs: &[Self]) -> String { + inputs + .iter() + .map(|input| input.content_hash()) + .collect::() + } + + pub fn partition_refs(inputs: &[Self]) -> (Vec<&FileInputDesc>, Vec<&EnvInputDesc>) { + let mut file_inputs = Vec::new(); + let mut env_inputs = Vec::new(); + + for input in inputs { + match input { + Self::File(desc) => file_inputs.push(desc), + Self::Env(desc) => env_inputs.push(desc), + } + } + + (file_inputs, env_inputs) + } +} + +#[derive(Clone, Debug, Eq, PartialEq, PartialOrd)] +pub struct FileInputDesc { pub path: PathBuf, pub is_directory: bool, pub content_hash: String, pub modified_at: SystemTime, } -impl FilePath { +impl Ord for FileInputDesc { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.path.cmp(&other.path) + } +} + +impl FileInputDesc { pub fn new(path: PathBuf) -> Result { let is_directory = path.is_dir(); let content_hash = if is_directory { @@ -250,8 +312,59 @@ impl FilePath { } } -impl From for FilePath { - fn from(row: db::FilePathRow) -> Self { +#[derive(Clone, Debug, Eq, PartialEq, PartialOrd)] +pub struct EnvInputDesc { + pub name: String, + pub content_hash: String, +} + +impl Ord for EnvInputDesc { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.name.cmp(&other.name) + } +} + +impl EnvInputDesc { + pub fn new(name: String) -> Result { + let value = std::env::var(&name).unwrap_or_default(); + let content_hash = hash::digest(&value); + Ok(Self { name, content_hash }) + } +} + +impl Input { + pub fn from_path(path: PathBuf) -> Result { + let file = FileInputDesc::new(path)?; + Ok(Self::File(file)) + } + + pub fn from_env_var(name: String) -> Result { + let env = EnvInputDesc::new(name)?; + Ok(Self::Env(env)) + } + + pub fn to_identifier(&self) -> String { + match self { + Self::File(file) => file.path.to_string_lossy().to_string(), + Self::Env(env) => format!("${}", env.name), + } + } +} + +impl From for Input { + fn from(row: db::FileInputRow) -> Self { + Self::File(row.into()) + } +} + +impl From for Input { + fn from(row: db::EnvInputRow) -> Self { + Self::Env(row.into()) + } +} + +impl From for FileInputDesc { + fn from(row: db::FileInputRow) -> Self { Self { path: row.path, is_directory: row.is_directory, @@ -261,6 +374,15 @@ impl From for FilePath { } } +impl From for EnvInputDesc { + fn from(row: db::EnvInputRow) -> Self { + Self { + name: row.name, + content_hash: row.content_hash, + } + } +} + /// Try to fetch the cached output for a hashed command. /// /// Returns the cached output if the command has been cached and none of the file dependencies have @@ -274,21 +396,27 @@ async fn query_cached_output( .map_err(CommandError::Sqlx)?; if let Some(cmd) = cached_cmd { - let mut files = db::get_files_by_command_id(pool, cmd.id) + let files = db::get_files_by_command_id(pool, cmd.id) .await .map_err(CommandError::Sqlx)?; - files.sort_by(|a, b| a.path.cmp(&b.path)); - files.dedup(); + let envs = db::get_envs_by_command_id(pool, cmd.id) + .await + .map_err(CommandError::Sqlx)?; + + use std::sync::Arc; + let mut inputs = files + .into_iter() + .map(Input::from) + .chain(envs.into_iter().map(Input::from)) + .collect::>(); + + inputs.sort(); + inputs.dedup(); let mut should_refresh = false; - let new_input_hash = hash::digest( - &files - .iter() - .map(|f| f.content_hash.clone()) - .collect::(), - ); + let new_input_hash = Input::compute_input_hash(&inputs); // Hash of input hashes do not match if cmd.input_hash != new_input_hash { @@ -300,24 +428,36 @@ async fn query_cached_output( should_refresh = true; } + let inputs = Arc::new(inputs); + if !should_refresh { let mut set = tokio::task::JoinSet::new(); - for file in &files { - let file = file.clone(); - set.spawn_blocking(|| check_file_state(file)); + for (index, _) in inputs.iter().enumerate() { + let inputs = Arc::clone(&inputs); + set.spawn_blocking(move || match &inputs[index] { + Input::File(file) => { + let res = check_file_state(&file); + (index, res) + } + Input::Env(env) => { + let res = check_env_state(&env); + (index, res) + } + }); } while let Some(res) = set.join_next().await { - if let Ok(Ok(file_state)) = res { + if let Ok((index, Ok(file_state))) = res { match file_state { - FileState::MetadataModified { - modified_at, path, .. - } => { - // TODO: batch with query builder? - db::update_file_modified_at(pool, path, modified_at) - .await - .map_err(CommandError::Sqlx)?; + FileState::MetadataModified { modified_at, .. } => { + if let Input::File(file) = &inputs[index] { + // TODO: batch with query builder? + let path = file.path.to_string_lossy().to_string(); + db::update_file_modified_at(pool, path, modified_at) + .await + .map_err(CommandError::Sqlx)?; + } } FileState::Modified { .. } => { should_refresh = true; @@ -345,7 +485,7 @@ async fn query_cached_output( status: process::ExitStatus::default(), stdout: cmd.output, stderr: Vec::new(), - paths: files.into_iter().map(FilePath::from).collect(), + inputs: Arc::try_unwrap(inputs).unwrap_or_else(|arc| (*arc).clone()), })) } } else { @@ -379,39 +519,35 @@ fn extract_op_from_log_line(log: &InternalLog) -> Option { #[allow(dead_code)] enum FileState { /// The file has not been modified since it was last cached. - Unchanged { path: PathBuf }, + Unchanged, /// The file's metadata, i.e. timestamp, has changed, but its content remains the same. - MetadataModified { - path: PathBuf, - modified_at: SystemTime, - }, + MetadataModified { modified_at: SystemTime }, /// The file's contents have been modified. Modified { - path: PathBuf, new_hash: String, modified_at: SystemTime, }, /// The file no longer exists in the file system. - Removed { path: PathBuf }, + Removed, } -fn check_file_state(file: db::FilePathRow) -> io::Result { +fn check_file_state(file: &FileInputDesc) -> io::Result { let metadata = match std::fs::metadata(&file.path) { Ok(metadata) => metadata, // Fix - Err(_) => return Ok(FileState::Removed { path: file.path }), + Err(_) => return Ok(FileState::Removed), }; let modified_at = metadata.modified().and_then(truncate_to_seconds)?; if modified_at == file.modified_at { // File has not been modified - return Ok(FileState::Unchanged { path: file.path }); + return Ok(FileState::Unchanged); } // mtime has changed, check if content has changed let new_hash = if file.is_directory { if !metadata.is_dir() { - return Ok(FileState::Removed { path: file.path }); + return Ok(FileState::Removed); } let paths = std::fs::read_dir(&file.path)? @@ -425,20 +561,35 @@ fn check_file_state(file: db::FilePathRow) -> io::Result { if new_hash == file.content_hash { // File touched but hash unchanged - Ok(FileState::MetadataModified { - path: file.path, - modified_at, - }) + Ok(FileState::MetadataModified { modified_at }) } else { // Hash has changed, return new hash Ok(FileState::Modified { - path: file.path, new_hash, modified_at, }) } } +fn check_env_state(env: &EnvInputDesc) -> io::Result { + let value = std::env::var(&env.name); + + if let Err(std::env::VarError::NotPresent) = value { + return Ok(FileState::Removed); + } + + let new_hash = hash::digest(&value.unwrap_or("".into())); + + if new_hash != env.content_hash { + Ok(FileState::Modified { + new_hash, + modified_at: SystemTime::now(), + }) + } else { + Ok(FileState::Unchanged) + } +} + fn truncate_to_seconds(time: SystemTime) -> io::Result { let duration_since_epoch = time .duration_since(UNIX_EPOCH) @@ -455,7 +606,7 @@ mod test { use std::io::Write; use tempdir::TempDir; - fn create_file_row(dir: &TempDir, content: &[u8]) -> db::FilePathRow { + fn create_file_row(dir: &TempDir, content: &[u8]) -> db::FileInputRow { let file_path = dir.path().join("test_file.txt"); let mut file = File::create(&file_path).unwrap(); file.write_all(content).unwrap(); @@ -465,7 +616,7 @@ mod test { let truncated_modified_at = truncate_to_seconds(modified_at).unwrap(); let content_hash = hash::compute_file_hash(&file_path).unwrap(); - db::FilePathRow { + db::FileInputRow { path: file_path, is_directory: false, content_hash, @@ -480,7 +631,7 @@ mod test { let file_row = create_file_row(&temp_dir, b"Hello, World!"); assert!(matches!( - check_file_state(file_row), + check_file_state(&file_row.into()), Ok(FileState::Unchanged { .. }) )); } @@ -500,7 +651,7 @@ mod test { drop(file); assert!(matches!( - check_file_state(file_row), + check_file_state(&file_row.into()), Ok(FileState::MetadataModified { .. }) )); } @@ -517,7 +668,7 @@ mod test { file.write_all(b"Modified content").unwrap(); assert!(matches!( - check_file_state(file_row), + check_file_state(&file_row.into()), Ok(FileState::Modified { .. }) )); } @@ -531,7 +682,7 @@ mod test { std::fs::remove_file(&file_row.path).unwrap(); assert!(matches!( - check_file_state(file_row), + check_file_state(&file_row.into()), Ok(FileState::Removed { .. }) )); } diff --git a/devenv-eval-cache/src/db.rs b/devenv-eval-cache/src/db.rs index 0432f134a..9d6b16f8b 100644 --- a/devenv-eval-cache/src/db.rs +++ b/devenv-eval-cache/src/db.rs @@ -1,4 +1,4 @@ -use super::command::FilePath; +use super::command::{EnvInputDesc, FileInputDesc, Input}; use sqlx::sqlite::{Sqlite, SqliteConnectOptions, SqliteJournalMode, SqliteRow, SqliteSynchronous}; use sqlx::{Acquire, Row, SqlitePool}; use std::ffi::OsStr; @@ -83,14 +83,14 @@ where Ok(record) } -pub async fn insert_command_with_files<'a, A>( +pub async fn insert_command_with_inputs<'a, A>( conn: A, raw_cmd: &str, cmd_hash: &str, input_hash: &str, output: &[u8], - paths: &[FilePath], -) -> Result<(i64, Vec), sqlx::Error> + inputs: &[Input], +) -> Result<(i64, Vec, Vec), sqlx::Error> where A: Acquire<'a, Database = Sqlite>, { @@ -99,11 +99,16 @@ where delete_command(&mut tx, cmd_hash).await?; let command_id = insert_command(&mut tx, raw_cmd, cmd_hash, input_hash, output).await?; - let file_ids = insert_files(&mut tx, paths, command_id).await?; + + // Partition and extract file and env inputs + let (file_inputs, env_inputs) = Input::partition_refs(inputs); + + let file_ids = insert_file_inputs(&mut tx, &file_inputs, command_id).await?; + let env_ids = insert_env_inputs(&mut tx, &env_inputs, command_id).await?; tx.commit().await?; - Ok((command_id, file_ids)) + Ok((command_id, file_ids, env_ids)) } async fn insert_command<'a, A>( @@ -174,9 +179,9 @@ where Ok(()) } -async fn insert_files<'a, A>( +async fn insert_file_inputs<'a, A>( conn: A, - paths: &[FilePath], + file_inputs: &[&FileInputDesc], command_id: i64, ) -> Result, sqlx::Error> where @@ -184,8 +189,8 @@ where { let mut conn = conn.acquire().await?; - let file_path_query = r#" - INSERT INTO file_path (path, is_directory, content_hash, modified_at) + let insert_file_input = r#" + INSERT INTO file_input (path, is_directory, content_hash, modified_at) VALUES (?, ?, ?, ?) ON CONFLICT (path) DO UPDATE SET content_hash = excluded.content_hash, @@ -195,16 +200,16 @@ where RETURNING id "#; - let mut file_ids = Vec::with_capacity(paths.len()); - for FilePath { + let mut file_ids = Vec::with_capacity(file_inputs.len()); + for FileInputDesc { path, is_directory, content_hash, modified_at, - } in paths + } in file_inputs { let modified_at = modified_at.duration_since(UNIX_EPOCH).unwrap().as_secs() as i64; - let id: i64 = sqlx::query(file_path_query) + let id: i64 = sqlx::query(insert_file_input) .bind(path.to_path_buf().into_os_string().as_bytes()) .bind(is_directory) .bind(content_hash) @@ -216,9 +221,9 @@ where } let cmd_input_path_query = r#" - INSERT INTO cmd_input_path (cached_cmd_id, file_path_id) + INSERT INTO cmd_input_path (cached_cmd_id, file_input_id) VALUES (?, ?) - ON CONFLICT (cached_cmd_id, file_path_id) DO NOTHING + ON CONFLICT (cached_cmd_id, file_input_id) DO NOTHING "#; for &file_id in &file_ids { @@ -231,11 +236,46 @@ where Ok(file_ids) } -/// The row type for the `file_path` table. +async fn insert_env_inputs<'a, A>( + conn: A, + env_inputs: &[&EnvInputDesc], + command_id: i64, +) -> Result, sqlx::Error> +where + A: Acquire<'a, Database = Sqlite>, +{ + let mut conn = conn.acquire().await?; + + let insert_env_input = r#" + INSERT INTO env_input (cached_cmd_id, name, content_hash) + VALUES (?, ?, ?) + ON CONFLICT (cached_cmd_id, name) DO UPDATE + SET content_hash = excluded.content_hash, + updated_at = strftime('%s', 'now') + RETURNING id + "#; + + let mut env_input_ids = Vec::with_capacity(env_inputs.len()); + for EnvInputDesc { name, content_hash } in env_inputs { + let id: i64 = sqlx::query(insert_env_input) + .bind(command_id) + .bind(name) + .bind(content_hash) + .fetch_one(&mut *conn) + .await? + .get(0); + env_input_ids.push(id); + } + + Ok(env_input_ids) +} + +/// The row type for the `file_input` table. #[derive(Clone, Debug, PartialEq)] -pub struct FilePathRow { +pub struct FileInputRow { /// A path pub path: PathBuf, + /// Whether the path is a directory pub is_directory: bool, /// The hash of the file's content pub content_hash: String, @@ -245,7 +285,7 @@ pub struct FilePathRow { pub updated_at: SystemTime, } -impl sqlx::FromRow<'_, SqliteRow> for FilePathRow { +impl sqlx::FromRow<'_, SqliteRow> for FileInputRow { fn from_row(row: &SqliteRow) -> Result { let path: &[u8] = row.get("path"); let is_directory: bool = row.get("is_directory"); @@ -262,15 +302,29 @@ impl sqlx::FromRow<'_, SqliteRow> for FilePathRow { } } +#[derive(Clone, Debug, PartialEq)] +pub struct EnvInputRow { + pub name: String, + pub content_hash: String, +} + +impl sqlx::FromRow<'_, SqliteRow> for EnvInputRow { + fn from_row(row: &SqliteRow) -> Result { + let name: String = row.get("name"); + let content_hash: String = row.get("content_hash"); + Ok(Self { name, content_hash }) + } +} + pub async fn get_files_by_command_id( pool: &SqlitePool, command_id: i64, -) -> Result, sqlx::Error> { +) -> Result, sqlx::Error> { let files = sqlx::query_as( r#" - SELECT fp.path, fp.is_directory, fp.content_hash, fp.modified_at, fp.updated_at - FROM file_path fp - JOIN cmd_input_path cip ON fp.id = cip.file_path_id + SELECT f.path, f.is_directory, f.content_hash, f.modified_at, f.updated_at + FROM file_input f + JOIN cmd_input_path cip ON f.id = cip.file_input_id WHERE cip.cached_cmd_id = ? "#, ) @@ -284,12 +338,12 @@ pub async fn get_files_by_command_id( pub async fn get_files_by_command_hash( pool: &SqlitePool, command_hash: &str, -) -> Result, sqlx::Error> { +) -> Result, sqlx::Error> { let files = sqlx::query_as( r#" - SELECT fp.path, fp.is_directory, fp.content_hash, fp.modified_at, fp.updated_at - FROM file_path fp - JOIN cmd_input_path cip ON fp.id = cip.file_path_id + SELECT f.path, f.is_directory, f.content_hash, f.modified_at, f.updated_at + FROM file_input f + JOIN cmd_input_path cip ON f.id = cip.file_input_id JOIN cached_cmd cc ON cip.cached_cmd_id = cc.id WHERE cc.cmd_hash = ? "#, @@ -301,6 +355,43 @@ pub async fn get_files_by_command_hash( Ok(files) } +pub async fn get_envs_by_command_id( + pool: &SqlitePool, + command_id: i64, +) -> Result, sqlx::Error> { + let files = sqlx::query_as( + r#" + SELECT e.name, e.content_hash, e.updated_at + FROM env_input e + WHERE e.cached_cmd_id = ? + "#, + ) + .bind(command_id) + .fetch_all(pool) + .await?; + + Ok(files) +} + +pub async fn get_envs_by_command_hash( + pool: &SqlitePool, + command_hash: &str, +) -> Result, sqlx::Error> { + let files = sqlx::query_as( + r#" + SELECT e.name, e.content_hash, e.updated_at + FROM env_input e + JOIN cached_cmd cc ON e.cached_cmd_id = cc.id + WHERE cc.cmd_hash = ? + "#, + ) + .bind(command_hash) + .fetch_all(pool) + .await?; + + Ok(files) +} + pub async fn update_file_modified_at>( pool: &SqlitePool, path: P, @@ -310,7 +401,7 @@ pub async fn update_file_modified_at>( sqlx::query( r#" - UPDATE file_path + UPDATE file_input SET modified_at = ?, updated_at = strftime('%s', 'now') WHERE path = ? "#, @@ -326,11 +417,11 @@ pub async fn update_file_modified_at>( pub async fn delete_unreferenced_files(pool: &SqlitePool) -> Result { let result = sqlx::query( r#" - DELETE FROM file_path + DELETE FROM file_input WHERE NOT EXISTS ( SELECT 1 FROM cmd_input_path - WHERE cmd_input_path.file_path_id = file_path.id + WHERE cmd_input_path.file_input_id = file_input.id ) "#, ) @@ -353,29 +444,29 @@ mod tests { let cmd_hash = hash::digest(raw_cmd); let output = b"Hello, world!"; let modified_at = SystemTime::now(); - let paths = vec![ - FilePath { + let inputs = vec![ + Input::File(FileInputDesc { path: "/path/to/file1".into(), is_directory: false, content_hash: "hash1".to_string(), modified_at, - }, - FilePath { + }), + Input::File(FileInputDesc { path: "/path/to/file2".into(), is_directory: false, content_hash: "hash2".to_string(), modified_at, - }, + }), ]; let input_hash = hash::digest( - &paths + &inputs .iter() - .map(|fp| fp.content_hash.clone()) + .map(|fp| fp.content_hash()) .collect::(), ); - let (command_id, file_ids) = - insert_command_with_files(&pool, raw_cmd, &cmd_hash, &input_hash, output, &paths) + let (command_id, file_ids, _) = + insert_command_with_inputs(&pool, raw_cmd, &cmd_hash, &input_hash, output, &inputs) .await .unwrap(); @@ -404,62 +495,66 @@ mod tests { let cmd_hash1 = hash::digest(raw_cmd1); let output1 = b"Hello, world!"; let modified_at = SystemTime::now(); - let paths1 = vec![ - FilePath { + let inputs1 = vec![ + Input::File(FileInputDesc { path: "/path/to/file1".into(), is_directory: false, content_hash: "hash1".to_string(), modified_at, - }, - FilePath { + }), + Input::File(FileInputDesc { path: "/path/to/file2".into(), is_directory: false, content_hash: "hash2".to_string(), modified_at, - }, + }), ]; - let input_hash1 = hash::digest( - &paths1 - .iter() - .map(|p| p.content_hash.clone()) - .collect::(), - ); - - let (command_id1, file_ids1) = - insert_command_with_files(&pool, raw_cmd1, &cmd_hash1, &input_hash1, output1, &paths1) - .await - .unwrap(); + let input_hash1 = + hash::digest(&inputs1.iter().map(|p| p.content_hash()).collect::()); + + let (command_id1, file_ids1, _) = insert_command_with_inputs( + &pool, + raw_cmd1, + &cmd_hash1, + &input_hash1, + output1, + &inputs1, + ) + .await + .unwrap(); // Second command let raw_cmd2 = "nix-build -A goodbye"; let cmd_hash2 = hash::digest(raw_cmd2); let output2 = b"Goodbye, world!"; let modified_at = SystemTime::now(); - let paths2 = vec![ - FilePath { + let inputs2 = vec![ + Input::File(FileInputDesc { path: "/path/to/file2".into(), is_directory: false, content_hash: "hash2".to_string(), modified_at, - }, - FilePath { + }), + Input::File(FileInputDesc { path: "/path/to/file3".into(), is_directory: false, content_hash: "hash3".to_string(), modified_at, - }, + }), ]; - let input_hash2 = hash::digest( - &paths2 - .iter() - .map(|p| p.content_hash.clone()) - .collect::(), - ); - - let (command_id2, file_ids2) = - insert_command_with_files(&pool, raw_cmd2, &cmd_hash2, &input_hash2, output2, &paths2) - .await - .unwrap(); + let input_hash2 = + hash::digest(&inputs2.iter().map(|p| p.content_hash()).collect::()); + + let (command_id2, file_ids2, _) = insert_command_with_inputs( + &pool, + raw_cmd2, + &cmd_hash2, + &input_hash2, + output2, + &inputs2, + ) + .await + .unwrap(); // Verify first command let retrieved_command1 = get_command_by_hash(&pool, &cmd_hash1) @@ -499,56 +594,48 @@ mod tests { let cmd_hash = hash::digest(raw_cmd); let output = b"Hello, world!"; let modified_at = SystemTime::now(); - let paths1 = vec![ - FilePath { + let inputs1 = vec![ + Input::File(FileInputDesc { path: "/path/to/file1".into(), is_directory: false, content_hash: "hash1".to_string(), modified_at, - }, - FilePath { + }), + Input::File(FileInputDesc { path: "/path/to/file2".into(), is_directory: false, content_hash: "hash2".to_string(), modified_at, - }, + }), ]; - let input_hash = hash::digest( - &paths1 - .iter() - .map(|p| p.content_hash.clone()) - .collect::(), - ); + let input_hash = + hash::digest(&inputs1.iter().map(|p| p.content_hash()).collect::()); - let (_command_id1, file_ids1) = - insert_command_with_files(&pool, raw_cmd, &cmd_hash, &input_hash, output, &paths1) + let (_command_id1, file_ids1, _) = + insert_command_with_inputs(&pool, raw_cmd, &cmd_hash, &input_hash, output, &inputs1) .await .unwrap(); // Second command - let paths2 = vec![ - FilePath { + let inputs2 = vec![ + Input::File(FileInputDesc { path: "/path/to/file2".into(), is_directory: false, content_hash: "hash2".to_string(), modified_at, - }, - FilePath { + }), + Input::File(FileInputDesc { path: "/path/to/file3".into(), is_directory: false, content_hash: "hash3".to_string(), modified_at, - }, + }), ]; - let input_hash2 = hash::digest( - &paths2 - .iter() - .map(|p| p.content_hash.clone()) - .collect::(), - ); + let input_hash2 = + hash::digest(&inputs2.iter().map(|p| p.content_hash()).collect::()); - let (command_id2, file_ids2) = - insert_command_with_files(&pool, raw_cmd, &cmd_hash, &input_hash2, output, &paths2) + let (command_id2, file_ids2, _) = + insert_command_with_inputs(&pool, raw_cmd, &cmd_hash, &input_hash2, output, &inputs2) .await .unwrap(); diff --git a/devenv-eval-cache/src/lib.rs b/devenv-eval-cache/src/lib.rs index 3d910152d..a55ab1e3c 100644 --- a/devenv-eval-cache/src/lib.rs +++ b/devenv-eval-cache/src/lib.rs @@ -4,5 +4,7 @@ pub(crate) mod hash; pub mod internal_log; pub mod op; -pub use command::{supports_eval_caching, CachedCommand, Output}; +pub use command::{ + supports_eval_caching, CachedCommand, EnvInputDesc, FileInputDesc, Input, Output, +}; pub use db::setup_db; diff --git a/devenv-eval-cache/src/op.rs b/devenv-eval-cache/src/op.rs index 1c0c6ab0a..bbc7b1356 100644 --- a/devenv-eval-cache/src/op.rs +++ b/devenv-eval-cache/src/op.rs @@ -1,7 +1,7 @@ use crate::internal_log::InternalLog; use regex::Regex; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; /// A sum-type of filesystem operations that we can extract from the Nix logs. #[derive(Clone, Debug, PartialEq)] @@ -70,28 +70,6 @@ impl Op { _ => None, } } - - pub fn source(&self) -> Option<&Path> { - match self { - Op::CopiedSource { source, .. } => Some(source.as_path()), - Op::EvaluatedFile { source } => Some(source.as_path()), - Op::ReadFile { source } => Some(source.as_path()), - Op::ReadDir { source } => Some(source.as_path()), - Op::TrackedPath { source } => Some(source.as_path()), - _ => None, - } - } - - pub fn owned_source(self) -> Option { - match self { - Op::CopiedSource { source, .. } => Some(source), - Op::EvaluatedFile { source } => Some(source), - Op::ReadFile { source } => Some(source), - Op::ReadDir { source } => Some(source), - Op::TrackedPath { source } => Some(source), - _ => None, - } - } } #[cfg(test)] diff --git a/devenv/src/cnix.rs b/devenv/src/cnix.rs index 89506bca6..fa8a2738e 100644 --- a/devenv/src/cnix.rs +++ b/devenv/src/cnix.rs @@ -404,7 +404,7 @@ impl<'a> Nix<'a> { status: output.status, stdout: output.stdout, stderr: output.stderr, - paths: vec![], + inputs: vec![], } }; diff --git a/devenv/src/devenv.rs b/devenv/src/devenv.rs index 464dd2bf4..853217856 100644 --- a/devenv/src/devenv.rs +++ b/devenv/src/devenv.rs @@ -1,4 +1,4 @@ -use super::{cli, cnix, config, log, tasks}; +use super::{cli, cnix, config, tasks}; use clap::crate_version; use cli_table::Table; use cli_table::{print_stderr, WithTitle}; @@ -846,11 +846,19 @@ impl Devenv { let span = tracing::info_span!("building_shell", devenv.user_message = "Building shell",); let env = self.nix.dev_env(json, &gc_root).instrument(span).await?; + use devenv_eval_cache::command::{FileInputDesc, Input}; std::fs::write( - self.devenv_dotfile.join("input-paths.txt"), - env.paths + // TODO: update direnvrc to use this + self.devenv_dotfile.join("cache-inputs.txt"), + env.inputs .iter() - .map(|fp| fp.path.to_string_lossy()) + .filter_map(|input| match input { + Input::File(FileInputDesc { path, .. }) => { + Some(path.to_string_lossy().to_string()) + } + // TODO: update direnvrc to handle env vars + _ => None, + }) .collect::>() .join("\n"), )