diff --git a/src/lib.rs b/src/lib.rs index fa1d6b0..a132759 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,11 +12,66 @@ pub mod parser; /// the `test-data` in the crate root directory, next to `src`. #[cfg(test)] mod formats { - use crate::model::Root; - use crate::parser::Ksy; + use crate::model::{ImportLoader, Package}; + use crate::parser::{Import, Ksy}; use std::fs::File; + use std::io; + use std::path::{Path, PathBuf}; use test_generator::test_resources; + #[derive(Debug)] + // Values of enumeration is not used directly, but still valuable if test fail + #[allow(dead_code)] + enum LoaderError { + Io(String, io::Error), + Parse(serde_yml::Error), + } + + fn to_path(mut base: PathBuf, import: &Import) -> PathBuf { + for comp in &import.components { + base.push(comp); + } + base.set_extension("ksy"); + base + } + + struct FileLoader { + /// Bases for absolute imports + abs_roots: Vec, + } + impl ImportLoader for FileLoader { + type Id = PathBuf; + type Error = LoaderError; + + fn new_id(&mut self, mut base: Self::Id, import: &Import) -> Self::Id { + if import.absolute { + for base in self.abs_roots.iter().cloned() { + let path = to_path(base, import); + // Required for tests which expect that absolute import will look into several places + // test-data/formats/imports_abs.ksy + // test-data/formats/imports_abs_abs.ksy + // test-data/formats/imports_abs_rel.ksy + // test-data/formats/ks_path/for_abs_imports/imported_and_abs.ksy + if path.exists() { + return path; + } + } + // In tests we should find file in one of the provided roots + panic!("cannot find file for {import}"); + } else { + // Remove name of the file from which we are imported + base.pop(); + to_path(base, import) + } + } + + fn load(&mut self, id: Self::Id) -> Result { + let display = id.display().to_string(); + let file = File::open(id).map_err(|e| LoaderError::Io(display, e))?; + serde_yml::from_reader(file).map_err(LoaderError::Parse) + } + } + #[test_resources("formats/**/*.ksy")] #[test_resources("test-data/formats/**/*.ksy")] fn parse(resource: &str) { @@ -44,7 +99,19 @@ mod formats { return; } - let _ = Root::validate(&ksy).expect(&format!("incorrect KSY {}", resource)); + let id = Path::new(resource).to_path_buf(); + // Directory with `ksc` crate + let ksc_dir = Path::new(env!("CARGO_MANIFEST_DIR")); + + let package = Package::new(id, ksy, FileLoader { + abs_roots: vec![ + ksc_dir.join("formats"), + ksc_dir.join("test-data").join("formats"), + ksc_dir.join("test-data").join("formats").join("ks_path"), + ], + }).expect(&format!("invalid imports in {}", resource)); + + package.validate().expect(&format!("incorrect KSY {}", resource)); } #[test_resources("test-data/formats_err/**/*.ksy")] diff --git a/src/model/mod.rs b/src/model/mod.rs index 488ae12..44458df 100644 --- a/src/model/mod.rs +++ b/src/model/mod.rs @@ -2,6 +2,8 @@ //! Unlike structures from [`parser`] module that module contains validated //! structures, that represent only valid constructs of kaitai struct language. //! +//! The entry point is a [`Package`] struct. +//! //! [`parser`]: crate::parser use std::cmp; @@ -24,11 +26,13 @@ use crate::parser::expressions::{parse_process, parse_type_ref, AttrType}; mod r#enum; pub mod expressions; mod name; +mod package; mod r#type; pub use name::{ EnumName, EnumPath, EnumValueName, FieldName, Name, OptionalName, ParamName, SeqName, TypeName, }; +pub use package::{ImportLoader, Package}; pub use r#enum::Enum; pub use r#type::{Root, UserType, UserTypeRef}; diff --git a/src/model/package.rs b/src/model/package.rs new file mode 100644 index 0000000..2db60b0 --- /dev/null +++ b/src/model/package.rs @@ -0,0 +1,203 @@ +use std::collections::{HashSet, VecDeque}; +use std::hash::Hash; + +use crate::error::ModelError; +use crate::model::Root; +use crate::parser::{Import, Ksy}; + +/// Loader used to handle imported kaitai types in `meta.imports` section. +pub trait ImportLoader { + /// The identifier of a file to load. User to cache information about already + /// loaded files to prevent double loading. Inferred using [`Self::new_id`] + /// from a file which imports another file and import specification. + /// + /// File-based implementations usually will use `std::path::PathBuf` for that. + type Id: Clone + Hash + Eq; + /// The type of error that loader could return during reading imported file. + type Error; + + /// Derives id of imported file from the import specification and ID of a file + /// that imports it. + /// + /// This method will be called at least the same count as [`Self::load`], but + /// not each call of this method will be followed by [`Self::load`]. + /// + /// # Parameters + /// - `base`: ID of a file which imports another file, described by the `import` parameter + /// - `import`: specification of the imported file as written in a KSY + /// + /// Returns an ID of a file to load. The file by that ID does not necessary exist, + /// the returned ID is only the pointer where the loader is expect to find it. + /// Actual checks should be performed in the [`Self::load`] method. + fn new_id(&mut self, base: Self::Id, import: &Import) -> Self::Id; + + /// Perform loading of one imported file (entry from `meta.imports[i]`). + /// This method may be called not for every call to [`Self::new_id`]. + /// + /// # Parameters + /// - `id`: ID the file to load, previously generated by the [`Self::new_id`] method. + fn load(&mut self, id: Self::Id) -> Result; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Package contains information about the main kaitai struct file and all directly +/// and indirectly imported files. Validation of a package produces model that is used +/// by code generators. +#[derive(Debug, Clone)] +pub struct Package { + /// The list of files that represents one translation unit -- a set of files + /// that should be processed together because they are linked by import relations. + /// + /// Usually this list is filled automatically by creating a package using [`Self::new`] method. + pub files: Vec, +} +impl Package { + /// Recursively loads all imported files using the provided loader, + /// returning a package with all loaded files. + /// + /// Returns the first error returned by the loader. + pub fn new(mut id: L::Id, mut ksy: Ksy, mut loader: L) -> Result + where + L: ImportLoader, + { + let mut to_load = VecDeque::new(); + let mut loaded = HashSet::new(); + let mut files = Vec::new(); + + 'external: loop { + if let Some(imports) = &ksy.meta.imports { + for import in imports { + let new_id = loader.new_id(id.clone(), import); + // If such file was already loaded, do not try to load it again + if new_id != id && !loaded.contains(&new_id) { + to_load.push_back(new_id); + } + } + } + loaded.insert(id); + files.push(ksy); + + while let Some(new_id) = to_load.pop_front() { + // Even when we filter already loaded files before insert into `to_load`, + // we still can insert duplicated entries to `to_load` because we can have + // two files, that requested loading of the same file, but which have not + // yet tried to load + if loaded.contains(&new_id) { + continue; + } + id = new_id; + ksy = loader.load(id.clone())?; + continue 'external; + } + break; + } + + Ok(Self { files }) + } + + /// Performs validation of a set of KS files and create a list of models for them. + pub fn validate(self) -> Result, ModelError> { + self.files.iter().map(Root::validate).collect() + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#[test] +fn import() { + use pretty_assertions::assert_eq; + use std::collections::{BTreeMap, HashSet}; + + const KSY1: &str = " + meta: + id: ksy1 + imports: + - ksy2 + - nested/ksy3 + "; + const KSY2: &str = " + meta: + id: ksy2 + imports: + - /ksy1 + - /nested/ksy3 + "; + const KSY3: &str = " + meta: + id: ksy3 + imports: + - ../ksy1 + - ../ksy2 + - ksy4 + "; + const KSY4: &str = " + meta: + id: ksy4 + "; + + struct SimpleLoader<'s> { + fs: BTreeMap<&'static str, &'static str>, + already_read: &'s mut HashSet, + } + impl<'s> ImportLoader for SimpleLoader<'s> { + type Id = Vec; + type Error = serde_yml::Error; + + fn new_id(&mut self, mut base: Self::Id, import: &Import) -> Self::Id { + if import.absolute { + // Start from scratch + base = Vec::new(); + } else { + // Remove name of the file from which we are imported + base.pop(); + } + // Add relative path + for comp in &import.components { + if comp == ".." { + base.pop(); + } else { + base.push(comp.into()); + } + } + base + } + + fn load(&mut self, id: Self::Id) -> Result { + let key = id.join("/"); + let ksy = self.fs.get(key.as_str()).expect(&format!("{key} was not found in File System")); + // loader should not be called twice for already processed ID + assert_eq!( + self.already_read.insert(key.clone()), + true, + "{key} read more that once: {:?}", + self.already_read + ); + + serde_yml::from_str(ksy) + } + } + + let start_id = "ksy1".to_string(); + let ksy1: Ksy = serde_yml::from_str(KSY1).expect("`ksy1` not loaded"); + + // list of already read files + let mut already_read = HashSet::new(); + already_read.insert(start_id.clone()); + + // Test File System with set of files (ID -> file content) + let loader = SimpleLoader { + fs: BTreeMap::from_iter([ + ("ksy1", KSY1), + ("ksy2", KSY2), + ("nested/ksy3", KSY3), + ("nested/ksy4", KSY4), + ]), + already_read: &mut already_read, + }; + + let package = Package::new(vec![start_id], ksy1, loader).unwrap(); + + assert_eq!(package.files.len(), 4); + assert_eq!(already_read.len(), 4); +}