Skip to content

Commit

Permalink
Add a Package conception served as a set of linked KSYs and implement…
Browse files Browse the repository at this point in the history
… imports loading
  • Loading branch information
Mingun committed Oct 1, 2024
1 parent e0fbffa commit 2e03ed6
Show file tree
Hide file tree
Showing 3 changed files with 277 additions and 3 deletions.
73 changes: 70 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,66 @@ pub mod parser;
/// the `test-data` in the crate root directory, next to `src`.
#[cfg(test)]
mod formats {
use crate::model::Root;
use crate::parser::Ksy;
use crate::model::{ImportLoader, Package};
use crate::parser::{Import, Ksy};
use std::fs::File;
use std::io;
use std::path::{Path, PathBuf};
use test_generator::test_resources;

#[derive(Debug)]
// Values of enumeration is not used directly, but still valuable if test fail
#[allow(dead_code)]
enum LoaderError {
Io(String, io::Error),
Parse(serde_yml::Error),
}

fn to_path(mut base: PathBuf, import: &Import) -> PathBuf {
for comp in &import.components {
base.push(comp);
}
base.set_extension("ksy");
base
}

struct FileLoader {
/// Bases for absolute imports
abs_roots: Vec<PathBuf>,
}
impl ImportLoader for FileLoader {
type Id = PathBuf;
type Error = LoaderError;

fn new_id(&mut self, mut base: Self::Id, import: &Import) -> Self::Id {
if import.absolute {
for base in self.abs_roots.iter().cloned() {
let path = to_path(base, import);
// Required for tests which expect that absolute import will look into several places
// test-data/formats/imports_abs.ksy
// test-data/formats/imports_abs_abs.ksy
// test-data/formats/imports_abs_rel.ksy
// test-data/formats/ks_path/for_abs_imports/imported_and_abs.ksy
if path.exists() {
return path;
}
}
// In tests we should find file in one of the provided roots
panic!("cannot find file for {import}");
} else {
// Remove name of the file from which we are imported
base.pop();
to_path(base, import)
}
}

fn load(&mut self, id: Self::Id) -> Result<Ksy, Self::Error> {
let display = id.display().to_string();
let file = File::open(id).map_err(|e| LoaderError::Io(display, e))?;
serde_yml::from_reader(file).map_err(LoaderError::Parse)
}
}

#[test_resources("formats/**/*.ksy")]
#[test_resources("test-data/formats/**/*.ksy")]
fn parse(resource: &str) {
Expand Down Expand Up @@ -44,7 +99,19 @@ mod formats {
return;
}

let _ = Root::validate(&ksy).expect(&format!("incorrect KSY {}", resource));
let id = Path::new(resource).to_path_buf();
// Directory with `ksc` crate
let ksc_dir = Path::new(env!("CARGO_MANIFEST_DIR"));

let package = Package::new(id, ksy, FileLoader {
abs_roots: vec![
ksc_dir.join("formats"),
ksc_dir.join("test-data").join("formats"),
ksc_dir.join("test-data").join("formats").join("ks_path"),
],
}).expect(&format!("invalid imports in {}", resource));

package.validate().expect(&format!("incorrect KSY {}", resource));
}

#[test_resources("test-data/formats_err/**/*.ksy")]
Expand Down
4 changes: 4 additions & 0 deletions src/model/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
//! Unlike structures from [`parser`] module that module contains validated
//! structures, that represent only valid constructs of kaitai struct language.
//!
//! The entry point is a [`Package`] struct.
//!
//! [`parser`]: crate::parser
use std::cmp;
Expand All @@ -24,11 +26,13 @@ use crate::parser::expressions::{parse_process, parse_type_ref, AttrType};
mod r#enum;
pub mod expressions;
mod name;
mod package;
mod r#type;

pub use name::{
EnumName, EnumPath, EnumValueName, FieldName, Name, OptionalName, ParamName, SeqName, TypeName,
};
pub use package::{ImportLoader, Package};
pub use r#enum::Enum;
pub use r#type::{Root, UserType, UserTypeRef};

Expand Down
203 changes: 203 additions & 0 deletions src/model/package.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
use std::collections::{HashSet, VecDeque};
use std::hash::Hash;

use crate::error::ModelError;
use crate::model::Root;
use crate::parser::{Import, Ksy};

/// Loader used to handle imported kaitai types in `meta.imports` section.
pub trait ImportLoader {
/// The identifier of a file to load. User to cache information about already
/// loaded files to prevent double loading. Inferred using [`Self::new_id`]
/// from a file which imports another file and import specification.
///
/// File-based implementations usually will use `std::path::PathBuf` for that.
type Id: Clone + Hash + Eq;
/// The type of error that loader could return during reading imported file.
type Error;

/// Derives id of imported file from the import specification and ID of a file
/// that imports it.
///
/// This method will be called at least the same count as [`Self::load`], but
/// not each call of this method will be followed by [`Self::load`].
///
/// # Parameters
/// - `base`: ID of a file which imports another file, described by the `import` parameter
/// - `import`: specification of the imported file as written in a KSY
///
/// Returns an ID of a file to load. The file by that ID does not necessary exist,
/// the returned ID is only the pointer where the loader is expect to find it.
/// Actual checks should be performed in the [`Self::load`] method.
fn new_id(&mut self, base: Self::Id, import: &Import) -> Self::Id;

/// Perform loading of one imported file (entry from `meta.imports[i]`).
/// This method may be called not for every call to [`Self::new_id`].
///
/// # Parameters
/// - `id`: ID the file to load, previously generated by the [`Self::new_id`] method.
fn load(&mut self, id: Self::Id) -> Result<Ksy, Self::Error>;
}

////////////////////////////////////////////////////////////////////////////////////////////////////

/// Package contains information about the main kaitai struct file and all directly
/// and indirectly imported files. Validation of a package produces model that is used
/// by code generators.
#[derive(Debug, Clone)]
pub struct Package {
/// The list of files that represents one translation unit -- a set of files
/// that should be processed together because they are linked by import relations.
///
/// Usually this list is filled automatically by creating a package using [`Self::new`] method.
pub files: Vec<Ksy>,
}
impl Package {
/// Recursively loads all imported files using the provided loader,
/// returning a package with all loaded files.
///
/// Returns the first error returned by the loader.
pub fn new<L>(mut id: L::Id, mut ksy: Ksy, mut loader: L) -> Result<Self, L::Error>
where
L: ImportLoader,
{
let mut to_load = VecDeque::new();
let mut loaded = HashSet::new();
let mut files = Vec::new();

'external: loop {
if let Some(imports) = &ksy.meta.imports {
for import in imports {
let new_id = loader.new_id(id.clone(), import);
// If such file was already loaded, do not try to load it again
if new_id != id && !loaded.contains(&new_id) {
to_load.push_back(new_id);
}
}
}
loaded.insert(id);
files.push(ksy);

while let Some(new_id) = to_load.pop_front() {
// Even when we filter already loaded files before insert into `to_load`,
// we still can insert duplicated entries to `to_load` because we can have
// two files, that requested loading of the same file, but which have not
// yet tried to load
if loaded.contains(&new_id) {
continue;
}
id = new_id;
ksy = loader.load(id.clone())?;
continue 'external;
}
break;
}

Ok(Self { files })
}

/// Performs validation of a set of KS files and create a list of models for them.
pub fn validate(self) -> Result<Vec<Root>, ModelError> {
self.files.iter().map(Root::validate).collect()
}
}

////////////////////////////////////////////////////////////////////////////////////////////////////

#[test]
fn import() {
use pretty_assertions::assert_eq;
use std::collections::{BTreeMap, HashSet};

const KSY1: &str = "
meta:
id: ksy1
imports:
- ksy2
- nested/ksy3
";
const KSY2: &str = "
meta:
id: ksy2
imports:
- /ksy1
- /nested/ksy3
";
const KSY3: &str = "
meta:
id: ksy3
imports:
- ../ksy1
- ../ksy2
- ksy4
";
const KSY4: &str = "
meta:
id: ksy4
";

struct SimpleLoader<'s> {
fs: BTreeMap<&'static str, &'static str>,
already_read: &'s mut HashSet<String>,
}
impl<'s> ImportLoader for SimpleLoader<'s> {
type Id = Vec<String>;
type Error = serde_yml::Error;

fn new_id(&mut self, mut base: Self::Id, import: &Import) -> Self::Id {
if import.absolute {
// Start from scratch
base = Vec::new();
} else {
// Remove name of the file from which we are imported
base.pop();
}
// Add relative path
for comp in &import.components {
if comp == ".." {
base.pop();
} else {
base.push(comp.into());
}
}
base
}

fn load(&mut self, id: Self::Id) -> Result<Ksy, Self::Error> {
let key = id.join("/");
let ksy = self.fs.get(key.as_str()).expect(&format!("{key} was not found in File System"));
// loader should not be called twice for already processed ID
assert_eq!(
self.already_read.insert(key.clone()),
true,
"{key} read more that once: {:?}",
self.already_read
);

serde_yml::from_str(ksy)
}
}

let start_id = "ksy1".to_string();
let ksy1: Ksy = serde_yml::from_str(KSY1).expect("`ksy1` not loaded");

// list of already read files
let mut already_read = HashSet::new();
already_read.insert(start_id.clone());

// Test File System with set of files (ID -> file content)
let loader = SimpleLoader {
fs: BTreeMap::from_iter([
("ksy1", KSY1),
("ksy2", KSY2),
("nested/ksy3", KSY3),
("nested/ksy4", KSY4),
]),
already_read: &mut already_read,
};

let package = Package::new(vec![start_id], ksy1, loader).unwrap();

assert_eq!(package.files.len(), 4);
assert_eq!(already_read.len(), 4);
}

0 comments on commit 2e03ed6

Please sign in to comment.