Skip to content

Commit

Permalink
Merge pull request #126 from y1zhou/feat/read-options
Browse files Browse the repository at this point in the history
Add  ReadOptions implementation
  • Loading branch information
douweschulte authored Jun 21, 2024
2 parents a21b17c + e82ec7f commit 1298c60
Show file tree
Hide file tree
Showing 29 changed files with 2,533 additions and 309 deletions.
13 changes: 10 additions & 3 deletions benches/benchmark.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use pdbtbx::*;
use std::fs::File;
use std::io::prelude::*;
use std::io::BufWriter;
use std::time::{Duration, Instant};

use pdbtbx::*;

fn main() {
// Setup the data needed
let pdb_names = vec![
Expand All @@ -19,8 +20,11 @@ fn main() {
("big", "example-pdbs/pTLS-6484.cif"),
];
let mut models = Vec::with_capacity(pdb_names.len());
let parser = ReadOptions::default()
.set_level(crate::StrictnessLevel::Loose)
.set_format(Format::Pdb);
for (name, path) in &pdb_names {
models.push((*name, open_pdb(path, StrictnessLevel::Loose).unwrap().0))
models.push((*name, parser.read(path).unwrap().0))
}
let mut results = Vec::new();

Expand Down Expand Up @@ -56,7 +60,10 @@ fn main() {
}

fn bench_open(filename: &str) {
let (_pdb, _errors) = open(filename, StrictnessLevel::Loose).unwrap();
let (_pdb, _errors) = ReadOptions::default()
.set_level(crate::StrictnessLevel::Loose)
.read(filename)
.unwrap();
}

fn bench_transformation(mut pdb: PDB) {
Expand Down
1,960 changes: 1,960 additions & 0 deletions example-pdbs/rosetta_model.cif

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion examples/selection.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
use pdbtbx::*;

fn main() {
let (pdb, _errors) = open_pdb("example-pdbs/1ubq.pdb", StrictnessLevel::Loose).unwrap();
let (pdb, _errors) = ReadOptions::default()
.set_level(StrictnessLevel::Loose)
.set_format(Format::Pdb)
.read("example-pdbs/1ubq.pdb")
.unwrap();

// Two ways of selecting the following atom in the PDB file, the first search can be somewhat faster
// because it can discard other chains which the second search has to test.
Expand Down
24 changes: 14 additions & 10 deletions examples/sphere.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
use pdbtbx::*;
use rayon::iter::ParallelIterator;

use pdbtbx::*;

fn main() {
atom_sphere();
residue_sphere();
find_clashes();
let (pdb, _errors) = ReadOptions::new()
.set_level(StrictnessLevel::Loose)
.set_format(Format::Pdb)
.read("example-pdbs/1ubq.pdb")
.unwrap();

atom_sphere(&pdb);
residue_sphere(&pdb);
find_clashes(&pdb);
}

/// Find all Atoms in a sphere around a single origin Atom with a user-defined radius
/// This is using the features `rstar` and `rayon`.
fn atom_sphere() {
let (pdb, _errors) = open_pdb("example-pdbs/1ubq.pdb", StrictnessLevel::Loose).unwrap();
fn atom_sphere(pdb: &PDB) {
let (origin_id, radius): (usize, f64) = (12, 3.5);

// Leverage parallel searching
Expand All @@ -31,8 +37,7 @@ fn atom_sphere() {
/// Find all Atoms belonging to a Residue that has at least one Atom within a sphere of
/// user-defined origin and radius.
/// This is using the features `rstar` and `rayon`.
fn residue_sphere() {
let (pdb, _errors) = open_pdb("example-pdbs/1ubq.pdb", StrictnessLevel::Loose).unwrap();
fn residue_sphere(pdb: &PDB) {
let (origin_id, radius): (usize, f64) = (12, 3.5);

let sphere_origin = pdb
Expand Down Expand Up @@ -72,8 +77,7 @@ fn residue_sphere() {
/// Results for Atoms within the same Residue are excluded as well as those from the C and N Atoms
/// constituting the peptide bond of neighbouring amino acids.
/// Also, Atoms are not counted twice.
fn find_clashes() {
let (pdb, _errors) = open_pdb("example-pdbs/1ubq.pdb", StrictnessLevel::Loose).unwrap();
fn find_clashes(pdb: &PDB) {
let tree = pdb.create_hierarchy_rtree();

let mut clashing_atoms = Vec::new();
Expand Down
10 changes: 7 additions & 3 deletions examples/waterbox.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use pdbtbx::*;
use std::env;
use std::path::Path;
use std::time::Instant;

use pdbtbx::*;

fn main() {
let filename = env::current_dir()
.unwrap()
Expand All @@ -23,8 +24,11 @@ fn main() {
fn create_waterbox(size: (f64, f64, f64)) -> PDB {
let now = Instant::now();

let (mut liquid, _errors) =
open_pdb("example-pdbs/liquid.pdb", StrictnessLevel::Loose).unwrap();
let (mut liquid, _errors) = ReadOptions::new()
.set_level(StrictnessLevel::Loose)
.set_format(Format::Pdb)
.read("example-pdbs/liquid.pdb")
.unwrap();

let time = now.elapsed();

Expand Down
3 changes: 1 addition & 2 deletions src/error/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ impl Context {
} else {
Context::Line {
linenumber: pos.line,
line: pos.text.lines().into_iter().next().unwrap().to_string(),
line: pos.text.lines().next().unwrap().to_string(),
offset: 0,
length: 3,
}
Expand All @@ -132,7 +132,6 @@ impl Context {
lines: start
.text
.lines()
.into_iter()
.take(end.line - start.line)
.map(ToString::to_string)
.collect::<Vec<String>>(),
Expand Down
9 changes: 3 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,7 @@
//!
//! ```rust
//! use pdbtbx::*;
//! let (mut pdb, _errors) = pdbtbx::open(
//! "example-pdbs/1ubq.pdb",
//! StrictnessLevel::Medium
//! ).unwrap();
//! let (mut pdb, _errors) = pdbtbx::open("example-pdbs/1ubq.pdb").unwrap();
//!
//! pdb.remove_atoms_by(|atom| atom.element() == Some(&Element::H)); // Remove all H atoms
//!
Expand Down Expand Up @@ -65,15 +62,15 @@
doc = r##"
```rust
use pdbtbx::*;
let (mut pdb, _errors) = pdbtbx::open("example-pdbs/1ubq.pdb", pdbtbx::StrictnessLevel::Medium).unwrap();
let (mut pdb, _errors) = pdbtbx::open("example-pdbs/1ubq.pdb").unwrap();
// You can loop over all atoms within 3.5 Aͦ of a specific atom
// Note: The `locate_within_distance` method takes a squared distance
let tree = pdb.create_atom_rtree();
for atom in tree.locate_within_distance(pdb.atom(42).unwrap().pos(), 3.5 * 3.5) {
println!("{}", atom);
}
// You can even get information about the hierarchy of these atoms
// You can even get information about the hierarchy of these atoms
// (the chain, residue and conformer that contain this atom)
let tree = pdb.create_hierarchy_rtree();
let mut total = 0;
Expand Down
169 changes: 30 additions & 139 deletions src/read/general.rs
Original file line number Diff line number Diff line change
@@ -1,52 +1,29 @@
use std::io::{BufRead, Read, Seek};

use super::*;
use crate::error::*;
use crate::structs::PDB;
use crate::StrictnessLevel;

#[cfg(feature = "compression")]
use super::mmcif::open_mmcif_bufread;
#[cfg(feature = "compression")]
use flate2::read::GzDecoder;
#[cfg(feature = "compression")]
use std::fs;
use super::*;

/// Standard return type for reading a file.
pub type ReadResult = std::result::Result<(PDB, Vec<PDBError>), Vec<PDBError>>;
pub type ReadResult = Result<(PDB, Vec<PDBError>), Vec<PDBError>>;

/// Open an atomic data file, either PDB or mmCIF/PDBx. The correct type will be
/// determined based on the file extension. This function is equivalent to
/// [`ReadOptions::read()`] with default options, apart from the `level` which
/// can be set by the `level` parameter.
/// Open an atomic data file, either PDB or mmCIF/PDBx.
///
/// This function is equivalent to [`ReadOptions::read()`] with default options.
/// The correct type will be determined based on the file extension.
/// Gzipped files can also be opened directly if file extensions are
/// `.pdb.gz`, `.pdb1.gz`, `.mmcif.gz`, or `.cif.gz`.
///
/// # Errors
/// Returns a `PDBError` if a `BreakingError` is found. Otherwise it returns the PDB with all errors/warnings found while parsing it.
///
/// # Related
/// If you want to open a file from memory see [`open_raw`]. There are also function to open a specified file type directly
/// see [`crate::open_pdb`] and [`crate::open_mmcif`] respectively.
pub fn open(filename: impl AsRef<str>, level: StrictnessLevel) -> ReadResult {
open_with_options(filename, &ReadOptions::new().set_level(level))
}

/// Opens a files based on the given options.
pub(in crate::read) fn open_with_options(
filename: impl AsRef<str>,
options: &ReadOptions,
) -> ReadResult {
if check_extension(&filename, "pdb") {
open_pdb(filename, options.level)
} else if check_extension(&filename, "cif") {
open_mmcif(filename, options.level)
} else {
Err(vec![PDBError::new(
ErrorLevel::BreakingError,
"Incorrect extension",
"Could not determine the type of the given file, make it .pdb or .cif",
Context::show(filename.as_ref()),
)])
}
/// If you want to open a file from memory see [`ReadOptions::read_raw`].
/// The file type can be set explicitly with [`ReadOptions::set_format`].
/// These functions are useful if you are using a non-standard compression algorithm or way of
/// storing the data.
pub fn open(filename: impl AsRef<str>) -> ReadResult {
ReadOptions::default().read(filename)
}

/// Open a compressed atomic data file, either PDB or mmCIF/PDBx. The correct type will be
Expand All @@ -56,104 +33,20 @@ pub(in crate::read) fn open_with_options(
/// Returns a `PDBError` if a `BreakingError` is found. Otherwise it returns the PDB with all errors/warnings found while parsing it.
///
/// # Related
/// If you want to open a file from memory see [`open_raw`], [`crate::open_pdb_raw`] and [`crate::open_mmcif_bufread`].
/// If you want to open a file from memory see [`ReadOptions::read_raw`].
/// The file type can be set explicitly with [`ReadOptions::set_format`].
/// These functions are useful if you are using a non-standard compression algorithm or way of
/// storing the data.
#[cfg(feature = "compression")]
#[deprecated(
since = "0.12.0",
note = "Please use `ReadOptions::default().set_decompress(true).read(filename)` instead"
)]
pub fn open_gz(filename: impl AsRef<str>, level: StrictnessLevel) -> ReadResult {
let filename = filename.as_ref();

if check_extension(filename, "gz") {
// open a decompression stream
let file = fs::File::open(filename).map_err(|_| {
vec![PDBError::new(
ErrorLevel::BreakingError,
"Could not open file",
"Could not open the given file, make sure it exists and you have the correct permissions",
Context::show(filename),
)]
})?;

let decompressor = GzDecoder::new(file);

let reader = std::io::BufReader::new(decompressor);

if check_extension(&filename[..filename.len() - 3], "pdb") {
open_pdb_raw(reader, Context::show(filename), level)
} else if check_extension(&filename[..filename.len() - 3], "cif") {
open_mmcif_bufread(reader, level)
} else {
Err(vec![PDBError::new(
ErrorLevel::BreakingError,
"Incorrect extension",
"Could not determine the type of the given file, make it .pdb.gz or .cif.gz",
Context::show(filename),
)])
}
} else {
Err(vec![PDBError::new(
ErrorLevel::BreakingError,
"Incorrect extension",
"Could not determine the type of the given file, make it .pdb.gz or .cif.gz",
Context::show(filename),
)])
}
}

/// Open a stream with either PDB or mmCIF data. The distinction is made on the start of the first line.
/// If it starts with `HEADER` it is a PDB file, if it starts with `data_` it is a mmCIF file.
///
/// # Errors
/// Returns a `PDBError` if a `BreakingError` is found. Otherwise it returns the PDB with all errors/warnings found while parsing it.
/// It returns a breaking error if the buffer could not be read, the file type could not be determined form the first line, or there was a breaking error in the file itself.
/// See the `PDBError` for more details.
///
/// # Related
/// If you want to open a file see [`open`]. There are also function to open a specified file type directly
/// see [`crate::open_pdb_raw`] and [`crate::open_mmcif_raw`] respectively.
pub fn open_raw<T: std::io::Read + std::io::Seek>(
mut input: std::io::BufReader<T>,
level: StrictnessLevel,
) -> ReadResult {
let mut first_line = String::new();
if input.read_line(&mut first_line).is_err() {
return Err(vec![PDBError::new(
ErrorLevel::BreakingError,
"Buffer could not be read",
"The buffer provided to `open_raw` could not be read.",
Context::None,
)]);
}
if input.rewind().is_err() {
return Err(vec![PDBError::new(
ErrorLevel::BreakingError,
"Buffer could not be read",
"The buffer provided to `open_raw` could not be rewound to the start.",
Context::None,
)]);
}
if first_line.starts_with("HEADER") {
open_pdb_raw(input, Context::None, level)
} else if first_line.starts_with("data_") {
let mut contents = String::new();
if input.read_to_string(&mut contents).is_ok() {
open_mmcif_raw(&contents, level)
} else {
Err(vec![PDBError::new(
ErrorLevel::BreakingError,
"Buffer could not be read",
"The buffer provided to `open_raw` could not be read to end.",
Context::show(&first_line),
)])
}
} else {
Err(vec![PDBError::new(
ErrorLevel::BreakingError,
"Could not determine file type",
"Could not determine the type of the given file, make it .pdb or .cif",
Context::show(&first_line),
)])
}
ReadOptions::default()
.set_level(level)
.guess_format(filename.as_ref())
.read(filename)
}

#[cfg(test)]
Expand All @@ -162,19 +55,17 @@ mod tests {

#[test]
fn open_invalid() {
assert!(open("file.png", StrictnessLevel::Medium).is_err());
assert!(open("file.mmcif", StrictnessLevel::Medium).is_err());
assert!(open("file.pdbml", StrictnessLevel::Medium).is_err());
assert!(open("file.pd", StrictnessLevel::Medium).is_err());
assert!(open("file.png").is_err());
assert!(open("file.mmcif").is_err());
assert!(open("file.pdbml").is_err());
assert!(open("file.pd").is_err());
}

#[test]
fn open_not_existing() {
let pdb =
open("file.pdb", StrictnessLevel::Medium).expect_err("This file should not exist.");
let pdb = open("file.pdb").expect_err("This file should not exist.");
assert_eq!(pdb[0].short_description(), "Could not open file");
let cif =
open("file.cif", StrictnessLevel::Medium).expect_err("This file should not exist.");
let cif = open("file.cif").expect_err("This file should not exist.");
assert_eq!(cif[0].short_description(), "Could not open file");
}
}
Loading

0 comments on commit 1298c60

Please sign in to comment.