Skip to content

Commit

Permalink
Unicode-normalize manifest and file paths before comparing; fixes issue
Browse files Browse the repository at this point in the history
  • Loading branch information
dmoles committed Dec 4, 2022
1 parent a80afee commit 267e8bf
Show file tree
Hide file tree
Showing 9 changed files with 95 additions and 59 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ResourceBundle;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -26,23 +27,36 @@ abstract public class AbstractPayloadFileExistsInManifestsVistor extends SimpleF

/**
* constructor must be called before using!
*
*
* @param ignoreHiddenFiles Should hidden files be ignored
*/
public AbstractPayloadFileExistsInManifestsVistor(final boolean ignoreHiddenFiles) {
super();
this.ignoreHiddenFiles = ignoreHiddenFiles;
}

@Override
public FileVisitResult preVisitDirectory(final Path dir, final BasicFileAttributes attrs) throws IOException {
FileVisitResult result = FileVisitResult.CONTINUE;

if(ignoreHiddenFiles && PathUtils.isHidden(dir)){
logger.debug(messages.getString("skipping_hidden_file"), dir);
result = FileVisitResult.SKIP_SUBTREE;
}

return result;
}

/**
* Returns true if the path exists in the provided set of manifest paths, false otherwise.
* @param path The file path.
* @param manifestPaths The manifest paths.
* @return true if the path exists, false otherwise.
*/
protected static boolean inManifest(final Path path, final Set<Path> manifestPaths) {
final String normalizedPath = ManifestVerifier.toNormalizedString(path);
return manifestPaths.stream().anyMatch(
(mp) -> ManifestVerifier.toNormalizedString(mp).equals(normalizedPath)
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,19 @@ public enum ManifestVerifier {; //using enum to enforce singleton
private static final ResourceBundle messages = ResourceBundle.getBundle("MessageBundle");

/**
* Verify that all the files in the payload directory are listed in the payload manifest and
* Verify that all the files in the payload directory are listed in the payload manifest and
* all files listed in all manifests exist.
*
*
* @param bag the bag which contains the manifests to check
* @param ignoreHiddenFiles to include hidden files when checking
*
*
* @throws IOException if there is an error while reading a file from the filesystem
* @throws MaliciousPathException if a path is outside the bag
* @throws InvalidBagitFileFormatException if a manifest is not formatted correctly
* @throws FileNotInPayloadDirectoryException if a file listed in a manifest is not in the payload directory
*/
public static void verifyManifests(final Bag bag, final boolean ignoreHiddenFiles)throws IOException{

final Set<Path> allFilesListedInManifests = getAllFilesListedInManifests(bag);
checkAllFilesListedInManifestExist(allFilesListedInManifests);

Expand All @@ -54,12 +54,21 @@ public static void verifyManifests(final Bag bag, final boolean ignoreHiddenFile
}
}

/**
* Returns the path as a String in {{java.text.Normalizer.Form#NFD}} (canonical) normalized form.
* @param path the path to normalize
* @return String the normalized string
*/
static String toNormalizedString(final Path path) {
return Normalizer.normalize(path.toString(), Normalizer.Form.NFD);
}

/*
* get the full path (absolute) of all the files listed in all the manifests
*/
private static Set<Path> getAllFilesListedInManifests(final Bag bag) throws IOException {
logger.debug(messages.getString("all_files_in_manifests"));

final Set<Path> filesListedInManifests = new HashSet<>();

try(DirectoryStream<Path> directoryStream = Files.newDirectoryStream(bag.getTagFileDir(), new ManifestFilter())){
Expand All @@ -78,7 +87,7 @@ private static Set<Path> getAllFilesListedInManifests(final Bag bag) throws IOEx
*/
private static void checkAllFilesListedInManifestExist(final Set<Path> files) {
logger.info(messages.getString("check_all_files_in_manifests_exist"));

for (final Path file : files) {
if(!Files.exists(file)){
if(existsNormalized(file)){
Expand All @@ -91,21 +100,21 @@ private static void checkAllFilesListedInManifestExist(final Set<Path> files) {
}
}
}

/**
* if a file is parially normalized or of a different normalization then the manifest specifies it will fail the existence test.
* This method checks for that by normalizing what is on disk with the normalized filename and see if they match.
*
*
* @return true if the normalized filename matches one on disk in the specified folder
*/
private static boolean existsNormalized(final Path file){
boolean existsNormalized = false;
final String normalizedFile = Normalizer.normalize(file.toString(), Normalizer.Form.NFD);
final String normalizedFile = toNormalizedString(file);
final Path parent = file.getParent();
if(parent != null){
try(DirectoryStream<Path> files = Files.newDirectoryStream(parent)){
for(final Path fileToCheck : files){
final String normalizedFileToCheck = Normalizer.normalize(fileToCheck.toString(), Normalizer.Form.NFD);
final String normalizedFileToCheck = toNormalizedString(fileToCheck);
if(normalizedFile.equals(normalizedFileToCheck)){
existsNormalized = true;
break;
Expand All @@ -116,7 +125,7 @@ private static boolean existsNormalized(final Path file){
logger.error(messages.getString("error_reading_normalized_file"), parent, normalizedFile, e);
}
}

return existsNormalized;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.github.jscancella.verify.internal;

import java.io.IOException;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
Expand Down Expand Up @@ -29,7 +30,7 @@ public final class PayloadFileExistsInAllManifestsVistor extends AbstractPayload

/**
* Implements {@link SimpleFileVisitor} to ensure that the encountered file is in one of the manifests.
*
*
* @param manifests the set of manifests to check
* @param rootDir the root directory of the bag
* @param ignoreHiddenFiles if the checker should ignore hidden files or not
Expand All @@ -41,16 +42,16 @@ public PayloadFileExistsInAllManifestsVistor(final Set<Manifest> manifests, fina
}

@Override
public FileVisitResult visitFile(final Path path, final BasicFileAttributes attrs){
public FileVisitResult visitFile(final Path path, final BasicFileAttributes attrs) throws IOException {
if(Files.isRegularFile(path)){
for(final Manifest manifest : manifests){
final Set<Path> relativePaths = manifest
.getEntries().stream()
.map(entry -> entry.getRelativeLocation())
.collect(Collectors.toSet());
final Path relativePath = rootDir.relativize(path);
if(!relativePaths.contains(relativePath)){

if(!inManifest(relativePath, relativePaths)){
final String formattedMessage = messages.getString("file_not_in_manifest_error");
throw new FileNotInManifestException(MessageFormatter.format(formattedMessage, path, manifest.getBagitAlgorithmName()).getMessage());
}
Expand All @@ -59,4 +60,5 @@ public FileVisitResult visitFile(final Path path, final BasicFileAttributes attr
logger.debug(messages.getString("file_in_all_manifests"), path);
return FileVisitResult.CONTINUE;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public final class PayloadFileExistsInAtLeastOneManifestVistor extends AbstractP

/**
* Implements {@link SimpleFileVisitor} to ensure that the encountered file is in one of the manifests.
*
*
* @param filesListedInManifests the set of files listed in all the manifests
* @param ignoreHiddenFiles if the checker should ignore hidden files or not
*/
Expand All @@ -42,7 +42,7 @@ public FileVisitResult visitFile(final Path path, final BasicFileAttributes attr
logger.debug(messages.getString("skipping_hidden_file"), path);
}
else {
if(Files.isRegularFile(path) && !filesListedInManifests.contains(path.toAbsolutePath())){
if(Files.isRegularFile(path) && !inManifest(path.toAbsolutePath(), filesListedInManifests)){
final String formattedMessage = messages.getString("file_not_in_any_manifest_error");
throw new FileNotInManifestException(MessageFormatter.format(formattedMessage, path).getMessage());
}
Expand Down
61 changes: 34 additions & 27 deletions src/test/java/com/github/jscancella/verify/BagVeriferTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,134 +25,141 @@ public class BagVeriferTest extends TempFolderTest {
Security.addProvider(new BouncyCastleProvider());
}
}

@AfterAll
public static void tearDown() {
BagitChecksumNameMapping.clear("sha3256");
}

private Path rootDir = Paths.get(new File("src/test/resources/bags/v0_97/bag").toURI());

@Test
public void testValidWhenHiddenFolderNotIncluded() throws Exception{
Path copyDir = copyBagToTempFolder(rootDir);
Files.createDirectory(copyDir.resolve("data").resolve(".someHiddenFolder"));
TestUtils.makeFilesHiddenOnWindows(copyDir);

Bag bag = Bag.read(copyDir);
bag.isValid(true);
}

@Test
public void testValidWithHiddenFile() throws Exception{
Path copyDir = copyBagToTempFolder(rootDir);
Files.createFile(copyDir.resolve("data").resolve(".someHiddenFile"));
TestUtils.makeFilesHiddenOnWindows(copyDir);

Bag bag = Bag.read(copyDir);
bag.isValid(true);
}

@Test
public void testInvalidWithHiddenFile() throws Exception{
Path copyDir = copyBagToTempFolder(rootDir);
Files.createFile(copyDir.resolve("data").resolve(".someHiddenFile"));
TestUtils.makeFilesHiddenOnWindows(copyDir);

Bag bag = Bag.read(copyDir);
Assertions.assertThrows(FileNotInManifestException.class, () -> { bag.isValid(false); });
}


@Test
public void testValidWithAccentedCharactersInFilename() throws Exception{
Path bagDir = Paths.get("src", "test", "resources", "bagWithAccentedCharactersInFilename");
Bag bag = Bag.read(bagDir);
bag.isValid(true);
}

@Test
public void testMD5Bag() throws Exception{
Path bagDir = Paths.get("src", "test", "resources", "md5Bag");
Bag bag = Bag.read(bagDir);
bag.isValid(true);
}

@Test
public void testSHA1Bag() throws Exception{
Path bagDir = Paths.get("src", "test", "resources", "sha1Bag");
Bag bag = Bag.read(bagDir);
bag.isValid(true);
}

@Test
public void testSHA224Bag() throws Exception{
Path bagDir = Paths.get("src", "test", "resources", "sha224Bag");
Bag bag = Bag.read(bagDir);
bag.isValid(true);
}

@Test
public void testSHA256Bag() throws Exception{
Path bagDir = Paths.get("src", "test", "resources", "sha256Bag");
Bag bag = Bag.read(bagDir);
bag.isValid(true);
}

@Test
public void testSHA512Bag() throws Exception{
Path bagDir = Paths.get("src", "test", "resources", "sha512Bag");
Bag bag = Bag.read(bagDir);
bag.isValid(true);
}

@Test
public void testVersion0_97IsValid() throws Exception{
Bag bag = Bag.read(rootDir);

bag.isValid(true);
}

@Test
public void testIsComplete() throws Exception{
Bag bag = Bag.read(rootDir);

bag.isComplete(true);
}

@Test
public void testCorruptPayloadFile() throws Exception{
rootDir = Paths.get(new File("src/test/resources/corruptPayloadFile").toURI());
Bag bag = Bag.read(rootDir);

Assertions.assertThrows(CorruptChecksumException.class, () -> { bag.isValid(true); });
}

@Test
public void testCorruptTagFile() throws Exception{
rootDir = Paths.get(new File("src/test/resources/corruptTagFile").toURI());
Bag bag = Bag.read(rootDir);

Assertions.assertThrows(CorruptChecksumException.class, () -> { bag.isValid(true); });
}

@Test
public void testThrowsNoSuchAlgorithmExceptionWhenNotInHasherMap() throws Exception{
Path sha3BagDir = Paths.get(getClass().getClassLoader().getResource("sha3Bag").toURI());

Assertions.assertThrows(NoSuchBagitAlgorithmException.class, () -> { Bag.read(sha3BagDir); });
}

@Test
public void testAddSHA3SupportViaInterface() throws Exception{
boolean successful = BagitChecksumNameMapping.add("sha3256", SHA3Hasher.class);
Assertions.assertTrue(successful);

Path sha3BagDir = Paths.get(new File("src/test/resources/sha3Bag").toURI());
Bag bag = Bag.read(sha3BagDir);
Assertions.assertTrue(bag.isValid(true));
}

/*
* Technically valid but highly discouraged
*/
@Test
public void testManifestsWithLeadingDotSlash() throws Exception{
Path bagPath = Paths.get(new File("src/test/resources/bag-with-leading-dot-slash-in-manifest").toURI());
Bag bag = Bag.read(bagPath);

Assertions.assertTrue(bag.isValid(true));
}
}
Loading

0 comments on commit 267e8bf

Please sign in to comment.