From 48e64c7b961e6f1faf6dfdf1a88ed553b55819c5 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Fri, 26 Apr 2024 17:28:22 +0200 Subject: [PATCH] Make openSinglePieceOrSplitZimFile better handling single/multi part opening. Now, it tries to open as single piece only if not ending by `.zimaa`. If it cannot find the file, or if ending with `.zimaa`, it tries multi zim part. In any case, FileImpl constructor check for size and throw a ZimFileFormatError is file's size doesn't correspond to what is the header. --- src/file_compound.cpp | 16 ++++++++++++---- src/file_compound.h | 2 +- src/fileimpl.cpp | 13 +++++++++---- test/archive.cpp | 27 ++++++++++++++++++++++++++- 4 files changed, 48 insertions(+), 10 deletions(-) diff --git a/src/file_compound.cpp b/src/file_compound.cpp index 763de65f..b9784143 100644 --- a/src/file_compound.cpp +++ b/src/file_compound.cpp @@ -41,12 +41,20 @@ void FileCompound::addPart(FilePart* fpart) _fsize += fpart->size(); } -std::shared_ptr FileCompound::openSinglePieceOrSplitZimFile(const std::string& filename) { +std::shared_ptr FileCompound::openSinglePieceOrSplitZimFile(std::string filename) { + std::shared_ptr fileCompound; + if (filename.size() > 6 && filename.substr(filename.size()-6) == ".zimaa") { + filename.resize(filename.size()-2); + } else { try { - return std::make_shared(filename); - } catch (...) { - return std::make_shared(filename, FileCompound::MultiPartToken::Multi); + fileCompound = std::make_shared(filename); + } catch(...) { } } + + if ( !fileCompound ) { + fileCompound = std::make_shared(filename, FileCompound::MultiPartToken::Multi); + } + return fileCompound; } FileCompound::FileCompound(const std::string& filename): diff --git a/src/file_compound.h b/src/file_compound.h index 5fe94159..a49de886 100644 --- a/src/file_compound.h +++ b/src/file_compound.h @@ -57,7 +57,7 @@ class FileCompound : private std::map { enum class MultiPartToken { Multi }; public: // functions - static std::shared_ptr openSinglePieceOrSplitZimFile(const std::string& filename); + static std::shared_ptr openSinglePieceOrSplitZimFile(std::string filename); explicit FileCompound(const std::string& filename); explicit FileCompound(const std::string& filename, MultiPartToken token); diff --git a/src/fileimpl.cpp b/src/fileimpl.cpp index 2341febd..91132f89 100644 --- a/src/fileimpl.cpp +++ b/src/fileimpl.cpp @@ -207,6 +207,15 @@ class Grouping throw ZimFileFormatError("error reading zim-file header."); } + // This can happen for several reasons: + // - Zim file is corrupted (corrupted header) + // - Zim file is too small (ongoing download, truncated file...) + // - Zim file is embedded at beginning of another file (and we try to open the file as a zim file) + // If open through a FdInput, size should be set in FdInput. + if (header.hasChecksum() && (header.getChecksumPos() + 16) != size_type(zimReader->size())) { + throw ZimFileFormatError("Zim file(s) is of bad size or corrupted."); + } + auto pathPtrReader = sectionSubReader(*zimReader, "Dirent pointer table", offset_t(header.getPathPtrPos()), @@ -297,10 +306,6 @@ class Grouping throw ZimFileFormatError("last cluster offset larger than file size; file corrupt"); } } - - if (header.hasChecksum() && header.getChecksumPos() != (getFilesize().v-16) ) { - throw ZimFileFormatError("Checksum position is not valid"); - } } offset_type FileImpl::getMimeListEndUpperLimit() const diff --git a/test/archive.cpp b/test/archive.cpp index c7143fad..c0241a2f 100644 --- a/test/archive.cpp +++ b/test/archive.cpp @@ -264,6 +264,31 @@ TEST(ZimArchive, openRealZimArchive) } } +TEST(ZimArchive, openSplitZimArchive) +{ + const char* fname = "wikibooks_be_all_nopic_2017-02_splitted.zim"; + + for (auto& testfile: getDataFilePath(fname)) { + const TestContext ctx{ {"path", testfile.path+"aa" } }; + std::unique_ptr archive; + EXPECT_NO_THROW( archive.reset(new zim::Archive(testfile.path+"aa")) ) << ctx; + if ( archive ) { + EXPECT_TRUE( archive->check() ) << ctx; + } + } +} + +TEST(ZimArchive, openDontFallbackOnNonSplitZimArchive) +{ + const char* fname = "wikibooks_be_all_nopic_2017-02.zim"; + + for (auto& testfile: getDataFilePath(fname)) { + const TestContext ctx{ {"path", testfile.path+"aa" } }; + std::unique_ptr archive; + EXPECT_THROW( archive.reset(new zim::Archive(testfile.path+"aa")), std::runtime_error) << ctx; + } +} + TEST(ZimArchive, randomEntry) { const char* const zimfiles[] = { @@ -434,7 +459,7 @@ TEST(ZimArchive, validate) TEST_BROKEN_ZIM_NAME( "invalid.invalid_checksumpos.zim", - "Checksum position is not valid\n" + "Zim file(s) is of bad size or corrupted.\n" ); TEST_BROKEN_ZIM_NAME(