Skip to content

Commit

Permalink
Make openSinglePieceOrSplitZimFile better handling single/multi part …
Browse files Browse the repository at this point in the history
…opening.

Now, it tries to open as single piece only if not ending by `.zimaa`.
If it cannot find the file, or if ending with `.zimaa`, it tries multi zim part.

In any case, FileImpl constructor check for size and throw a
ZimFileFormatError is file's size doesn't correspond to what is the header.
  • Loading branch information
mgautierfr committed Apr 30, 2024
1 parent 926d862 commit 48e64c7
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 10 deletions.
16 changes: 12 additions & 4 deletions src/file_compound.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,20 @@ void FileCompound::addPart(FilePart* fpart)
_fsize += fpart->size();
}

std::shared_ptr<FileCompound> FileCompound::openSinglePieceOrSplitZimFile(const std::string& filename) {
std::shared_ptr<FileCompound> FileCompound::openSinglePieceOrSplitZimFile(std::string filename) {
std::shared_ptr<FileCompound> fileCompound;
if (filename.size() > 6 && filename.substr(filename.size()-6) == ".zimaa") {
filename.resize(filename.size()-2);
} else {
try {
return std::make_shared<FileCompound>(filename);
} catch (...) {
return std::make_shared<FileCompound>(filename, FileCompound::MultiPartToken::Multi);
fileCompound = std::make_shared<FileCompound>(filename);
} catch(...) { }
}

if ( !fileCompound ) {
fileCompound = std::make_shared<FileCompound>(filename, FileCompound::MultiPartToken::Multi);
}
return fileCompound;
}

FileCompound::FileCompound(const std::string& filename):
Expand Down
2 changes: 1 addition & 1 deletion src/file_compound.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class FileCompound : private std::map<Range, FilePart*, less_range> {
enum class MultiPartToken { Multi };

public: // functions
static std::shared_ptr<FileCompound> openSinglePieceOrSplitZimFile(const std::string& filename);
static std::shared_ptr<FileCompound> openSinglePieceOrSplitZimFile(std::string filename);
explicit FileCompound(const std::string& filename);
explicit FileCompound(const std::string& filename, MultiPartToken token);

Expand Down
13 changes: 9 additions & 4 deletions src/fileimpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,15 @@ class Grouping
throw ZimFileFormatError("error reading zim-file header.");
}

// This can happen for several reasons:
// - Zim file is corrupted (corrupted header)
// - Zim file is too small (ongoing download, truncated file...)
// - Zim file is embedded at beginning of another file (and we try to open the file as a zim file)
// If open through a FdInput, size should be set in FdInput.
if (header.hasChecksum() && (header.getChecksumPos() + 16) != size_type(zimReader->size())) {
throw ZimFileFormatError("Zim file(s) is of bad size or corrupted.");
}

auto pathPtrReader = sectionSubReader(*zimReader,
"Dirent pointer table",
offset_t(header.getPathPtrPos()),
Expand Down Expand Up @@ -297,10 +306,6 @@ class Grouping
throw ZimFileFormatError("last cluster offset larger than file size; file corrupt");
}
}

if (header.hasChecksum() && header.getChecksumPos() != (getFilesize().v-16) ) {
throw ZimFileFormatError("Checksum position is not valid");
}
}

offset_type FileImpl::getMimeListEndUpperLimit() const
Expand Down
27 changes: 26 additions & 1 deletion test/archive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,31 @@ TEST(ZimArchive, openRealZimArchive)
}
}

TEST(ZimArchive, openSplitZimArchive)
{
const char* fname = "wikibooks_be_all_nopic_2017-02_splitted.zim";

for (auto& testfile: getDataFilePath(fname)) {
const TestContext ctx{ {"path", testfile.path+"aa" } };
std::unique_ptr<zim::Archive> archive;
EXPECT_NO_THROW( archive.reset(new zim::Archive(testfile.path+"aa")) ) << ctx;
if ( archive ) {
EXPECT_TRUE( archive->check() ) << ctx;
}
}
}

TEST(ZimArchive, openDontFallbackOnNonSplitZimArchive)
{
const char* fname = "wikibooks_be_all_nopic_2017-02.zim";

for (auto& testfile: getDataFilePath(fname)) {
const TestContext ctx{ {"path", testfile.path+"aa" } };
std::unique_ptr<zim::Archive> archive;
EXPECT_THROW( archive.reset(new zim::Archive(testfile.path+"aa")), std::runtime_error) << ctx;
}
}

TEST(ZimArchive, randomEntry)
{
const char* const zimfiles[] = {
Expand Down Expand Up @@ -434,7 +459,7 @@ TEST(ZimArchive, validate)

TEST_BROKEN_ZIM_NAME(
"invalid.invalid_checksumpos.zim",
"Checksum position is not valid\n"
"Zim file(s) is of bad size or corrupted.\n"
);

TEST_BROKEN_ZIM_NAME(
Expand Down

0 comments on commit 48e64c7

Please sign in to comment.