Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize checksum calculation on verify #861

Merged
merged 1 commit into from
Mar 25, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 25 additions & 8 deletions src/fileimpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
*
*/

#define CHUNK_SIZE 1024
#include "fileimpl.h"
#include <zim/error.h>
#include <zim/tools.h>
Expand Down Expand Up @@ -547,28 +548,45 @@ class Grouping

struct zim_MD5_CTX md5ctx;
zim_MD5Init(&md5ctx);


unsigned char ch[CHUNK_SIZE];
offset_type checksumPos = header.getChecksumPos();
offset_type currentPos = 0;
offset_type toRead = checksumPos;

for(auto part = zimFile->begin();
part != zimFile->end();
part++) {
std::ifstream stream(part->second->filename(), std::ios_base::in|std::ios_base::binary);

char ch;
for(/*NOTHING*/ ; currentPos < checksumPos && stream.get(ch).good(); currentPos++) {
zim_MD5Update(&md5ctx, reinterpret_cast<const uint8_t*>(&ch), 1);
while(toRead>=CHUNK_SIZE && stream.read(reinterpret_cast<char*>(ch),CHUNK_SIZE).good()) {
zim_MD5Update(&md5ctx, ch, CHUNK_SIZE);
toRead-=CHUNK_SIZE;
}

// Previous read was good, so we have exited the previous `while` because
// `toRead<CHUNK_SIZE`. Let's try to read `toRead` chars and process them later.
// Else, the previous `while` exited because we didn't succeed to read
// `CHUNK_SIZE`, and we still have some data to process before changing part.
// It reads the remaining amount of part when we reach the end of the file
if(stream.good()){
aryanA101a marked this conversation as resolved.
Show resolved Hide resolved
stream.read(reinterpret_cast<char*>(ch),toRead);
}

// It updates the checksum with the remaining amount of data when we
// reach the end of the file or part
zim_MD5Update(&md5ctx, ch, stream.gcount());
toRead-=stream.gcount();

if (stream.bad()) {
perror("error while reading file");
return false;
}
if (currentPos == checksumPos) {
if (!toRead) {
break;
}
}

if (currentPos != checksumPos) {
if (toRead) {
return false;
}

Expand All @@ -580,7 +598,6 @@ class Grouping
{
return false;
}

return true;
}

Expand Down
Loading