Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Be able to open zim archive from several fds. #860

Merged
merged 8 commits into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 22 additions & 6 deletions include/zim/archive.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ namespace zim
/** Archive constructor.
*
* Construct an archive from a file descriptor.
* Fd is used only at Archive creation.
* Ownership of the fd is not taken and it must be closed by caller.
*
* Note: This function is not available under Windows.
*
Expand All @@ -108,16 +110,30 @@ namespace zim
*
* Construct an archive from a descriptor of a file with an embedded ZIM
* archive inside.
* Fd is used only at Archive creation.
* Ownership of the fd is not taken and it must be closed by caller.
*
* Note: This function is not available under Windows.
*
* @param fd The descriptor of a seekable file with a continuous segment
* representing a complete ZIM archive.
* @param offset The offset of the ZIM archive relative to the beginning
* of the file (rather than the current position associated with fd).
* @param size The size of the ZIM archive.
* @param fd A FdInput (tuple) containing the fd (int), offset (offset_type) and size (size_type)
* referencing a continuous segment representing a complete ZIM archive.
*/
Archive(int fd, offset_type offset, size_type size);
explicit Archive(FdInput fd);

/** Archive constructor.
*
* Construct an archive from several file descriptors.
* Each part may be embedded in a file.
* Fds are used only at Archive creation.
* Ownership of the fds is not taken and they must be closed by caller.
* Fds (int) can be the same between FdInput if the parts belong to the same file.
*
* Note: This function is not available under Windows.
*
* @param fds A vector of FdInput (tuple) containing the fd (int), offset (offset_type) and size (size_type)
* referencing a series of segments representing a complete ZIM archive.
*/
explicit Archive(const std::vector<FdInput>& fds);
#endif

/** Return the filename of the zim file.
Expand Down
14 changes: 14 additions & 0 deletions include/zim/zim.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,20 @@ namespace zim
// An offset.
typedef uint64_t offset_type;

struct FdInput {
// An open file descriptor
int fd;

// The (absolute) offset of the data "pointed" by FdInput in fd.
offset_type offset;

// The size (length) of the data "pointed" by FdInput
size_type size;

FdInput(int fd, offset_type offset, size_type size):
fd(fd), offset(offset), size(size) {}
};

enum class Compression
{
None = 1,
Expand Down
2 changes: 1 addition & 1 deletion scripts/download_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import tarfile
import sys

TEST_DATA_VERSION = "0.3"
TEST_DATA_VERSION = "0.5"
mgautierfr marked this conversation as resolved.
Show resolved Hide resolved
ARCHIVE_URL_TEMPL = "https://github.com/openzim/zim-testing-suite/releases/download/v{version}/zim-testing-suite-{version}.tar.gz"

if __name__ == "__main__":
Expand Down
2 changes: 0 additions & 2 deletions src/_dirent.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,8 @@
#include <string>
#include <zim/zim.h>
#include <exception>
#include <memory>

#include "zim_types.h"
#include "debug.h"

namespace zim
{
Expand Down
8 changes: 6 additions & 2 deletions src/archive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,12 @@ namespace zim
: m_impl(new FileImpl(fd))
{ }

Archive::Archive(int fd, offset_type offset, size_type size)
: m_impl(new FileImpl(fd, offset_t(offset), zsize_t(size)))
Archive::Archive(FdInput fd)
: m_impl(new FileImpl(fd))
{ }

Archive::Archive(const std::vector<FdInput>& fds)
: m_impl(new FileImpl(fds))
{ }
#endif

Expand Down
6 changes: 1 addition & 5 deletions src/buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,10 @@
*/

#include "buffer.h"
#include "debug.h"

#include <sys/stat.h>
#include <cstdio>
#include <cstdlib>
#include <fcntl.h>
#include <string.h>
#include <errno.h>
#include <sstream>

#ifndef _WIN32
# include <sys/mman.h>
Expand Down
5 changes: 0 additions & 5 deletions src/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,10 @@
#ifndef ZIM_BUFFER_H_
#define ZIM_BUFFER_H_

#include <cstddef>
#include <exception>
#include <memory>
#include <iostream>

#include "config.h"
#include "zim_types.h"
#include "endian_tools.h"
#include "debug.h"
#include <zim/blob.h>

namespace zim {
Expand Down
1 change: 1 addition & 0 deletions src/bufferstreamer.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#define ZIM_BUFFERSTREAMER_H

#include "debug.h"
#include "endian_tools.h"

#include <string.h>

Expand Down
3 changes: 0 additions & 3 deletions src/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,12 @@
#include <zim/blob.h>
#include <zim/error.h>
#include "buffer_reader.h"
#include "endian_tools.h"
#include "bufferstreamer.h"
#include "decoderstreamreader.h"
#include "rawstreamreader.h"
#include <algorithm>
#include <stdlib.h>
#include <sstream>

#include "compression.h"
#include "log.h"

#include "config.h"
Expand Down
3 changes: 0 additions & 3 deletions src/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,11 @@
#include <zim/zim.h>
#include "buffer.h"
#include "zim_types.h"
#include "file_reader.h"
#include <iosfwd>
#include <vector>
#include <memory>
#include <mutex>

#include "zim_types.h"
#include "zim/error.h"

namespace zim
{
Expand Down
8 changes: 5 additions & 3 deletions src/compression.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,8 @@
#ifndef _LIBZIM_COMPRESSION_
#define _LIBZIM_COMPRESSION_

#include <vector>
#include "string.h"
#include "reader.h"

#include "file_reader.h"
#include <zim/error.h>

#include "config.h"
Expand All @@ -36,6 +34,10 @@
#include "zim_types.h"
#include "constants.h"

#include <cstring>
#include <vector>
#include <memory>

//#define DEB(X) std::cerr << __func__ << " " << X << std::endl ;
#define DEB(X)

Expand Down
2 changes: 0 additions & 2 deletions src/dirent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@
#include <zim/error.h>
#include "buffer.h"
#include "bufferstreamer.h"
#include "endian_tools.h"
#include "log.h"
#include <algorithm>
#include <cstring>

log_define("zim.dirent")
Expand Down
1 change: 0 additions & 1 deletion src/dirent_accessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
#define ZIM_DIRENT_ACCESSOR_H

#include "zim_types.h"
#include "debug.h"
#include "lrucache.h"

#include <memory>
Expand Down
1 change: 0 additions & 1 deletion src/dirent_lookup.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
#include <algorithm>
#include <map>
#include <mutex>
#include <vector>
#include <cassert>

namespace zim
Expand Down
4 changes: 2 additions & 2 deletions src/endian_tools.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
#ifndef ENDIAN_H
#define ENDIAN_H

#include <algorithm>
#include <iostream>
#include <zim/zim.h>

#include <cstddef>

namespace zim
{

Expand Down
2 changes: 0 additions & 2 deletions src/entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@
#include <zim/entry.h>
#include <zim/error.h>
#include <zim/item.h>
#include "_dirent.h"
#include "fileimpl.h"
#include "file_part.h"
#include "log.h"

#include <sstream>
Expand Down
17 changes: 16 additions & 1 deletion src/file_compound.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
*/

#include "file_compound.h"
#include "buffer.h"

#include <errno.h>
#include <string.h>
Expand Down Expand Up @@ -77,6 +76,22 @@ FileCompound::FileCompound(int fd):
{
addPart(new FilePart(fd));
}

FileCompound::FileCompound(FdInput fd):
_filename(),
_fsize(0)
{
addPart(new FilePart(fd));
}

FileCompound::FileCompound(const std::vector<FdInput>& fds):
_filename(),
_fsize(0)
{
for (auto& fd: fds) {
addPart(new FilePart(fd));
}
}
#endif

FileCompound::~FileCompound() {
Expand Down
5 changes: 3 additions & 2 deletions src/file_compound.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@
#include "zim_types.h"
#include "debug.h"
#include <map>
#include <memory>
#include <cstdio>
#include <vector>

namespace zim {

Expand Down Expand Up @@ -60,6 +59,8 @@ class FileCompound : private std::map<Range, FilePart*, less_range> {

#ifndef _WIN32
explicit FileCompound(int fd);
explicit FileCompound(FdInput fd);
explicit FileCompound(const std::vector<FdInput>& fds);
#endif

~FileCompound();
Expand Down
24 changes: 20 additions & 4 deletions src/file_part.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#define ZIM_FILE_PART_H_

#include <string>
#include <cstdio>
#include <memory>

#include <zim/zim.h>
Expand All @@ -32,21 +31,36 @@

namespace zim {

/** A part of file.
*
* `FilePart` references a part(section) of a physical file.
* Most of the time, `FilePart` will reference the whole file (m_offset==0 and m_size==m_fhandle->getSize())
* but in some situation, it can reference only a part of the file:
* We have this case on android where the zim file is split in different part and stored in a "resource" (zip) archive
* using no-compression.
*/
class FilePart {
typedef DEFAULTFS FS;

public:
using FDSharedPtr = std::shared_ptr<FS::FD>;

public:
FilePart(const std::string& filename) :
explicit FilePart(const std::string& filename) :
m_filename(filename),
m_fhandle(std::make_shared<FS::FD>(FS::openFile(filename))),
m_offset(0),
m_size(m_fhandle->getSize()) {}

#ifndef _WIN32
FilePart(int fd) :
explicit FilePart(int fd) :
FilePart(getFilePathFromFD(fd)) {}

explicit FilePart(FdInput fdInput):
m_filename(getFilePathFromFD(fdInput.fd)),
m_fhandle(std::make_shared<FS::FD>(FS::openFile(m_filename))),
m_offset(fdInput.offset),
m_size(fdInput.size) {}
#endif

~FilePart() = default;
Expand All @@ -55,13 +69,15 @@ class FilePart {
const FDSharedPtr& shareable_fhandle() const { return m_fhandle; };

zsize_t size() const { return m_size; };
offset_t offset() const { return m_offset; }
bool fail() const { return !m_size; };
bool good() const { return bool(m_size); };

private:
const std::string m_filename;
FDSharedPtr m_fhandle;
zsize_t m_size;
offset_t m_offset;
zsize_t m_size; // The total size of the (starting at m_offset) of the part
};

};
Expand Down
Loading
Loading