Skip to content

Commit

Permalink
Merge pull request #860 from openzim/archive_fds
Browse files Browse the repository at this point in the history
  • Loading branch information
mgautierfr authored Feb 27, 2024
2 parents 121e3af + 57fa6f1 commit 65bf7a8
Show file tree
Hide file tree
Showing 37 changed files with 161 additions and 99 deletions.
28 changes: 22 additions & 6 deletions include/zim/archive.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ namespace zim
/** Archive constructor.
*
* Construct an archive from a file descriptor.
* Fd is used only at Archive creation.
* Ownership of the fd is not taken and it must be closed by caller.
*
* Note: This function is not available under Windows.
*
Expand All @@ -108,16 +110,30 @@ namespace zim
*
* Construct an archive from a descriptor of a file with an embedded ZIM
* archive inside.
* Fd is used only at Archive creation.
* Ownership of the fd is not taken and it must be closed by caller.
*
* Note: This function is not available under Windows.
*
* @param fd The descriptor of a seekable file with a continuous segment
* representing a complete ZIM archive.
* @param offset The offset of the ZIM archive relative to the beginning
* of the file (rather than the current position associated with fd).
* @param size The size of the ZIM archive.
* @param fd A FdInput (tuple) containing the fd (int), offset (offset_type) and size (size_type)
* referencing a continuous segment representing a complete ZIM archive.
*/
Archive(int fd, offset_type offset, size_type size);
explicit Archive(FdInput fd);

/** Archive constructor.
*
* Construct an archive from several file descriptors.
* Each part may be embedded in a file.
* Fds are used only at Archive creation.
* Ownership of the fds is not taken and they must be closed by caller.
* Fds (int) can be the same between FdInput if the parts belong to the same file.
*
* Note: This function is not available under Windows.
*
* @param fds A vector of FdInput (tuple) containing the fd (int), offset (offset_type) and size (size_type)
* referencing a series of segments representing a complete ZIM archive.
*/
explicit Archive(const std::vector<FdInput>& fds);
#endif

/** Return the filename of the zim file.
Expand Down
14 changes: 14 additions & 0 deletions include/zim/zim.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,20 @@ namespace zim
// An offset.
typedef uint64_t offset_type;

struct FdInput {
// An open file descriptor
int fd;

// The (absolute) offset of the data "pointed" by FdInput in fd.
offset_type offset;

// The size (length) of the data "pointed" by FdInput
size_type size;

FdInput(int fd, offset_type offset, size_type size):
fd(fd), offset(offset), size(size) {}
};

enum class Compression
{
None = 1,
Expand Down
2 changes: 1 addition & 1 deletion scripts/download_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import tarfile
import sys

TEST_DATA_VERSION = "0.3"
TEST_DATA_VERSION = "0.5"
ARCHIVE_URL_TEMPL = "https://github.com/openzim/zim-testing-suite/releases/download/v{version}/zim-testing-suite-{version}.tar.gz"

if __name__ == "__main__":
Expand Down
2 changes: 0 additions & 2 deletions src/_dirent.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,8 @@
#include <string>
#include <zim/zim.h>
#include <exception>
#include <memory>

#include "zim_types.h"
#include "debug.h"

namespace zim
{
Expand Down
8 changes: 6 additions & 2 deletions src/archive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,12 @@ namespace zim
: m_impl(new FileImpl(fd))
{ }

Archive::Archive(int fd, offset_type offset, size_type size)
: m_impl(new FileImpl(fd, offset_t(offset), zsize_t(size)))
Archive::Archive(FdInput fd)
: m_impl(new FileImpl(fd))
{ }

Archive::Archive(const std::vector<FdInput>& fds)
: m_impl(new FileImpl(fds))
{ }
#endif

Expand Down
6 changes: 1 addition & 5 deletions src/buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,10 @@
*/

#include "buffer.h"
#include "debug.h"

#include <sys/stat.h>
#include <cstdio>
#include <cstdlib>
#include <fcntl.h>
#include <string.h>
#include <errno.h>
#include <sstream>

#ifndef _WIN32
# include <sys/mman.h>
Expand Down
5 changes: 0 additions & 5 deletions src/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,10 @@
#ifndef ZIM_BUFFER_H_
#define ZIM_BUFFER_H_

#include <cstddef>
#include <exception>
#include <memory>
#include <iostream>

#include "config.h"
#include "zim_types.h"
#include "endian_tools.h"
#include "debug.h"
#include <zim/blob.h>

namespace zim {
Expand Down
1 change: 1 addition & 0 deletions src/bufferstreamer.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#define ZIM_BUFFERSTREAMER_H

#include "debug.h"
#include "endian_tools.h"

#include <string.h>

Expand Down
3 changes: 0 additions & 3 deletions src/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,12 @@
#include <zim/blob.h>
#include <zim/error.h>
#include "buffer_reader.h"
#include "endian_tools.h"
#include "bufferstreamer.h"
#include "decoderstreamreader.h"
#include "rawstreamreader.h"
#include <algorithm>
#include <stdlib.h>
#include <sstream>

#include "compression.h"
#include "log.h"

#include "config.h"
Expand Down
3 changes: 0 additions & 3 deletions src/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,11 @@
#include <zim/zim.h>
#include "buffer.h"
#include "zim_types.h"
#include "file_reader.h"
#include <iosfwd>
#include <vector>
#include <memory>
#include <mutex>

#include "zim_types.h"
#include "zim/error.h"

namespace zim
{
Expand Down
8 changes: 5 additions & 3 deletions src/compression.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,8 @@
#ifndef _LIBZIM_COMPRESSION_
#define _LIBZIM_COMPRESSION_

#include <vector>
#include "string.h"
#include "reader.h"

#include "file_reader.h"
#include <zim/error.h>

#include "config.h"
Expand All @@ -36,6 +34,10 @@
#include "zim_types.h"
#include "constants.h"

#include <cstring>
#include <vector>
#include <memory>

//#define DEB(X) std::cerr << __func__ << " " << X << std::endl ;
#define DEB(X)

Expand Down
2 changes: 0 additions & 2 deletions src/dirent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@
#include <zim/error.h>
#include "buffer.h"
#include "bufferstreamer.h"
#include "endian_tools.h"
#include "log.h"
#include <algorithm>
#include <cstring>

log_define("zim.dirent")
Expand Down
1 change: 0 additions & 1 deletion src/dirent_accessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
#define ZIM_DIRENT_ACCESSOR_H

#include "zim_types.h"
#include "debug.h"
#include "lrucache.h"

#include <memory>
Expand Down
1 change: 0 additions & 1 deletion src/dirent_lookup.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
#include <algorithm>
#include <map>
#include <mutex>
#include <vector>
#include <cassert>

namespace zim
Expand Down
4 changes: 2 additions & 2 deletions src/endian_tools.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
#ifndef ENDIAN_H
#define ENDIAN_H

#include <algorithm>
#include <iostream>
#include <zim/zim.h>

#include <cstddef>

namespace zim
{

Expand Down
2 changes: 0 additions & 2 deletions src/entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@
#include <zim/entry.h>
#include <zim/error.h>
#include <zim/item.h>
#include "_dirent.h"
#include "fileimpl.h"
#include "file_part.h"
#include "log.h"

#include <sstream>
Expand Down
17 changes: 16 additions & 1 deletion src/file_compound.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
*/

#include "file_compound.h"
#include "buffer.h"

#include <errno.h>
#include <string.h>
Expand Down Expand Up @@ -77,6 +76,22 @@ FileCompound::FileCompound(int fd):
{
addPart(new FilePart(fd));
}

FileCompound::FileCompound(FdInput fd):
_filename(),
_fsize(0)
{
addPart(new FilePart(fd));
}

FileCompound::FileCompound(const std::vector<FdInput>& fds):
_filename(),
_fsize(0)
{
for (auto& fd: fds) {
addPart(new FilePart(fd));
}
}
#endif

FileCompound::~FileCompound() {
Expand Down
5 changes: 3 additions & 2 deletions src/file_compound.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@
#include "zim_types.h"
#include "debug.h"
#include <map>
#include <memory>
#include <cstdio>
#include <vector>

namespace zim {

Expand Down Expand Up @@ -60,6 +59,8 @@ class FileCompound : private std::map<Range, FilePart*, less_range> {

#ifndef _WIN32
explicit FileCompound(int fd);
explicit FileCompound(FdInput fd);
explicit FileCompound(const std::vector<FdInput>& fds);
#endif

~FileCompound();
Expand Down
24 changes: 20 additions & 4 deletions src/file_part.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#define ZIM_FILE_PART_H_

#include <string>
#include <cstdio>
#include <memory>

#include <zim/zim.h>
Expand All @@ -32,21 +31,36 @@

namespace zim {

/** A part of file.
*
* `FilePart` references a part(section) of a physical file.
* Most of the time, `FilePart` will reference the whole file (m_offset==0 and m_size==m_fhandle->getSize())
* but in some situation, it can reference only a part of the file:
* We have this case on android where the zim file is split in different part and stored in a "resource" (zip) archive
* using no-compression.
*/
class FilePart {
typedef DEFAULTFS FS;

public:
using FDSharedPtr = std::shared_ptr<FS::FD>;

public:
FilePart(const std::string& filename) :
explicit FilePart(const std::string& filename) :
m_filename(filename),
m_fhandle(std::make_shared<FS::FD>(FS::openFile(filename))),
m_offset(0),
m_size(m_fhandle->getSize()) {}

#ifndef _WIN32
FilePart(int fd) :
explicit FilePart(int fd) :
FilePart(getFilePathFromFD(fd)) {}

explicit FilePart(FdInput fdInput):
m_filename(getFilePathFromFD(fdInput.fd)),
m_fhandle(std::make_shared<FS::FD>(FS::openFile(m_filename))),
m_offset(fdInput.offset),
m_size(fdInput.size) {}
#endif

~FilePart() = default;
Expand All @@ -55,13 +69,15 @@ class FilePart {
const FDSharedPtr& shareable_fhandle() const { return m_fhandle; };

zsize_t size() const { return m_size; };
offset_t offset() const { return m_offset; }
bool fail() const { return !m_size; };
bool good() const { return bool(m_size); };

private:
const std::string m_filename;
FDSharedPtr m_fhandle;
zsize_t m_size;
offset_t m_offset;
zsize_t m_size; // The total size of the (starting at m_offset) of the part
};

};
Expand Down
Loading

0 comments on commit 65bf7a8

Please sign in to comment.