Skip to content

Commit

Permalink
Rename *Url* symbols to *Path*.
Browse files Browse the repository at this point in the history
Dirents' "url" are not really url. They are u8 arrays which serve
as the main "key".
Specification says that this u8 array store a utf-8 encoded string and
we use this key as a path.

Public API already use path semantic so rename all internal symbols
from url to path.

Fix #868
  • Loading branch information
mgautierfr committed Mar 29, 2024
1 parent 96afb38 commit 54a3e6c
Show file tree
Hide file tree
Showing 22 changed files with 112 additions and 108 deletions.
2 changes: 1 addition & 1 deletion include/zim/zim.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ namespace zim
CHECKSUM,

/**
* Checks that offsets in UrlPtrList are valid.
* Checks that offsets in PathPtrList are valid.
*/
DIRENT_PTRS,

Expand Down
16 changes: 8 additions & 8 deletions src/_dirent.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ namespace zim

char ns;
std::string title;
std::string url;
std::string path;
std::string parameter;

public:
Expand Down Expand Up @@ -79,15 +79,15 @@ namespace zim
entry_index_t getRedirectIndex() const { return isRedirect() ? redirectIndex : entry_index_t(0); }

char getNamespace() const { return ns; }
const std::string& getTitle() const { return title.empty() ? url : title; }
const std::string& getUrl() const { return url; }
std::string getLongUrl() const;
const std::string &getTitle() const { return title.empty() ? path : title; }
const std::string &getPath() const { return path; }
std::string getLongPath() const;
const std::string& getParameter() const { return parameter; }

size_t getDirentSize() const
{
size_t ret = (isRedirect() ? 12 : 16) + url.size() + parameter.size() + 2;
if (title != url)
size_t ret = (isRedirect() ? 12 : 16) + path.size() + parameter.size() + 2;
if (title != path)
ret += title.size();
return ret;
}
Expand All @@ -97,10 +97,10 @@ namespace zim
title = title_;
}

void setUrl(char ns_, const std::string& url_)
void setPath(char ns_, const std::string &path_)
{
ns = ns_;
url = url_;
path = path_;
}

void setParameter(const std::string& parameter_)
Expand Down
2 changes: 1 addition & 1 deletion src/archive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ namespace zim
auto end = m_impl->getNamespaceEndOffset('M');
for (auto idx=start; idx!=end; idx++) {
auto dirent = m_impl->getDirent(idx);
ret.push_back(dirent->getUrl());
ret.push_back(dirent->getPath());
}
return ret;
}
Expand Down
24 changes: 12 additions & 12 deletions src/dirent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,21 +76,21 @@ namespace zim
dirent.setItem(mimeType, cluster_index_t(clusterNumber), blob_index_t(blobNumber));
}

std::string url;
std::string path;
std::string title;
std::string parameter;

log_debug("read url, title and parameters");
log_debug("read path, title and parameters");

size_type url_size = strnlen(
size_type path_size = strnlen(
reader.current(),
reader.left().v - extraLen
);
if (url_size >= reader.left().v) {
if (path_size >= reader.left().v) {
return false;
}
url = std::string(reader.current(), url_size);
reader.skip(zsize_t(url_size+1));
path = std::string(reader.current(), path_size);
reader.skip(zsize_t(path_size + 1));

size_type title_size = strnlen(
reader.current(),
Expand All @@ -106,7 +106,7 @@ namespace zim
return false;
}
parameter = std::string(reader.current(), extraLen);
dirent.setUrl(ns, url);
dirent.setPath(ns, path);
dirent.setTitle(title);
dirent.setParameter(parameter);
return true;
Expand All @@ -120,12 +120,12 @@ namespace zim
}

// We don't know the size of the dirent because it depends of the size of
// the title, url and extra parameters.
// the title, path and extra parameters.
// This is a pity but we have no choice.
// We cannot take a buffer of the size of the file, it would be really
// inefficient. Let's do try, catch and retry while chosing a smart value
// for the buffer size. Most dirent will be "Article" entry (header's size
// == 16) without extra parameters. Let's hope that url + title size will
// == 16) without extra parameters. Let's hope that path + title size will
// be < 256 and if not try again with a bigger size.

size_t bufferSize(std::min(size_type(256), mp_zimReader->size().v-offset.v));
Expand All @@ -139,12 +139,12 @@ namespace zim
}
}

std::string Dirent::getLongUrl() const
std::string Dirent::getLongPath() const
{
log_trace("Dirent::getLongUrl()");
log_trace("Dirent::getLongPath()");
log_debug("namespace=" << getNamespace() << " title=" << getTitle());

return std::string(1, getNamespace()) + '/' + getUrl();
return std::string(1, getNamespace()) + '/' + getPath();
}

}
9 changes: 6 additions & 3 deletions src/dirent_accessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,12 @@

using namespace zim;

DirectDirentAccessor::DirectDirentAccessor(std::shared_ptr<DirentReader> direntReader, std::unique_ptr<const Reader> urlPtrReader, entry_index_t direntCount)
DirectDirentAccessor::DirectDirentAccessor(
std::shared_ptr<DirentReader> direntReader,
std::unique_ptr<const Reader> pathPtrReader,
entry_index_t direntCount)
: mp_direntReader(direntReader),
mp_urlPtrReader(std::move(urlPtrReader)),
mp_pathPtrReader(std::move(pathPtrReader)),
m_direntCount(direntCount),
m_direntCache(envValue("ZIM_DIRENTCACHE", DIRENT_CACHE_SIZE)),
m_bufferDirentZone(256)
Expand Down Expand Up @@ -60,7 +63,7 @@ offset_t DirectDirentAccessor::getOffset(entry_index_t idx) const
if (idx >= m_direntCount) {
throw std::out_of_range("entry index out of range");
}
offset_t offset(mp_urlPtrReader->read_uint<offset_type>(offset_t(sizeof(offset_type)*idx.v)));
offset_t offset(mp_pathPtrReader->read_uint<offset_type>(offset_t(sizeof(offset_type)*idx.v)));
return offset;
}

Expand Down
6 changes: 4 additions & 2 deletions src/dirent_accessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ class DirentReader;
class DirectDirentAccessor
{
public: // functions
DirectDirentAccessor(std::shared_ptr<DirentReader> direntReader, std::unique_ptr<const Reader> urlPtrReader, entry_index_t direntCount);
DirectDirentAccessor(std::shared_ptr<DirentReader> direntReader,
std::unique_ptr<const Reader> pathPtrReader,
entry_index_t direntCount);

offset_t getOffset(entry_index_t idx) const;
std::shared_ptr<const Dirent> getDirent(entry_index_t idx) const;
Expand All @@ -56,7 +58,7 @@ class DirectDirentAccessor

private: // data
std::shared_ptr<DirentReader> mp_direntReader;
std::unique_ptr<const Reader> mp_urlPtrReader;
std::unique_ptr<const Reader> mp_pathPtrReader;
entry_index_t m_direntCount;

mutable lru_cache<entry_index_type, std::shared_ptr<const Dirent>> m_direntCache;
Expand Down
4 changes: 2 additions & 2 deletions src/entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ std::string Entry::getTitle() const
std::string Entry::getPath() const
{
if (m_file->hasNewNamespaceScheme()) {
return m_dirent->getUrl();
return m_dirent->getPath();
} else {
return m_dirent->getLongUrl();
return m_dirent->getLongPath();
}
}

Expand Down
8 changes: 4 additions & 4 deletions src/fileheader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ namespace zim
std::copy(getUuid().data, getUuid().data + sizeof(Uuid), header + 8);
toLittleEndian(getArticleCount(), header + 24);
toLittleEndian(getClusterCount(), header + 28);
toLittleEndian(getUrlPtrPos(), header + 32);
toLittleEndian(getPathPtrPos(), header + 32);
toLittleEndian(getTitleIdxPos(), header + 40);
toLittleEndian(getClusterPtrPos(), header + 48);
toLittleEndian(getMimeListPos(), header + 56);
Expand Down Expand Up @@ -100,7 +100,7 @@ namespace zim

setArticleCount(seqReader.read<uint32_t>());
setClusterCount(seqReader.read<uint32_t>());
setUrlPtrPos(seqReader.read<uint64_t>());
setPathPtrPos(seqReader.read<uint64_t>());
setTitleIdxPos(seqReader.read<uint64_t>());
setClusterPtrPos(seqReader.read<uint64_t>());
setMimeListPos(seqReader.read<uint64_t>());
Expand All @@ -120,8 +120,8 @@ namespace zim
throw ZimFileFormatError("mimelistPos must be 80.");
}

if (urlPtrPos < mimeListPos) {
throw ZimFileFormatError("urlPtrPos must be > mimelistPos.");
if (pathPtrPos < mimeListPos) {
throw ZimFileFormatError("pathPtrPos must be > mimelistPos.");
}
if (titleIdxPos < mimeListPos) {
throw ZimFileFormatError("titleIdxPos must be > mimelistPos.");
Expand Down
8 changes: 4 additions & 4 deletions src/fileheader.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ namespace zim
Uuid uuid;
entry_index_type articleCount;
offset_type titleIdxPos;
offset_type urlPtrPos;
offset_type pathPtrPos;
offset_type mimeListPos;
cluster_index_type clusterCount;
offset_type clusterPtrPos;
Expand All @@ -62,7 +62,7 @@ namespace zim
minorVersion(zimMinorVersion),
articleCount(0),
titleIdxPos(0),
urlPtrPos(0),
pathPtrPos(0),
clusterCount(0),
clusterPtrPos(0),
mainPage(std::numeric_limits<entry_index_type>::max()),
Expand Down Expand Up @@ -92,8 +92,8 @@ namespace zim
offset_type getTitleIdxPos() const { return titleIdxPos; }
void setTitleIdxPos(offset_type p) { titleIdxPos = p; }

offset_type getUrlPtrPos() const { return urlPtrPos; }
void setUrlPtrPos(offset_type p) { urlPtrPos = p; }
offset_type getPathPtrPos() const { return pathPtrPos; }
void setPathPtrPos(offset_type p) { pathPtrPos = p; }

offset_type getMimeListPos() const { return mimeListPos; }
void setMimeListPos(offset_type p) { mimeListPos = p; }
Expand Down
Loading

0 comments on commit 54a3e6c

Please sign in to comment.