From 54a3e6c46781eb9afd694c203df0cecd27ca6393 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Thu, 28 Mar 2024 17:53:51 +0100 Subject: [PATCH] Rename `*Url*` symbols to `*Path*`. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dirents' "url" are not really url. They are u8 arrays which serve as the main "key". Specification says that this u8 array store a utf-8 encoded string and we use this key as a path. Public API already use path semantic so rename all internal symbols from url to path. Fix #868 --- include/zim/zim.h | 2 +- src/_dirent.h | 16 +++++------ src/archive.cpp | 2 +- src/dirent.cpp | 24 ++++++++-------- src/dirent_accessor.cpp | 9 ++++-- src/dirent_accessor.h | 6 ++-- src/entry.cpp | 4 +-- src/fileheader.cpp | 8 +++--- src/fileheader.h | 8 +++--- src/fileimpl.cpp | 60 ++++++++++++++++++++-------------------- src/fileimpl.h | 8 +++--- src/narrowdown.h | 2 +- src/template.h | 2 +- src/writer/_dirent.h | 7 ++--- src/writer/creator.cpp | 4 +-- src/writer/creatordata.h | 2 +- test/creator.cpp | 18 ++++++------ test/dirent.cpp | 14 +++++----- test/dirent_lookup.cpp | 10 +++---- test/find.cpp | 6 ++-- test/header.cpp | 6 ++-- test/iterator.cpp | 2 +- 22 files changed, 112 insertions(+), 108 deletions(-) diff --git a/include/zim/zim.h b/include/zim/zim.h index 4b27dc27a..91ae3471f 100644 --- a/include/zim/zim.h +++ b/include/zim/zim.h @@ -95,7 +95,7 @@ namespace zim CHECKSUM, /** - * Checks that offsets in UrlPtrList are valid. + * Checks that offsets in PathPtrList are valid. */ DIRENT_PTRS, diff --git a/src/_dirent.h b/src/_dirent.h index 91642049a..0fdfbc599 100644 --- a/src/_dirent.h +++ b/src/_dirent.h @@ -46,7 +46,7 @@ namespace zim char ns; std::string title; - std::string url; + std::string path; std::string parameter; public: @@ -79,15 +79,15 @@ namespace zim entry_index_t getRedirectIndex() const { return isRedirect() ? redirectIndex : entry_index_t(0); } char getNamespace() const { return ns; } - const std::string& getTitle() const { return title.empty() ? url : title; } - const std::string& getUrl() const { return url; } - std::string getLongUrl() const; + const std::string &getTitle() const { return title.empty() ? path : title; } + const std::string &getPath() const { return path; } + std::string getLongPath() const; const std::string& getParameter() const { return parameter; } size_t getDirentSize() const { - size_t ret = (isRedirect() ? 12 : 16) + url.size() + parameter.size() + 2; - if (title != url) + size_t ret = (isRedirect() ? 12 : 16) + path.size() + parameter.size() + 2; + if (title != path) ret += title.size(); return ret; } @@ -97,10 +97,10 @@ namespace zim title = title_; } - void setUrl(char ns_, const std::string& url_) + void setPath(char ns_, const std::string &path_) { ns = ns_; - url = url_; + path = path_; } void setParameter(const std::string& parameter_) diff --git a/src/archive.cpp b/src/archive.cpp index b10f1bb91..fcb43ed97 100644 --- a/src/archive.cpp +++ b/src/archive.cpp @@ -136,7 +136,7 @@ namespace zim auto end = m_impl->getNamespaceEndOffset('M'); for (auto idx=start; idx!=end; idx++) { auto dirent = m_impl->getDirent(idx); - ret.push_back(dirent->getUrl()); + ret.push_back(dirent->getPath()); } return ret; } diff --git a/src/dirent.cpp b/src/dirent.cpp index 3316d36d2..89d3466c3 100644 --- a/src/dirent.cpp +++ b/src/dirent.cpp @@ -76,21 +76,21 @@ namespace zim dirent.setItem(mimeType, cluster_index_t(clusterNumber), blob_index_t(blobNumber)); } - std::string url; + std::string path; std::string title; std::string parameter; - log_debug("read url, title and parameters"); + log_debug("read path, title and parameters"); - size_type url_size = strnlen( + size_type path_size = strnlen( reader.current(), reader.left().v - extraLen ); - if (url_size >= reader.left().v) { + if (path_size >= reader.left().v) { return false; } - url = std::string(reader.current(), url_size); - reader.skip(zsize_t(url_size+1)); + path = std::string(reader.current(), path_size); + reader.skip(zsize_t(path_size + 1)); size_type title_size = strnlen( reader.current(), @@ -106,7 +106,7 @@ namespace zim return false; } parameter = std::string(reader.current(), extraLen); - dirent.setUrl(ns, url); + dirent.setPath(ns, path); dirent.setTitle(title); dirent.setParameter(parameter); return true; @@ -120,12 +120,12 @@ namespace zim } // We don't know the size of the dirent because it depends of the size of - // the title, url and extra parameters. + // the title, path and extra parameters. // This is a pity but we have no choice. // We cannot take a buffer of the size of the file, it would be really // inefficient. Let's do try, catch and retry while chosing a smart value // for the buffer size. Most dirent will be "Article" entry (header's size - // == 16) without extra parameters. Let's hope that url + title size will + // == 16) without extra parameters. Let's hope that path + title size will // be < 256 and if not try again with a bigger size. size_t bufferSize(std::min(size_type(256), mp_zimReader->size().v-offset.v)); @@ -139,12 +139,12 @@ namespace zim } } - std::string Dirent::getLongUrl() const + std::string Dirent::getLongPath() const { - log_trace("Dirent::getLongUrl()"); + log_trace("Dirent::getLongPath()"); log_debug("namespace=" << getNamespace() << " title=" << getTitle()); - return std::string(1, getNamespace()) + '/' + getUrl(); + return std::string(1, getNamespace()) + '/' + getPath(); } } diff --git a/src/dirent_accessor.cpp b/src/dirent_accessor.cpp index 73a8f4220..c19c41ed0 100644 --- a/src/dirent_accessor.cpp +++ b/src/dirent_accessor.cpp @@ -29,9 +29,12 @@ using namespace zim; -DirectDirentAccessor::DirectDirentAccessor(std::shared_ptr direntReader, std::unique_ptr urlPtrReader, entry_index_t direntCount) +DirectDirentAccessor::DirectDirentAccessor( + std::shared_ptr direntReader, + std::unique_ptr pathPtrReader, + entry_index_t direntCount) : mp_direntReader(direntReader), - mp_urlPtrReader(std::move(urlPtrReader)), + mp_pathPtrReader(std::move(pathPtrReader)), m_direntCount(direntCount), m_direntCache(envValue("ZIM_DIRENTCACHE", DIRENT_CACHE_SIZE)), m_bufferDirentZone(256) @@ -60,7 +63,7 @@ offset_t DirectDirentAccessor::getOffset(entry_index_t idx) const if (idx >= m_direntCount) { throw std::out_of_range("entry index out of range"); } - offset_t offset(mp_urlPtrReader->read_uint(offset_t(sizeof(offset_type)*idx.v))); + offset_t offset(mp_pathPtrReader->read_uint(offset_t(sizeof(offset_type)*idx.v))); return offset; } diff --git a/src/dirent_accessor.h b/src/dirent_accessor.h index b9a3b3719..c11116d3e 100644 --- a/src/dirent_accessor.h +++ b/src/dirent_accessor.h @@ -45,7 +45,9 @@ class DirentReader; class DirectDirentAccessor { public: // functions - DirectDirentAccessor(std::shared_ptr direntReader, std::unique_ptr urlPtrReader, entry_index_t direntCount); + DirectDirentAccessor(std::shared_ptr direntReader, + std::unique_ptr pathPtrReader, + entry_index_t direntCount); offset_t getOffset(entry_index_t idx) const; std::shared_ptr getDirent(entry_index_t idx) const; @@ -56,7 +58,7 @@ class DirectDirentAccessor private: // data std::shared_ptr mp_direntReader; - std::unique_ptr mp_urlPtrReader; + std::unique_ptr mp_pathPtrReader; entry_index_t m_direntCount; mutable lru_cache> m_direntCache; diff --git a/src/entry.cpp b/src/entry.cpp index dbe60ea4c..966e29ea3 100644 --- a/src/entry.cpp +++ b/src/entry.cpp @@ -43,9 +43,9 @@ std::string Entry::getTitle() const std::string Entry::getPath() const { if (m_file->hasNewNamespaceScheme()) { - return m_dirent->getUrl(); + return m_dirent->getPath(); } else { - return m_dirent->getLongUrl(); + return m_dirent->getLongPath(); } } diff --git a/src/fileheader.cpp b/src/fileheader.cpp index c719d9e25..0a7110444 100644 --- a/src/fileheader.cpp +++ b/src/fileheader.cpp @@ -53,7 +53,7 @@ namespace zim std::copy(getUuid().data, getUuid().data + sizeof(Uuid), header + 8); toLittleEndian(getArticleCount(), header + 24); toLittleEndian(getClusterCount(), header + 28); - toLittleEndian(getUrlPtrPos(), header + 32); + toLittleEndian(getPathPtrPos(), header + 32); toLittleEndian(getTitleIdxPos(), header + 40); toLittleEndian(getClusterPtrPos(), header + 48); toLittleEndian(getMimeListPos(), header + 56); @@ -100,7 +100,7 @@ namespace zim setArticleCount(seqReader.read()); setClusterCount(seqReader.read()); - setUrlPtrPos(seqReader.read()); + setPathPtrPos(seqReader.read()); setTitleIdxPos(seqReader.read()); setClusterPtrPos(seqReader.read()); setMimeListPos(seqReader.read()); @@ -120,8 +120,8 @@ namespace zim throw ZimFileFormatError("mimelistPos must be 80."); } - if (urlPtrPos < mimeListPos) { - throw ZimFileFormatError("urlPtrPos must be > mimelistPos."); + if (pathPtrPos < mimeListPos) { + throw ZimFileFormatError("pathPtrPos must be > mimelistPos."); } if (titleIdxPos < mimeListPos) { throw ZimFileFormatError("titleIdxPos must be > mimelistPos."); diff --git a/src/fileheader.h b/src/fileheader.h index 468800e65..15561e3e4 100644 --- a/src/fileheader.h +++ b/src/fileheader.h @@ -48,7 +48,7 @@ namespace zim Uuid uuid; entry_index_type articleCount; offset_type titleIdxPos; - offset_type urlPtrPos; + offset_type pathPtrPos; offset_type mimeListPos; cluster_index_type clusterCount; offset_type clusterPtrPos; @@ -62,7 +62,7 @@ namespace zim minorVersion(zimMinorVersion), articleCount(0), titleIdxPos(0), - urlPtrPos(0), + pathPtrPos(0), clusterCount(0), clusterPtrPos(0), mainPage(std::numeric_limits::max()), @@ -92,8 +92,8 @@ namespace zim offset_type getTitleIdxPos() const { return titleIdxPos; } void setTitleIdxPos(offset_type p) { titleIdxPos = p; } - offset_type getUrlPtrPos() const { return urlPtrPos; } - void setUrlPtrPos(offset_type p) { urlPtrPos = p; } + offset_type getPathPtrPos() const { return pathPtrPos; } + void setPathPtrPos(offset_type p) { pathPtrPos = p; } offset_type getMimeListPos() const { return mimeListPos; } void setMimeListPos(offset_type p) { mimeListPos = p; } diff --git a/src/fileimpl.cpp b/src/fileimpl.cpp index 229da4b64..8b6e2c242 100644 --- a/src/fileimpl.cpp +++ b/src/fileimpl.cpp @@ -207,14 +207,13 @@ class Grouping throw ZimFileFormatError("error reading zim-file header."); } - auto urlPtrReader = sectionSubReader(*zimReader, - "Dirent pointer table", - offset_t(header.getUrlPtrPos()), - zsize_t(sizeof(offset_type)*header.getArticleCount())); - - mp_urlDirentAccessor.reset( - new DirectDirentAccessor(direntReader, std::move(urlPtrReader), entry_index_t(header.getArticleCount()))); + auto pathPtrReader = sectionSubReader(*zimReader, + "Dirent pointer table", + offset_t(header.getPathPtrPos()), + zsize_t(sizeof(offset_type)*header.getArticleCount())); + mp_pathDirentAccessor.reset( + new DirectDirentAccessor(direntReader, std::move(pathPtrReader), entry_index_t(header.getArticleCount()))); clusterOffsetReader = sectionSubReader(*zimReader, "Cluster pointer table", @@ -243,7 +242,7 @@ class Grouping return nullptr; } - auto dirent = mp_urlDirentAccessor->getDirent(result.second); + auto dirent = mp_pathDirentAccessor->getDirent(result.second); auto cluster = getCluster(dirent->getClusterNumber()); if (cluster->isCompressed()) { // This is a ZimFileFormatError. @@ -263,7 +262,7 @@ class Grouping size); return std::unique_ptr( - new IndirectDirentAccessor(mp_urlDirentAccessor, std::move(titleIndexReader), title_index_t(size.v/sizeof(entry_index_type)))); + new IndirectDirentAccessor(mp_pathDirentAccessor, std::move(titleIndexReader), title_index_t(size.v/sizeof(entry_index_type)))); } FileImpl::DirentLookup& FileImpl::direntLookup() const @@ -278,7 +277,7 @@ class Grouping std::lock_guard lock(m_direntLookupCreationMutex); if ( !m_direntLookup ) { const auto cacheSize = envValue("ZIM_DIRENTLOOKUPCACHE", DIRENT_LOOKUP_CACHE_SIZE); - m_direntLookup.reset(new DirentLookup(mp_urlDirentAccessor.get(), cacheSize)); + m_direntLookup.reset(new DirentLookup(mp_pathDirentAccessor.get(), cacheSize)); } } return *m_direntLookup; @@ -306,13 +305,13 @@ class Grouping offset_type FileImpl::getMimeListEndUpperLimit() const { - offset_type result(header.getUrlPtrPos()); + offset_type result(header.getPathPtrPos()); result = std::min(result, header.getTitleIdxPos()); result = std::min(result, header.getClusterPtrPos()); if ( getCountArticles().v != 0 ) { // assuming that dirents are placed in the zim file in the same // order as the corresponding entries in the dirent pointer table - result = std::min(result, mp_urlDirentAccessor->getOffset(entry_index_t(0)).v); + result = std::min(result, mp_pathDirentAccessor->getOffset(entry_index_t(0)).v); // assuming that clusters are placed in the zim file in the same // order as the corresponding entries in the cluster pointer table @@ -325,11 +324,12 @@ class Grouping { // read mime types // libzim write zims files two ways : - // - The old way by putting the urlPtrPos just after the mimetype. - // - The new way by putting the urlPtrPos at the end of the zim files. - // In this case, the cluster data are always at 1024 bytes offset and we know that - // mimetype list is before this. - // 1024 seems to be a good maximum size for the mimetype list, even for the "old" way. + // - The old way by putting the pathPtrPos just after the mimetype. + // - The new way by putting the pathPtrPos at the end of the zim files. + // In this case, the cluster data are always at 1024 bytes offset and we + // know that mimetype list is before this. + // 1024 seems to be a good maximum size for the mimetype list, even for the + // "old" way. const auto endMimeList = getMimeListEndUpperLimit(); if ( endMimeList <= header.getMimeListPos() ) { throw(ZimFileFormatError("Bad ZIM archive")); @@ -363,17 +363,17 @@ class Grouping } } - FileImpl::FindxResult FileImpl::findx(char ns, const std::string& url) + FileImpl::FindxResult FileImpl::findx(char ns, const std::string& path) { - return direntLookup().find(ns, url); + return direntLookup().find(ns, path); } - FileImpl::FindxResult FileImpl::findx(const std::string& url) + FileImpl::FindxResult FileImpl::findx(const std::string& longPath) { char ns; std::string path; try { - std::tie(ns, path) = parseLongPath(url); + std::tie(ns, path) = parseLongPath(longPath); return findx(ns, path); } catch (...) {} return { false, entry_index_t(0) }; @@ -392,7 +392,7 @@ class Grouping std::shared_ptr FileImpl::getDirent(entry_index_t idx) { - return mp_urlDirentAccessor->getDirent(idx); + return mp_pathDirentAccessor->getDirent(idx); } std::shared_ptr FileImpl::getDirentByTitle(title_index_t idx) @@ -418,7 +418,7 @@ class Grouping for(auto i = startIdx; i < endIdx; i++) { // This is the offset of the dirent in the zimFile - auto indexOffset = mp_urlDirentAccessor->getOffset(entry_index_t(i)); + auto indexOffset = mp_pathDirentAccessor->getOffset(entry_index_t(i)); // Get the mimeType of the dirent (offset 0) to know the type of the dirent uint16_t mimeType = zimReader->read_uint(indexOffset); if (mimeType==Dirent::redirectMimeType || mimeType==Dirent::linktargetMimeType || mimeType == Dirent::deletedMimeType) { @@ -643,7 +643,7 @@ class Grouping const zsize_t direntMinSize(11); for ( entry_index_type i = 0; i < articleCount; ++i ) { - const auto offset = mp_urlDirentAccessor->getOffset(entry_index_t(i)); + const auto offset = mp_pathDirentAccessor->getOffset(entry_index_t(i)); if ( offset < validDirentRangeStart || offset + direntMinSize > validDirentRangeEnd ) { std::cerr << "Invalid dirent pointer" << std::endl; @@ -658,12 +658,12 @@ class Grouping std::shared_ptr prevDirent; for ( entry_index_type i = 0; i < articleCount; ++i ) { - const std::shared_ptr dirent = mp_urlDirentAccessor->getDirent(entry_index_t(i)); - if ( prevDirent && !(prevDirent->getLongUrl() < dirent->getLongUrl()) ) + const std::shared_ptr dirent = mp_pathDirentAccessor->getDirent(entry_index_t(i)); + if ( prevDirent && !(prevDirent->getLongPath() < dirent->getLongPath()) ) { std::cerr << "Dirent table is not properly sorted:\n" - << " #" << i-1 << ": " << prevDirent->getLongUrl() << "\n" - << " #" << i << ": " << dirent->getLongUrl() << std::endl; + << " #" << i-1 << ": " << prevDirent->getLongPath() << "\n" + << " #" << i << ": " << dirent->getLongPath() << std::endl; return false; } prevDirent = dirent; @@ -738,9 +738,9 @@ bool checkTitleListing(const IndirectDirentAccessor& accessor, entry_index_type const entry_index_type articleCount = getCountArticles().v; for ( entry_index_type i = 0; i < articleCount; ++i ) { - const auto dirent = mp_urlDirentAccessor->getDirent(entry_index_t(i)); + const auto dirent = mp_pathDirentAccessor->getDirent(entry_index_t(i)); if ( dirent->isArticle() && dirent->getMimeType() >= mimeTypes.size() ) { - std::cerr << "Entry " << dirent->getLongUrl() + std::cerr << "Entry " << dirent->getLongPath() << " has invalid MIME-type value " << dirent->getMimeType() << "." << std::endl; return false; diff --git a/src/fileimpl.h b/src/fileimpl.h index b07d2c7f1..d7f2678c3 100644 --- a/src/fileimpl.h +++ b/src/fileimpl.h @@ -50,7 +50,7 @@ namespace zim std::unique_ptr clusterOffsetReader; - std::shared_ptr mp_urlDirentAccessor; + std::shared_ptr mp_pathDirentAccessor; std::unique_ptr mp_titleDirentAccessor; typedef std::shared_ptr ClusterHandle; @@ -72,7 +72,7 @@ namespace zim typedef DirectDirentAccessor DirentAccessorType; typedef entry_index_t index_t; static const std::string& getDirentKey(const Dirent& d) { - return d.getUrl(); + return d.getPath(); } }; @@ -119,8 +119,8 @@ namespace zim entry_index_t getIndexByClusterOrder(entry_index_t idx) const; entry_index_t getCountArticles() const { return entry_index_t(header.getArticleCount()); } - FindxResult findx(char ns, const std::string& url); - FindxResult findx(const std::string& url); + FindxResult findx(char ns, const std::string &path); + FindxResult findx(const std::string &path); FindxTitleResult findxByTitle(char ns, const std::string& title); std::shared_ptr getCluster(cluster_index_t idx); diff --git a/src/narrowdown.h b/src/narrowdown.h index 7486cd1cb..3be55ac0f 100644 --- a/src/narrowdown.h +++ b/src/narrowdown.h @@ -104,7 +104,7 @@ class NarrowDown void add(const std::string& key, index_type i, const std::string& nextKey) { // It would be better to have `key >= nextKey`, but pretty old zim file were not enforce to - // have unique url, just that entries were sorted by url, but two entries could have the same url. + // have unique path, just that entries were sorted by path, but two entries could have the same path. // It is somehow a bug and have been fixed then, but we still have to be tolerent here and accept that // two concecutive keys can be equal. if (key > nextKey) { diff --git a/src/template.h b/src/template.h index 116be10b2..e163251c3 100644 --- a/src/template.h +++ b/src/template.h @@ -32,7 +32,7 @@ namespace zim public: virtual void onData(const std::string& data) = 0; virtual void onToken(const std::string& token) = 0; - virtual void onLink(char ns, const std::string& url) = 0; + virtual void onLink(char ns, const std::string &path) = 0; virtual ~Event() = default; }; diff --git a/src/writer/_dirent.h b/src/writer/_dirent.h index 2b75af4db..42a5fd23b 100644 --- a/src/writer/_dirent.h +++ b/src/writer/_dirent.h @@ -173,7 +173,7 @@ namespace zim Dirent(const std::string& path, const std::string& title, const Dirent& target); // Creator for "temporary" dirent, used to search for dirent in container. - // We use them in url ordered container so we only need to set the namespace and the path. + // We use them in path ordered container so we only need to set the namespace and the path. // Other value are irrelevant. Dirent(NS ns, const std::string& path) : Dirent(ns, path, "", 0) @@ -244,12 +244,11 @@ namespace zim void write(int out_fd) const; - friend bool compareUrl(const Dirent* d1, const Dirent* d2); + friend bool comparePath(const Dirent* d1, const Dirent* d2); friend inline bool compareTitle(const Dirent* d1, const Dirent* d2); } PACKED; - - inline bool compareUrl(const Dirent* d1, const Dirent* d2) + inline bool comparePath(const Dirent* d1, const Dirent* d2) { return d1->getNamespace() < d2->getNamespace() || (d1->getNamespace() == d2->getNamespace() && d1->getPath() < d2->getPath()); diff --git a/src/writer/creator.cpp b/src/writer/creator.cpp index e28d60b2c..093b6d170 100644 --- a/src/writer/creator.cpp +++ b/src/writer/creator.cpp @@ -360,8 +360,8 @@ namespace zim dirent->write(out_fd); } - TINFO(" write url prt list"); - header.setUrlPtrPos(lseek(out_fd, 0, SEEK_CUR)); + TINFO(" write path prt list"); + header.setPathPtrPos(lseek(out_fd, 0, SEEK_CUR)); for (auto& dirent: data->dirents) { char tmp_buff[sizeof(offset_type)]; diff --git a/src/writer/creatordata.h b/src/writer/creatordata.h index 7ff8cbcd3..6783e48e1 100644 --- a/src/writer/creatordata.h +++ b/src/writer/creatordata.h @@ -44,7 +44,7 @@ namespace zim { struct UrlCompare { bool operator() (const Dirent* d1, const Dirent* d2) const { - return compareUrl(d1, d2); + return comparePath(d1, d2); } }; diff --git a/test/creator.cpp b/test/creator.cpp index dba1a6150..6c0ce1e93 100644 --- a/test/creator.cpp +++ b/test/creator.cpp @@ -67,7 +67,7 @@ struct Optional { void test_article_dirent( std::shared_ptr dirent, Optional ns, - Optional url, + Optional path, Optional title, Optional mimetype, Optional clusterNumber, @@ -75,7 +75,7 @@ void test_article_dirent( { ASSERT_TRUE(dirent->isArticle()); ns.check(dirent->getNamespace()); - url.check(dirent->getUrl()); + path.check(dirent->getPath()); title.check(dirent->getTitle()); mimetype.check(dirent->getMimeType()); clusterNumber.check(dirent->getClusterNumber()); @@ -85,13 +85,13 @@ void test_article_dirent( void test_redirect_dirent( std::shared_ptr dirent, Optional ns, - Optional url, + Optional path, Optional title, Optional target) { ASSERT_TRUE(dirent->isRedirect()); ns.check(dirent->getNamespace()); - url.check(dirent->getUrl()); + path.check(dirent->getPath()); title.check(dirent->getTitle()); target.check(dirent->getRedirectIndex()); } @@ -125,8 +125,8 @@ TEST(ZimCreator, createEmptyZim) ASSERT_EQ(header.getArticleCount(), 2u); // counter + titleListIndexesv0 //Read the only one item existing. - auto urlPtrReader = reader->sub_reader(offset_t(header.getUrlPtrPos()), zsize_t(sizeof(offset_t)*header.getArticleCount())); - DirectDirentAccessor direntAccessor(std::make_shared(reader), std::move(urlPtrReader), entry_index_t(header.getArticleCount())); + auto pathPtrReader = reader->sub_reader(offset_t(header.getPathPtrPos()), zsize_t(sizeof(offset_t)*header.getArticleCount())); + DirectDirentAccessor direntAccessor(std::make_shared(reader), std::move(pathPtrReader), entry_index_t(header.getArticleCount())); std::shared_ptr dirent; dirent = direntAccessor.getDirent(entry_index_t(0)); @@ -184,7 +184,7 @@ TEST(ZimCreator, createZim) auto item = std::make_shared("foo", "Foo", "FooContent"); EXPECT_NO_THROW(creator.addItem(item)); EXPECT_THROW(creator.addItem(item), std::runtime_error); - // Be sure that title order is not the same that url order + // Be sure that title order is not the same that path order item = std::make_shared("foo2", "AFoo", "Foo2Content"); creator.addItem(item); creator.addAlias("foo_bis", "The same Foo", "foo2"); @@ -223,8 +223,8 @@ TEST(ZimCreator, createZim) ASSERT_EQ(header.getArticleCount(), nb_entry); // Read dirent - auto urlPtrReader = reader->sub_reader(offset_t(header.getUrlPtrPos()), zsize_t(sizeof(offset_t)*header.getArticleCount())); - DirectDirentAccessor direntAccessor(std::make_shared(reader), std::move(urlPtrReader), entry_index_t(header.getArticleCount())); + auto pathPtrReader = reader->sub_reader(offset_t(header.getPathPtrPos()), zsize_t(sizeof(offset_t)*header.getArticleCount())); + DirectDirentAccessor direntAccessor(std::make_shared(reader), std::move(pathPtrReader), entry_index_t(header.getArticleCount())); std::shared_ptr dirent; entry_index_type direntIdx = 0; diff --git a/test/dirent.cpp b/test/dirent.cpp index 47479956d..ad43986f5 100644 --- a/test/dirent.cpp +++ b/test/dirent.cpp @@ -84,13 +84,13 @@ TEST(DirentTest, size) TEST(DirentTest, set_get_data_dirent) { zim::Dirent dirent; - dirent.setUrl('C', "Bar"); + dirent.setPath('C', "Bar"); dirent.setItem(17, zim::cluster_index_t(45), zim::blob_index_t(1234)); dirent.setVersion(54346); ASSERT_TRUE(!dirent.isRedirect()); ASSERT_EQ(dirent.getNamespace(), 'C'); - ASSERT_EQ(dirent.getUrl(), "Bar"); + ASSERT_EQ(dirent.getPath(), "Bar"); ASSERT_EQ(dirent.getTitle(), "Bar"); ASSERT_EQ(dirent.getParameter(), ""); ASSERT_EQ(dirent.getBlobNumber().v, 1234U); @@ -98,7 +98,7 @@ TEST(DirentTest, set_get_data_dirent) dirent.setTitle("Foo"); ASSERT_EQ(dirent.getNamespace(), 'C'); - ASSERT_EQ(dirent.getUrl(), "Bar"); + ASSERT_EQ(dirent.getPath(), "Bar"); ASSERT_EQ(dirent.getTitle(), "Foo"); ASSERT_EQ(dirent.getParameter(), ""); } @@ -151,7 +151,7 @@ TEST(DirentTest, read_write_article_dirent_unicode) ASSERT_TRUE(!dirent2.isRedirect()); ASSERT_EQ(dirent2.getNamespace(), 'C'); - ASSERT_EQ(dirent2.getUrl(), "L\xc3\xbcliang"); + ASSERT_EQ(dirent2.getPath(), "L\xc3\xbcliang"); ASSERT_EQ(dirent2.getTitle(), "L\xc3\xbcliang"); ASSERT_EQ(dirent2.getParameter(), ""); ASSERT_EQ(dirent2.getClusterNumber().v, 45U); @@ -177,18 +177,18 @@ TEST(DirentTest, read_write_redirect_dirent) ASSERT_TRUE(dirent2.isRedirect()); ASSERT_EQ(dirent2.getNamespace(), 'C'); - ASSERT_EQ(dirent2.getUrl(), "Bar"); + ASSERT_EQ(dirent2.getPath(), "Bar"); ASSERT_EQ(dirent2.getTitle(), "Bar"); ASSERT_EQ(dirent2.getRedirectIndex().v, 321U); } TEST(DirentTest, dirent_size) { - // case url set, title empty, extralen empty + // case path set, title empty, extralen empty zim::writer::Dirent dirent(NS::C, "Bar", "", 17); ASSERT_EQ(dirent.getDirentSize(), writenDirentSize(dirent)); - // case url set, title set, extralen empty + // case path set, title set, extralen empty zim::writer::Dirent dirent2(NS::C, "Bar", "Foo", 17); ASSERT_EQ(dirent2.getDirentSize(), writenDirentSize(dirent2)); } diff --git a/test/dirent_lookup.cpp b/test/dirent_lookup.cpp index c0b461e72..1f2f2efc1 100644 --- a/test/dirent_lookup.cpp +++ b/test/dirent_lookup.cpp @@ -30,7 +30,7 @@ namespace { -const std::vector> articleurl = { +const std::vector> articlepath = { {'A', "aa"}, //0 {'A', "aaaa"}, //1 {'A', "aaaaaa"}, //2 @@ -51,17 +51,17 @@ struct GetDirentMock typedef GetDirentMock DirentAccessorType; typedef zim::entry_index_t index_t; static const std::string& getDirentKey(const zim::Dirent& d) { - return d.getUrl(); + return d.getPath(); } zim::entry_index_t getDirentCount() const { - return zim::entry_index_t(articleurl.size()); + return zim::entry_index_t(articlepath.size()); } std::shared_ptr getDirent(zim::entry_index_t idx) const { - auto info = articleurl.at(idx.v); + auto info = articlepath.at(idx.v); auto ret = std::make_shared(); - ret->setUrl(info.first, info.second); + ret->setPath(info.first, info.second); return ret; } }; diff --git a/test/find.cpp b/test/find.cpp index ea8fd99c8..6947ca355 100644 --- a/test/find.cpp +++ b/test/find.cpp @@ -54,9 +54,9 @@ TEST(FindTests, NotFoundByPath) for(auto& testfile:getDataFilePath("wikibooks_be_all_nopic_2017-02.zim")) { zim::Archive archive (testfile.path); - auto range0 = archive.findByPath("unkwonUrl"); - auto range1 = archive.findByPath("U/unkwonUrl"); - auto range2 = archive.findByPath("A/unkwonUrl"); + auto range0 = archive.findByPath("unkwonPath"); + auto range1 = archive.findByPath("U/unkwonPath"); + auto range2 = archive.findByPath("A/unkwonPath"); auto range3 = archive.findByPath("X"); auto range4 = archive.findByPath("X/"); ASSERT_EQ(range0.begin(), range0.end()); diff --git a/test/header.cpp b/test/header.cpp index 489edcda5..20caf1903 100644 --- a/test/header.cpp +++ b/test/header.cpp @@ -49,7 +49,7 @@ TEST(HeaderTest, read_write_header) zim::Fileheader header; header.setUuid("123456789\0abcd\nf"); header.setArticleCount(4711); - header.setUrlPtrPos(12345); + header.setPathPtrPos(12345); header.setTitleIdxPos(23456); header.setClusterCount(14); header.setClusterPtrPos(45678); @@ -59,7 +59,7 @@ TEST(HeaderTest, read_write_header) ASSERT_EQ(header.getUuid(), "123456789\0abcd\nf"); ASSERT_EQ(header.getArticleCount(), 4711U); - ASSERT_EQ(header.getUrlPtrPos(), 12345U); + ASSERT_EQ(header.getPathPtrPos(), 12345U); ASSERT_EQ(header.getTitleIdxPos(), 23456U); ASSERT_EQ(header.getClusterCount(), 14U); ASSERT_EQ(header.getClusterPtrPos(), 45678U); @@ -73,7 +73,7 @@ TEST(HeaderTest, read_write_header) ASSERT_EQ(header2.getUuid(), "123456789\0abcd\nf"); ASSERT_EQ(header2.getArticleCount(), 4711U); - ASSERT_EQ(header2.getUrlPtrPos(), 12345U); + ASSERT_EQ(header2.getPathPtrPos(), 12345U); ASSERT_EQ(header2.getTitleIdxPos(), 23456U); ASSERT_EQ(header2.getClusterCount(), 14U); ASSERT_EQ(header2.getClusterPtrPos(), 45678U); diff --git a/test/iterator.cpp b/test/iterator.cpp index 82064ec59..46c7925e7 100644 --- a/test/iterator.cpp +++ b/test/iterator.cpp @@ -123,7 +123,7 @@ TEST(IteratorTests, beginByTitle) } -// ByUrl +// ByPath TEST(IteratorTests, beginByPath) { std::vector expected = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9};