diff --git a/Catalog/DdlCommandExecutor.cpp b/Catalog/DdlCommandExecutor.cpp index c9b2a0605b..febe3092a9 100644 --- a/Catalog/DdlCommandExecutor.cpp +++ b/Catalog/DdlCommandExecutor.cpp @@ -1490,14 +1490,8 @@ ExecutionResult ShowDiskCacheUsageCommand::execute() { auto [td, td_with_lock] = get_table_descriptor_with_lock(*cat_ptr, table_name, false); - const auto mgr = dynamic_cast( - disk_cache->getGlobalFileMgr()->findFileMgr(cat_ptr->getDatabaseId(), - td->tableId)); - - // NOTE: This size does not include datawrapper metadata that is on disk. - // If a mgr does not exist it means a cache is not enabled/created for the given - // table. - auto table_cache_size = mgr ? mgr->getTotalFileSize() : 0; + auto table_cache_size = + disk_cache->getSpaceReservedByTable(cat_ptr->getDatabaseId(), td->tableId); // logical_values -> table data logical_values.emplace_back(RelLogicalValues::RowValues{}); diff --git a/DataMgr/AbstractBufferMgr.h b/DataMgr/AbstractBufferMgr.h index 9f1358c2af..f9b90f3d42 100644 --- a/DataMgr/AbstractBufferMgr.h +++ b/DataMgr/AbstractBufferMgr.h @@ -41,9 +41,10 @@ } \ } -DEFINE_ENUM_WITH_STRING_CONVERSIONS(MgrType, - (FILE_MGR)(CPU_MGR)(GPU_MGR)(GLOBAL_FILE_MGR)( - PERSISTENT_STORAGE_MGR)(FOREIGN_STORAGE_MGR)) +DEFINE_ENUM_WITH_STRING_CONVERSIONS( + MgrType, + (CACHING_FILE_MGR)(FILE_MGR)(CPU_MGR)(GPU_MGR)(GLOBAL_FILE_MGR)( + PERSISTENT_STORAGE_MGR)(FOREIGN_STORAGE_MGR)) namespace Data_Namespace { diff --git a/DataMgr/CMakeLists.txt b/DataMgr/CMakeLists.txt index 34990e6831..1d7c5857f4 100644 --- a/DataMgr/CMakeLists.txt +++ b/DataMgr/CMakeLists.txt @@ -8,6 +8,7 @@ set(datamgr_source_files DataMgr.cpp Encoder.cpp StringNoneEncoder.cpp + FileMgr/CachingFileMgr.cpp FileMgr/GlobalFileMgr.cpp FileMgr/FileMgr.cpp FileMgr/FileBuffer.cpp diff --git a/DataMgr/FileMgr/CachingFileMgr.cpp b/DataMgr/FileMgr/CachingFileMgr.cpp new file mode 100644 index 0000000000..c429b02239 --- /dev/null +++ b/DataMgr/FileMgr/CachingFileMgr.cpp @@ -0,0 +1,159 @@ +/* + * Copyright 2021 Omnisci, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CachingFileMgr.h + */ + +#include "DataMgr/FileMgr/CachingFileMgr.h" +#include +#include "Shared/File.h" + +constexpr char EPOCH_FILENAME[] = "epoch_metadata"; + +namespace File_Namespace { +namespace bf = boost::filesystem; + +CachingFileMgr::CachingFileMgr(const std::string& base_path, + const size_t num_reader_threads) { + fileMgrBasePath_ = base_path; + maxRollbackEpochs_ = 0; + defaultPageSize_ = DEFAULT_PAGE_SIZE; + nextFileId_ = 0; + init(num_reader_threads, -1); +} + +bool CachingFileMgr::coreInit() { + mapd_unique_lock write_lock(files_rw_mutex_); + bf::path path(fileMgrBasePath_); + if (bf::exists(path)) { + if (!bf::is_directory(path)) { + LOG(FATAL) << "Specified path '" << fileMgrBasePath_ + << "' for disk cache is not a directory."; + } + migrateToLatestFileMgrVersion(); + openAndReadEpochFile(EPOCH_FILENAME); + return true; + } + LOG(FATAL) << "Cache path: " << fileMgrBasePath_ << "does not exit."; + return false; +} + +void CachingFileMgr::clearForTable(int db_id, int tb_id) { + { + mapd_unique_lock write_lock(chunkIndexMutex_); + for (auto it = chunkIndex_.begin(); it != chunkIndex_.end();) { + auto& [key, buffer] = *it; + if (in_same_table(key, {db_id, tb_id})) { + buffer->freePages(); + delete buffer; + it = chunkIndex_.erase(it); + } else { + ++it; + } + } + auto dir_name = getFileMgrBasePath() + "/" + get_dir_name_for_table(db_id, tb_id); + if (bf::exists(dir_name)) { + bf::remove_all(dir_name); + } + } + checkpoint(); + // TODO(Misiu): Implement background file removal. + // Currently the renameForDelete idiom will only work in the mapd/ directory as the + // cleanup thread is targetted there. If we want it to work for arbitrary directories + // we will need to add a new dir to the thread, or start a second thread. + // File_Namespace::renameForDelete(get_dir_name_for_table(db_id, tb_id)); +} + +std::string CachingFileMgr::getOrAddTableDir(int db_id, int tb_id) { + mapd_unique_lock write_lock(files_rw_mutex_); + std::string table_dir = + getFileMgrBasePath() + "/" + get_dir_name_for_table(db_id, tb_id); + if (!bf::exists(table_dir)) { + bf::create_directory(table_dir); + } else { + if (!bf::is_directory(table_dir)) { + LOG(FATAL) << "Specified path '" << table_dir + << "' for cache table data is not a directory."; + } + } + return table_dir; +} + +void CachingFileMgr::closeRemovePhysical() { + mapd_unique_lock write_lock(files_rw_mutex_); + closePhysicalUnlocked(); + auto dir_name = getFileMgrBasePath(); + if (bf::exists(dir_name)) { + bf::remove_all(dir_name); + } + + // TODO(Misiu): Implement background file removal. + // Currently the renameForDelete idiom will only work in the mapd/ directory as the + // cleanup thread is targetted there. If we want it to work for arbitrary directories + // we will need to add a new dir to the thread, or start a second thread. + // File_Namespace::renameForDelete(getFileMgrBasePath()); +} + +uint64_t CachingFileMgr::getChunkSpaceReservedByTable(int db_id, int tb_id) { + mapd_shared_lock read_lock(chunkIndexMutex_); + uint64_t space_used = 0; + for (const auto& [key, buffer] : chunkIndex_) { + if (key[CHUNK_KEY_DB_IDX] == db_id && key[CHUNK_KEY_TABLE_IDX] == tb_id) { + space_used += buffer->reservedSize(); + } + } + return space_used; +} + +uint64_t CachingFileMgr::getMetadataSpaceReservedByTable(int db_id, int tb_id) { + mapd_shared_lock read_lock(chunkIndexMutex_); + uint64_t space_used = 0; + for (const auto& [key, buffer] : chunkIndex_) { + if (key[CHUNK_KEY_DB_IDX] == db_id && key[CHUNK_KEY_TABLE_IDX] == tb_id) { + space_used += (buffer->numMetadataPages() * METADATA_PAGE_SIZE); + } + } + return space_used; +} + +uint64_t CachingFileMgr::getWrapperSpaceReservedByTable(int db_id, int tb_id) { + mapd_shared_lock read_lock(files_rw_mutex_); + uint64_t space_used = 0; + std::string table_dir = + getFileMgrBasePath() + "/" + get_dir_name_for_table(db_id, tb_id); + if (bf::exists(table_dir)) { + for (const auto& file : bf::recursive_directory_iterator(table_dir)) { + if (bf::is_regular_file(file.path())) { + space_used += bf::file_size(file.path()); + } + } + } + return space_used; +} + +uint64_t CachingFileMgr::getSpaceReservedByTable(int db_id, int tb_id) { + auto chunkSpace = getChunkSpaceReservedByTable(db_id, tb_id); + auto metaSpace = getMetadataSpaceReservedByTable(db_id, tb_id); + auto wrapperSpace = getWrapperSpaceReservedByTable(db_id, tb_id); + return chunkSpace + metaSpace + wrapperSpace; +} + +std::string CachingFileMgr::describeSelf() { + return "cache"; +} + +} // namespace File_Namespace diff --git a/DataMgr/FileMgr/CachingFileMgr.h b/DataMgr/FileMgr/CachingFileMgr.h new file mode 100644 index 0000000000..c711d79c4e --- /dev/null +++ b/DataMgr/FileMgr/CachingFileMgr.h @@ -0,0 +1,108 @@ +/* + * Copyright 2021 Omnisci, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CachingFileMgr.h + * + * This file details an extension of the FileMgr that can contain pages from multiple + * tables (CachingFileMgr). + */ + +#pragma once + +#include "FileMgr.h" + +namespace File_Namespace { + +inline std::string get_dir_name_for_table(int db_id, int tb_id) { + std::stringstream file_name; + file_name << "table_" << db_id << "_" << tb_id << "/"; + return file_name.str(); +} + +/** + * @class CachingFileMgr + * @brief A FileMgr capable of limiting it's size and storing data from multiple tables + * in a shared directory. For any table that supports DiskCaching, the CachingFileMgr + * must contain either metadata for all table chunks, or for none (the cache is either has + * no knowledge of that table, or has complete knowledge of that table). Any data chunk + * within a table may or may not be contained within the cache. + */ +class CachingFileMgr : public FileMgr { + public: + CachingFileMgr(const std::string& base_path, const size_t num_reader_threads = 0); + ~CachingFileMgr() {} + /** + * @brief Determines file path, and if exists, runs file migration and opens and reads + * epoch file + * @return a boolean representing whether the directory path existed + */ + bool coreInit() override; + + // Simple getters. + inline MgrType getMgrType() override { return CACHING_FILE_MGR; }; + inline std::string getStringMgrType() override { return ToString(CACHING_FILE_MGR); } + inline size_t getDefaultPageSize() { return defaultPageSize_; } + + // TODO(Misiu): These are unimplemented for now, but will become necessary when we want + // to limit the size. + inline size_t getMaxSize() override { + UNREACHABLE() << "Unimplemented"; + return 0; + } + inline size_t getInUseSize() override { + UNREACHABLE() << "Unimplemented"; + return 0; + } + inline size_t getAllocated() override { + UNREACHABLE() << "Unimplemented"; + return 0; + } + inline bool isAllocationCapped() override { return false; } + + /** + * @brief Removes all data related to the given table (pages and subdirectories). + */ + void clearForTable(int db_id, int tb_id); + + /** + * @brief Returns (and optionally creates) a subdirectory for table-specific persistent + * data (e.g. serialized foreign data warppers). + */ + std::string getOrAddTableDir(int db_id, int tb_id); + + /** + * @brief Query to determine if the contained pages will have their database and table + * ids overriden by the filemgr key (FileMgr does this). + */ + inline bool hasFileMgrKey() const override { return false; } + /** + * @breif Closes files and removes the caching directory. + */ + void closeRemovePhysical() override; + + /** + * Set of functions to determine how much space is reserved in a table by type. + */ + uint64_t getChunkSpaceReservedByTable(int db_id, int tb_id); + uint64_t getMetadataSpaceReservedByTable(int db_id, int tb_id); + uint64_t getWrapperSpaceReservedByTable(int db_id, int tb_id); + uint64_t getSpaceReservedByTable(int db_id, int tb_id); + + std::string describeSelf() override; +}; + +} // namespace File_Namespace diff --git a/DataMgr/FileMgr/FileBuffer.cpp b/DataMgr/FileMgr/FileBuffer.cpp index fee12404a9..8cb46dac57 100644 --- a/DataMgr/FileMgr/FileBuffer.cpp +++ b/DataMgr/FileMgr/FileBuffer.cpp @@ -622,4 +622,12 @@ void FileBuffer::write(int8_t* src, CHECK(bytesLeft == 0); } +std::string FileBuffer::dump() const { + std::stringstream ss; + ss << "chunk_key = " << show_chunk(chunkKey_) << "\n"; + ss << "has_encoder = " << (hasEncoder() ? "true\n" : "false\n"); + ss << "size_ = " << size_ << "\n"; + return ss.str(); +} + } // namespace File_Namespace diff --git a/DataMgr/FileMgr/FileBuffer.h b/DataMgr/FileMgr/FileBuffer.h index d5a9300c1d..8a4c6a1576 100644 --- a/DataMgr/FileMgr/FileBuffer.h +++ b/DataMgr/FileMgr/FileBuffer.h @@ -38,7 +38,8 @@ using namespace Data_Namespace; namespace File_Namespace { -class FileMgr; // forward declaration +// forward declarations +class FileMgr; /** * @class FileBuffer @@ -148,6 +149,8 @@ class FileBuffer : public AbstractBuffer { inline size_t numMetadataPages() const { return metadataPages_.pageVersions.size(); }; + std::string dump() const; + private: // FileBuffer(const FileBuffer&); // private copy constructor // FileBuffer& operator=(const FileBuffer&); // private overloaded assignment operator diff --git a/DataMgr/FileMgr/FileInfo.cpp b/DataMgr/FileMgr/FileInfo.cpp index 71dd336f38..89d16fbea7 100644 --- a/DataMgr/FileMgr/FileInfo.cpp +++ b/DataMgr/FileMgr/FileInfo.cpp @@ -105,10 +105,14 @@ void FileInfo::openExistingFile(std::vector& headerVec, // We don't want to read headerSize in our header - so start // reading 4 bytes past it - // always derive dbid/tbid from FileMgr + // Derive dbid/tbid if from FileMgr ChunkKey chunkKey(&ints[1], &ints[1 + numHeaderElems - 2]); - chunkKey[0] = fileMgr->get_fileMgrKey().first; - chunkKey[1] = fileMgr->get_fileMgrKey().second; + if (fileMgr->hasFileMgrKey()) { + // A regular FileMgr is locked to one table, but a CachingFileMgr can contain + // chunks from different tables + chunkKey[0] = fileMgr->get_fileMgrKey().first; + chunkKey[1] = fileMgr->get_fileMgrKey().second; + } // recover page in case a crash failed deletion of this page if (!g_read_only) { if (ints[1] == DELETE_CONTINGENT || ints[1] == ROLLOFF_CONTINGENT) { diff --git a/DataMgr/FileMgr/FileMgr.cpp b/DataMgr/FileMgr/FileMgr.cpp index 80c0b3f458..8ab9609399 100644 --- a/DataMgr/FileMgr/FileMgr.cpp +++ b/DataMgr/FileMgr/FileMgr.cpp @@ -50,18 +50,6 @@ using namespace std; namespace File_Namespace { -bool headerCompare(const HeaderInfo& firstElem, const HeaderInfo& secondElem) { - // HeaderInfo.first is a pair of Chunk key with a vector containing - // pageId and version - if (firstElem.chunkKey != secondElem.chunkKey) { - return firstElem.chunkKey < secondElem.chunkKey; - } - if (firstElem.pageId != secondElem.pageId) { - return firstElem.pageId < secondElem.pageId; - } - return firstElem.versionEpoch < secondElem.versionEpoch; -} - FileMgr::FileMgr(const int32_t deviceId, GlobalFileMgr* gfm, const std::pair fileMgrKey, @@ -70,11 +58,11 @@ FileMgr::FileMgr(const int32_t deviceId, const int32_t epoch, const size_t defaultPageSize) : AbstractBufferMgr(deviceId) - , gfm_(gfm) - , fileMgrKey_(fileMgrKey) , maxRollbackEpochs_(maxRollbackEpochs) , defaultPageSize_(defaultPageSize) - , nextFileId_(0) { + , nextFileId_(0) + , gfm_(gfm) + , fileMgrKey_(fileMgrKey) { init(num_reader_threads, epoch); } @@ -85,11 +73,11 @@ FileMgr::FileMgr(const int32_t deviceId, const size_t defaultPageSize, const bool runCoreInit) : AbstractBufferMgr(deviceId) - , gfm_(gfm) - , fileMgrKey_(fileMgrKey) , maxRollbackEpochs_(-1) , defaultPageSize_(defaultPageSize) - , nextFileId_(0) { + , nextFileId_(0) + , gfm_(gfm) + , fileMgrKey_(fileMgrKey) { const std::string fileMgrDirPrefix("table"); const std::string FileMgrDirDelim("_"); fileMgrBasePath_ = (gfm_->getBasePath() + fileMgrDirPrefix + FileMgrDirDelim + @@ -104,12 +92,12 @@ FileMgr::FileMgr(const int32_t deviceId, FileMgr::FileMgr(GlobalFileMgr* gfm, const size_t defaultPageSize, std::string basePath) : AbstractBufferMgr(0) - , gfm_(gfm) - , fileMgrKey_(0, 0) , maxRollbackEpochs_(-1) , fileMgrBasePath_(basePath) , defaultPageSize_(defaultPageSize) - , nextFileId_(0) { + , nextFileId_(0) + , gfm_(gfm) + , fileMgrKey_(0, 0) { init(basePath, -1); } @@ -118,6 +106,9 @@ FileMgr::FileMgr(const int epoch) : AbstractBufferMgr(-1) { epoch_.ceiling(epoch); } +// Used to initialize CachingFileMgr. +FileMgr::FileMgr() : AbstractBufferMgr(0) {} + FileMgr::~FileMgr() { // free memory used by FileInfo objects for (auto chunkIt = chunkIndex_.begin(); chunkIt != chunkIndex_.end(); ++chunkIt) { @@ -283,7 +274,7 @@ void FileMgr::init(const size_t num_reader_threads, const int32_t epochOverride) * and in order of increasing PageId * - Version Epoch */ auto& header_vec = open_files_result.header_infos; - std::sort(header_vec.begin(), header_vec.end(), headerCompare); + std::sort(header_vec.begin(), header_vec.end()); /* Goal of next section is to find sequences in the * sorted headerVec of the same ChunkId, which we @@ -339,8 +330,8 @@ void FileMgr::init(const size_t num_reader_threads, const int32_t epochOverride) /* define number of reader threads to be used */ size_t num_hardware_based_threads = - std::thread::hardware_concurrency(); // # of threads is based on # of cores on the - // host + std::thread::hardware_concurrency(); // # of threads is based on # of cores on + // the host if (num_reader_threads == 0) { // # of threads has not been defined by user num_reader_threads_ = num_hardware_based_threads; } else { @@ -489,7 +480,7 @@ void FileMgr::init(const std::string& dataPathToConvertFrom, * and in order of increasing PageId * - Version Epoch */ - std::sort(headerVec.begin(), headerVec.end(), headerCompare); + std::sort(headerVec.begin(), headerVec.end()); /* Goal of next section is to find sequences in the * sorted headerVec of the same ChunkId, which we @@ -568,9 +559,8 @@ void FileMgr::init(const std::string& dataPathToConvertFrom, isFullyInitted_ = true; } -void FileMgr::closeRemovePhysical() { - for (auto file_info_entry : files_) { - auto file_info = file_info_entry.second; +void FileMgr::closePhysicalUnlocked() { + for (auto& [idx, file_info] : files_) { if (file_info->f) { close(file_info->f); file_info->f = nullptr; @@ -586,7 +576,11 @@ void FileMgr::closeRemovePhysical() { close(epochFile_); epochFile_ = nullptr; } +} +void FileMgr::closeRemovePhysical() { + mapd_unique_lock write_lock(files_rw_mutex_); + closePhysicalUnlocked(); /* rename for later deletion the directory containing table related data */ File_Namespace::renameForDelete(getFileMgrBasePath()); } @@ -702,9 +696,14 @@ void FileMgr::rollOffOldData(const int32_t epochCeiling, const bool shouldCheckp } } +std::string FileMgr::describeSelf() { + stringstream ss; + ss << "table (" << fileMgrKey_.first << ", " << fileMgrKey_.second << ")"; + return ss.str(); +} + void FileMgr::checkpoint() { - VLOG(2) << "Checkpointing table (" << fileMgrKey_.first << ", " << fileMgrKey_.second - << " epoch: " << epoch(); + VLOG(2) << "Checkpointing " << describeSelf() << " epoch: " << epoch(); mapd_unique_lock chunkIndexWriteLock(chunkIndexMutex_); for (auto chunkIt = chunkIndex_.begin(); chunkIt != chunkIndex_.end(); ++chunkIt) { if (chunkIt->second->isDirty()) { @@ -1202,9 +1201,8 @@ void FileMgr::migrateToLatestFileMgrVersion() { void FileMgr::setEpoch(const int32_t newEpoch) { if (newEpoch < epoch_.floor()) { std::stringstream error_message; - error_message << "Cannot set epoch for table (" << fileMgrKey_.first << "," - << fileMgrKey_.second << ") lower than the minimum rollback epoch (" - << epoch_.floor() << ")."; + error_message << "Cannot set epoch for " << describeSelf() + << " lower than the minimum rollback epoch (" << epoch_.floor() << ")."; throw std::runtime_error(error_message.str()); } epoch_.ceiling(newEpoch); @@ -1220,20 +1218,6 @@ void FileMgr::removeTableRelatedDS(const int32_t db_id, const int32_t table_id) UNREACHABLE(); } -uint64_t FileMgr::getTotalFileSize() const { - uint64_t total_size = 0; - for (const auto& file_info_entry : files_) { - total_size += file_info_entry.second->size(); - } - if (epochFile_) { - total_size += fileSize(epochFile_); - } - if (DBMetaFile_) { - total_size += fileSize(DBMetaFile_); - } - return total_size; -} - /** * Resumes an interrupted file compaction process. This method would * normally only be called when re-initializing the file manager diff --git a/DataMgr/FileMgr/FileMgr.h b/DataMgr/FileMgr/FileMgr.h index c6585e98b1..b92e1c62b7 100644 --- a/DataMgr/FileMgr/FileMgr.h +++ b/DataMgr/FileMgr/FileMgr.h @@ -163,7 +163,7 @@ class FileMgr : public AbstractBufferMgr { // implements FileMgr(GlobalFileMgr* gfm, const size_t defaultPageSize, std::string basePath); /// Destructor - ~FileMgr() override; + virtual ~FileMgr() override; StorageStats getStorageStats(); /// Creates a chunk with the specified key and page size. @@ -215,7 +215,6 @@ class FileMgr : public AbstractBufferMgr { // implements inline FileInfo* getFileInfoForFileId(const int32_t fileId) { return files_[fileId]; } - uint64_t getTotalFileSize() const; FileMetadata getMetadataForFile( const boost::filesystem::directory_iterator& fileIterator); @@ -228,7 +227,7 @@ class FileMgr : public AbstractBufferMgr { // implements * @return a boolean representing whether the directory path existed */ - bool coreInit(); + virtual bool coreInit(); void copyPage(Page& srcPage, FileMgr* destFileMgr, @@ -328,12 +327,13 @@ class FileMgr : public AbstractBufferMgr { // implements int32_t getDBVersion() const; bool getDBConvert() const; void createTopLevelMetadata(); // create metadata shared by all tables of all DBs - std::string getFileMgrBasePath() const { return fileMgrBasePath_; } - void closeRemovePhysical(); + inline std::string getFileMgrBasePath() const { return fileMgrBasePath_; } + virtual void closeRemovePhysical(); void removeTableRelatedDS(const int32_t db_id, const int32_t table_id) override; void free_page(std::pair&& page); + inline virtual bool hasFileMgrKey() const { return true; } const std::pair get_fileMgrKey() const { return fileMgrKey_; } @@ -365,12 +365,9 @@ class FileMgr : public AbstractBufferMgr { // implements static void setNumPagesPerMetadataFile(size_t num_pages); protected: - // For testing purposes only - FileMgr(const int epoch); + // Used to initialize CachingFileMgr. + FileMgr(); - private: - GlobalFileMgr* gfm_; /// Global FileMgr - std::pair fileMgrKey_; int32_t maxRollbackEpochs_; std::string fileMgrBasePath_; /// The OS file system path containing files related to /// this FileMgr @@ -466,6 +463,18 @@ class FileMgr : public AbstractBufferMgr { // implements void deleteEmptyFiles(); void resumeFileCompaction(const std::string& status_file_name); std::vector readPageMappingsFromStatusFile(); + + // For testing purposes only + FileMgr(const int epoch); + + // Used to describe the manager in logging and error messages. + virtual std::string describeSelf(); + + void closePhysicalUnlocked(); + + private: + GlobalFileMgr* gfm_; /// Global FileMgr + std::pair fileMgrKey_; }; } // namespace File_Namespace diff --git a/DataMgr/FileMgr/Page.h b/DataMgr/FileMgr/Page.h index 37e672273e..1cfcbe3004 100644 --- a/DataMgr/FileMgr/Page.h +++ b/DataMgr/FileMgr/Page.h @@ -151,6 +151,16 @@ struct HeaderInfo { const int32_t versionEpoch, const Page& page) : chunkKey(chunkKey), pageId(pageId), versionEpoch(versionEpoch), page(page) {} + + bool operator<(const HeaderInfo& other) { + if (chunkKey != other.chunkKey) { + return chunkKey < other.chunkKey; + } + if (pageId != other.pageId) { + return pageId < other.pageId; + } + return versionEpoch < other.versionEpoch; + } }; } // namespace File_Namespace diff --git a/DataMgr/ForeignStorage/CachingForeignStorageMgr.cpp b/DataMgr/ForeignStorage/CachingForeignStorageMgr.cpp index 250575717e..8217570a94 100644 --- a/DataMgr/ForeignStorage/CachingForeignStorageMgr.cpp +++ b/DataMgr/ForeignStorage/CachingForeignStorageMgr.cpp @@ -15,7 +15,6 @@ */ #include "CachingForeignStorageMgr.h" - #include "Catalog/ForeignTable.h" #include "CsvDataWrapper.h" #include "ForeignStorageException.h" @@ -26,7 +25,6 @@ namespace foreign_storage { namespace { constexpr int64_t MAX_REFRESH_TIME_IN_SECONDS = 60 * 60; -const std::string wrapper_file_name = "/wrapper_metadata.json"; } // namespace CachingForeignStorageMgr::CachingForeignStorageMgr(ForeignStorageCache* cache) @@ -106,16 +104,18 @@ void CachingForeignStorageMgr::fetchBuffer(const ChunkKey& chunk_key, void CachingForeignStorageMgr::getChunkMetadataVecForKeyPrefix( ChunkMetadataVector& chunk_metadata, const ChunkKey& keyPrefix) { + auto [db_id, tb_id] = get_table_prefix(keyPrefix); ForeignStorageMgr::getChunkMetadataVecForKeyPrefix(chunk_metadata, keyPrefix); getDataWrapper(keyPrefix)->serializeDataWrapperInternals( - disk_cache_->getCacheDirectoryForTablePrefix(keyPrefix) + wrapper_file_name); + disk_cache_->getCacheDirectoryForTable(db_id, tb_id) + wrapper_file_name); } void CachingForeignStorageMgr::recoverDataWrapperFromDisk( const ChunkKey& table_key, const ChunkMetadataVector& chunk_metadata) { + auto [db_id, tb_id] = get_table_prefix(table_key); getDataWrapper(table_key)->restoreDataWrapperInternals( - disk_cache_->getCacheDirectoryForTablePrefix(table_key) + wrapper_file_name, + disk_cache_->getCacheDirectoryForTable(db_id, tb_id) + wrapper_file_name, chunk_metadata); } diff --git a/DataMgr/ForeignStorage/CsvDataWrapper.cpp b/DataMgr/ForeignStorage/CsvDataWrapper.cpp index e86a964f31..55df1e60c5 100644 --- a/DataMgr/ForeignStorage/CsvDataWrapper.cpp +++ b/DataMgr/ForeignStorage/CsvDataWrapper.cpp @@ -513,7 +513,6 @@ void cache_blocks(std::map& cached_chunks, } } cached_chunks[chunk_key].appendData(data_block, row_count, 0); - if (is_last_block) { // cache the chunks now so they are tracked by eviction algorithm std::vector key_to_cache{chunk_key}; @@ -821,7 +820,6 @@ void add_placeholder_metadata( empty_buffer.initEncoder(column->columnType); auto chunk_metadata = empty_buffer.getEncoder()->getMetadata(column->columnType); chunk_metadata->numElements = num_elements; - // signal to query engine populate, not set by default for arrays chunk_metadata->chunkStats.min.intval = std::numeric_limits::max(); chunk_metadata->chunkStats.max.intval = std::numeric_limits::lowest(); @@ -890,6 +888,7 @@ void CsvDataWrapper::populateChunkMetadata(ChunkMetadataVector& chunk_metadata_v columns_to_scan.insert(column->columnId); } } + // Track where scan started for appends int start_row = num_rows_; if (!csv_reader_->isScanFinished()) { diff --git a/DataMgr/ForeignStorage/ForeignStorageCache.cpp b/DataMgr/ForeignStorage/ForeignStorageCache.cpp index 9a65f8a980..048c608080 100644 --- a/DataMgr/ForeignStorage/ForeignStorageCache.cpp +++ b/DataMgr/ForeignStorage/ForeignStorageCache.cpp @@ -51,19 +51,18 @@ void set_metadata_for_buffer(AbstractBuffer* buffer, ChunkMetadata* meta) { } } // namespace -ForeignStorageCache::ForeignStorageCache(const DiskCacheConfig& config, - std::shared_ptr fsi) +ForeignStorageCache::ForeignStorageCache(const DiskCacheConfig& config) : num_chunks_added_(0), num_metadata_added_(0) { validatePath(config.path); - global_file_mgr_ = std::make_unique( - 0, fsi, config.path, config.num_reader_threads); + caching_file_mgr_ = std::make_unique( + config.path, config.num_reader_threads); } void ForeignStorageCache::deleteBufferIfExists(const ChunkKey& chunk_key) { write_lock meta_lock(metadata_mutex_); write_lock chunk_lock(chunks_mutex_); if (cached_metadata_.find(chunk_key) != cached_metadata_.end()) { - global_file_mgr_->deleteBuffer(chunk_key); + caching_file_mgr_->deleteBuffer(chunk_key); cached_chunks_.erase(chunk_key); cached_metadata_.erase(chunk_key); } @@ -76,8 +75,8 @@ void ForeignStorageCache::cacheChunk(const ChunkKey& chunk_key, AbstractBuffer* CHECK(!buffer->isDirty()); buffer->setUpdated(); num_chunks_added_++; - global_file_mgr_->putBuffer(chunk_key, buffer); - global_file_mgr_->checkpoint(); + caching_file_mgr_->putBuffer(chunk_key, buffer); + caching_file_mgr_->checkpoint(); cached_metadata_.emplace(chunk_key); cached_chunks_.emplace(chunk_key); CHECK(!buffer->isDirty()); @@ -95,11 +94,11 @@ void ForeignStorageCache::cacheTableChunks(const std::vector& chunk_ke for (const auto& chunk_key : chunk_keys) { CHECK_EQ(db_id, chunk_key[CHUNK_KEY_DB_IDX]); CHECK_EQ(table_id, chunk_key[CHUNK_KEY_TABLE_IDX]); - CHECK(global_file_mgr_->isBufferOnDevice(chunk_key)); + CHECK(caching_file_mgr_->isBufferOnDevice(chunk_key)); num_chunks_added_++; cached_chunks_.emplace(chunk_key); } - global_file_mgr_->checkpoint(db_id, table_id); + caching_file_mgr_->checkpoint(); } AbstractBuffer* ForeignStorageCache::getCachedChunkIfExists(const ChunkKey& chunk_key) { @@ -111,7 +110,7 @@ AbstractBuffer* ForeignStorageCache::getCachedChunkIfExists(const ChunkKey& chun return nullptr; } } - return global_file_mgr_->getBuffer(chunk_key); + return caching_file_mgr_->getBuffer(chunk_key); } bool ForeignStorageCache::isMetadataCached(const ChunkKey& chunk_key) const { @@ -124,14 +123,13 @@ bool ForeignStorageCache::recoverCacheForTable(ChunkMetadataVector& meta_vec, write_lock lock(chunks_mutex_); CHECK(meta_vec.size() == 0); CHECK(is_table_key(table_key)); - CHECK(dynamic_cast(global_file_mgr_->getFileMgr(table_key))); - global_file_mgr_->getChunkMetadataVecForKeyPrefix(meta_vec, table_key); + caching_file_mgr_->getChunkMetadataVecForKeyPrefix(meta_vec, table_key); for (auto& [chunk_key, metadata] : meta_vec) { cached_metadata_.emplace(chunk_key); // If there is no page count then the chunk was metadata only and should not be // cached. - if (const auto& buf = global_file_mgr_->getBuffer(chunk_key); buf->pageCount() > 0) { + if (const auto& buf = caching_file_mgr_->getBuffer(chunk_key); buf->pageCount() > 0) { cached_chunks_.emplace(chunk_key); } @@ -144,7 +142,7 @@ bool ForeignStorageCache::recoverCacheForTable(ChunkMetadataVector& meta_vec, chunk_key[CHUNK_KEY_FRAGMENT_IDX], 2}; - if (const auto& buf = global_file_mgr_->getBuffer(index_chunk_key); + if (const auto& buf = caching_file_mgr_->getBuffer(index_chunk_key); buf->pageCount() > 0) { cached_chunks_.emplace(index_chunk_key); } @@ -182,20 +180,20 @@ void ForeignStorageCache::cacheMetadataVec(const ChunkMetadataVector& metadata_v 2}; } bool chunk_in_cache = false; - if (!global_file_mgr_->isBufferOnDevice(chunk_key)) { - buf = global_file_mgr_->createBuffer(chunk_key); + if (!caching_file_mgr_->isBufferOnDevice(chunk_key)) { + buf = caching_file_mgr_->createBuffer(chunk_key); if (!index_chunk_key.empty()) { - CHECK(!global_file_mgr_->isBufferOnDevice(index_chunk_key)); - index_buffer = global_file_mgr_->createBuffer(index_chunk_key); + CHECK(!caching_file_mgr_->isBufferOnDevice(index_chunk_key)); + index_buffer = caching_file_mgr_->createBuffer(index_chunk_key); CHECK(index_buffer); } } else { - buf = global_file_mgr_->getBuffer(chunk_key); + buf = caching_file_mgr_->getBuffer(chunk_key); if (!index_chunk_key.empty()) { - CHECK(global_file_mgr_->isBufferOnDevice(index_chunk_key)); - index_buffer = global_file_mgr_->getBuffer(index_chunk_key); + CHECK(caching_file_mgr_->isBufferOnDevice(index_chunk_key)); + index_buffer = caching_file_mgr_->getBuffer(index_chunk_key); CHECK(index_buffer); } @@ -221,7 +219,7 @@ void ForeignStorageCache::cacheMetadataVec(const ChunkMetadataVector& metadata_v } num_metadata_added_++; } - global_file_mgr_->checkpoint(); + caching_file_mgr_->checkpoint(); } void ForeignStorageCache::getCachedMetadataVecForKeyPrefix( @@ -232,7 +230,7 @@ void ForeignStorageCache::getCachedMetadataVecForKeyPrefix( iterate_over_matching_prefix( [&metadata_vec, this](auto chunk) { std::shared_ptr buf_metadata = std::make_shared(); - global_file_mgr_->getBuffer(chunk)->getEncoder()->getMetadata(buf_metadata); + caching_file_mgr_->getBuffer(chunk)->getEncoder()->getMetadata(buf_metadata); metadata_vec.push_back(std::make_pair(chunk, buf_metadata)); }, cached_metadata_, @@ -275,7 +273,7 @@ void ForeignStorageCache::clearForTablePrefix(const ChunkKey& chunk_prefix) { meta_it = cached_metadata_.erase(meta_it); } } - global_file_mgr_->removeTableRelatedDS(chunk_prefix[0], chunk_prefix[1]); + caching_file_mgr_->clearForTable(chunk_prefix[0], chunk_prefix[1]); } void ForeignStorageCache::clear() { @@ -294,9 +292,12 @@ void ForeignStorageCache::clear() { meta_it = cached_metadata_.erase(meta_it); } } - for (const auto& table_key : table_keys) { - global_file_mgr_->removeTableRelatedDS(table_key[0], table_key[1]); - } + // FileMgrs do not clean up after themselves nicely, so we need to close all their disk + // resources and then re-create the CachingFileMgr to reset it. + caching_file_mgr_->closeRemovePhysical(); + boost::filesystem::create_directory(caching_file_mgr_->getFileMgrBasePath()); + caching_file_mgr_ = std::make_unique( + caching_file_mgr_->getFileMgrBasePath(), caching_file_mgr_->getNumReaderThreads()); } std::vector ForeignStorageCache::getCachedChunksForKeyPrefix( @@ -314,8 +315,8 @@ ChunkToBufferMap ForeignStorageCache::getChunkBuffersForCaching( read_lock lock(chunks_mutex_); for (const auto& chunk_key : chunk_keys) { CHECK(cached_chunks_.find(chunk_key) == cached_chunks_.end()); - CHECK(global_file_mgr_->isBufferOnDevice(chunk_key)); - chunk_buffer_map[chunk_key] = global_file_mgr_->getBuffer(chunk_key); + CHECK(caching_file_mgr_->isBufferOnDevice(chunk_key)); + chunk_buffer_map[chunk_key] = caching_file_mgr_->getBuffer(chunk_key); CHECK(dynamic_cast(chunk_buffer_map[chunk_key])); CHECK_EQ(chunk_buffer_map[chunk_key]->pageCount(), static_cast(0)); @@ -332,7 +333,7 @@ void ForeignStorageCache::eraseChunk(const ChunkKey& chunk_key) { return; } File_Namespace::FileBuffer* file_buffer = - static_cast(global_file_mgr_->getBuffer(chunk_key)); + static_cast(caching_file_mgr_->getBuffer(chunk_key)); file_buffer->freeChunkPages(); cached_chunks_.erase(chunk_key); } @@ -340,7 +341,7 @@ void ForeignStorageCache::eraseChunk(const ChunkKey& chunk_key) { std::set::iterator ForeignStorageCache::evictChunkByIterator( const std::set::iterator& chunk_it) { File_Namespace::FileBuffer* file_buffer = - static_cast(global_file_mgr_->getBuffer(*chunk_it)); + static_cast(caching_file_mgr_->getBuffer(*chunk_it)); file_buffer->freeChunkPages(); return cached_chunks_.erase(chunk_it); } @@ -381,15 +382,6 @@ void ForeignStorageCache::validatePath(const std::string& base_path) const { } } -std::string ForeignStorageCache::getCacheDirectoryForTablePrefix( - const ChunkKey& table_prefix) const { - CHECK(table_prefix.size() >= 2); - auto fileMgr = dynamic_cast( - getGlobalFileMgr()->getFileMgr(table_prefix)); - CHECK(fileMgr); - return fileMgr->getFileMgrBasePath(); -} - void ForeignStorageCache::cacheMetadataWithFragIdGreaterOrEqualTo( const ChunkMetadataVector& metadata_vec, const int frag_id) { @@ -407,11 +399,11 @@ AbstractBuffer* ForeignStorageCache::getChunkBufferForPrecaching( const ChunkKey& chunk_key, bool is_new_buffer) { if (!is_new_buffer) { - CHECK(getGlobalFileMgr()->isBufferOnDevice(chunk_key)); - return getGlobalFileMgr()->getBuffer(chunk_key); + CHECK(caching_file_mgr_->isBufferOnDevice(chunk_key)); + return caching_file_mgr_->getBuffer(chunk_key); } else { - CHECK(!getGlobalFileMgr()->isBufferOnDevice(chunk_key)); - return getGlobalFileMgr()->createBuffer(chunk_key); + CHECK(!caching_file_mgr_->isBufferOnDevice(chunk_key)); + return caching_file_mgr_->createBuffer(chunk_key); } } diff --git a/DataMgr/ForeignStorage/ForeignStorageCache.h b/DataMgr/ForeignStorage/ForeignStorageCache.h index 5075616489..9c78dbdff3 100644 --- a/DataMgr/ForeignStorage/ForeignStorageCache.h +++ b/DataMgr/ForeignStorage/ForeignStorageCache.h @@ -27,7 +27,7 @@ #include "../Shared/mapd_shared_mutex.h" #include "DataMgr/AbstractBufferMgr.h" -#include "DataMgr/FileMgr/GlobalFileMgr.h" +#include "DataMgr/FileMgr/CachingFileMgr.h" #include "ForeignDataWrapper.h" class CacheTooSmallException : public std::runtime_error { @@ -54,10 +54,11 @@ using namespace Data_Namespace; namespace foreign_storage { +const std::string wrapper_file_name = "wrapper_metadata.json"; + class ForeignStorageCache { public: - ForeignStorageCache(const DiskCacheConfig& config, - std::shared_ptr fsi); + ForeignStorageCache(const DiskCacheConfig& config); /** * Caches the chunks for the given chunk keys. Chunk buffers @@ -97,15 +98,22 @@ class ForeignStorageCache { std::string dumpCachedChunkEntries() const; std::string dumpCachedMetadataEntries() const; - inline File_Namespace::GlobalFileMgr* getGlobalFileMgr() const { - return global_file_mgr_.get(); + inline std::string getCacheDirectory() const { + return caching_file_mgr_->getFileMgrBasePath(); + } + + inline std::string getCacheDirectoryForTable(int db_id, int tb_id) const { + return caching_file_mgr_->getOrAddTableDir(db_id, tb_id); } - std::string getCacheDirectoryForTablePrefix(const ChunkKey&) const; void cacheMetadataWithFragIdGreaterOrEqualTo(const ChunkMetadataVector& metadata_vec, const int frag_id); void evictThenEraseChunk(const ChunkKey&); + inline uint64_t getSpaceReservedByTable(int db_id, int tb_id) const { + return caching_file_mgr_->getSpaceReservedByTable(db_id, tb_id); + } + private: // These methods are private and assume locks are already acquired when called. std::set::iterator eraseChunk(const std::set::iterator&); @@ -115,8 +123,8 @@ class ForeignStorageCache { void evictThenEraseChunkUnlocked(const ChunkKey&); void validatePath(const std::string&) const; - // Underlying storage is handled by a GlobalFileMgr unique to the cache. - std::unique_ptr global_file_mgr_; + // Underlying storage is handled by a CachingFileMgr unique to the cache. + std::unique_ptr caching_file_mgr_; // Keeps tracks of which Chunks/ChunkMetadata are cached. std::set cached_chunks_; diff --git a/DataMgr/PersistentStorageMgr/PersistentStorageMgr.cpp b/DataMgr/PersistentStorageMgr/PersistentStorageMgr.cpp index 1eb8194703..77395fe6b3 100644 --- a/DataMgr/PersistentStorageMgr/PersistentStorageMgr.cpp +++ b/DataMgr/PersistentStorageMgr/PersistentStorageMgr.cpp @@ -42,10 +42,10 @@ PersistentStorageMgr::PersistentStorageMgr(const std::string& data_dir, global_file_mgr_ = std::make_unique( 0, fsi_, data_dir, num_reader_threads); - disk_cache_ = disk_cache_config_.isEnabled() - ? std::make_unique( - disk_cache_config, fsi_) - : nullptr; + disk_cache_ = + disk_cache_config_.isEnabled() + ? std::make_unique(disk_cache_config) + : nullptr; foreign_storage_mgr_ = disk_cache_config_.isEnabledForFSI() ? std::make_unique(disk_cache_.get()) diff --git a/Shared/File.cpp b/Shared/File.cpp index 2317c29605..3689dd1c3d 100644 --- a/Shared/File.cpp +++ b/Shared/File.cpp @@ -60,7 +60,6 @@ FILE* create(const std::string& basePath, if (f == nullptr) { LOG(FATAL) << "Error trying to create file '" << path << "', the error was: " << std::strerror(errno); - ; } fseek(f, static_cast((pageSize * numPages) - 1), SEEK_SET); fputc(EOF, f); diff --git a/Shared/types.h b/Shared/types.h index f254bb000b..13ff428af0 100644 --- a/Shared/types.h +++ b/Shared/types.h @@ -54,6 +54,11 @@ inline ChunkKey get_table_key(const ChunkKey& key) { return ChunkKey{key[CHUNK_KEY_DB_IDX], key[CHUNK_KEY_TABLE_IDX]}; } +inline std::pair get_table_prefix(const ChunkKey& key) { + CHECK(has_table_prefix(key)); + return std::pair{key[CHUNK_KEY_DB_IDX], key[CHUNK_KEY_TABLE_IDX]}; +} + inline bool is_column_key(const ChunkKey& key) { return key.size() == 3; } @@ -70,6 +75,13 @@ inline bool is_varlen_index_key(const ChunkKey& key) { return key.size() == 5 && key[4] == 2; } +inline bool in_same_table(const ChunkKey& left_key, const ChunkKey& right_key) { + CHECK(has_table_prefix(left_key)); + CHECK(has_table_prefix(right_key)); + return (left_key[CHUNK_KEY_DB_IDX] == right_key[CHUNK_KEY_DB_IDX] && + left_key[CHUNK_KEY_TABLE_IDX] == right_key[CHUNK_KEY_TABLE_IDX]); +} + inline std::string show_chunk(const ChunkKey& key) { std::ostringstream tss; for (auto vecIt = key.begin(); vecIt != key.end(); ++vecIt) { diff --git a/Tests/ForeignStorageCacheTest.cpp b/Tests/ForeignStorageCacheTest.cpp index 2f6d2b449c..131a662717 100644 --- a/Tests/ForeignStorageCacheTest.cpp +++ b/Tests/ForeignStorageCacheTest.cpp @@ -49,7 +49,6 @@ static const ChunkKey table_prefix2 = {1, 2}; class ForeignStorageCacheUnitTest : public testing::Test { protected: - inline static File_Namespace::GlobalFileMgr* gfm_; inline static std::unique_ptr cache_; inline static std::string cache_path_; @@ -128,23 +127,20 @@ class ForeignStorageCacheUnitTest : public testing::Test { } static void reinitializeCache(std::unique_ptr& cache, - File_Namespace::GlobalFileMgr*& gfm, const DiskCacheConfig& config) { - cache = std::make_unique(config, fsi); - gfm = cache->getGlobalFileMgr(); + cache = std::make_unique(config); } static void SetUpTestSuite() { cache_path_ = "./tmp/mapd_data/test_foreign_data_cache"; boost::filesystem::remove_all(cache_path_); - reinitializeCache(cache_, gfm_, {cache_path_, DiskCacheLevel::fsi}); + reinitializeCache(cache_, {cache_path_, DiskCacheLevel::fsi}); } static void TearDownTestSuite() { boost::filesystem::remove_all(cache_path_); } void SetUp() override { cache_->clear(); - ASSERT_EQ(gfm_->getNumChunks(), 0U); ASSERT_EQ(cache_->getNumCachedChunks(), 0U); ASSERT_EQ(cache_->getNumCachedMetadata(), 0U); } @@ -286,9 +282,6 @@ TEST_F(ForeignStorageCacheUnitTest, ClearForTablePrefix) { cache_->clearForTablePrefix(table_prefix1); ASSERT_EQ(cache_->getNumCachedChunks(), 1U); ASSERT_EQ(cache_->getNumCachedMetadata(), 1U); - ASSERT_FALSE(gfm_->isBufferOnDevice(chunk_key1)); - ASSERT_FALSE(gfm_->isBufferOnDevice(chunk_key2)); - ASSERT_TRUE(gfm_->isBufferOnDevice(chunk_key_table2)); } TEST_F(ForeignStorageCacheUnitTest, Clear) { @@ -303,9 +296,6 @@ TEST_F(ForeignStorageCacheUnitTest, Clear) { cache_->clear(); ASSERT_EQ(cache_->getNumCachedChunks(), 0U); ASSERT_EQ(cache_->getNumCachedMetadata(), 0U); - ASSERT_FALSE(gfm_->isBufferOnDevice(chunk_key1)); - ASSERT_FALSE(gfm_->isBufferOnDevice(chunk_key2)); - ASSERT_FALSE(gfm_->isBufferOnDevice(chunk_key_table2)); } class CacheDiskStorageTest : public ForeignStorageCacheUnitTest { @@ -316,35 +306,28 @@ class CacheDiskStorageTest : public ForeignStorageCacheUnitTest { static void TearDownTestSuite() {} void SetUp() override { boost::filesystem::remove_all(cache_path_); - reinitializeCache(cache_, gfm_, {cache_path_, DiskCacheLevel::fsi}); + reinitializeCache(cache_, {cache_path_, DiskCacheLevel::fsi}); } void TearDown() override { boost::filesystem::remove_all(cache_path_); } }; -TEST_F(CacheDiskStorageTest, CachePath_CreateBaseDir) { - ASSERT_FALSE(boost::filesystem::exists(cache_path_ + "/table_1_1")); - ChunkWrapper chunk_wrapper1{kINT, {1, 2, 3, 4}}; - chunk_wrapper1.cacheMetadata(chunk_key1); - ASSERT_TRUE(boost::filesystem::exists(cache_path_ + "/table_1_1")); -} - TEST_F(CacheDiskStorageTest, CacheMetadata_VerifyMetadataFileCreated) { ChunkWrapper chunk_wrapper1{kINT, {1, 2, 3, 4}}; chunk_wrapper1.cacheMetadata(chunk_key1); - ASSERT_TRUE(boost::filesystem::exists(cache_path_ + "/table_1_1/0.4096.mapd")); + ASSERT_TRUE(boost::filesystem::exists(cache_path_ + "/0.4096.mapd")); } TEST_F(CacheDiskStorageTest, CacheChunk_VerifyChunkFileCreated) { ChunkWrapper chunk_wrapper1{kINT, {1, 2, 3, 4}}; chunk_wrapper1.cacheMetadataThenChunk(chunk_key1); - ASSERT_TRUE(boost::filesystem::exists(cache_path_ + "/table_1_1/1." + - to_string(gfm_->getDefaultPageSize()) + ".mapd")); + ASSERT_TRUE(boost::filesystem::exists(cache_path_ + "/1." + + to_string(DEFAULT_PAGE_SIZE) + ".mapd")); } TEST_F(CacheDiskStorageTest, RecoverCache_Metadata) { ChunkWrapper chunk_wrapper1{kINT, {1, 2, 3, 4}}; chunk_wrapper1.cacheMetadata(chunk_key1); - reinitializeCache(cache_, gfm_, {cache_path_, DiskCacheLevel::fsi}); + reinitializeCache(cache_, {cache_path_, DiskCacheLevel::fsi}); ASSERT_EQ(cache_->getNumCachedMetadata(), 0U); ChunkMetadataVector metadata_vec_cached{}; cache_->recoverCacheForTable(metadata_vec_cached, table_prefix1); @@ -358,7 +341,7 @@ TEST_F(CacheDiskStorageTest, RecoverCache_UpdatedMetadata) { chunk_wrapper1.cacheMetadata(chunk_key1); ChunkWrapper chunk_wrapper2{kINT, {5, 6}}; chunk_wrapper2.cacheMetadata(chunk_key1); - reinitializeCache(cache_, gfm_, {cache_path_, DiskCacheLevel::fsi}); + reinitializeCache(cache_, {cache_path_, DiskCacheLevel::fsi}); ChunkMetadataVector metadata_vec_cached{}; cache_->recoverCacheForTable(metadata_vec_cached, table_prefix1); ASSERT_EQ(cache_->getNumCachedMetadata(), 1U); @@ -368,7 +351,7 @@ TEST_F(CacheDiskStorageTest, RecoverCache_UpdatedMetadata) { TEST_F(CacheDiskStorageTest, RecoverCache_SingleChunk) { ChunkWrapper chunk_wrapper1{kINT, {1, 2, 3, 4}}; chunk_wrapper1.cacheMetadataThenChunk(chunk_key1); - reinitializeCache(cache_, gfm_, {cache_path_, DiskCacheLevel::fsi}); + reinitializeCache(cache_, {cache_path_, DiskCacheLevel::fsi}); ASSERT_EQ(cache_->getNumCachedChunks(), 0U); ChunkMetadataVector metadata_vec_cached{}; cache_->recoverCacheForTable(metadata_vec_cached, table_prefix1); @@ -388,12 +371,9 @@ TEST_F(ForeignStorageCacheFileTest, FileCreation) { cache_path_ = "./test_foreign_data_cache"; boost::filesystem::remove_all(cache_path_); { - ForeignStorageCache cache{{cache_path_, DiskCacheLevel::fsi}, fsi}; - auto* gfm = cache.getGlobalFileMgr(); + ForeignStorageCache cache{{cache_path_, DiskCacheLevel::fsi}}; ASSERT_TRUE(boost::filesystem::exists(cache_path_)); - ASSERT_FALSE(boost::filesystem::exists(cache_path_ + "/table_1_1")); ASSERT_EQ(cache.getCachedChunkIfExists(chunk_key1), nullptr); - ASSERT_FALSE(gfm->isBufferOnDevice(chunk_key1)); TestBuffer source_buffer{std::vector{1, 2, 3, 4}}; source_buffer.initEncoder(kINT); std::shared_ptr cached_meta = std::make_shared(); @@ -402,10 +382,9 @@ TEST_F(ForeignStorageCacheFileTest, FileCreation) { auto buffer_map = cache.getChunkBuffersForCaching({chunk_key1}); buffer_map[chunk_key1]->write(source_buffer.getMemoryPtr(), source_buffer.size()); cache.cacheTableChunks({chunk_key1}); - ASSERT_TRUE(gfm->isBufferOnDevice(chunk_key1)); - ASSERT_TRUE(boost::filesystem::exists(cache_path_ + "/table_1_1/0.4096.mapd")); - ASSERT_TRUE(boost::filesystem::exists( - cache_path_ + "/table_1_1/1." + to_string(gfm->getDefaultPageSize()) + ".mapd")); + ASSERT_TRUE(boost::filesystem::exists(cache_path_ + "/0.4096.mapd")); + ASSERT_TRUE(boost::filesystem::exists(cache_path_ + "/1." + + to_string(DEFAULT_PAGE_SIZE) + ".mapd")); } // Cache files should persist after cache is destroyed. ASSERT_TRUE(boost::filesystem::exists(cache_path_)); @@ -414,7 +393,7 @@ TEST_F(ForeignStorageCacheFileTest, FileCreation) { TEST_F(ForeignStorageCacheFileTest, CustomPath) { cache_path_ = "./test_foreign_data_cache"; PersistentStorageMgr psm(data_path, 0, {cache_path_, DiskCacheLevel::fsi}); - ASSERT_EQ(psm.getDiskCache()->getGlobalFileMgr()->getBasePath(), cache_path_ + "/"); + ASSERT_EQ(psm.getDiskCache()->getCacheDirectory(), cache_path_); } TEST_F(ForeignStorageCacheFileTest, InitializeSansCache) { @@ -437,7 +416,7 @@ TEST_F(ForeignStorageCacheFileTest, FileBlocksPath) { tmp_file << "1"; tmp_file.close(); try { - ForeignStorageCache cache{{cache_path_, DiskCacheLevel::fsi}, fsi}; + ForeignStorageCache cache{{cache_path_, DiskCacheLevel::fsi}}; FAIL() << "An exception should have been thrown for this testcase"; } catch (std::runtime_error& e) { ASSERT_EQ(e.what(), @@ -451,7 +430,7 @@ TEST_F(ForeignStorageCacheFileTest, ExistingDir) { cache_path_ = "./test_foreign_data_cache"; boost::filesystem::remove_all(cache_path_); boost::filesystem::create_directory(cache_path_); - ForeignStorageCache cache{{cache_path_, DiskCacheLevel::fsi}, fsi}; + ForeignStorageCache cache{{cache_path_, DiskCacheLevel::fsi}}; } int main(int argc, char** argv) { diff --git a/Tests/ForeignTableDmlTest.cpp b/Tests/ForeignTableDmlTest.cpp index 051b052b26..177f7c9c1c 100644 --- a/Tests/ForeignTableDmlTest.cpp +++ b/Tests/ForeignTableDmlTest.cpp @@ -123,7 +123,7 @@ class ForeignTableTest : public DBHandlerTestFixture { static ChunkKey getChunkKeyFromTable(const Catalog_Namespace::Catalog& cat, const std::string& table_name, const ChunkKey& key_suffix) { - const TableDescriptor* fd = cat.getMetadataForTable(table_name); + const TableDescriptor* fd = cat.getMetadataForTable(table_name, false); ChunkKey key{cat.getCurrentDB().dbId, fd->tableId}; for (auto i : key_suffix) { key.push_back(i); @@ -192,7 +192,7 @@ class SelectQueryTest : public ForeignTableTest { bool has_nulls, const std::string& table_name) { auto& cat = getCatalog(); - auto foreign_table = cat.getMetadataForTable(table_name); + auto foreign_table = cat.getMetadataForTable(table_name, false); auto column_descriptor = cat.getMetadataForColumn(foreign_table->tableId, column_id); auto chunk_metadata = std::make_unique(); chunk_metadata->sqlType = column_descriptor->columnType; @@ -224,7 +224,7 @@ class SelectQueryTest : public ForeignTableTest { expected_metadata, const std::string& table_name) const { auto& cat = getCatalog(); - auto foreign_table = cat.getMetadataForTable(table_name); + auto foreign_table = cat.getMetadataForTable(table_name, false); if (!foreign_table) { throw std::runtime_error("Could not find foreign table: " + table_name); } @@ -278,7 +278,7 @@ class CacheControllingSelectQueryTest : public SelectQueryTest, public ::testing::WithParamInterface { public: - inline static std::string cache_path_ = to_string(BASE_PATH) + "/omnisci_disk_cache/"; + inline static std::string cache_path_ = to_string(BASE_PATH) + "/omnisci_disk_cache"; DiskCacheLevel starting_cache_level_; protected: @@ -342,7 +342,7 @@ bool compare_json_files(const std::string& generated, class RecoverCacheQueryTest : public ForeignTableTest { public: - inline static std::string cache_path_ = to_string(BASE_PATH) + "/omnisci_disk_cache/"; + inline static std::string cache_path_ = to_string(BASE_PATH) + "/omnisci_disk_cache"; Catalog_Namespace::Catalog* cat_; PersistentStorageMgr* psm_; foreign_storage::ForeignStorageCache* cache_; @@ -359,7 +359,7 @@ class RecoverCacheQueryTest : public ForeignTableTest { } bool isTableDatawrapperRestored(const std::string& name) { - auto td = getCatalog().getMetadataForTable(name); + auto td = getCatalog().getMetadataForTable(name, false); ChunkKey table_key{getCatalog().getCurrentDB().dbId, td->tableId}; return getCatalog() .getDataMgr() @@ -369,26 +369,28 @@ class RecoverCacheQueryTest : public ForeignTableTest { } bool isTableDatawrapperDataOnDisk(const std::string& name) { - auto td = getCatalog().getMetadataForTable(name); - ChunkKey table_key{getCatalog().getCurrentDB().dbId, td->tableId}; + auto td = getCatalog().getMetadataForTable(name, false); + auto db_id = getCatalog().getCurrentDB().dbId; + ChunkKey table_key{db_id, td->tableId}; return bf::exists(getCatalog() .getDataMgr() .getPersistentStorageMgr() ->getDiskCache() - ->getCacheDirectoryForTablePrefix(table_key) + - "/wrapper_metadata.json"); + ->getCacheDirectoryForTable(db_id, td->tableId) + + "/" + foreign_storage::wrapper_file_name); } bool compareTableDatawrapperMetadataToFile(const std::string& name, const std::string& filepath) { - auto td = getCatalog().getMetadataForTable(name); - ChunkKey table_key{getCatalog().getCurrentDB().dbId, td->tableId}; + auto td = getCatalog().getMetadataForTable(name, false); + auto db_id = getCatalog().getCurrentDB().dbId; + ChunkKey table_key{db_id, td->tableId}; return compare_json_files(getCatalog() .getDataMgr() .getPersistentStorageMgr() ->getDiskCache() - ->getCacheDirectoryForTablePrefix(table_key) + - "/wrapper_metadata.json", + ->getCacheDirectoryForTable(db_id, td->tableId) + + foreign_storage::wrapper_file_name, filepath, getDataFilesPath()); } @@ -1535,7 +1537,7 @@ bool does_cache_contain_chunks(Catalog_Namespace::Catalog* cat, const std::string& table_name, const std::vector> subkeys) { // subkey is chunkey without db, table ids - auto td = cat->getMetadataForTable(table_name); + auto td = cat->getMetadataForTable(table_name, false); ChunkKey table_key{cat->getCurrentDB().dbId, td->tableId}; auto cache = cat->getDataMgr().getPersistentStorageMgr()->getDiskCache(); @@ -3337,7 +3339,6 @@ class ForeignStorageCacheQueryTest : public ForeignTableTest { inline static const std::string col_name3 = "col3"; inline static Catalog_Namespace::Catalog* cat; inline static ForeignStorageCache* cache; - inline static File_Namespace::GlobalFileMgr* gfm; inline static const TableDescriptor* td; inline static const ColumnDescriptor *cd1, *cd2, *cd3; inline static ChunkKey query_chunk_key1, query_chunk_key2, query_chunk_key3, @@ -3347,7 +3348,6 @@ class ForeignStorageCacheQueryTest : public ForeignTableTest { DBHandlerTestFixture::SetUpTestSuite(); cat = &getCatalog(); cache = cat->getDataMgr().getPersistentStorageMgr()->getDiskCache(); - gfm = cache->getGlobalFileMgr(); sqlDropForeignTable(); } @@ -3358,7 +3358,7 @@ class ForeignStorageCacheQueryTest : public ForeignTableTest { "(" + col_name1 + " TEXT, " + col_name2 + " INTEGER, " + col_name3 + " DOUBLE)", table_2_filename, "csv"); - td = cat->getMetadataForTable(default_table_name); + td = cat->getMetadataForTable(default_table_name, false); cd1 = cat->getMetadataForColumn(td->tableId, col_name1); cd2 = cat->getMetadataForColumn(td->tableId, col_name2); cd3 = cat->getMetadataForColumn(td->tableId, col_name3); @@ -3411,7 +3411,7 @@ class ForeignStorageCacheQueryTest : public ForeignTableTest { } }; -TEST_F(ForeignStorageCacheQueryTest, CreatePopulateMetadata) { +TEST_F(ForeignStorageCacheQueryTest, CreateDoesNotPopulateMetadata) { sqlDropForeignTable(); ASSERT_FALSE(cache->isMetadataCached(query_chunk_key1)); ASSERT_FALSE(cache->isMetadataCached(query_chunk_key2)); @@ -3419,11 +3419,11 @@ TEST_F(ForeignStorageCacheQueryTest, CreatePopulateMetadata) { ASSERT_FALSE(cache->hasCachedMetadataForKeyPrefix(query_chunk_key1)); ASSERT_FALSE(cache->hasCachedMetadataForKeyPrefix(query_table_prefix)); createTestTable(); - ASSERT_TRUE(cache->isMetadataCached(query_chunk_key1)); - ASSERT_TRUE(cache->isMetadataCached(query_chunk_key2)); - ASSERT_TRUE(cache->isMetadataCached(query_chunk_key3)); - ASSERT_TRUE(cache->hasCachedMetadataForKeyPrefix(query_chunk_key1)); - ASSERT_TRUE(cache->hasCachedMetadataForKeyPrefix(query_table_prefix)); + ASSERT_FALSE(cache->isMetadataCached(query_chunk_key1)); + ASSERT_FALSE(cache->isMetadataCached(query_chunk_key2)); + ASSERT_FALSE(cache->isMetadataCached(query_chunk_key3)); + ASSERT_FALSE(cache->hasCachedMetadataForKeyPrefix(query_chunk_key1)); + ASSERT_FALSE(cache->hasCachedMetadataForKeyPrefix(query_table_prefix)); } TEST_F(ForeignStorageCacheQueryTest, CacheEvictAfterDrop) { @@ -3476,7 +3476,7 @@ TEST_F(ForeignStorageCacheQueryTest, ArrayTypes) { sql(query); sql("SELECT COUNT(*) FROM " + default_table_name + ";"); - auto td = cat->getMetadataForTable(default_table_name); + auto td = cat->getMetadataForTable(default_table_name, false); ChunkMetadataVector metadata_vec{}; cache->getCachedMetadataVecForKeyPrefix(metadata_vec, {cat->getCurrentDB().dbId, td->tableId}); @@ -3532,8 +3532,7 @@ class CacheDefaultTest : public DBHandlerTestFixture {}; TEST_F(CacheDefaultTest, Path) { auto cat = &getCatalog(); auto cache = cat->getDataMgr().getPersistentStorageMgr()->getDiskCache(); - ASSERT_EQ(cache->getGlobalFileMgr()->getBasePath(), - to_string(BASE_PATH) + "/omnisci_disk_cache/"); + ASSERT_EQ(cache->getCacheDirectory(), to_string(BASE_PATH) + "/omnisci_disk_cache"); } TEST_F(RecoverCacheQueryTest, RecoverWithoutWrappers) { @@ -3543,7 +3542,7 @@ TEST_F(RecoverCacheQueryTest, RecoverWithoutWrappers) { "SERVER omnisci_local_csv WITH (file_path = '" + getDataFilesPath() + "/" + "example_1_dir_archives/');"; sql(query); - auto td = cat_->getMetadataForTable(default_table_name); + auto td = cat_->getMetadataForTable(default_table_name, false); ChunkKey key{cat_->getCurrentDB().dbId, td->tableId, 1, 0}; ChunkKey table_key{cat_->getCurrentDB().dbId, td->tableId}; @@ -3573,14 +3572,14 @@ TEST_F(RecoverCacheQueryTest, RecoverWithoutWrappers) { sqlDropForeignTable(); } -TEST_F(RecoverCacheQueryTest, RecoverThenPopulateDataWrappersOnDemandVarLen) { +TEST_F(RecoverCacheQueryTest, RecoverThenPopulateDataWrappersOnDemandVarLenCsv) { sqlDropForeignTable(); std::string query = "CREATE FOREIGN TABLE " + default_table_name + " (t TEXT, i BIGINT[]) "s + "SERVER omnisci_local_csv WITH (file_path = '" + getDataFilesPath() + "/" + "example_1_dir_archives/');"; sql(query); - auto td = cat_->getMetadataForTable(default_table_name); + auto td = cat_->getMetadataForTable(default_table_name, false); ChunkKey key{cat_->getCurrentDB().dbId, td->tableId, 1, 0}; ChunkKey table_key{cat_->getCurrentDB().dbId, td->tableId}; @@ -3610,6 +3609,43 @@ TEST_F(RecoverCacheQueryTest, RecoverThenPopulateDataWrappersOnDemandVarLen) { sqlDropForeignTable(); } +TEST_F(RecoverCacheQueryTest, RecoverThenPopulateDataWrappersOnDemandVarLenParquet) { + sqlDropForeignTable(); + std::string query = "CREATE FOREIGN TABLE " + default_table_name + + " (t TEXT, i INTEGER[]) "s + + "SERVER omnisci_local_parquet WITH (file_path = '" + + getDataFilesPath() + "/" + "example_1.parquet');"; + sql(query); + auto td = cat_->getMetadataForTable(default_table_name, false); + ChunkKey key{cat_->getCurrentDB().dbId, td->tableId, 1, 0}; + ChunkKey table_key{cat_->getCurrentDB().dbId, td->tableId}; + + sqlAndCompareResult("SELECT COUNT(*) FROM " + default_table_name + ";", {{i(3)}}); + ASSERT_FALSE(isTableDatawrapperRestored(default_table_name)); + + // Reset cache and clear memory representations. + resetStorageManagerAndClearTableMemory(table_key); + + // Cache should be empty until query prompts recovery from disk + ASSERT_EQ(cache_->getNumCachedMetadata(), 0U); + ASSERT_EQ(cache_->getNumCachedChunks(), 0U); + + ASSERT_TRUE(isTableDatawrapperDataOnDisk(default_table_name)); + + sqlAndCompareResult("SELECT * FROM " + default_table_name + " ORDER BY t;", + {{"a", array({i(1), i(1), i(1)})}, + {"aa", array({i(NULL_INT), i(2), i(2)})}, + {"aaa", array({i(3), i(NULL_INT), i(3)})}}); + + // 2 data + 1 index chunk + ASSERT_EQ(cache_->getNumCachedChunks(), 3U); + // Only 2 metadata + ASSERT_EQ(cache_->getNumCachedMetadata(), 2U); + + ASSERT_TRUE(isTableDatawrapperRestored(default_table_name)); + sqlDropForeignTable(); +} + // Check that csv datawrapper metadata is generated and restored correctly for CSV // Archives TEST_F(RecoverCacheQueryTest, RecoverThenPopulateDataWrappersOnDemandFromCsvArchive) { @@ -3619,7 +3655,7 @@ TEST_F(RecoverCacheQueryTest, RecoverThenPopulateDataWrappersOnDemandFromCsvArch "SERVER omnisci_local_csv WITH (file_path = '" + getDataFilesPath() + "/" + "example_1_multilevel.zip');"; sql(query); - auto td = cat_->getMetadataForTable(default_table_name); + auto td = cat_->getMetadataForTable(default_table_name, false); ChunkKey key{cat_->getCurrentDB().dbId, td->tableId, 1, 0}; ChunkKey table_key{cat_->getCurrentDB().dbId, td->tableId}; @@ -3659,7 +3695,7 @@ TEST_P(DataWrapperRecoverCacheQueryTest, RecoverThenPopulateDataWrappersOnDemand sqlDropForeignTable(); sqlCreateForeignTable("(col1 BIGINT)", "1", wrapper); - auto td = cat_->getMetadataForTable(default_table_name); + auto td = cat_->getMetadataForTable(default_table_name, false); ChunkKey key{cat_->getCurrentDB().dbId, td->tableId, 1, 0}; ChunkKey table_key{cat_->getCurrentDB().dbId, td->tableId}; @@ -3718,7 +3754,7 @@ TEST_P(DataWrapperRecoverCacheQueryTest, AppendData) { "', REFRESH_UPDATE_TYPE = 'APPEND');"; sql(query); - auto td = cat_->getMetadataForTable(default_table_name); + auto td = cat_->getMetadataForTable(default_table_name, false); ChunkKey key{cat_->getCurrentDB().dbId, td->tableId, 1, 0}; ChunkKey table_key{cat_->getCurrentDB().dbId, td->tableId}; diff --git a/Tests/PersistentStorageTest.cpp b/Tests/PersistentStorageTest.cpp index 14c8b92e20..fbbe038320 100644 --- a/Tests/PersistentStorageTest.cpp +++ b/Tests/PersistentStorageTest.cpp @@ -38,7 +38,7 @@ class PersistentStorageMgrTest : public testing::Test { TEST_F(PersistentStorageMgrTest, DiskCache_CustomPath) { PersistentStorageMgr psm(data_path, 0, {cache_path_, DiskCacheLevel::fsi}); - ASSERT_EQ(psm.getDiskCache()->getGlobalFileMgr()->getBasePath(), cache_path_ + "/"); + ASSERT_EQ(psm.getDiskCache()->getCacheDirectory(), cache_path_); } TEST_F(PersistentStorageMgrTest, DiskCache_InitializeWithoutCache) { @@ -48,7 +48,7 @@ TEST_F(PersistentStorageMgrTest, DiskCache_InitializeWithoutCache) { TEST_F(PersistentStorageMgrTest, MutableDiskCache_CustomPath) { MutableCachePersistentStorageMgr psm(data_path, 0, {cache_path_, DiskCacheLevel::all}); - ASSERT_EQ(psm.getDiskCache()->getGlobalFileMgr()->getBasePath(), cache_path_ + "/"); + ASSERT_EQ(psm.getDiskCache()->getCacheDirectory(), cache_path_); } int main(int argc, char** argv) { diff --git a/Tests/ShowCommandsDdlTest.cpp b/Tests/ShowCommandsDdlTest.cpp index bde7dff284..b5b288eea5 100644 --- a/Tests/ShowCommandsDdlTest.cpp +++ b/Tests/ShowCommandsDdlTest.cpp @@ -23,6 +23,7 @@ #include "DBHandlerTestHelpers.h" #include "Shared/File.h" #include "TestHelpers.h" +#include "boost/filesystem.hpp" #ifndef BASE_PATH #define BASE_PATH "./tmp" @@ -1001,11 +1002,10 @@ class ShowDiskCacheUsageTest : public ShowTest { public: static inline constexpr int64_t epoch_file_size{2 * sizeof(int64_t)}; static inline constexpr int64_t empty_mgr_size{0}; - static inline const int64_t meta_only_size{epoch_file_size + - DEFAULT_METADATA_FILE_SIZE}; - static inline const int64_t minimum_total_size{DEFAULT_DATA_FILE_SIZE + meta_only_size}; + static inline constexpr int64_t meta_only_size{METADATA_PAGE_SIZE}; + static inline constexpr int64_t minimum_total_size{DEFAULT_PAGE_SIZE + meta_only_size}; // TODO(Misiu): These can be made constexpr once c++20 is supported. - static inline std::string cache_path_ = to_string(BASE_PATH) + "/omnisci_disk_cache/"; + static inline std::string cache_path_ = to_string(BASE_PATH) + "/omnisci_disk_cache"; static inline std::string foreign_table1{"foreign_table1"}; static inline std::string foreign_table2{"foreign_table2"}; static inline std::string foreign_table3{"foreign_table3"}; @@ -1017,6 +1017,7 @@ class ShowDiskCacheUsageTest : public ShowTest { sql("DROP DATABASE IF EXISTS test_db;"); sql("CREATE DATABASE test_db;"); login("admin", "HyperInteractive", "test_db"); + getCatalog().getDataMgr().getPersistentStorageMgr()->getDiskCache()->clear(); } static void TearDownTestSuite() { @@ -1052,6 +1053,28 @@ class ShowDiskCacheUsageTest : public ShowTest { boost::filesystem::canonical("../../Tests/FsiDataFiles/0.parquet").string() + "');"); } + + uint64_t getWrapperSizeForTable(std::string& table_name) { + uint64_t space_used = 0; + auto& cat = getCatalog(); + auto td = cat.getMetadataForTable(table_name, false); + std::string table_dir = + cache_path_ + "/" + + File_Namespace::get_dir_name_for_table(cat.getDatabaseId(), td->tableId); + if (boost::filesystem::exists(table_dir)) { + for (const auto& file : + boost::filesystem::recursive_directory_iterator(table_dir)) { + if (boost::filesystem::is_regular_file(file.path())) { + space_used += boost::filesystem::file_size(file.path()); + } + } + } + return space_used; + } + + uint64_t getMinSizeForTable(std::string& table_name) { + return minimum_total_size + getWrapperSizeForTable(table_name); + } }; TEST_F(ShowDiskCacheUsageTest, SingleTable) { @@ -1064,7 +1087,8 @@ TEST_F(ShowDiskCacheUsageTest, SingleTableInUse) { sqlCreateBasicForeignTable(foreign_table1); sql("SELECT * FROM " + foreign_table1 + ";"); - sqlAndCompareResult("SHOW DISK CACHE USAGE;", {{foreign_table1, minimum_total_size}}); + sqlAndCompareResult("SHOW DISK CACHE USAGE;", + {{foreign_table1, i(getMinSizeForTable(foreign_table1))}}); } TEST_F(ShowDiskCacheUsageTest, MultipleTables) { @@ -1076,8 +1100,8 @@ TEST_F(ShowDiskCacheUsageTest, MultipleTables) { sql("SELECT * FROM " + foreign_table2 + ";"); sqlAndCompareResult("SHOW DISK CACHE USAGE;", - {{foreign_table1, minimum_total_size}, - {foreign_table2, minimum_total_size}, + {{foreign_table1, i(getMinSizeForTable(foreign_table1))}, + {foreign_table2, i(getMinSizeForTable(foreign_table2))}, {foreign_table3, empty_mgr_size}}); } @@ -1101,7 +1125,8 @@ TEST_F(ShowDiskCacheUsageTest, MultipleTablesFiltered) { sqlAndCompareResult( "SHOW DISK CACHE USAGE " + foreign_table1 + ", " + foreign_table3 + ";", - {{foreign_table1, minimum_total_size}, {foreign_table3, empty_mgr_size}}); + {{foreign_table1, i(getMinSizeForTable(foreign_table1))}, + {foreign_table3, empty_mgr_size}}); } TEST_F(ShowDiskCacheUsageTest, SingleTableDropped) { @@ -1128,7 +1153,8 @@ TEST_F(ShowDiskCacheUsageTest, SingleTableRefreshed) { sql("SELECT * FROM " + foreign_table1 + ";"); sql("REFRESH FOREIGN TABLES " + foreign_table1 + ";"); - sqlAndCompareResult("SHOW DISK CACHE USAGE;", {{foreign_table1, minimum_total_size}}); + sqlAndCompareResult("SHOW DISK CACHE USAGE;", + {{foreign_table1, i(getMinSizeForTable(foreign_table1))}}); } TEST_F(ShowDiskCacheUsageTest, SingleTableMetadataOnly) { @@ -1136,7 +1162,9 @@ TEST_F(ShowDiskCacheUsageTest, SingleTableMetadataOnly) { sql("SELECT COUNT(*) FROM " + foreign_table1 + ";"); - sqlAndCompareResult("SHOW DISK CACHE USAGE;", {{foreign_table1, meta_only_size}}); + sqlAndCompareResult( + "SHOW DISK CACHE USAGE;", + {{foreign_table1, i(meta_only_size + getWrapperSizeForTable(foreign_table1))}}); } TEST_F(ShowDiskCacheUsageTest, ForeignAndNormalTable) { @@ -1146,8 +1174,23 @@ TEST_F(ShowDiskCacheUsageTest, ForeignAndNormalTable) { sql("SELECT * FROM " + foreign_table1 + ";"); sql("SELECT * FROM " + table1 + ";"); + sqlAndCompareResult( + "SHOW DISK CACHE USAGE;", + {{foreign_table1, i(getMinSizeForTable(foreign_table1))}, {table1, i(0)}}); +} + +TEST_F(ShowDiskCacheUsageTest, MultipleChunks) { + sql("CREATE FOREIGN TABLE " + foreign_table1 + + " (t TEXT, i INTEGER[]) SERVER omnisci_local_parquet WITH " + "(file_path = '" + + boost::filesystem::canonical("../../Tests/FsiDataFiles/example_1.parquet") + .string() + + "');"); + sql("SELECT * FROM " + foreign_table1 + ";"); sqlAndCompareResult("SHOW DISK CACHE USAGE;", - {{foreign_table1, minimum_total_size}, {table1, i(0)}}); + {{foreign_table1, + i(getMinSizeForTable(foreign_table1) + + (2 * (METADATA_PAGE_SIZE + DEFAULT_PAGE_SIZE)))}}); } class ShowDiskCacheUsageForNormalTableTest : public ShowDiskCacheUsageTest { @@ -1178,7 +1221,8 @@ TEST_F(ShowDiskCacheUsageForNormalTableTest, NormalTableEmptyUninitialized) { sql("SELECT * FROM " + foreign_table1 + ";"); sqlAndCompareResult("SHOW DISK CACHE USAGE;", - {{foreign_table1, minimum_total_size}, {table1, empty_mgr_size}}); + {{foreign_table1, i(getMinSizeForTable(foreign_table1))}, + {table1, empty_mgr_size}}); } // If a table is initialized, but empty (it has a fileMgr, but no content), it will have @@ -1192,7 +1236,8 @@ TEST_F(ShowDiskCacheUsageForNormalTableTest, NormalTableEmptyInitialized) { sql("SELECT * FROM " + table1 + ";"); sqlAndCompareResult("SHOW DISK CACHE USAGE;", - {{foreign_table1, minimum_total_size}, {table1, epoch_file_size}}); + {{foreign_table1, i(getMinSizeForTable(foreign_table1))}, + {table1, empty_mgr_size}}); } TEST_F(ShowDiskCacheUsageForNormalTableTest, NormalTableMinimum) { @@ -1202,9 +1247,9 @@ TEST_F(ShowDiskCacheUsageForNormalTableTest, NormalTableMinimum) { sql("SELECT * FROM " + foreign_table1 + ";"); sql("INSERT INTO " + table1 + " VALUES('1');"); - sqlAndCompareResult( - "SHOW DISK CACHE USAGE;", - {{foreign_table1, minimum_total_size}, {table1, minimum_total_size}}); + sqlAndCompareResult("SHOW DISK CACHE USAGE;", + {{foreign_table1, i(getMinSizeForTable(foreign_table1))}, + {table1, i(minimum_total_size * 2)}}); } class ShowTableDetailsTest : public ShowTest,