From 7aa2a7bf5afab452b084713582f3f7944a3c121a Mon Sep 17 00:00:00 2001 From: "Chen, Qi" Date: Wed, 17 May 2023 14:16:58 +0100 Subject: [PATCH] Store the product's version number in C++ (#110) So to embed WriterMetadata in symbol list keys for future use --- .github/workflows/tag.yml | 4 +- cpp/arcticdb/CMakeLists.txt | 2 - cpp/arcticdb/python/arctic_version.cpp | 14 ----- cpp/arcticdb/python/arctic_version.hpp | 14 ----- cpp/arcticdb/python/python_module.cpp | 7 +-- cpp/arcticdb/util/global_lifetimes.cpp | 1 + cpp/arcticdb/util/version_number.hpp | 17 ++++++ cpp/arcticdb/util/writer_info.hpp | 44 +++++++++++++++ cpp/arcticdb/version/python_bindings.cpp | 10 +++- cpp/arcticdb/version/symbol_list.cpp | 66 ++++++++++++++++------ cpp/arcticdb/version/symbol_list.hpp | 22 +++++--- cpp/arcticdb/version/version_store_api.hpp | 5 ++ cpp/proto/arcticc/pb2/descriptors.proto | 23 ++++++++ python/arcticdb/__init__.py | 3 +- setup.cfg | 1 - setup.py | 11 +++- version_io.py | 51 +++++++++++++++++ 17 files changed, 225 insertions(+), 70 deletions(-) delete mode 100644 cpp/arcticdb/python/arctic_version.cpp delete mode 100644 cpp/arcticdb/python/arctic_version.hpp create mode 100644 cpp/arcticdb/util/version_number.hpp create mode 100644 cpp/arcticdb/util/writer_info.hpp create mode 100644 version_io.py diff --git a/.github/workflows/tag.yml b/.github/workflows/tag.yml index 7c042f983b6..3d45498e374 100644 --- a/.github/workflows/tag.yml +++ b/.github/workflows/tag.yml @@ -25,8 +25,7 @@ jobs: env: BASH_FUNC_tag_and_push%%: |- () { - sed -ri '/^\[metadata]/, /^\[/ s/^version[[:blank:]]*=.*/version = '$2/ setup.cfg - git add setup.cfg README.md + python3 version_io.py $2 git status git diff --cached --exit-code || git commit -m "$1 v$2" set -x @@ -50,6 +49,7 @@ jobs: git switch -C tagging ${{github.ref}} # Remove the build status badge from a version tag sed -i 's###' README.md || true + git add README.md tag_and_push "Tagging" ${{inputs.version}} refs/tags/v${{inputs.version}} ${{inputs.overwrite && '-f' || ''}} - name: Bump ${{github.ref_name}} to the next version diff --git a/cpp/arcticdb/CMakeLists.txt b/cpp/arcticdb/CMakeLists.txt index 17a4a490e75..d39acf076c4 100755 --- a/cpp/arcticdb/CMakeLists.txt +++ b/cpp/arcticdb/CMakeLists.txt @@ -573,8 +573,6 @@ endif () # This configures: linking to Python::Module + pybind, output pre-/suf-fix, -fvisibility=hidden, strip release build pybind11_add_module(arcticdb_ext MODULE python/python_module.cpp - python/arctic_version.cpp - python/arctic_version.hpp python/reader.hpp python/adapt_read_dataframe.hpp ) diff --git a/cpp/arcticdb/python/arctic_version.cpp b/cpp/arcticdb/python/arctic_version.cpp deleted file mode 100644 index 5f2fe291e4c..00000000000 --- a/cpp/arcticdb/python/arctic_version.cpp +++ /dev/null @@ -1,14 +0,0 @@ -/* Copyright 2023 Man Group Operations Limited - * - * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. - */ - -#include - -namespace arcticdb { -std::string get_arcticdb_version_string() { - return std::string("Arctic Native v0.999"); -} -} \ No newline at end of file diff --git a/cpp/arcticdb/python/arctic_version.hpp b/cpp/arcticdb/python/arctic_version.hpp deleted file mode 100644 index 9feee17b56b..00000000000 --- a/cpp/arcticdb/python/arctic_version.hpp +++ /dev/null @@ -1,14 +0,0 @@ -/* Copyright 2023 Man Group Operations Limited - * - * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt. - * - * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. - */ - -#pragma once - -#include - -namespace arcticdb { - std::string get_arcticdb_version_string(); -} \ No newline at end of file diff --git a/cpp/arcticdb/python/python_module.cpp b/cpp/arcticdb/python/python_module.cpp index 240b293f555..2eb889f989c 100644 --- a/cpp/arcticdb/python/python_module.cpp +++ b/cpp/arcticdb/python/python_module.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -290,10 +291,6 @@ PYBIND11_MODULE(arcticdb_ext, m) { register_termination_handler(); -#ifdef VERSION_INFO - m.attr("__version__") = VERSION_INFO; -#else - m.attr("__version__") = "dev"; -#endif + m.attr("__version__") = ARCTICDB_VERSION_STR; } diff --git a/cpp/arcticdb/util/global_lifetimes.cpp b/cpp/arcticdb/util/global_lifetimes.cpp index e4969af84b9..56b09ad6a51 100644 --- a/cpp/arcticdb/util/global_lifetimes.cpp +++ b/cpp/arcticdb/util/global_lifetimes.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #if defined(_MSC_VER) && defined(_DEBUG) #include diff --git a/cpp/arcticdb/util/version_number.hpp b/cpp/arcticdb/util/version_number.hpp new file mode 100644 index 00000000000..2ad4a9d5319 --- /dev/null +++ b/cpp/arcticdb/util/version_number.hpp @@ -0,0 +1,17 @@ +/* Copyright 2023 Man Group Operations Limited + * + * Use of this software is governed by the Business Source License 1.1 included in the file LICENSE.txt. + * + * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. + */ +#ifndef ARCTICDB_MAJOR + +// The lines below until "END" are updated mechanically: +#define ARCTICDB_MAJOR 1 +#define ARCTICDB_MINOR 2 +#define ARCTICDB_PATCH 0 +#define ARCTICDB_VERSION_STR "1.2.0-rc1" +// END <- DO NOT MODIFY + +#define ARCTICDB_VERSION_INT ((ARCTICDB_MAJOR * 1000U + ARCTICDB_MINOR) * 1000U + ARCTICDB_PATCH) +#endif diff --git a/cpp/arcticdb/util/writer_info.hpp b/cpp/arcticdb/util/writer_info.hpp new file mode 100644 index 00000000000..faf84b4a442 --- /dev/null +++ b/cpp/arcticdb/util/writer_info.hpp @@ -0,0 +1,44 @@ +/* Copyright 2023 Man Group Operations Limited + * + * Use of this software is governed by the Business Source License 1.1 included in the file LICENSE.txt. + * + * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. + */ +#pragma once + +#include +#include "version_number.hpp" + +namespace arcticdb::util { + +inline void set_static_writer_info_fields(proto::descriptors::WriterInfo& proto) { + using Proto = proto::descriptors::WriterInfo; + +#if defined(__linux__) + proto.set_platform(Proto::LINUX); +#elif defined(_WIN32) + proto.set_platform(Proto::WINDOWS); +#elif defined(__APPLE__) + proto.set_platform(Proto::APPLE); +#else +#warning "Please add your OS to the protobuf" + proto.set_platform(Proto::UNKNOWN_PLATFORM); +#endif + +#if defined(__clang_major__) + proto.set_cc(Proto::CLANG); + proto.set_cc_version((__clang_major__ * 1000 + __clang_minor__) * 1000 + __clang_patchlevel__); +#elif defined(__GNUC__) + proto.set_cc(Proto::GCC); + proto.set_cc_version((__GNUC__ * 1000 + __GNUC_MINOR__) * 1000 + __GNUC_PATCHLEVEL__); +#elif defined(_MSC_VER) + proto.set_cc(Proto::MSVC); + proto.set_cc_version(_MSC_FULL_VER); +#else +#warning "Please add your compiler to the protobuf" + proto.set_cc(Proto::UNKNOWN_CC); +#endif + + proto.set_version(ARCTICDB_VERSION_INT); +} +} \ No newline at end of file diff --git a/cpp/arcticdb/version/python_bindings.cpp b/cpp/arcticdb/version/python_bindings.cpp index e67825b919d..749df9edc60 100644 --- a/cpp/arcticdb/version/python_bindings.cpp +++ b/cpp/arcticdb/version/python_bindings.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include @@ -21,6 +21,7 @@ #include #include #include +#include #include namespace arcticdb::version_store { @@ -579,9 +580,14 @@ void register_bindings(py::module &m, py::exception& .def("latest_timestamp", &PythonVersionStore::latest_timestamp, "Returns latest timestamp of a symbol") + .def("get_latest_symbol_list_compaction_descriptor", + [](PythonVersionStore& v) { + auto maybe_descriptor = v.get_latest_symbol_list_compaction_descriptor(); + return maybe_descriptor ? python_util::pb_to_python(*maybe_descriptor) : py::none(); + }) ; - m.def("get_version_string", &get_arcticdb_version_string); + m.def("get_version_string", []() {return ARCTICDB_VERSION_STR; }, "Deprecated. Use arcticdb.__version__"); m.def("read_runtime_config", [](const py::object object) { auto config = RuntimeConfig{}; diff --git a/cpp/arcticdb/version/symbol_list.cpp b/cpp/arcticdb/version/symbol_list.cpp index 540b9333657..47cf614dc56 100644 --- a/cpp/arcticdb/version/symbol_list.cpp +++ b/cpp/arcticdb/version/symbol_list.cpp @@ -6,6 +6,7 @@ */ #include +#include namespace arcticdb { @@ -17,6 +18,7 @@ struct SymbolList::LoadResult { mutable KeyVector symbol_list_keys; /** The last CompactionId key in symbol_list_keys, if any. */ std::optional maybe_last_compaction; + SymbolList::OptDescriptor maybe_last_compaction_descriptor; mutable CollectionType symbols; arcticdb::timestamp timestamp; @@ -36,7 +38,8 @@ SymbolList::LoadResult SymbolList::attempt_load(const std::shared_ptr& st if (load_result.maybe_last_compaction) { load_result.timestamp = load_result.symbol_list_keys.rbegin()->creation_ts(); // Per step 5 load_result.symbols = load_from_symbol_list_keys(store, - {*load_result.maybe_last_compaction, load_result.symbol_list_keys.cend()}); + {*load_result.maybe_last_compaction, load_result.symbol_list_keys.cend()}, + load_result.maybe_last_compaction_descriptor); } else { load_result.timestamp = store->current_timestamp(); load_result.symbols = load_from_version_keys(store); @@ -61,7 +64,8 @@ SymbolList::CollectionType SymbolList::load(const std::shared_ptr& store, SYMBOL_LIST_RUNTIME_LOG("Checking whether we still need to compact under lock"); if (can_update_symbol_list(store, load_result.maybe_last_compaction)) { // Step 5 - auto written = write_symbols(store, load_result.symbols, compaction_id, load_result.timestamp).get(); + auto written = write_symbols(store, load_result.symbols, compaction_id, load_result.timestamp, + load_result.maybe_last_compaction_descriptor).get(); delete_keys(store, load_result.detach_symbol_list_keys(), std::get(written)).get(); } } else { @@ -136,11 +140,11 @@ bool SymbolList::can_update_symbol_list(const std::shared_ptr& store, return symbols; } - folly::Future SymbolList::write_symbols( - const std::shared_ptr& store, - const CollectionType& symbols, - const StreamId& stream_id, - timestamp creation_ts) { + folly::Future SymbolList::write_symbols(const std::shared_ptr& store, + const CollectionType& symbols, + const StreamId& stream_id, + timestamp creation_ts, + const OptDescriptor& existing_descriptor) { SYMBOL_LIST_RUNTIME_LOG("Writing {} symbols to symbol list cache", symbols.size()); SegmentInMemory list_segment{symbol_stream_descriptor(stream_id)}; @@ -159,25 +163,34 @@ bool SymbolList::can_update_symbol_list(const std::shared_ptr& store, ); list_segment.end_row(); } - if(symbols.empty()) { - google::protobuf::Any any = {}; - arcticdb::proto::descriptors::SymbolListDescriptor metadata; + + arcticdb::proto::descriptors::SymbolListDescriptor metadata; + if (existing_descriptor) { + metadata.CopyFrom(*existing_descriptor); + if (ARCTICDB_VERSION_INT < metadata.min_writer_version()) { + metadata.set_min_writer_version(ARCTICDB_VERSION_INT); + } + } else { metadata.set_enabled(true); - any.PackFrom(metadata); - list_segment.set_metadata(std::move(any)); + util::set_static_writer_info_fields(*metadata.mutable_initial_writer()); + metadata.set_min_writer_version(ARCTICDB_VERSION_INT); } + google::protobuf::Any any = {}; + any.PackFrom(metadata); + list_segment.set_metadata(std::move(any)); + return store->write(KeyType::SYMBOL_LIST, 0, stream_id, creation_ts, 0, 0, std::move(list_segment)); } - SymbolList::CollectionType SymbolList::load_from_symbol_list_keys( - const std::shared_ptr& store, - const folly::Range& keys) { + SymbolList::CollectionType SymbolList::load_from_symbol_list_keys(const std::shared_ptr& store, + const folly::Range& keys, + OptDescriptor& descriptor_out) { SYMBOL_LIST_RUNTIME_LOG("Loading symbols from symbol list keys"); bool read_compaction = false; CollectionType symbols{}; for(const auto& key : keys) { if(key.id() == compaction_id) { - read_list_from_storage(store, key, symbols); + read_list_from_storage(store, key, symbols, descriptor_out); read_compaction = true; } else { @@ -200,10 +213,17 @@ bool SymbolList::can_update_symbol_list(const std::shared_ptr& store, return symbols; } - void SymbolList::read_list_from_storage(const std::shared_ptr& store, const AtomKey& key, - CollectionType& symbols) { + void SymbolList::read_list_from_storage(const std::shared_ptr& store, + const AtomKey& key, + CollectionType& symbols, + OptDescriptor& descriptor_out) { ARCTICDB_DEBUG(log::version(), "Reading list from storage with key {}", key); auto key_seg = store->read(key).get().second; + if (key_seg.metadata()) { + descriptor_out.emplace(); + key_seg.metadata()->UnpackTo(&descriptor_out.value()); + } + auto field_desc = key_seg.descriptor().field_at(0); missing_data::check(field_desc.has_value(), "Expected at least one column in symbol list with key {}", key); @@ -268,4 +288,14 @@ bool SymbolList::can_update_symbol_list(const std::shared_ptr& store, } } +SymbolList::OptDescriptor SymbolList::get_latest_compaction_descriptor(const std::shared_ptr& store) { + auto result = attempt_load(store); + if (result.maybe_last_compaction) { + auto any = store->read_metadata(*result.maybe_last_compaction.value()).get().second; + proto::descriptors::SymbolListDescriptor metadata; + any->UnpackTo(&metadata); + return metadata; + } + return {}; +} } //namespace arcticdb diff --git a/cpp/arcticdb/version/symbol_list.hpp b/cpp/arcticdb/version/symbol_list.hpp index 014e50b5662..1a0c107c816 100644 --- a/cpp/arcticdb/version/symbol_list.hpp +++ b/cpp/arcticdb/version/symbol_list.hpp @@ -85,6 +85,10 @@ class SymbolList { delete_all_keys_of_type(KeyType::SYMBOL_LIST, store, true); } + using OptDescriptor = std::optional; + // For diagnostics. + OptDescriptor get_latest_compaction_descriptor(const std::shared_ptr& store); + private: struct LoadResult; @@ -106,18 +110,20 @@ class SymbolList { [[nodiscard]] CollectionType load_from_version_keys(const std::shared_ptr& store); - [[nodiscard]] folly::Future write_symbols( - const std::shared_ptr& store, + [[nodiscard]] folly::Future write_symbols(const std::shared_ptr& store, const CollectionType& symbols, const StreamId& stream_id, - timestamp creation_ts); + timestamp creation_ts, + const OptDescriptor& existing_descriptor); - [[nodiscard]] CollectionType load_from_symbol_list_keys( - const std::shared_ptr& store, - const folly::Range& keys); + [[nodiscard]] SymbolList::CollectionType load_from_symbol_list_keys(const std::shared_ptr& store, + const folly::Range& keys, + OptDescriptor& descriptor_out); - void read_list_from_storage(const std::shared_ptr& store, const AtomKey& key, - CollectionType& symbols); + void read_list_from_storage(const std::shared_ptr& store, + const AtomKey& key, + CollectionType& symbols, + OptDescriptor& descriptor_out); [[nodiscard]] KeyVector get_all_symbol_list_keys(const std::shared_ptr& store) const; diff --git a/cpp/arcticdb/version/version_store_api.hpp b/cpp/arcticdb/version/version_store_api.hpp index 76d27e2c4d2..2ef7820de13 100644 --- a/cpp/arcticdb/version/version_store_api.hpp +++ b/cpp/arcticdb/version/version_store_api.hpp @@ -314,6 +314,11 @@ class PythonVersionStore : public LocalVersionedEngine { std::vector get_version_history(const StreamId& stream_id); + // For diagnostics. + auto get_latest_symbol_list_compaction_descriptor() { + return symbol_list_ptr()->get_latest_compaction_descriptor(store()); + } + private: std::vector batch_write_index_keys_to_version_map( diff --git a/cpp/proto/arcticc/pb2/descriptors.proto b/cpp/proto/arcticc/pb2/descriptors.proto index 9bd91093eae..681231f9be6 100644 --- a/cpp/proto/arcticc/pb2/descriptors.proto +++ b/cpp/proto/arcticc/pb2/descriptors.proto @@ -239,6 +239,27 @@ message UserDefinedMetadata { } } +message WriterInfo { + enum Platform { + UNKNOWN_PLATFORM = 0; + LINUX = 1; + WINDOWS = 2; + APPLE = 3; + } + Platform platform = 1; + + enum Compiler { + UNKNOWN_CC = 0; + GCC = 1; + CLANG = 2; + MSVC = 3; + } + Compiler cc = 2; + uint32 cc_version = 3; // Same encoding as version except _MSC_FULL_VER is used as is + + uint32 version = 4; // ArcticDB SemVer major * 1e6 + minor * 1e3 + patch +} + enum KeyType { STREAM_GROUP = 0; @@ -301,4 +322,6 @@ message TimeSeriesDescriptor message SymbolListDescriptor { bool enabled = 1; + WriterInfo initial_writer = 2; + uint32 min_writer_version = 3; } diff --git a/python/arcticdb/__init__.py b/python/arcticdb/__init__.py index db8344a9580..6d75a804d55 100644 --- a/python/arcticdb/__init__.py +++ b/python/arcticdb/__init__.py @@ -1,6 +1,5 @@ import arcticdb_ext as _ext import os as _os -import sys as _sys from arcticdb.arctic import Arctic from arcticdb.options import LibraryOptions @@ -10,4 +9,4 @@ from arcticdb.tools import set_config_from_env_vars set_config_from_env_vars(_os.environ) - +__version__ = _ext.__version__ diff --git a/setup.cfg b/setup.cfg index cfdd97f18c2..abf6422e799 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,5 @@ [metadata] name = arcticdb -version = 1.0.1 description = ArcticDB DataFrame Database author = Man Alpha Technology author_email = arcticdb@man.com diff --git a/setup.py b/setup.py index d85e32c9607..e10682da28c 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,6 @@ import platform import shutil import re -from pathlib import Path from tempfile import mkdtemp from setuptools import setup, Command, find_namespace_packages from setuptools import Extension, find_packages @@ -14,14 +13,21 @@ from setuptools.command.develop import develop from wheel.bdist_wheel import bdist_wheel +_saved = list(sys.path) +sys.path += [os.getcwd()] # Pip parses `package_dir={"": "python"}` below +from version_io import resolve_version + +sys.path = _saved + # experimental flag to indicate that we want # the dependencies from a conda -ARCTICDB_USING_CONDA = os.environ.get("ARCTICDB_USING_CONDA", "0") +ARCTICDB_USING_CONDA = os.environ.get("ARCTICDB_USING_CONDA", "0") ARCTICDB_USING_CONDA = ARCTICDB_USING_CONDA != "0" print(f"ARCTICDB_USING_CONDA={ARCTICDB_USING_CONDA}") + def _log_and_run(*cmd, **kwargs): print("Running " + " ".join(cmd)) subprocess.check_call(cmd, **kwargs) @@ -163,6 +169,7 @@ def readme(): if __name__ == "__main__": setup( + version=resolve_version(), ext_modules=[CMakeExtension("arcticdb_ext")], package_dir={"": "python"}, packages=find_packages(where="python", exclude=["tests", "tests.*"]) diff --git a/version_io.py b/version_io.py new file mode 100644 index 00000000000..260335eabae --- /dev/null +++ b/version_io.py @@ -0,0 +1,51 @@ +import re + +""" +Version workflow: +tag.yml -> update_version() -> _FILE -> Pybind arcticdb_ext.__version__ -> arcticdb.__version__ + ↘ resolve_version() -> setup.py -> wheel +""" +_FILE = "cpp/arcticdb/util/version_number.hpp" + + +def resolve_version(): + with open(_FILE) as f: + pattern = re.compile(r'#define ARCTICDB_VERSION_STR "([^"]+)"') + for line in f: + match = pattern.match(line) + if match: + return match.group(1) + + raise RuntimeError(_FILE + " is malformed") + + +def update_version(new_ver_str: str): + import semver + + new_ver = semver.Version.parse(new_ver_str) + pattern = re.compile(r"\n#define ARCTICDB_MAJOR.*// END", re.DOTALL) + + with open(_FILE, "r+") as f: + text = f.read() + parts = pattern.split(text, maxsplit=1) + assert len(parts) == 2, _FILE + " is malformed:\n" + text + out = f"""{parts[0]} +#define ARCTICDB_MAJOR {new_ver.major} +#define ARCTICDB_MINOR {new_ver.minor} +#define ARCTICDB_PATCH {new_ver.patch} +#define ARCTICDB_VERSION_STR "{new_ver}" +// END{parts[1]}""" + f.seek(0) + f.write(out) + f.truncate() + + +if __name__ == "__main__": + import sys + import subprocess + + ver = sys.argv[1] + update_version(ver) + assert resolve_version() == ver + + subprocess.check_call(["git", "add", _FILE])