diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index 63d8766c70..1bdc311874 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -1,5 +1,8 @@
# .git-blame-ignore-revs
+# Build time improvements
+9d2c8ef41589a4e8635b972911d25c0125dc3728
+
# Moving implementation from column.hpp to column.cpp
# Reordering/removing some inclusions
801cf4b6f0f9ec0a997311fbdc14639537d3bbb6
diff --git a/.github/workflows/analysis_workflow.yml b/.github/workflows/analysis_workflow.yml
index f554b641d8..41f260be4d 100644
--- a/.github/workflows/analysis_workflow.yml
+++ b/.github/workflows/analysis_workflow.yml
@@ -75,7 +75,7 @@ jobs:
- name: Install ArcticDB[Testing]
shell: bash -el {0}
run: |
- pip install arcticdb[Testing]
+ pip install arcticdb[Testing] "protobuf<5"
- name: Publish results to Github Pages
shell: bash -el {0}
diff --git a/.github/workflows/benchmark_commits.yml b/.github/workflows/benchmark_commits.yml
index 0c260d2f71..8984a49051 100644
--- a/.github/workflows/benchmark_commits.yml
+++ b/.github/workflows/benchmark_commits.yml
@@ -13,6 +13,7 @@ jobs:
job_type: start
benchmark_commit:
+ timeout-minutes: 1200
needs: [start_ec2_runner]
if: |
always() &&
@@ -92,4 +93,4 @@ jobs:
with:
job_type: stop
label: ${{ needs.start_ec2_runner.outputs.label }}
- ec2-instance-id: ${{ needs.start_ec2_runner.outputs.ec2-instance-id }}
\ No newline at end of file
+ ec2-instance-id: ${{ needs.start_ec2_runner.outputs.ec2-instance-id }}
diff --git a/.github/workflows/persistent_storage.yml b/.github/workflows/persistent_storage.yml
index 242e5a8ce1..9b8cbe743d 100644
--- a/.github/workflows/persistent_storage.yml
+++ b/.github/workflows/persistent_storage.yml
@@ -53,7 +53,7 @@ jobs:
- name: Install latest release
if: inputs.arcticdb_version == 'latest'
run: |
- pip install pytest arcticdb
+ pip install pytest arcticdb "protobuf<5"
# Currently, the oldest "supported" release is 3.0.0
# We use this version to test forwards/barwards compatibility
@@ -61,7 +61,7 @@ jobs:
# Change this value, if we need to support a newer one in the future
- name: Install oldest supported release
run: |
- pip install pytest arcticdb=="3.0.0"
+ pip install pytest arcticdb=="3.0.0" "protobuf<5"
- name: Set persistent storage variables
uses: ./.github/actions/set_persistent_storage_env_vars
@@ -96,7 +96,7 @@ jobs:
- name: Install latest release
run: |
- pip install pytest arcticdb
+ pip install pytest arcticdb "protobuf<5"
- name: Set persistent storage variables
uses: ./.github/actions/set_persistent_storage_env_vars
diff --git a/README.md b/README.md
index 27387b527c..47c0ef9cb5 100644
--- a/README.md
+++ b/README.md
@@ -1,24 +1,3 @@
-
-
-
-
-![Alt text](static/happybdayarcticdb.png)
-
- |
-
-
-# 🚀 ArcticDB Hits 1,000 GitHub Stars on Our First Open Source Anniversary! 🚀
-
-Exciting news as we celebrate two milestones: ArcticDB's first year in the open-source community
-and reaching 1,000 GitHub stars! ⭐
-
-A huge thank you to all the contributors, supporters, and community members whose involvement has been
-pivotal to our success! 🙌
-
- |
-
-
-
diff --git a/cpp/arcticdb/codec/codec-inl.hpp b/cpp/arcticdb/codec/codec-inl.hpp
index cb170cf6a8..4adcfc6f76 100644
--- a/cpp/arcticdb/codec/codec-inl.hpp
+++ b/cpp/arcticdb/codec/codec-inl.hpp
@@ -91,6 +91,7 @@ std::size_t decode_ndarray(
util::check(type_desc_tag.data_type() == DataType::EMPTYVAL,
"NDArray of type {} should not be of size 0!",
datatype_to_str(type_desc_tag.data_type()));
+ read_bytes = encoding_sizes::data_compressed_size(field);
return;
}
diff --git a/cpp/arcticdb/column_store/chunked_buffer.hpp b/cpp/arcticdb/column_store/chunked_buffer.hpp
index 2cc35f6251..4daa91b1ca 100644
--- a/cpp/arcticdb/column_store/chunked_buffer.hpp
+++ b/cpp/arcticdb/column_store/chunked_buffer.hpp
@@ -271,7 +271,11 @@ class ChunkedBufferImpl {
}
[[nodiscard]] const uint8_t* data() const {
- util::check(blocks_.size() == 1, "Taking a pointer to the beginning of a non-contiguous buffer");
+ if (blocks_.empty()) {
+ return nullptr;
+ }
+ internal::check(blocks_.size() == 1,
+ "Taking a pointer to the beginning of a non-contiguous buffer");
blocks_[0]->magic_.check();
return blocks_[0]->data();
}
diff --git a/cpp/arcticdb/column_store/column_data.hpp b/cpp/arcticdb/column_store/column_data.hpp
index bba7fa81c5..5a1955e2a7 100644
--- a/cpp/arcticdb/column_store/column_data.hpp
+++ b/cpp/arcticdb/column_store/column_data.hpp
@@ -213,7 +213,7 @@ struct ColumnData {
}
// Used to construct [c]end iterators
- explicit ColumnDataIterator(ColumnData* parent, typename TDT::DataTypeTag::raw_type* end_ptr):
+ explicit ColumnDataIterator(ColumnData* parent, RawType* end_ptr):
parent_(parent) {
data_.ptr_ = end_ptr;
}
@@ -304,7 +304,7 @@ struct ColumnData {
if(!data_->blocks().empty()) {
auto block = data_->blocks().at(num_blocks() - 1);
auto typed_block_data = next_typed_block(block);
- end_ptr = typed_block_data.data() + typed_block_data.row_count();
+ end_ptr = const_cast(typed_block_data.data() + typed_block_data.row_count());
}
return ColumnDataIterator(this, end_ptr);
}
diff --git a/cpp/arcticdb/column_store/memory_segment.hpp b/cpp/arcticdb/column_store/memory_segment.hpp
index 561feaee0f..73e6d587c7 100644
--- a/cpp/arcticdb/column_store/memory_segment.hpp
+++ b/cpp/arcticdb/column_store/memory_segment.hpp
@@ -385,10 +385,10 @@ class SegmentInMemory {
impl_->set_string_pool(string_pool);
}
- SegmentInMemory filter(const util::BitSet& filter_bitset,
+ SegmentInMemory filter(util::BitSet&& filter_bitset,
bool filter_down_stringpool=false,
bool validate=false) const{
- return SegmentInMemory(impl_->filter(filter_bitset, filter_down_stringpool, validate));
+ return SegmentInMemory(impl_->filter(std::move(filter_bitset), filter_down_stringpool, validate));
}
/// @see SegmentInMemoryImpl::truncate
diff --git a/cpp/arcticdb/column_store/memory_segment_impl.cpp b/cpp/arcticdb/column_store/memory_segment_impl.cpp
index 0bb0fbe763..8f8acf55df 100644
--- a/cpp/arcticdb/column_store/memory_segment_impl.cpp
+++ b/cpp/arcticdb/column_store/memory_segment_impl.cpp
@@ -168,9 +168,10 @@ void SegmentInMemoryImpl::drop_column(std::string_view name) {
column_map_->erase(name);
}
-std::shared_ptr SegmentInMemoryImpl::filter(const util::BitSet& filter_bitset,
+std::shared_ptr SegmentInMemoryImpl::filter(util::BitSet&& filter_bitset,
bool filter_down_stringpool,
bool validate) const {
+ filter_bitset.resize(row_count());
bool is_input_sparse = is_sparse();
auto num_values = filter_bitset.count();
if(num_values == 0)
@@ -210,18 +211,19 @@ std::shared_ptr SegmentInMemoryImpl::filter(const util::Bit
} else {
bitset_including_sparse.resize((*column)->row_count());
}
- if (bitset_including_sparse.count() == 0) {
- // No values are set in the sparse column, skip it
- return;
- }
output_col_idx = output->add_column(field(column.index), bitset_including_sparse.count(), true);
final_bitset = &bitset_including_sparse;
} else {
final_bitset = &filter_bitset;
}
auto& output_col = output->column(position_t(output_col_idx));
- if (sparse_map)
+ if (sparse_map) {
output_col.opt_sparse_map() = std::make_optional();
+ if (final_bitset->count() == 0) {
+ // No values are set in the sparse column, no more work to do
+ return;
+ }
+ }
auto output_ptr = reinterpret_cast(output_col.ptr());
auto input_data = (*column)->data();
@@ -585,7 +587,7 @@ std::vector> SegmentInMemoryImpl::split(siz
util::BitSetSizeType end = std::min(start + rows, total_rows);
// set_range is close interval on [left, right]
bitset.set_range(start, end - 1, true);
- output.emplace_back(filter(bitset));
+ output.emplace_back(filter(std::move(bitset)));
}
return output;
}
diff --git a/cpp/arcticdb/column_store/memory_segment_impl.hpp b/cpp/arcticdb/column_store/memory_segment_impl.hpp
index b09b726d56..9747be880b 100644
--- a/cpp/arcticdb/column_store/memory_segment_impl.hpp
+++ b/cpp/arcticdb/column_store/memory_segment_impl.hpp
@@ -758,7 +758,7 @@ class SegmentInMemoryImpl {
std::shared_ptr get_output_segment(size_t num_values, bool pre_allocate=true) const;
- std::shared_ptr filter(const util::BitSet& filter_bitset,
+ std::shared_ptr filter(util::BitSet&& filter_bitset,
bool filter_down_stringpool=false,
bool validate=false) const;
diff --git a/cpp/arcticdb/column_store/test/test_memory_segment.cpp b/cpp/arcticdb/column_store/test/test_memory_segment.cpp
index 3416242e75..d49e02168c 100644
--- a/cpp/arcticdb/column_store/test/test_memory_segment.cpp
+++ b/cpp/arcticdb/column_store/test/test_memory_segment.cpp
@@ -481,7 +481,7 @@ TEST(MemSegment, Filter) {
filter_bitset.set_bit(retained_row);
}
- auto filtered_seg = seg.filter(filter_bitset);
+ auto filtered_seg = seg.filter(std::move(filter_bitset));
for (auto&& [idx, row]: folly::enumerate(filtered_seg)) {
ASSERT_EQ(static_cast(retained_rows[idx]), row.scalar_at(0));
diff --git a/cpp/arcticdb/entity/metrics.cpp b/cpp/arcticdb/entity/metrics.cpp
index 6f92314fc2..1480e251a5 100644
--- a/cpp/arcticdb/entity/metrics.cpp
+++ b/cpp/arcticdb/entity/metrics.cpp
@@ -27,45 +27,39 @@ namespace arcticdb {
std::shared_ptr PrometheusInstance::instance_;
std::once_flag PrometheusInstance::init_flag_;
- std::shared_ptr PrometheusConfigInstance::instance(){
- std::call_once(PrometheusConfigInstance::init_flag_, &PrometheusConfigInstance::init);
- return PrometheusConfigInstance::instance_;
+ PrometheusInstance::PrometheusInstance() : configured_(false) {
+ arcticdb::log::version().debug("PrometheusInstance created");
}
- std::shared_ptr PrometheusConfigInstance::instance_;
- std::once_flag PrometheusConfigInstance::init_flag_;
-
- PrometheusInstance::PrometheusInstance() {
-
- auto cfg = PrometheusConfigInstance::instance()->config;
-
- if (cfg.prometheus_model() == PrometheusConfigInstance::Proto::PUSH) {
- // PUSH MODE
- if (cfg.instance().empty() || cfg.host().empty() || cfg.port().empty() || cfg.job_name().empty()) {
- util::raise_rte( "Invalid Push PrometheusConfig {}", arcticdb::util::format(cfg));
- }
+ void PrometheusInstance::configure(const MetricsConfig& config, const bool reconfigure) {
+ if (configured_ && !reconfigure) {
+ arcticdb::log::version().warn("Prometheus already configured");
+ return;
+ }
+
+ cfg_ = config;
+ if (cfg_.model_ == MetricsConfig::Model::PUSH) {
// IMP: This is the GROUPING_KEY - every push overwrites the previous grouping key
auto labels = prometheus::Gateway::GetInstanceLabel(getHostName());
- mongo_instance_ = cfg.instance();
+ mongo_instance_ = cfg_.instance;
labels.try_emplace(MONGO_INSTANCE_LABEL, mongo_instance_);
- labels.try_emplace(PROMETHEUS_ENV_LABEL, cfg.prometheus_env());
- gateway_= std::make_shared(cfg.host(), cfg.port(), cfg.job_name(), labels);
+ labels.try_emplace(PROMETHEUS_ENV_LABEL, cfg_.prometheus_env);
+ gateway_= std::make_shared(cfg_.host, cfg_.port, cfg_.job_name, labels);
registry_ = std::make_shared();
gateway_->RegisterCollectable(registry_);
- arcticdb::log::version().info("Prometheus Push created with settings {}", arcticdb::util::format(cfg));
+ arcticdb::log::version().info("Prometheus Push created with settings {}", cfg_);
- } else if (cfg.prometheus_model() == PrometheusConfigInstance::Proto::WEB) {
-
- // WEB SERVER MODE
- if (cfg.port().empty()) {
- util::raise_rte( "PrometheusConfig web mode port not set {}", arcticdb::util::format(cfg));
- }
+ } else if (cfg_.model_ == MetricsConfig::Model::PULL) {
// create an http server ie "http://hostname:"+port()+"/metrics"
- std::string hostname = getHostName();
- std::string endpoint = hostname + ":" + cfg.port();
+ std::string endpoint = cfg_.host + ":" + cfg_.port;
+
+ if (exposer_.use_count() > 0) {
+ exposer_->RemoveCollectable(registry_, "/metrics");
+ exposer_.reset();
+ }
// default to 2 threads
exposer_ = std::make_shared(endpoint, 2);
@@ -79,8 +73,10 @@ namespace arcticdb {
arcticdb::log::version().info("Prometheus endpoint created on {}/metrics", endpoint);
}
else {
- arcticdb::log::version().info("Prometheus not configured {}", arcticdb::util::format(cfg));
+ arcticdb::log::version().info("Prometheus not configured {}", cfg_);
}
+
+ configured_ = true;
}
// new mechanism, labels at runtime
diff --git a/cpp/arcticdb/entity/metrics.hpp b/cpp/arcticdb/entity/metrics.hpp
index a370f75032..b3366048b0 100644
--- a/cpp/arcticdb/entity/metrics.hpp
+++ b/cpp/arcticdb/entity/metrics.hpp
@@ -26,6 +26,7 @@
#include