From 3732304330d99039eb749a2f86496b8138d163cb Mon Sep 17 00:00:00 2001 From: Vasil Pashov Date: Fri, 19 Apr 2024 16:22:30 +0300 Subject: [PATCH] Resolve review comments --- cpp/arcticdb/python/python_to_tensor_frame.cpp | 2 +- cpp/arcticdb/version/schema_checks.hpp | 18 ++++++++---------- python/arcticdb/version_store/_store.py | 4 +--- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/cpp/arcticdb/python/python_to_tensor_frame.cpp b/cpp/arcticdb/python/python_to_tensor_frame.cpp index c7f2a0381d..fce30d3020 100644 --- a/cpp/arcticdb/python/python_to_tensor_frame.cpp +++ b/cpp/arcticdb/python/python_to_tensor_frame.cpp @@ -116,7 +116,7 @@ NativeTensor obj_to_tensor(PyObject *ptr, bool empty_types) { const auto arr = pybind11::detail::array_proxy(ptr); const auto descr = pybind11::detail::array_descriptor_proxy(arr->descr); auto ndim = arr->nd; - ssize_t size = ndim == 1 ? arr->dimensions[0] : arr->dimensions[0] * arr->dimensions[1]; + const ssize_t size = ndim == 1 ? arr->dimensions[0] : arr->dimensions[0] * arr->dimensions[1]; // In Pandas < 2, empty series dtype is `"float"`, but as of Pandas 2.0, empty series dtype is `"object"` // The Normalizer in Python cast empty `"float"` series to `"object"` so `EMPTY` is used here. // See: https://github.com/man-group/ArcticDB/pull/1049 diff --git a/cpp/arcticdb/version/schema_checks.hpp b/cpp/arcticdb/version/schema_checks.hpp index 62032cbd0d..9b81d54058 100644 --- a/cpp/arcticdb/version/schema_checks.hpp +++ b/cpp/arcticdb/version/schema_checks.hpp @@ -67,7 +67,7 @@ inline void check_normalization_index_match( ); } else { // (old_idx_kind == IndexDescriptor::TIMESTAMP && new_idx_kind == IndexDescriptor::ROWCOUNT) is left to preserve - // pre-empty index behavior with pandas 2, see test_empty_writes.cpp::test_append_empty_series. Empty pd.Series + // pre-empty index behavior with pandas 2, see test_empty_writes.py::test_append_empty_series. Empty pd.Series // have Rowrange index, but due to: https://github.com/man-group/ArcticDB/blob/bd1776291fe402d8b18af9fea865324ebd7705f1/python/arcticdb/version_store/_normalization.py#L545 // it gets converted to DatetimeIndex (all empty indexes except categorical and multiindex are converted to datetime index // in pandas 2 if empty index type is disabled), however we still want to be able to append pd.Series to empty pd.Series. @@ -87,25 +87,23 @@ inline void check_normalization_index_match( inline bool columns_match( const StreamDescriptor& df_in_store_descriptor, - const StreamDescriptor& new_df_descriptor, - bool empty_types + const StreamDescriptor& new_df_descriptor ) { - const int right_fields_offset = (empty_types && df_in_store_descriptor.index().type() == IndexDescriptor::EMPTY) - ? new_df_descriptor.index().field_count() - : 0; + const int index_field_size = + df_in_store_descriptor.index().type() == IndexDescriptor::EMPTY ? new_df_descriptor.index().field_count() : 0; // The empty index is compatible with all other index types. Differences in the index fields in this case is // allowed. The index fields are always the first in the list. - if (df_in_store_descriptor.fields().size() + right_fields_offset != new_df_descriptor.fields().size()) { + if (df_in_store_descriptor.fields().size() + index_field_size != new_df_descriptor.fields().size()) { return false; } // In case the left index is empty index we want to skip name/type checking of the index fields which are always // the first fields. for (auto i = 0; i < int(df_in_store_descriptor.fields().size()); ++i) { - if (df_in_store_descriptor.fields(i).name() != new_df_descriptor.fields(i + right_fields_offset).name()) + if (df_in_store_descriptor.fields(i).name() != new_df_descriptor.fields(i + index_field_size).name()) return false; const TypeDescriptor& left_type = df_in_store_descriptor.fields(i).type(); - const TypeDescriptor& right_type = new_df_descriptor.fields(i + right_fields_offset).type(); + const TypeDescriptor& right_type = new_df_descriptor.fields(i + index_field_size).type(); if (!trivially_compatible_types(left_type, right_type) && !(is_empty_type(left_type.data_type()) || is_empty_type(right_type.data_type()))) @@ -128,7 +126,7 @@ inline void fix_descriptor_mismatch_or_throw( fix_normalization_or_throw(operation == APPEND, existing_isr, new_frame); - if (!columns_match(old_sd, new_frame.desc, empty_types)) { + if (!columns_match(old_sd, new_frame.desc)) { throw StreamDescriptorMismatch( "The columns (names and types) in the argument are not identical to that of the existing version", StreamDescriptor{old_sd}, diff --git a/python/arcticdb/version_store/_store.py b/python/arcticdb/version_store/_store.py index f26b987f36..c0fc92adcf 100644 --- a/python/arcticdb/version_store/_store.py +++ b/python/arcticdb/version_store/_store.py @@ -332,9 +332,7 @@ def _try_normalize( dynamic_schema = self.resolve_defaults( "dynamic_schema", self._lib_cfg.lib_desc.version.write_options, False, **kwargs ) - empty_types = self.resolve_defaults( - "empty_types", self._lib_cfg.lib_desc.version.write_options, False, **kwargs - ) + empty_types = self.resolve_defaults("empty_types", self._lib_cfg.lib_desc.version.write_options, False) try: udm = normalize_metadata(metadata) if metadata is not None else None opt_custom = self._custom_normalizer.normalize(dataframe)