Skip to content

Commit

Permalink
Resolve review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Vasil Pashov committed Apr 19, 2024
1 parent 87ef604 commit 3732304
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 14 deletions.
2 changes: 1 addition & 1 deletion cpp/arcticdb/python/python_to_tensor_frame.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ NativeTensor obj_to_tensor(PyObject *ptr, bool empty_types) {
const auto arr = pybind11::detail::array_proxy(ptr);
const auto descr = pybind11::detail::array_descriptor_proxy(arr->descr);
auto ndim = arr->nd;
ssize_t size = ndim == 1 ? arr->dimensions[0] : arr->dimensions[0] * arr->dimensions[1];
const ssize_t size = ndim == 1 ? arr->dimensions[0] : arr->dimensions[0] * arr->dimensions[1];
// In Pandas < 2, empty series dtype is `"float"`, but as of Pandas 2.0, empty series dtype is `"object"`
// The Normalizer in Python cast empty `"float"` series to `"object"` so `EMPTY` is used here.
// See: https://github.com/man-group/ArcticDB/pull/1049
Expand Down
18 changes: 8 additions & 10 deletions cpp/arcticdb/version/schema_checks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ inline void check_normalization_index_match(
);
} else {
// (old_idx_kind == IndexDescriptor::TIMESTAMP && new_idx_kind == IndexDescriptor::ROWCOUNT) is left to preserve
// pre-empty index behavior with pandas 2, see test_empty_writes.cpp::test_append_empty_series. Empty pd.Series
// pre-empty index behavior with pandas 2, see test_empty_writes.py::test_append_empty_series. Empty pd.Series
// have Rowrange index, but due to: https://github.com/man-group/ArcticDB/blob/bd1776291fe402d8b18af9fea865324ebd7705f1/python/arcticdb/version_store/_normalization.py#L545
// it gets converted to DatetimeIndex (all empty indexes except categorical and multiindex are converted to datetime index
// in pandas 2 if empty index type is disabled), however we still want to be able to append pd.Series to empty pd.Series.
Expand All @@ -87,25 +87,23 @@ inline void check_normalization_index_match(

inline bool columns_match(
const StreamDescriptor& df_in_store_descriptor,
const StreamDescriptor& new_df_descriptor,
bool empty_types
const StreamDescriptor& new_df_descriptor
) {
const int right_fields_offset = (empty_types && df_in_store_descriptor.index().type() == IndexDescriptor::EMPTY)
? new_df_descriptor.index().field_count()
: 0;
const int index_field_size =
df_in_store_descriptor.index().type() == IndexDescriptor::EMPTY ? new_df_descriptor.index().field_count() : 0;
// The empty index is compatible with all other index types. Differences in the index fields in this case is
// allowed. The index fields are always the first in the list.
if (df_in_store_descriptor.fields().size() + right_fields_offset != new_df_descriptor.fields().size()) {
if (df_in_store_descriptor.fields().size() + index_field_size != new_df_descriptor.fields().size()) {
return false;
}
// In case the left index is empty index we want to skip name/type checking of the index fields which are always
// the first fields.
for (auto i = 0; i < int(df_in_store_descriptor.fields().size()); ++i) {
if (df_in_store_descriptor.fields(i).name() != new_df_descriptor.fields(i + right_fields_offset).name())
if (df_in_store_descriptor.fields(i).name() != new_df_descriptor.fields(i + index_field_size).name())
return false;

const TypeDescriptor& left_type = df_in_store_descriptor.fields(i).type();
const TypeDescriptor& right_type = new_df_descriptor.fields(i + right_fields_offset).type();
const TypeDescriptor& right_type = new_df_descriptor.fields(i + index_field_size).type();

if (!trivially_compatible_types(left_type, right_type) &&
!(is_empty_type(left_type.data_type()) || is_empty_type(right_type.data_type())))
Expand All @@ -128,7 +126,7 @@ inline void fix_descriptor_mismatch_or_throw(

fix_normalization_or_throw(operation == APPEND, existing_isr, new_frame);

if (!columns_match(old_sd, new_frame.desc, empty_types)) {
if (!columns_match(old_sd, new_frame.desc)) {
throw StreamDescriptorMismatch(
"The columns (names and types) in the argument are not identical to that of the existing version",
StreamDescriptor{old_sd},
Expand Down
4 changes: 1 addition & 3 deletions python/arcticdb/version_store/_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,9 +332,7 @@ def _try_normalize(
dynamic_schema = self.resolve_defaults(
"dynamic_schema", self._lib_cfg.lib_desc.version.write_options, False, **kwargs
)
empty_types = self.resolve_defaults(
"empty_types", self._lib_cfg.lib_desc.version.write_options, False, **kwargs
)
empty_types = self.resolve_defaults("empty_types", self._lib_cfg.lib_desc.version.write_options, False)
try:
udm = normalize_metadata(metadata) if metadata is not None else None
opt_custom = self._custom_normalizer.normalize(dataframe)
Expand Down

0 comments on commit 3732304

Please sign in to comment.