From cdb0dff0fa0df306cf994d3f892ea359f69a545f Mon Sep 17 00:00:00 2001 From: Alex Brant Date: Tue, 30 Mar 2021 15:32:38 -0700 Subject: [PATCH] Set has_nulls when constructing default metadata --- DataMgr/ForeignStorage/CsvDataWrapper.cpp | 11 ++-------- DataMgr/ForeignStorage/CsvShared.cpp | 24 ++++++++++++++++++++++ DataMgr/ForeignStorage/CsvShared.h | 4 ++++ Tests/ForeignTableDmlTest.cpp | 15 ++++++++++++++ Tests/FsiDataFiles/null_str.csv | 5 +++++ Tests/FsiDataFiles/null_str.parquet | Bin 0 -> 2015 bytes 6 files changed, 50 insertions(+), 9 deletions(-) create mode 100644 Tests/FsiDataFiles/null_str.csv create mode 100644 Tests/FsiDataFiles/null_str.parquet diff --git a/DataMgr/ForeignStorage/CsvDataWrapper.cpp b/DataMgr/ForeignStorage/CsvDataWrapper.cpp index 55df1e60c5..48c5f0f21c 100644 --- a/DataMgr/ForeignStorage/CsvDataWrapper.cpp +++ b/DataMgr/ForeignStorage/CsvDataWrapper.cpp @@ -146,6 +146,7 @@ void CsvDataWrapper::updateMetadata( entry.second.getBuffer()->getEncoder()->getMetadata(column->columnType); cached_metadata->chunkStats.max = chunk_metadata->chunkStats.max; cached_metadata->chunkStats.min = chunk_metadata->chunkStats.min; + cached_metadata->chunkStats.has_nulls = chunk_metadata->chunkStats.has_nulls; cached_metadata->numBytes = entry.second.getBuffer()->size(); fragmenter->updateColumnChunkMetadata(column, fragment_id, cached_metadata); } @@ -815,16 +816,8 @@ void add_placeholder_metadata( ? total_num_rows % foreign_table->maxFragRows : foreign_table->maxFragRows; - ForeignStorageBuffer empty_buffer; - // Use default encoder metadata as in parquet wrapper - empty_buffer.initEncoder(column->columnType); - auto chunk_metadata = empty_buffer.getEncoder()->getMetadata(column->columnType); - chunk_metadata->numElements = num_elements; - chunk_metadata->chunkStats.min.intval = std::numeric_limits::max(); - chunk_metadata->chunkStats.max.intval = std::numeric_limits::lowest(); - chunk_key[CHUNK_KEY_FRAGMENT_IDX] = fragment_id; - chunk_metadata_map[chunk_key] = chunk_metadata; + chunk_metadata_map[chunk_key] = Csv::get_placeholder_metadata(column, num_elements); } } diff --git a/DataMgr/ForeignStorage/CsvShared.cpp b/DataMgr/ForeignStorage/CsvShared.cpp index 8eed108bc2..84ce645b12 100644 --- a/DataMgr/ForeignStorage/CsvShared.cpp +++ b/DataMgr/ForeignStorage/CsvShared.cpp @@ -231,6 +231,30 @@ Chunk_NS::Chunk make_chunk_for_column( retval.initEncoder(); return retval; } + +std::shared_ptr get_placeholder_metadata(const ColumnDescriptor* column, + size_t num_elements) { + ForeignStorageBuffer empty_buffer; + // Use default encoder metadata as in parquet wrapper + empty_buffer.initEncoder(column->columnType); + auto chunk_metadata = empty_buffer.getEncoder()->getMetadata(column->columnType); + chunk_metadata->numElements = num_elements; + + if (!column->columnType.is_varlen_indeed()) { + chunk_metadata->numBytes = column->columnType.get_size() * num_elements; + } + // min/max not set by default for arrays, so get from elem type encoder + if (column->columnType.is_array()) { + ForeignStorageBuffer scalar_buffer; + scalar_buffer.initEncoder(column->columnType.get_elem_type()); + auto scalar_metadata = + scalar_buffer.getEncoder()->getMetadata(column->columnType.get_elem_type()); + chunk_metadata->chunkStats.min = scalar_metadata->chunkStats.min; + chunk_metadata->chunkStats.max = scalar_metadata->chunkStats.max; + } + chunk_metadata->chunkStats.has_nulls = true; + return chunk_metadata; +} } // namespace Csv } // namespace foreign_storage diff --git a/DataMgr/ForeignStorage/CsvShared.h b/DataMgr/ForeignStorage/CsvShared.h index b343e4f2cc..84bce380e4 100644 --- a/DataMgr/ForeignStorage/CsvShared.h +++ b/DataMgr/ForeignStorage/CsvShared.h @@ -92,5 +92,9 @@ Chunk_NS::Chunk make_chunk_for_column( std::map>& chunk_metadata_map, const std::map& buffers); +// Construct default metadata for given column descriptor with num_elements +std::shared_ptr get_placeholder_metadata(const ColumnDescriptor* column, + size_t num_elements); + } // namespace Csv } // namespace foreign_storage diff --git a/Tests/ForeignTableDmlTest.cpp b/Tests/ForeignTableDmlTest.cpp index da451d10c4..f00553a748 100644 --- a/Tests/ForeignTableDmlTest.cpp +++ b/Tests/ForeignTableDmlTest.cpp @@ -1153,6 +1153,21 @@ TEST_P(CacheControllingSelectQueryTest, CSV_CustomDelimiters) { // clang-format on } +TEST_P(DataWrapperSelectQueryTest, AggregateAndGroupByNull) { + const auto& query = + getCreateForeignTableQuery("(t TEXT, i INT)", "null_str", GetParam()); + sql(query); + TQueryResult result; + sql(result, "select t, count( * ) from test_foreign_table group by 1 order by 1 asc;"); + // clang-format off + assertResultSetEqual({{"a", i(1)}, + {"b", i(1)}, + {"c", i(1)}, + {Null, i(1)}}, + result); + // clang-format on +} + class CSVFileTypeTests : public SelectQueryTest, public ::testing::WithParamInterface> {}; diff --git a/Tests/FsiDataFiles/null_str.csv b/Tests/FsiDataFiles/null_str.csv new file mode 100644 index 0000000000..fd8c21149d --- /dev/null +++ b/Tests/FsiDataFiles/null_str.csv @@ -0,0 +1,5 @@ +string +a,1 +b,1 +,1 +c,1 \ No newline at end of file diff --git a/Tests/FsiDataFiles/null_str.parquet b/Tests/FsiDataFiles/null_str.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e13a7befd85f3e94c281857eea72b23a37e53c55 GIT binary patch literal 2015 zcmb_dOK;mo5MI)>;08ib1SUZNItWBV_@JnwEU8tH9+rwlQ?+DUZuBY&fuv}O4!L6W zG;J8jF(`td$DI3f`ZLl)Z#nc-^we2Wa%CrJV59`JyYu?KnVlhZgc}Oa@!Pz302_B7 zp?hC42qA+mCXe|0ypX0AKE}tAMYO}1B4dEiGDpRZEc{DP8Nw{t17JB%eV);046!>~wx%e}v3BFyDqIr*=R z*$#{~pkvWB*2@Eqdf57<@H9-xCH)XB4;qSuLptwcJJ zzEgicu_wZ77#tvkeH$U*zSBj^pLRLID@y*BgMFI?axvx`q;E{b-`Pva8r#NnG?pHv zE~U9Wo;V`{XDOReikYmGPD!p~j*W3Zm40B1$D@UG1?mW*cOxX1&*W%`L;Ci4&lsqAlo4Gv5*r}EC=XEE) z(4ECV_s;zesSYjjEVNnsE}X0Leo!qPl(A!Jm04G-T(o!J!3gT*d=O!{;M2^vbM?8o z-<+DHMtRNl3B|vuvxDli+ina2Gut`Qi@H`&Ol>jI%T>>4H7DIlY1U~eu6{Ne=%k*t zJircgYX6|V-%=KKvy`>lY7_@C{U@Dlm>W@trq>u+%9@=tXGW_&?`lt-Q^@g7IZ^EL zt$fyW@8(i+46k{%j9K=oe%jx67hPI^x$2wb_@d@iOE>PzE%}hQYirF5dKWE4DTO0| zRcJJHnJ%>p7`kuZuve~9Sq+B<=4tf|@RKme3I{XN?nRSGM@xfR=%hg$46cT;Mg2qY zYmnoobjawC!m}Ffl}ji?97OIm0;5$RaQqgD@Zyetn%UEbmhY#QpOci5DP)v~Cv|*u WvVZt}A`Rbf`U(GI6QLvcYySs`Zrnrw literal 0 HcmV?d00001