Skip to content

Commit

Permalink
Set has_nulls when constructing default metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-brant authored and andrewseidl committed Apr 6, 2021
1 parent 914e5c1 commit cdb0dff
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 9 deletions.
11 changes: 2 additions & 9 deletions DataMgr/ForeignStorage/CsvDataWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ void CsvDataWrapper::updateMetadata(
entry.second.getBuffer()->getEncoder()->getMetadata(column->columnType);
cached_metadata->chunkStats.max = chunk_metadata->chunkStats.max;
cached_metadata->chunkStats.min = chunk_metadata->chunkStats.min;
cached_metadata->chunkStats.has_nulls = chunk_metadata->chunkStats.has_nulls;
cached_metadata->numBytes = entry.second.getBuffer()->size();
fragmenter->updateColumnChunkMetadata(column, fragment_id, cached_metadata);
}
Expand Down Expand Up @@ -815,16 +816,8 @@ void add_placeholder_metadata(
? total_num_rows % foreign_table->maxFragRows
: foreign_table->maxFragRows;

ForeignStorageBuffer empty_buffer;
// Use default encoder metadata as in parquet wrapper
empty_buffer.initEncoder(column->columnType);
auto chunk_metadata = empty_buffer.getEncoder()->getMetadata(column->columnType);
chunk_metadata->numElements = num_elements;
chunk_metadata->chunkStats.min.intval = std::numeric_limits<int32_t>::max();
chunk_metadata->chunkStats.max.intval = std::numeric_limits<int32_t>::lowest();

chunk_key[CHUNK_KEY_FRAGMENT_IDX] = fragment_id;
chunk_metadata_map[chunk_key] = chunk_metadata;
chunk_metadata_map[chunk_key] = Csv::get_placeholder_metadata(column, num_elements);
}
}

Expand Down
24 changes: 24 additions & 0 deletions DataMgr/ForeignStorage/CsvShared.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,30 @@ Chunk_NS::Chunk make_chunk_for_column(
retval.initEncoder();
return retval;
}

std::shared_ptr<ChunkMetadata> get_placeholder_metadata(const ColumnDescriptor* column,
size_t num_elements) {
ForeignStorageBuffer empty_buffer;
// Use default encoder metadata as in parquet wrapper
empty_buffer.initEncoder(column->columnType);
auto chunk_metadata = empty_buffer.getEncoder()->getMetadata(column->columnType);
chunk_metadata->numElements = num_elements;

if (!column->columnType.is_varlen_indeed()) {
chunk_metadata->numBytes = column->columnType.get_size() * num_elements;
}
// min/max not set by default for arrays, so get from elem type encoder
if (column->columnType.is_array()) {
ForeignStorageBuffer scalar_buffer;
scalar_buffer.initEncoder(column->columnType.get_elem_type());
auto scalar_metadata =
scalar_buffer.getEncoder()->getMetadata(column->columnType.get_elem_type());
chunk_metadata->chunkStats.min = scalar_metadata->chunkStats.min;
chunk_metadata->chunkStats.max = scalar_metadata->chunkStats.max;
}
chunk_metadata->chunkStats.has_nulls = true;
return chunk_metadata;
}
} // namespace Csv

} // namespace foreign_storage
4 changes: 4 additions & 0 deletions DataMgr/ForeignStorage/CsvShared.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,5 +92,9 @@ Chunk_NS::Chunk make_chunk_for_column(
std::map<ChunkKey, std::shared_ptr<ChunkMetadata>>& chunk_metadata_map,
const std::map<ChunkKey, AbstractBuffer*>& buffers);

// Construct default metadata for given column descriptor with num_elements
std::shared_ptr<ChunkMetadata> get_placeholder_metadata(const ColumnDescriptor* column,
size_t num_elements);

} // namespace Csv
} // namespace foreign_storage
15 changes: 15 additions & 0 deletions Tests/ForeignTableDmlTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1153,6 +1153,21 @@ TEST_P(CacheControllingSelectQueryTest, CSV_CustomDelimiters) {
// clang-format on
}

TEST_P(DataWrapperSelectQueryTest, AggregateAndGroupByNull) {
const auto& query =
getCreateForeignTableQuery("(t TEXT, i INT)", "null_str", GetParam());
sql(query);
TQueryResult result;
sql(result, "select t, count( * ) from test_foreign_table group by 1 order by 1 asc;");
// clang-format off
assertResultSetEqual({{"a", i(1)},
{"b", i(1)},
{"c", i(1)},
{Null, i(1)}},
result);
// clang-format on
}

class CSVFileTypeTests
: public SelectQueryTest,
public ::testing::WithParamInterface<std::pair<std::string, std::string>> {};
Expand Down
5 changes: 5 additions & 0 deletions Tests/FsiDataFiles/null_str.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
string
a,1
b,1
,1
c,1
Binary file added Tests/FsiDataFiles/null_str.parquet
Binary file not shown.

0 comments on commit cdb0dff

Please sign in to comment.