Skip to content

Commit

Permalink
GH-45151: [C++][Parquet] Fix Null-dereference READ in parquet::arrow:…
Browse files Browse the repository at this point in the history
…:ListToSchemaField (#45152)

### Rationale for this change

Fix Null-dereference READ in parquet::arrow::ListToSchemaField

### What changes are included in this PR?

Add a rule check before parquet::arrow::ListToSchemaField

### Are these changes tested?

Yes

### Are there any user-facing changes?

Bugfix

* GitHub Issue: #45151

Lead-authored-by: mwish <[email protected]>
Co-authored-by: mwish <[email protected]>
Co-authored-by: Gang Wu <[email protected]>
Signed-off-by: mwish <[email protected]>
  • Loading branch information
mapleFU and wgtmac authored Jan 3, 2025
1 parent ffcc117 commit 48d5151
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 3 deletions.
21 changes: 21 additions & 0 deletions cpp/src/parquet/arrow/arrow_schema_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -832,6 +832,27 @@ TEST_F(TestConvertParquetSchema, IllegalParquetNestedSchema) {
Invalid, testing::HasSubstr("LIST-annotated groups must not be repeated."),
ConvertSchema(parquet_fields));
}
// List<List<>>: outer list is two-level encoding, inner list is empty.
//
// optional group my_list (LIST) {
// repeated group array (LIST) {
// repeated group list {
// }
// }
// }
{
auto list = GroupNode::Make("list", Repetition::REPEATED, {});
auto array =
GroupNode::Make("array", Repetition::REPEATED, {list}, ConvertedType::LIST);
std::vector<NodePtr> parquet_fields;
parquet_fields.push_back(
GroupNode::Make("my_list", Repetition::OPTIONAL, {array}, ConvertedType::LIST));

EXPECT_RAISES_WITH_MESSAGE_THAT(
Invalid,
testing::HasSubstr("LIST-annotated groups must have at least one child."),
ConvertSchema(parquet_fields));
}
}

Status ArrowSchemaToParquetMetadata(std::shared_ptr<::arrow::Schema>& arrow_schema,
Expand Down
11 changes: 8 additions & 3 deletions cpp/src/parquet/arrow/schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -676,9 +676,14 @@ Status ListToSchemaField(const GroupNode& group, LevelInfo current_levels,
return Status::Invalid("Group with one repeated child must be LIST-annotated.");
}
// LIST-annotated group with three-level encoding cannot be repeated.
if (repeated_field->is_group() &&
!static_cast<const GroupNode&>(*repeated_field).field(0)->is_repeated()) {
return Status::Invalid("LIST-annotated groups must not be repeated.");
if (repeated_field->is_group()) {
auto& repeated_group_field = static_cast<const GroupNode&>(*repeated_field);
if (repeated_group_field.field_count() == 0) {
return Status::Invalid("LIST-annotated groups must have at least one child.");
}
if (!repeated_group_field.field(0)->is_repeated()) {
return Status::Invalid("LIST-annotated groups must not be repeated.");
}
}
RETURN_NOT_OK(
NodeToSchemaField(*repeated_field, current_levels, ctx, out, child_field));
Expand Down

0 comments on commit 48d5151

Please sign in to comment.