Skip to content

Commit

Permalink
[CsvIO] implement CsvIOParseHelpers::validate(CSVFormat, Schema) (#31869
Browse files Browse the repository at this point in the history
)

* [CsvIO] implement CsvIOParseHelpers::validate(CSVFormat, Schema)

* [CsvIO] update names for CsvIOParseHelpers validation methods to improve clarity.
  • Loading branch information
francisohara24 authored Jul 13, 2024
1 parent ff2731b commit ba27c36
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.math.BigDecimal;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.beam.sdk.schemas.Schema;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings;
Expand All @@ -35,7 +36,7 @@ final class CsvIOParseHelpers {
* "Reading CSV Files" section of the {@link CsvIO} documentation for information regarding which
* {@link CSVFormat} parameters are checked during validation.
*/
static void validate(CSVFormat format) {
static void validateCsvFormat(CSVFormat format) {
String[] header =
checkArgumentNotNull(format.getHeader(), "Illegal %s: header is required", CSVFormat.class);

Expand Down Expand Up @@ -66,8 +67,20 @@ static void validate(CSVFormat format) {
* Validate the {@link CSVFormat} in relation to the {@link Schema} for CSV record parsing
* requirements.
*/
// TODO(https://github.com/apache/beam/issues/31716): implement method.
static void validate(CSVFormat format, Schema schema) {}
static void validateCsvFormatWithSchema(CSVFormat format, Schema schema) {
List<String> header = Arrays.asList(format.getHeader());
for (Schema.Field field : schema.getFields()) {
String fieldName = field.getName();
if (!field.getType().getNullable()) {
checkArgument(
header.contains(fieldName),
"Illegal %s: required %s field '%s' not found in header",
CSVFormat.class,
Schema.class.getTypeName(),
fieldName);
}
}
}

/**
* Build a {@link List} of {@link Schema.Field}s corresponding to the expected position of each
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,19 @@
@RunWith(JUnit4.class)
public class CsvIOParseHelpersTest {

/** Tests for {@link CsvIOParseHelpers#validate(CSVFormat)}. */
/** Tests for {@link CsvIOParseHelpers#validateCsvFormat(CSVFormat)}. */
@Test
public void givenCSVFormatWithHeader_validates() {
CSVFormat format = csvFormatWithHeader();
CsvIOParseHelpers.validate(format);
CsvIOParseHelpers.validateCsvFormat(format);
}

@Test
public void givenCSVFormatWithNullHeader_throwsException() {
CSVFormat format = csvFormat();
String gotMessage =
assertThrows(IllegalArgumentException.class, () -> CsvIOParseHelpers.validate(format))
assertThrows(
IllegalArgumentException.class, () -> CsvIOParseHelpers.validateCsvFormat(format))
.getMessage();
assertEquals("Illegal class org.apache.commons.csv.CSVFormat: header is required", gotMessage);
}
Expand All @@ -54,7 +55,8 @@ public void givenCSVFormatWithNullHeader_throwsException() {
public void givenCSVFormatWithEmptyHeader_throwsException() {
CSVFormat format = csvFormat().withHeader();
String gotMessage =
assertThrows(IllegalArgumentException.class, () -> CsvIOParseHelpers.validate(format))
assertThrows(
IllegalArgumentException.class, () -> CsvIOParseHelpers.validateCsvFormat(format))
.getMessage();
assertEquals(
"Illegal class org.apache.commons.csv.CSVFormat: header cannot be empty", gotMessage);
Expand All @@ -64,7 +66,8 @@ public void givenCSVFormatWithEmptyHeader_throwsException() {
public void givenCSVFormatWithHeaderContainingEmptyString_throwsException() {
CSVFormat format = csvFormat().withHeader("", "bar");
String gotMessage =
assertThrows(IllegalArgumentException.class, () -> CsvIOParseHelpers.validate(format))
assertThrows(
IllegalArgumentException.class, () -> CsvIOParseHelpers.validateCsvFormat(format))
.getMessage();
assertEquals(
"Illegal class org.apache.commons.csv.CSVFormat: column name is required", gotMessage);
Expand All @@ -74,7 +77,8 @@ public void givenCSVFormatWithHeaderContainingEmptyString_throwsException() {
public void givenCSVFormatWithHeaderContainingNull_throwsException() {
CSVFormat format = csvFormat().withHeader(null, "bar");
String gotMessage =
assertThrows(IllegalArgumentException.class, () -> CsvIOParseHelpers.validate(format))
assertThrows(
IllegalArgumentException.class, () -> CsvIOParseHelpers.validateCsvFormat(format))
.getMessage();
assertEquals(
"Illegal class org.apache.commons.csv.CSVFormat: column name is required", gotMessage);
Expand All @@ -84,7 +88,8 @@ public void givenCSVFormatWithHeaderContainingNull_throwsException() {
public void givenCSVFormatThatAllowsMissingColumnNames_throwsException() {
CSVFormat format = csvFormatWithHeader().withAllowMissingColumnNames(true);
String gotMessage =
assertThrows(IllegalArgumentException.class, () -> CsvIOParseHelpers.validate(format))
assertThrows(
IllegalArgumentException.class, () -> CsvIOParseHelpers.validateCsvFormat(format))
.getMessage();
assertEquals(
"Illegal class org.apache.commons.csv.CSVFormat: cannot allow missing column names",
Expand All @@ -95,7 +100,8 @@ public void givenCSVFormatThatAllowsMissingColumnNames_throwsException() {
public void givenCSVFormatThatIgnoresHeaderCase_throwsException() {
CSVFormat format = csvFormatWithHeader().withIgnoreHeaderCase(true);
String gotMessage =
assertThrows(IllegalArgumentException.class, () -> CsvIOParseHelpers.validate(format))
assertThrows(
IllegalArgumentException.class, () -> CsvIOParseHelpers.validateCsvFormat(format))
.getMessage();
assertEquals(
"Illegal class org.apache.commons.csv.CSVFormat: cannot ignore header case", gotMessage);
Expand All @@ -105,14 +111,48 @@ public void givenCSVFormatThatIgnoresHeaderCase_throwsException() {
public void givenCSVFormatThatAllowsDuplicateHeaderNames_throwsException() {
CSVFormat format = csvFormatWithHeader().withAllowDuplicateHeaderNames(true);
String gotMessage =
assertThrows(IllegalArgumentException.class, () -> CsvIOParseHelpers.validate(format))
assertThrows(
IllegalArgumentException.class, () -> CsvIOParseHelpers.validateCsvFormat(format))
.getMessage();
assertEquals(
"Illegal class org.apache.commons.csv.CSVFormat: cannot allow duplicate header names",
gotMessage);
}

/** End of tests for {@link CsvIOParseHelpers#validate(CSVFormat)}. */
/** End of tests for {@link CsvIOParseHelpers#validateCsvFormat(CSVFormat)}. */
//////////////////////////////////////////////////////////////////////////////////////////////

/** Tests for {@link CsvIOParseHelpers#validateCsvFormatWithSchema(CSVFormat, Schema)}. */
@Test
public void givenNullableSchemaFieldNotPresentInHeader_validates() {
CSVFormat format = csvFormat().withHeader("foo", "bar");
Schema schema =
Schema.of(
Schema.Field.of("foo", Schema.FieldType.STRING),
Schema.Field.of("bar", Schema.FieldType.STRING),
Schema.Field.nullable("baz", Schema.FieldType.STRING));
CsvIOParseHelpers.validateCsvFormatWithSchema(format, schema);
}

@Test
public void givenRequiredSchemaFieldNotPresentInHeader_throwsException() {
CSVFormat format = csvFormat().withHeader("foo", "bar");
Schema schema =
Schema.of(
Schema.Field.of("foo", Schema.FieldType.STRING),
Schema.Field.of("bar", Schema.FieldType.STRING),
Schema.Field.of("baz", Schema.FieldType.STRING));
String gotMessage =
assertThrows(
IllegalArgumentException.class,
() -> CsvIOParseHelpers.validateCsvFormatWithSchema(format, schema))
.getMessage();
assertEquals(
"Illegal class org.apache.commons.csv.CSVFormat: required org.apache.beam.sdk.schemas.Schema field 'baz' not found in header",
gotMessage);
}

/** End of tests for {@link CsvIOParseHelpers#validateCsvFormatWithSchema(CSVFormat, Schema)}. */
//////////////////////////////////////////////////////////////////////////////////////////////

/** Tests for {@link CsvIOParseHelpers#parseCell(String, Schema.Field)}. */
Expand Down

0 comments on commit ba27c36

Please sign in to comment.