From 50e11463dfd8b376058d21095aac847a0fe87d38 Mon Sep 17 00:00:00 2001 From: Rob Beer Date: Fri, 16 Apr 2021 19:23:48 +0100 Subject: [PATCH 01/45] refactor(Conditionally required field checks): Initial work to define and test conditionally require --- .gitignore | 2 + .../conveyal/gtfs/error/NewGTFSErrorType.java | 1 + .../gtfs/loader/ConditionallyRequired.java | 20 ++++ .../loader/ConditionallyRequiredCheck.java | 12 ++ .../conveyal/gtfs/loader/EntityPopulator.java | 11 ++ .../java/com/conveyal/gtfs/loader/Field.java | 1 + .../conveyal/gtfs/loader/JdbcGtfsLoader.java | 105 ++++++++++++++++++ .../java/com/conveyal/gtfs/loader/Table.java | 35 ++++-- .../loader/ConditionallyRequiredTest.java | 77 +++++++++++++ .../agency.txt | 2 + .../calendar.txt | 2 + .../calendar_attributes.txt | 2 + .../calendar_dates.txt | 2 + .../directions.txt | 5 + .../fare_attributes.txt | 2 + .../fare_rules.txt | 3 + .../feed_info.txt | 2 + .../realtime_routes.txt | 3 + .../routes.txt | 2 + .../stop_times.txt | 26 +++++ .../stops.txt | 28 +++++ .../trips.txt | 7 ++ 22 files changed, 340 insertions(+), 10 deletions(-) create mode 100644 src/main/java/com/conveyal/gtfs/loader/ConditionallyRequired.java create mode 100644 src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredCheck.java create mode 100644 src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java create mode 100644 src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/agency.txt create mode 100644 src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/calendar.txt create mode 100644 src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/calendar_attributes.txt create mode 100644 src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/calendar_dates.txt create mode 100644 src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/directions.txt create mode 100644 src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/fare_attributes.txt create mode 100644 src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/fare_rules.txt create mode 100644 src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/feed_info.txt create mode 100644 src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/realtime_routes.txt create mode 100644 src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/routes.txt create mode 100644 src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/stop_times.txt create mode 100644 src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/stops.txt create mode 100644 src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/trips.txt diff --git a/.gitignore b/.gitignore index 928ad1b18..2cfdf9567 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ *.iml .idea/ target/ + +GTFSGraphQLTest/ \ No newline at end of file diff --git a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java index dd8f4be99..8e648577f 100644 --- a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java +++ b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java @@ -11,6 +11,7 @@ public enum NewGTFSErrorType { BOOLEAN_FORMAT(Priority.MEDIUM, "A GTFS boolean field must contain the value 1 or 0."), COLOR_FORMAT(Priority.MEDIUM, "A color should be specified with six-characters (three two-digit hexadecimal numbers)."), COLUMN_NAME_UNSAFE(Priority.HIGH, "Column header contains characters not safe in SQL, it was renamed."), + CONDITIONALLY_REQUIRED(Priority.HIGH, "Field is conditionally required."), CURRENCY_UNKNOWN(Priority.MEDIUM, "The currency code was not recognized."), DATE_FORMAT(Priority.MEDIUM, "Date format should be YYYYMMDD."), DATE_NO_SERVICE(Priority.MEDIUM, "No service_ids were active on a date within the range of dates with defined service."), diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequired.java b/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequired.java new file mode 100644 index 000000000..72b0c47bf --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequired.java @@ -0,0 +1,20 @@ +package com.conveyal.gtfs.loader; + +/** + * These are the values that are checked inline with {@link ConditionallyRequiredCheck} to determine if the required + * conditions have been met. + */ +public class ConditionallyRequired { + /** The type of check to be carried out */ + public final ConditionallyRequiredCheck check; + /** The minimum column value if a range check is being performed. */ + public double minValue; + /** The maximum column value if a range check is being performed. */ + public double maxValue; + + ConditionallyRequired(ConditionallyRequiredCheck check, double minValue, double maxValue) { + this.check = check; + this.minValue = minValue; + this.maxValue = maxValue; + } +} diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredCheck.java b/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredCheck.java new file mode 100644 index 000000000..6baa1b236 --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredCheck.java @@ -0,0 +1,12 @@ +package com.conveyal.gtfs.loader; + +/** + * These are the conditionally required checks to be carried out inline with the values provided in + * {@link ConditionallyRequired}. + */ +public enum ConditionallyRequiredCheck { + LOCATION_TYPE_STOP_NAME_CHECK, + LOCATION_TYPE_STOP_LAT_CHECK, + LOCATION_TYPE_STOP_LON_CHECK, + LOCATION_TYPE_PARENT_STATION_CHECK, +} diff --git a/src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java b/src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java index 762176262..91c40ad24 100644 --- a/src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java +++ b/src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java @@ -5,6 +5,7 @@ import com.conveyal.gtfs.model.CalendarDate; import com.conveyal.gtfs.model.Entity; import com.conveyal.gtfs.model.FareAttribute; +import com.conveyal.gtfs.model.FareRule; import com.conveyal.gtfs.model.Frequency; import com.conveyal.gtfs.model.PatternStop; import com.conveyal.gtfs.model.Route; @@ -117,6 +118,16 @@ public interface EntityPopulator { return fareAttribute; }; + EntityPopulator FARE_RULE = (result, columnForName) -> { + FareRule fareRule = new FareRule(); + fareRule.fare_id = getStringIfPresent(result, "fare_id", columnForName); + fareRule.route_id = getStringIfPresent(result, "route_id", columnForName); + fareRule.origin_id = getStringIfPresent(result, "origin_id", columnForName); + fareRule.destination_id = getStringIfPresent (result, "destination_id", columnForName); + fareRule.contains_id = getStringIfPresent (result, "contains_id", columnForName); + return fareRule; + }; + EntityPopulator FREQUENCY = (result, columnForName) -> { Frequency frequency = new Frequency(); frequency.trip_id = getStringIfPresent(result, "trip_id", columnForName); diff --git a/src/main/java/com/conveyal/gtfs/loader/Field.java b/src/main/java/com/conveyal/gtfs/loader/Field.java index 2440c4624..dd82abc9f 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Field.java +++ b/src/main/java/com/conveyal/gtfs/loader/Field.java @@ -7,6 +7,7 @@ import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.SQLType; +import java.util.HashSet; import java.util.Set; /** diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java index 37036657d..d611938fe 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java @@ -3,6 +3,8 @@ import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.error.NewGTFSErrorType; import com.conveyal.gtfs.error.SQLErrorStorage; +import com.conveyal.gtfs.model.FareRule; +import com.conveyal.gtfs.model.Stop; import com.conveyal.gtfs.storage.StorageException; import com.csvreader.CsvReader; import com.google.common.hash.HashCode; @@ -278,6 +280,9 @@ private TableLoadResult load (Table table) { try { tableLoadResult.rowCount = loadInternal(table); tableLoadResult.fileSize = getTableSize(table); + if (table.conditionallyRequiredFields.size() > 0) { + conditionallyRequiredChecks(table); + } LOG.info(String.format("loaded in %d %s records", tableLoadResult.rowCount, table.name)); } catch (Exception ex) { LOG.error("Fatal error loading table", ex); @@ -571,4 +576,104 @@ public static String sanitize (String string, SQLErrorStorage errorStorage) { } return clean; } + + /** + * Perform all conditionally required checks on the fields within the provided table. + */ + private void conditionallyRequiredChecks(Table table) { + if (table.name.equals(Table.STOPS.name)) { + final TableReader stopTableReader = new JDBCTableReader(table, dataSource, tablePrefix, EntityPopulator.STOP); + Iterable stops = stopTableReader.getAllOrdered(); + // Iterate through each stop and check each conditionally required field in turn. + for (Stop stop : stops) { + for (ConditionallyRequired condition : table.conditionallyRequiredFields) { + switch(condition.check) { + case LOCATION_TYPE_STOP_NAME_CHECK: + //FIXME: location_type defaults to 0 if not provided. This needs to be INT_MISSING or similar + // so these tests don't give false positives. + if ((stop.location_type >= condition.minValue || stop.location_type <= condition.maxValue) && + stop.stop_name == null) { + errorStorage.storeError( + NewGTFSError.forFeed( + CONDITIONALLY_REQUIRED, + String.format("stops.txt, stop_name is required for id %s.", stop.stop_id) + ) + ); + } + break; + case LOCATION_TYPE_STOP_LAT_CHECK: + if ((stop.location_type >= condition.minValue || stop.location_type <= condition.maxValue) && + stop.stop_lat == Double.MIN_VALUE) { + errorStorage.storeError( + NewGTFSError.forFeed( + CONDITIONALLY_REQUIRED, + String.format("stops.txt, stop_lat is required for id %s.", stop.stop_id))); + } + break; + case LOCATION_TYPE_STOP_LON_CHECK: + if ((stop.location_type >= condition.minValue || stop.location_type <= condition.maxValue) && + stop.stop_lon == Double.MIN_VALUE) { + errorStorage.storeError( + NewGTFSError.forFeed( + CONDITIONALLY_REQUIRED, + String.format("stops.txt, stop_long is required for id %s.", stop.stop_id) + ) + ); + } + break; + case LOCATION_TYPE_PARENT_STATION_CHECK: + if ((stop.location_type >= condition.minValue || stop.location_type <= condition.maxValue) && + stop.parent_station == null) { + errorStorage.storeError( + NewGTFSError.forFeed( + CONDITIONALLY_REQUIRED, + String.format("stops.txt, parent_station is required for id %s.", stop.stop_id) + ) + ); + } + break; + } + } + } + + // Because the fare rule table is produced before the stops table, the conditionally required checks have to + // be done in reverse. Instead of the fare rule table checking the zone id in the stops table, the stops table + // is responsible for iterating over the fare rule table to confirm required zone id references are available. + final TableReader fareRulesTableReader = + new JDBCTableReader(Table.FARE_RULES, dataSource, tablePrefix, EntityPopulator.FARE_RULE); + + // Get all zone ids referenced by the fare rule table. + Set zoneIds = new HashSet<>(); + for (FareRule rule : fareRulesTableReader.getAllOrdered()) { + if (rule.origin_id != null) { + zoneIds.add(rule.origin_id); + } else if (rule.destination_id != null) { + zoneIds.add(rule.destination_id); + } else if (rule.contains_id != null) { + zoneIds.add(rule.contains_id); + } + } + + // Make sure all zone id references are available, if not store an error. + if (zoneIds.size() > 0) { + for (String zoneId : zoneIds) { + boolean match = false; + for (Stop stop : stops) { + if (zoneId.equals(stop.zone_id)) { + match = true; + break; + } + } + if (!match) { + errorStorage.storeError( + NewGTFSError.forFeed( + CONDITIONALLY_REQUIRED, + String.format("stops.txt, zone_id %s is required by fare_rules.txt.", zoneId) + ) + ); + } + } + } + } + } } diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index 6ea3c2bcf..824dc9280 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -46,9 +46,11 @@ import java.util.zip.ZipFile; import static com.conveyal.gtfs.error.NewGTFSErrorType.DUPLICATE_HEADER; -import static com.conveyal.gtfs.error.NewGTFSErrorType.DUPLICATE_ID; -import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; import static com.conveyal.gtfs.error.NewGTFSErrorType.TABLE_IN_SUBDIRECTORY; +import static com.conveyal.gtfs.loader.ConditionallyRequiredCheck.LOCATION_TYPE_PARENT_STATION_CHECK; +import static com.conveyal.gtfs.loader.ConditionallyRequiredCheck.LOCATION_TYPE_STOP_LAT_CHECK; +import static com.conveyal.gtfs.loader.ConditionallyRequiredCheck.LOCATION_TYPE_STOP_LON_CHECK; +import static com.conveyal.gtfs.loader.ConditionallyRequiredCheck.LOCATION_TYPE_STOP_NAME_CHECK; import static com.conveyal.gtfs.loader.JdbcGtfsLoader.sanitize; import static com.conveyal.gtfs.loader.Requirement.EDITOR; import static com.conveyal.gtfs.loader.Requirement.EXTENSION; @@ -89,6 +91,8 @@ public class Table { * */ private boolean compoundKey; + public Set conditionallyRequiredFields = new HashSet<>(); + public Table (String name, Class entityClass, Requirement required, Field... fields) { // TODO: verify table name is OK for use in constructing dynamic SQL queries this.name = name; @@ -228,18 +232,24 @@ public Table (String name, Class entityClass, Requirement requ public static final Table STOPS = new Table("stops", Stop.class, REQUIRED, new StringField("stop_id", REQUIRED), new StringField("stop_code", OPTIONAL), - new StringField("stop_name", REQUIRED), + new StringField("stop_name", OPTIONAL), new StringField("stop_desc", OPTIONAL), - new DoubleField("stop_lat", REQUIRED, -80, 80, 6), - new DoubleField("stop_lon", REQUIRED, -180, 180, 6), - new StringField("zone_id", OPTIONAL), + new DoubleField("stop_lat", OPTIONAL, -80, 80, 6), + new DoubleField("stop_lon", OPTIONAL, -180, 180, 6), + new StringField("zone_id", OPTIONAL), new URLField("stop_url", OPTIONAL), new ShortField("location_type", OPTIONAL, 2), - // FIXME: Need self-reference check during referential integrity check - new StringField("parent_station", OPTIONAL), //.isReferenceToSelf() + new StringField("parent_station", REQUIRED), new StringField("stop_timezone", OPTIONAL), - new ShortField("wheelchair_boarding", OPTIONAL, 2) - ).restrictDelete().addPrimaryKey(); + new ShortField("wheelchair_boarding", OPTIONAL, 2), + new StringField("platform_code", OPTIONAL) + ) + .restrictDelete() + .addPrimaryKey() + .addConditionallyRequired(LOCATION_TYPE_STOP_NAME_CHECK, 0,2) + .addConditionallyRequired(LOCATION_TYPE_STOP_LAT_CHECK, 0,2) + .addConditionallyRequired(LOCATION_TYPE_STOP_LON_CHECK, 0,2) + .addConditionallyRequired(LOCATION_TYPE_PARENT_STATION_CHECK, 2,4); public static final Table PATTERN_STOP = new Table("pattern_stops", PatternStop.class, OPTIONAL, new StringField("pattern_id", REQUIRED).isReferenceTo(PATTERNS), @@ -996,4 +1006,9 @@ public int getKeyFieldIndex(Field[] fields) { String keyField = getKeyFieldName(); return Field.getFieldIndex(fields, keyField); } + + public Table addConditionallyRequired(ConditionallyRequiredCheck check, double minValue, double maxValue) { + this.conditionallyRequiredFields.add(new ConditionallyRequired(check, minValue, maxValue)); + return this; + } } diff --git a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java new file mode 100644 index 000000000..9734a054e --- /dev/null +++ b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java @@ -0,0 +1,77 @@ +package com.conveyal.gtfs.loader; + +import com.conveyal.gtfs.TestUtils; +import com.conveyal.gtfs.error.NewGTFSErrorType; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import javax.sql.DataSource; + +import java.io.IOException; + +import static com.conveyal.gtfs.GTFS.load; +import static com.conveyal.gtfs.TestUtils.assertThatSqlCountQueryYieldsExpectedCount; +import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; + +public class ConditionallyRequiredTest { + private static String testDBName; + private static DataSource testDataSource; + private static String testNamespace; + + @BeforeAll + public static void setUpClass() throws IOException { + // Create a new database + testDBName = TestUtils.generateNewDB(); + String dbConnectionUrl = String.format("jdbc:postgresql://localhost/%s", testDBName); + testDataSource = TestUtils.createTestDataSource(dbConnectionUrl); + // load feed into db + String zipFileName = TestUtils.zipFolderFiles("real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks", true); + FeedLoadResult feedLoadResult = load(zipFileName, testDataSource); + testNamespace = feedLoadResult.uniqueIdentifier; + } + + @AfterAll + public static void tearDownClass() { + TestUtils.dropDB(testDBName); + } + + @Test + public void stopTableMissingConditionallyRequiredStopName() { + checkFeedHasError(CONDITIONALLY_REQUIRED, "stops.txt, stop_name is required for id 4957."); + } + + @Test + public void stopTableMissingConditionallyRequiredParentStation() { + checkFeedHasError(CONDITIONALLY_REQUIRED, "stops.txt, parent_station is required for id 691."); + } + + @Test + public void stopTableMissingConditionallyRequiredStopLat() { + checkFeedHasError(CONDITIONALLY_REQUIRED, "stops.txt, stop_lat is required for id 691."); + } + + @Test + public void stopTableMissingConditionallyRequiredStopLon() { + checkFeedHasError(CONDITIONALLY_REQUIRED, "stops.txt, stop_long is required for id 692."); + } + + @Test + public void stopTableMissingConditionallyRequiredZoneId() { + checkFeedHasError(CONDITIONALLY_REQUIRED, "stops.txt, zone_id 1 is required by fare_rules.txt."); + } + + /** + * Check that the test feed has exactly one error for the given type and badValue. + */ + private void checkFeedHasError(NewGTFSErrorType type, String badValue) { + assertThatSqlCountQueryYieldsExpectedCount( + testDataSource, + String.format("select count(*) from %s.errors where error_type = '%s' and bad_value = '%s'", + testNamespace, + type, + badValue), + 1); + } + +} diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/agency.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/agency.txt new file mode 100644 index 000000000..b758beb62 --- /dev/null +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/agency.txt @@ -0,0 +1,2 @@ +agency_id,agency_name,agency_url,agency_timezone,agency_lang,agency_phone,agency_fare_url,agency_email +VTA,VTA,https://www.vta.org,America/Los_Angeles,EN,408-321-2300,https://www.vta.org/go/fares,customer.service@vta.org diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/calendar.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/calendar.txt new file mode 100644 index 000000000..6077757f1 --- /dev/null +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/calendar.txt @@ -0,0 +1,2 @@ +service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date +1,1,1,1,1,1,0,0,20210208,20210611 \ No newline at end of file diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/calendar_attributes.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/calendar_attributes.txt new file mode 100644 index 000000000..a59162bca --- /dev/null +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/calendar_attributes.txt @@ -0,0 +1,2 @@ +service_id,service_description +1,Weekday \ No newline at end of file diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/calendar_dates.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/calendar_dates.txt new file mode 100644 index 000000000..bf2f0150e --- /dev/null +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/calendar_dates.txt @@ -0,0 +1,2 @@ +service_id,date,exception_type +1,20210531,2 diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/directions.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/directions.txt new file mode 100644 index 000000000..666d39239 --- /dev/null +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/directions.txt @@ -0,0 +1,5 @@ +route_id,direction_id,direction +21,0,East +21,1,West +22,0,East +22,1,West \ No newline at end of file diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/fare_attributes.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/fare_attributes.txt new file mode 100644 index 000000000..4f13201f0 --- /dev/null +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/fare_attributes.txt @@ -0,0 +1,2 @@ +fare_id,price,currency_type,payment_method,transfers,transfer_duration +1,2.50000000,USD,0,0, \ No newline at end of file diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/fare_rules.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/fare_rules.txt new file mode 100644 index 000000000..6597f36bc --- /dev/null +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/fare_rules.txt @@ -0,0 +1,3 @@ +fare_id,route_id,origin_id,destination_id,contains_id +1,21,1,, +1,22,,, \ No newline at end of file diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/feed_info.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/feed_info.txt new file mode 100644 index 000000000..c59ff6d3e --- /dev/null +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/feed_info.txt @@ -0,0 +1,2 @@ +feed_publisher_name,feed_publisher_url,feed_lang,feed_start_date,feed_end_date,feed_version,feed_contact_email,feed_contact_url +Santa Clara Valley Transportation Authority,https://www.vta.org,EN,20210208,20210613,2021-02-16_15:11,customer.service@vta.org,https://www.vta.org/about/contact diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/realtime_routes.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/realtime_routes.txt new file mode 100644 index 000000000..27b0949c5 --- /dev/null +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/realtime_routes.txt @@ -0,0 +1,3 @@ +route_id,realtime_enabled,realtime_routename,realtime_routecode +21,1,, +22,1,, \ No newline at end of file diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/routes.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/routes.txt new file mode 100644 index 000000000..c7f99008f --- /dev/null +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/routes.txt @@ -0,0 +1,2 @@ +route_id,agency_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_sort_order,ext_route_type +21,VTA,21,Stanford Shopping Center - Santa Clara Transit Center,,3,https://www.vta.org/go/routes/21,29588c,FFFFFF,21,704 \ No newline at end of file diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/stop_times.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/stop_times.txt new file mode 100644 index 000000000..8a875cb93 --- /dev/null +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/stop_times.txt @@ -0,0 +1,26 @@ +trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint +1,05:43:00,05:43:00,4957,1,,0,0,,1 +1,05:44:00,05:44:00,691,2,,0,0,0.58999997,0 +1,05:45:00,05:45:00,692,3,,0,0,0.96569997,0 +1,05:46:00,05:46:00,1266,4,,0,0,1.14470005,0 +1,05:47:00,05:47:00,1267,5,,0,0,1.92729998,0 +1,05:48:00,05:48:00,1268,6,,0,0,2.28160000,0 +1,05:49:00,05:49:00,1542,7,,0,0,2.72530007,0 +1,05:50:00,05:50:00,1543,8,,0,0,3.23429990,0 +1,05:51:00,05:51:00,1544,9,,0,0,3.59170008,1 +2,05:52:00,05:52:00,1545,1,,0,0,3.88010001,0 +2,,,1546,2,,0,0,4.32210016,0 +2,,,1547,3,,0,0,4.82560015,0 +2,05:55:00,05:55:00,1548,4,,0,0,5.09070015,0 +3,05:55:00,05:55:00,1550,1,,0,0,5.59749985,0 +3,05:56:00,05:56:00,1562,2,,0,0,7.09219980,1 +4,05:55:00,05:55:00,1550,1,,0,0,5.59749985,0 +4,,,1558,2,,0,0,8.34879971,0 +4,,,1559,3,,0,0,8.68850040,0 +4,05:56:00,05:56:00,1562,4,,0,0,5.59749985,0 +5,00:00:00,00:00:00,4957,1,,0,0,,1 +5,01:00:00,01:00:00,1558,2,,0,0,8.34879971,0 +5,23:59:00,23:59:00,1562,3,,0,0,9.39290047,0 +6,00:00:00,00:00:00,4957,1,,0,0,,1 +6,,,1558,2,,0,0,8.34879971,0 +6,23:59:00,23:59:00,1562,3,,0,0,9.39290047,0 \ No newline at end of file diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/stops.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/stops.txt new file mode 100644 index 000000000..024711c02 --- /dev/null +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/stops.txt @@ -0,0 +1,28 @@ +stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,wheelchair_boarding,platform_code,sign_dest +4957,64957,,Southbound,37.40048600,-122.10892700,,,0,1,1,, +691,60691,San Antonio & El Camino,Northbound,,-122.11319800,,,0,,1,, +692,60692,San Antonio & Miller,Northbound,37.40462900,,,,0,,1,, +1266,61266,San Antonio & California,Northbound,37.40607000,-122.11050500,,,0,,1,, +1267,61267,San Antonio & Nita,Northbound,37.41186000,-122.10592500,,,0,,1,, +1268,61268,San Antonio & Nita,Northbound,37.41461600,-122.10392500,,,0,,1,, +1542,61542,Middlefield & Montrose,Northbound,37.41752400,-122.10575900,,,0,,1,, +1543,61543,Middlefield & Charleston,Westbound,37.42026600,-122.11023800,,,0,,1,, +1544,61544,Middlefield & Mayview,Westbound,37.42291600,-122.11254400,,,0,,1,, +1545,61545,Middlefield & Meadow,Westbound,37.42452600,-122.11510500,,,0,,1,, +1546,61546,Middlefield & Ames,Westbound,37.42700600,-122.11903200,,,0,,1,, +1547,61547,Middlefield & Layne,Westbound,37.42981800,-122.12349600,,,0,,1,, +1548,61548,Middlefield & Matadero,Westbound,37.43129900,-122.12585600,,,0,,1,, +1550,61550,Middlefield & Moreno,Westbound,37.43412800,-122.13037200,,,0,,1,, +1551,61551,Middlefield & California,Westbound,37.43745600,-122.13566600,,,0,,1,, +1552,61552,Middlefield & Seale,Westbound,37.43992600,-122.13953900,,,0,,2,, +1553,61553,Middlefield & Embarcadero,Westbound,37.44248900,-122.14365200,,,0,,1,, +1554,61554,Middlefield & Melville,Westbound,37.44458500,-122.14696700,,,0,,1,, +1555,61555,Middlefield & Kingsley,Westbound,37.44538300,-122.14822500,,,0,,1,, +1556,61556,Middlefield & Addison,Westbound,37.44694800,-122.15071600,,,0,,1,, +1557,61557,Middlefield & Channing,Westbound,37.44775900,-122.15199200,,,0,,1,, +1558,61558,Homer & Webster,Southbound,37.44699700,-122.15448100,,,0,,1,, +1559,61559,Waverly & Homer,Westbound,37.44492200,-122.15685900,,,0,,1,, +1560,61560,Hamilton & Waverly,Southbound,37.44612400,-122.15941900,,,0,,1,, +1561,61561,Hamilton & Ramona,Southbound,37.44471500,-122.16085800,,,0,,1,, +1562,61562,Hamilton & High,Southbound,37.44318800,-122.16232000,,,0,,1,, +1563,61563,Palo Alto Transit Center (Bay 4),,37.44414700,-122.16663100,,,0,PS_PATC,1,4, \ No newline at end of file diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/trips.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/trips.txt new file mode 100644 index 000000000..0e7afa8b6 --- /dev/null +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/trips.txt @@ -0,0 +1,7 @@ +route_id,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible,bikes_allowed +21,1,1,PALO ALTO TRANSIT CTR 1,,1,2145,101395,0,0 +21,1,2,PALO ALTO TRANSIT CTR 2,,1,2145,101395,0,0 +22,1,3,PALO ALTO TRANSIT CTR 3,,1,2145,101395,0,0 +23,1,4,PALO ALTO TRANSIT CTR 4,,1,2145,101395,0,0 +23,1,5,PALO ALTO TRANSIT CTR 5,,1,2145,101395,0,0 +23,1,6,PALO ALTO TRANSIT CTR 6,,1,2145,101395,0,0 From 19ca48e04dbedfc349ff57370e583d5176505cb0 Mon Sep 17 00:00:00 2001 From: Rob Beer Date: Tue, 20 Apr 2021 16:55:30 +0100 Subject: [PATCH 02/45] refactor(Check con req fields on data load): Now checking the conditionally required fields as each --- .../gtfs/loader/ConditionallyRequired.java | 20 -- .../loader/ConditionallyRequiredCheck.java | 12 -- .../loader/ConditionallyRequiredField.java | 36 ++++ .../ConditionallyRequiredFieldCheck.java | 10 + .../ConditionallyRequiredForeignRefCheck.java | 5 + .../java/com/conveyal/gtfs/loader/Field.java | 18 +- .../conveyal/gtfs/loader/JdbcGtfsLoader.java | 184 ++++++++---------- .../gtfs/loader/ReferenceTracker.java | 95 +++++++++ .../java/com/conveyal/gtfs/loader/Table.java | 67 +++++-- .../loader/ConditionallyRequiredTest.java | 33 +++- 10 files changed, 314 insertions(+), 166 deletions(-) delete mode 100644 src/main/java/com/conveyal/gtfs/loader/ConditionallyRequired.java delete mode 100644 src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredCheck.java create mode 100644 src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredField.java create mode 100644 src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredFieldCheck.java create mode 100644 src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredForeignRefCheck.java diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequired.java b/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequired.java deleted file mode 100644 index 72b0c47bf..000000000 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequired.java +++ /dev/null @@ -1,20 +0,0 @@ -package com.conveyal.gtfs.loader; - -/** - * These are the values that are checked inline with {@link ConditionallyRequiredCheck} to determine if the required - * conditions have been met. - */ -public class ConditionallyRequired { - /** The type of check to be carried out */ - public final ConditionallyRequiredCheck check; - /** The minimum column value if a range check is being performed. */ - public double minValue; - /** The maximum column value if a range check is being performed. */ - public double maxValue; - - ConditionallyRequired(ConditionallyRequiredCheck check, double minValue, double maxValue) { - this.check = check; - this.minValue = minValue; - this.maxValue = maxValue; - } -} diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredCheck.java b/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredCheck.java deleted file mode 100644 index 6baa1b236..000000000 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredCheck.java +++ /dev/null @@ -1,12 +0,0 @@ -package com.conveyal.gtfs.loader; - -/** - * These are the conditionally required checks to be carried out inline with the values provided in - * {@link ConditionallyRequired}. - */ -public enum ConditionallyRequiredCheck { - LOCATION_TYPE_STOP_NAME_CHECK, - LOCATION_TYPE_STOP_LAT_CHECK, - LOCATION_TYPE_STOP_LON_CHECK, - LOCATION_TYPE_PARENT_STATION_CHECK, -} diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredField.java b/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredField.java new file mode 100644 index 000000000..a25dc4ed8 --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredField.java @@ -0,0 +1,36 @@ +package com.conveyal.gtfs.loader; + +/** + * These are the values that are checked inline with {@link ConditionallyRequiredFieldCheck} to determine if the required + * conditions have been met. + */ +public class ConditionallyRequiredField { + /** The type of check to be performed on the reference field. */ + public final ConditionallyRequiredFieldCheck referenceCheck; + /** The type of check to be performed on the conditional field. */ + public ConditionallyRequiredFieldCheck conditionalCheck; + /** The minimum reference field value if a range check is being performed. */ + public double minReferenceValue; + /** The maximum reference field value if a range check is being performed. */ + public double maxReferenceValue; + /** The name of the reference field. */ + String referenceFieldName; + /** The name of the conditional field. */ + String conditionalFieldName; + + ConditionallyRequiredField ( + String referenceFieldName, + ConditionallyRequiredFieldCheck referenceCheck, + String conditionalFieldName, + ConditionallyRequiredFieldCheck conditionalCheck, + double minReferenceValue, + double maxReferenceValue + ) { + this.referenceFieldName = referenceFieldName; + this.referenceCheck = referenceCheck; + this.conditionalFieldName = conditionalFieldName; + this.conditionalCheck = conditionalCheck; + this.minReferenceValue = minReferenceValue; + this.maxReferenceValue = maxReferenceValue; + } +} diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredFieldCheck.java b/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredFieldCheck.java new file mode 100644 index 000000000..94ec62bdf --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredFieldCheck.java @@ -0,0 +1,10 @@ +package com.conveyal.gtfs.loader; + +/** + * These are the conditionally required checks to be carried out inline with the values provided in + * {@link ConditionallyRequiredField}. + */ +public enum ConditionallyRequiredFieldCheck { + FIELD_NOT_EMPTY, + FIELD_IN_RANGE +} \ No newline at end of file diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredForeignRefCheck.java b/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredForeignRefCheck.java new file mode 100644 index 000000000..b4b16de05 --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredForeignRefCheck.java @@ -0,0 +1,5 @@ +package com.conveyal.gtfs.loader; + +public enum ConditionallyRequiredForeignRefCheck { + STOPS_ZONE_ID_FARE_RULES_FOREIGN_REF_CHECK // Confirm that all zone_id references in fare rules are available in stops. +} diff --git a/src/main/java/com/conveyal/gtfs/loader/Field.java b/src/main/java/com/conveyal/gtfs/loader/Field.java index dd82abc9f..806482b17 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Field.java +++ b/src/main/java/com/conveyal/gtfs/loader/Field.java @@ -49,6 +49,7 @@ public abstract class Field { public Table referenceTable = null; private boolean shouldBeIndexed; private boolean emptyValuePermitted; + private boolean isConditionallyRequired; public Field(String name, Requirement requirement) { this.name = name; @@ -181,10 +182,25 @@ public boolean isEmptyValuePermitted() { /** * Get the expression used to select this column from the database based on the prefix. The csvOutput parameter is - * needed in overriden method implementations that have special ways of outputting certain fields. The prefix + * needed in overridden method implementations that have special ways of outputting certain fields. The prefix * parameter is assumed to be either null or a string in the format: `schema.` */ public String getColumnExpression(String prefix, boolean csvOutput) { return prefix != null ? String.format("%s%s", prefix, name) : name; } + + /** + * Flag this field as conditionally required. + */ + public Field addConditionallyRequired() { + this.isConditionallyRequired = true; + return this; + } + + /** + * Indicates that this field is conditionally required. + */ + public boolean isConditionallyRequired() { + return isConditionallyRequired; + } } diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java index d611938fe..9bc2defd3 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java @@ -24,6 +24,7 @@ import java.util.zip.ZipFile; import static com.conveyal.gtfs.error.NewGTFSErrorType.*; +import static com.conveyal.gtfs.loader.ConditionallyRequiredForeignRefCheck.STOPS_ZONE_ID_FARE_RULES_FOREIGN_REF_CHECK; import static com.conveyal.gtfs.model.Entity.human; import static com.conveyal.gtfs.util.Util.randomIdString; @@ -94,7 +95,9 @@ public JdbcGtfsLoader(String gtfsFilePath, DataSource dataSource) { this.dataSource = dataSource; } - /** Get SQL string for creating the feed registry table (AKA, the "feeds" table). */ + /** + * Get SQL string for creating the feed registry table (AKA, the "feeds" table). + */ public static String getCreateFeedRegistrySQL() { return "create table if not exists feeds (namespace varchar primary key, md5 varchar, " + "sha1 varchar, feed_id varchar, feed_version varchar, filename varchar, loaded_date timestamp, " + @@ -112,7 +115,7 @@ public static String getCreateFeedRegistrySQL() { // SHA1 took 1072 msec, 9fb356af4be2750f20955203787ec6f95d32ef22 // There appears to be no advantage to loading tables in parallel, as the whole loading process is I/O bound. - public FeedLoadResult loadTables () { + public FeedLoadResult loadTables() { // This result object will be returned to the caller to summarize the feed and report any critical errors. FeedLoadResult result = new FeedLoadResult(); @@ -137,7 +140,7 @@ public FeedLoadResult loadTables () { this.tablePrefix = randomIdString(); result.filename = gtfsFilePath; result.uniqueIdentifier = tablePrefix; - + // The order of the following four lines should not be changed because the schema needs to be in place // before the error storage can be constructed, which in turn needs to exist in case any errors are // encountered during the loading process. @@ -182,15 +185,15 @@ public FeedLoadResult loadTables () { } return result; } - + /** * Creates a schema/namespace in the database WITHOUT committing the changes. * This does *not* setup any other tables or enter the schema name in a registry (@see #registerFeed). - * + * * @param connection Connection to the database to create the schema on. * @param schemaName Name of the schema (i.e. table prefix). Should not include the dot suffix. */ - static void createSchema (Connection connection, String schemaName) { + static void createSchema(Connection connection, String schemaName) { try { Statement statement = connection.createStatement(); // FIXME do the following only on databases that support schemas. @@ -207,13 +210,13 @@ static void createSchema (Connection connection, String schemaName) { * Add a line to the list of loaded feeds showing that this feed has been loaded. * We used to inspect feed_info here so we could make our table prefix based on feed ID and version. * Now we just load feed_info like any other table. - * // Create a row in the table of loaded feeds for this feed + * // Create a row in the table of loaded feeds for this feed * Really this is not just making the table prefix - it's loading the feed_info and should also calculate hashes. * * Originally we were flattening all feed_info files into one root-level table, but that forces us to drop any * custom fields in feed_info. */ - private void registerFeed (File gtfsFile) { + private void registerFeed(File gtfsFile) { // FIXME is this extra CSV reader used anymore? Check comment below. // First, inspect feed_info.txt to extract the ID and version. @@ -245,7 +248,7 @@ private void registerFeed (File gtfsFile) { // current_timestamp seems to be the only standard way to get the current time across all common databases. // Record total load processing time? PreparedStatement insertStatement = connection.prepareStatement( - "insert into feeds values (?, ?, ?, ?, ?, ?, current_timestamp, null, false)"); + "insert into feeds values (?, ?, ?, ?, ?, ?, current_timestamp, null, false)"); insertStatement.setString(1, tablePrefix); insertStatement.setString(2, md5Hex); insertStatement.setString(3, shaHex); @@ -273,15 +276,15 @@ static void createFeedRegistryIfNotExists(Connection connection) throws SQLExcep /** * This wraps the main internal table loader method to catch exceptions and figure out how many errors happened. */ - private TableLoadResult load (Table table) { + private TableLoadResult load(Table table) { // This object will be returned to the caller to summarize the contents of the table and any errors. TableLoadResult tableLoadResult = new TableLoadResult(); int initialErrorCount = errorStorage.getErrorCount(); try { tableLoadResult.rowCount = loadInternal(table); tableLoadResult.fileSize = getTableSize(table); - if (table.conditionallyRequiredFields.size() > 0) { - conditionallyRequiredChecks(table); + if (table.conditionallyRequiredForeignRefChecks.size() > 0) { + conditionallyRequiredForeignRefChecks(table); } LOG.info(String.format("loaded in %d %s records", tableLoadResult.rowCount, table.name)); } catch (Exception ex) { @@ -316,9 +319,10 @@ private int getTableSize(Table table) { /** * This function will throw any exception that occurs. Those exceptions will be handled by the outer load method. + * * @return number of rows that were loaded. */ - private int loadInternal (Table table) throws Exception { + private int loadInternal(Table table) throws Exception { CsvReader csvReader = table.getCsvReader(zip, errorStorage); if (csvReader == null) { LOG.info(String.format("file %s.txt not found in gtfs zipfile", table.name)); @@ -394,6 +398,7 @@ private int loadInternal (Table table) throws Exception { // Maintain a separate columnIndex from for loop because some fields may be null and not included in the set // of fields for this table. int columnIndex = 0; + HashMap fieldLineData = new HashMap<>(); for (int f = 0; f < fields.length; f++) { Field field = fields[f]; // If the field is null, it represents a duplicate header or ID field and must be skipped to maintain @@ -408,10 +413,10 @@ private int loadInternal (Table table) throws Exception { // error. if ( table.name.equals("calendar_dates") && - "service_id".equals(field.name) && - "1".equals(csvReader.get(Field.getFieldIndex(fields, "exception_type"))) + "service_id".equals(field.name) && + "1".equals(csvReader.get(Field.getFieldIndex(fields, "exception_type"))) - ){ + ) { for (NewGTFSError error : errors) { if (NewGTFSErrorType.REFERENTIAL_INTEGRITY.equals(error.errorType)) { // Do not record bad service_id reference errors for calendar date entries that add service @@ -432,9 +437,18 @@ private int loadInternal (Table table) throws Exception { } // Add value for entry into table setValueForField(table, columnIndex, lineNumber, field, string, postgresText, transformedStrings); + if (field.isConditionallyRequired()) { + // Hold the field line data for use in checking conditionally required fields. + fieldLineData.put(field.name, new ReferenceTracker.LineData(keyValue, lineNumber, string)); + } // Increment column index. columnIndex += 1; } + if (fieldLineData.size() > 0) { + errorStorage.storeErrors( + referenceTracker.checkConditionallyRequiredFields(table, fieldLineData) + ); + } if (postgresText) { // Print a new line in the standard postgres text format: // https://www.postgresql.org/docs/9.1/static/sql-copy.html#AEN64380 @@ -486,7 +500,7 @@ public static void copyFromFile(Connection connection, File file, String targetT InputStream stream = new BufferedInputStream(new FileInputStream(file.getAbsolutePath())); // Our connection pool wraps the Connection objects, so we need to unwrap the Postgres connection interface. CopyManager copyManager = new CopyManager(connection.unwrap(BaseConnection.class)); - copyManager.copyIn(copySql, stream, 1024*1024); + copyManager.copyIn(copySql, stream, 1024 * 1024); stream.close(); // It is also possible to load from local file if this code is running on the database server. // statement.execute(String.format("copy %s from '%s'", table.name, tempTextFile.getAbsolutePath())); @@ -521,7 +535,7 @@ public void setValueForField(Table table, int fieldIndex, int lineNumber, Field ValidateFieldResult result = field.validateAndConvert(string); // If the result is null, use the null-setting method. if (result.clean == null) setFieldToNull(postgresText, transformedStrings, fieldIndex, field); - // Otherwise, set the cleaned field according to its index. + // Otherwise, set the cleaned field according to its index. else transformedStrings[fieldIndex + 1] = result.clean; errors = result.errors; } else { @@ -549,7 +563,7 @@ public void setValueForField(Table table, int fieldIndex, int lineNumber, Field */ private void setFieldToNull(boolean postgresText, String[] transformedStrings, int fieldIndex, Field field) { if (postgresText) transformedStrings[fieldIndex + 1] = POSTGRES_NULL_TEXT; - // Adjust parameter index by two: indexes are one-based and the first one is the CSV line number. + // Adjust parameter index by two: indexes are one-based and the first one is the CSV line number. else try { // LOG.info("setting {} index to null", fieldIndex + 2); field.setNull(insertStatement, fieldIndex + 2); @@ -568,7 +582,7 @@ private void setFieldToNull(boolean postgresText, String[] transformedStrings, i * * TODO add a test including SQL injection text (quote and semicolon) */ - public static String sanitize (String string, SQLErrorStorage errorStorage) { + public static String sanitize(String string, SQLErrorStorage errorStorage) { String clean = string.replaceAll("[^\\p{Alnum}_]", ""); if (!clean.equals(string)) { LOG.warn("SQL identifier '{}' was sanitized to '{}'", string, clean); @@ -578,99 +592,55 @@ public static String sanitize (String string, SQLErrorStorage errorStorage) { } /** - * Perform all conditionally required checks on the fields within the provided table. + * Perform all conditionally required foreign reference checks on the fields within the provided table. */ - private void conditionallyRequiredChecks(Table table) { + private void conditionallyRequiredForeignRefChecks(Table table) { + final TableReader stopTableReader = new JDBCTableReader(Table.STOPS, dataSource, tablePrefix, EntityPopulator.STOP); + Iterable stops = stopTableReader.getAllOrdered(); + + final TableReader fareRulesTableReader = new JDBCTableReader(Table.FARE_RULES, dataSource, tablePrefix, EntityPopulator.FARE_RULE); + Iterable fareRules = fareRulesTableReader.getAllOrdered(); + if (table.name.equals(Table.STOPS.name)) { - final TableReader stopTableReader = new JDBCTableReader(table, dataSource, tablePrefix, EntityPopulator.STOP); - Iterable stops = stopTableReader.getAllOrdered(); - // Iterate through each stop and check each conditionally required field in turn. - for (Stop stop : stops) { - for (ConditionallyRequired condition : table.conditionallyRequiredFields) { - switch(condition.check) { - case LOCATION_TYPE_STOP_NAME_CHECK: - //FIXME: location_type defaults to 0 if not provided. This needs to be INT_MISSING or similar - // so these tests don't give false positives. - if ((stop.location_type >= condition.minValue || stop.location_type <= condition.maxValue) && - stop.stop_name == null) { - errorStorage.storeError( - NewGTFSError.forFeed( - CONDITIONALLY_REQUIRED, - String.format("stops.txt, stop_name is required for id %s.", stop.stop_id) - ) - ); - } - break; - case LOCATION_TYPE_STOP_LAT_CHECK: - if ((stop.location_type >= condition.minValue || stop.location_type <= condition.maxValue) && - stop.stop_lat == Double.MIN_VALUE) { - errorStorage.storeError( - NewGTFSError.forFeed( - CONDITIONALLY_REQUIRED, - String.format("stops.txt, stop_lat is required for id %s.", stop.stop_id))); - } - break; - case LOCATION_TYPE_STOP_LON_CHECK: - if ((stop.location_type >= condition.minValue || stop.location_type <= condition.maxValue) && - stop.stop_lon == Double.MIN_VALUE) { - errorStorage.storeError( - NewGTFSError.forFeed( - CONDITIONALLY_REQUIRED, - String.format("stops.txt, stop_long is required for id %s.", stop.stop_id) - ) - ); - } - break; - case LOCATION_TYPE_PARENT_STATION_CHECK: - if ((stop.location_type >= condition.minValue || stop.location_type <= condition.maxValue) && - stop.parent_station == null) { - errorStorage.storeError( - NewGTFSError.forFeed( - CONDITIONALLY_REQUIRED, - String.format("stops.txt, parent_station is required for id %s.", stop.stop_id) - ) - ); - } - break; + for (ConditionallyRequiredForeignRefCheck check : table.conditionallyRequiredForeignRefChecks) { + // As the fare rule table is produced before the stops table, the conditionally required checks have to + // be done in reverse. Instead of the fare rule table checking the zone id in the stops table, the stops table + // is responsible for iterating over the fare rules table to confirm required zone id references are available. + if (check == STOPS_ZONE_ID_FARE_RULES_FOREIGN_REF_CHECK) { + // Get a unique list of all zone ids referenced by the fare rule table. + Set zoneIds = new HashSet<>(); + for (FareRule rule : fareRules) { + if (rule.origin_id != null) { + zoneIds.add(rule.origin_id); + } else if (rule.destination_id != null) { + zoneIds.add(rule.destination_id); + } else if (rule.contains_id != null) { + zoneIds.add(rule.contains_id); + } } - } - } - // Because the fare rule table is produced before the stops table, the conditionally required checks have to - // be done in reverse. Instead of the fare rule table checking the zone id in the stops table, the stops table - // is responsible for iterating over the fare rule table to confirm required zone id references are available. - final TableReader fareRulesTableReader = - new JDBCTableReader(Table.FARE_RULES, dataSource, tablePrefix, EntityPopulator.FARE_RULE); - - // Get all zone ids referenced by the fare rule table. - Set zoneIds = new HashSet<>(); - for (FareRule rule : fareRulesTableReader.getAllOrdered()) { - if (rule.origin_id != null) { - zoneIds.add(rule.origin_id); - } else if (rule.destination_id != null) { - zoneIds.add(rule.destination_id); - } else if (rule.contains_id != null) { - zoneIds.add(rule.contains_id); - } - } + // No zone_id references used in fare rules. + if (zoneIds.isEmpty()) { + continue; + } - // Make sure all zone id references are available, if not store an error. - if (zoneIds.size() > 0) { - for (String zoneId : zoneIds) { - boolean match = false; - for (Stop stop : stops) { - if (zoneId.equals(stop.zone_id)) { - match = true; - break; + // Make sure all zone id references are available, if not flag an error. + for (String zoneId : zoneIds) { + boolean match = false; + for (Stop stop : stops) { + if (zoneId.equals(stop.zone_id)) { + match = true; + break; + } + } + if (!match) { + errorStorage.storeError( + NewGTFSError.forFeed( + CONDITIONALLY_REQUIRED, + String.format("zone_id %s is required by fare_rules within stops.", zoneId) + ) + ); } - } - if (!match) { - errorStorage.storeError( - NewGTFSError.forFeed( - CONDITIONALLY_REQUIRED, - String.format("stops.txt, zone_id %s is required by fare_rules.txt.", zoneId) - ) - ); } } } diff --git a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java index 7a01e25dd..a3f2b8959 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java +++ b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java @@ -1,13 +1,18 @@ package com.conveyal.gtfs.loader; import com.conveyal.gtfs.error.NewGTFSError; +import org.apache.commons.lang3.math.NumberUtils; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.Set; +import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; import static com.conveyal.gtfs.error.NewGTFSErrorType.DUPLICATE_ID; import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; +import static com.conveyal.gtfs.loader.ConditionallyRequiredFieldCheck.FIELD_IN_RANGE; +import static com.conveyal.gtfs.loader.ConditionallyRequiredFieldCheck.FIELD_NOT_EMPTY; /** * This class is used while loading GTFS to track the unique keys that are encountered in a GTFS @@ -136,4 +141,94 @@ public Set checkReferencesAndUniqueness(String keyValue, int lineN } return errors; } + + /** + * Work through each conditionally required check assigned to a table. + */ + public Set checkConditionallyRequiredFields(Table table, HashMap fieldLineData) { + Set errors = new HashSet<>(); + Set fieldsToCheck = table.conditionallyRequiredFields; + for (ConditionallyRequiredField check : fieldsToCheck) { + LineData refFieldLineData = fieldLineData.get(check.referenceFieldName); + if (check.referenceCheck == FIELD_IN_RANGE && + !referenceFieldInRange(refFieldLineData.fieldValue, check.minReferenceValue, check.maxReferenceValue) + ) { + // reference field not within range, move to the next check. + continue; + } + + LineData conFieldLineData = fieldLineData.get(check.conditionalFieldName); + if (check.conditionalCheck == FIELD_NOT_EMPTY && + isEmpty(conFieldLineData.fieldValue) + ) { + NewGTFSError conReqError = NewGTFSError + .forLine(table, + conFieldLineData.lineNumber, + CONDITIONALLY_REQUIRED, + String.format("%s is conditionally required.", check.conditionalFieldName)) + .setEntityId(conFieldLineData.keyValue); + errors.add(conReqError); + } + } + return errors; + } + + /** + * Check if the provided reference field value is within the min and max values. If the field value can not be converted + * to a number it is assumed that the value is not a number and will therefore never be within the min/max range. + */ + private boolean referenceFieldInRange(String referenceFieldValue, double min, double max) { + try { + int fieldValue = Integer.parseInt(referenceFieldValue); + return fieldValue >= min || fieldValue <= max; + } catch (NumberFormatException e) { + return false; + } + } + + /** + * Checks if the provided field value is empty. If the value can be converted to either a double or int and these + * match the minimum value it is assumed these are empty. + */ + private boolean isEmpty(String str) { + // Text values + if (str == null || str.isEmpty()) { + return true; + } + + // Number values + if (NumberUtils.isParsable(str)) { + try { + double dValue = Double.parseDouble(str); + if (dValue == Double.MIN_VALUE) { + return true; + } + int iValue = Integer.parseInt(str); + if (iValue == Integer.MIN_VALUE) { + return true; + } + } catch (NumberFormatException e) { + return false; + } + } + return false; + } + + /** + * Holds line data that will be used in relation to a conditionally required field. + */ + public static class LineData { + /** The key associated with this line of data. */ + public final String keyValue; + /** The line number. */ + public final int lineNumber; + /** The string representation of the field value. */ + public final String fieldValue; + + public LineData(String keyValue, int lineNumber, String fieldValue) { + this.keyValue = keyValue; + this.lineNumber = lineNumber; + this.fieldValue = fieldValue; + } + } } diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index 824dc9280..ec1efe94d 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -47,10 +47,9 @@ import static com.conveyal.gtfs.error.NewGTFSErrorType.DUPLICATE_HEADER; import static com.conveyal.gtfs.error.NewGTFSErrorType.TABLE_IN_SUBDIRECTORY; -import static com.conveyal.gtfs.loader.ConditionallyRequiredCheck.LOCATION_TYPE_PARENT_STATION_CHECK; -import static com.conveyal.gtfs.loader.ConditionallyRequiredCheck.LOCATION_TYPE_STOP_LAT_CHECK; -import static com.conveyal.gtfs.loader.ConditionallyRequiredCheck.LOCATION_TYPE_STOP_LON_CHECK; -import static com.conveyal.gtfs.loader.ConditionallyRequiredCheck.LOCATION_TYPE_STOP_NAME_CHECK; +import static com.conveyal.gtfs.loader.ConditionallyRequiredFieldCheck.FIELD_IN_RANGE; +import static com.conveyal.gtfs.loader.ConditionallyRequiredFieldCheck.FIELD_NOT_EMPTY; +import static com.conveyal.gtfs.loader.ConditionallyRequiredForeignRefCheck.STOPS_ZONE_ID_FARE_RULES_FOREIGN_REF_CHECK; import static com.conveyal.gtfs.loader.JdbcGtfsLoader.sanitize; import static com.conveyal.gtfs.loader.Requirement.EDITOR; import static com.conveyal.gtfs.loader.Requirement.EXTENSION; @@ -91,7 +90,8 @@ public class Table { * */ private boolean compoundKey; - public Set conditionallyRequiredFields = new HashSet<>(); + public Set conditionallyRequiredFields = new HashSet<>(); + public Set conditionallyRequiredForeignRefChecks = new HashSet<>(); public Table (String name, Class entityClass, Requirement required, Field... fields) { // TODO: verify table name is OK for use in constructing dynamic SQL queries @@ -232,24 +232,25 @@ public Table (String name, Class entityClass, Requirement requ public static final Table STOPS = new Table("stops", Stop.class, REQUIRED, new StringField("stop_id", REQUIRED), new StringField("stop_code", OPTIONAL), - new StringField("stop_name", OPTIONAL), + new StringField("stop_name", OPTIONAL).addConditionallyRequired(), new StringField("stop_desc", OPTIONAL), - new DoubleField("stop_lat", OPTIONAL, -80, 80, 6), - new DoubleField("stop_lon", OPTIONAL, -180, 180, 6), + new DoubleField("stop_lat", OPTIONAL, -80, 80, 6).addConditionallyRequired(), + new DoubleField("stop_lon", OPTIONAL, -180, 180, 6).addConditionallyRequired(), new StringField("zone_id", OPTIONAL), new URLField("stop_url", OPTIONAL), - new ShortField("location_type", OPTIONAL, 2), - new StringField("parent_station", REQUIRED), + new ShortField("location_type", OPTIONAL, 2).addConditionallyRequired(), + new StringField("parent_station", REQUIRED).addConditionallyRequired(), new StringField("stop_timezone", OPTIONAL), new ShortField("wheelchair_boarding", OPTIONAL, 2), - new StringField("platform_code", OPTIONAL) + new StringField("platform_code", OPTIONAL).addConditionallyRequired() ) .restrictDelete() .addPrimaryKey() - .addConditionallyRequired(LOCATION_TYPE_STOP_NAME_CHECK, 0,2) - .addConditionallyRequired(LOCATION_TYPE_STOP_LAT_CHECK, 0,2) - .addConditionallyRequired(LOCATION_TYPE_STOP_LON_CHECK, 0,2) - .addConditionallyRequired(LOCATION_TYPE_PARENT_STATION_CHECK, 2,4); + .addConditionalRequiredCheck("location_type", FIELD_IN_RANGE,"stop_name", FIELD_NOT_EMPTY,0, 2) + .addConditionalRequiredCheck("location_type", FIELD_IN_RANGE,"stop_lat", FIELD_NOT_EMPTY,0, 2) + .addConditionalRequiredCheck("location_type", FIELD_IN_RANGE,"stop_lon", FIELD_NOT_EMPTY,0, 2) + .addConditionalRequiredCheck("location_type", FIELD_IN_RANGE,"parent_station", FIELD_NOT_EMPTY,2, 4) + .addConditionallyRequiredForeignRefCheck(STOPS_ZONE_ID_FARE_RULES_FOREIGN_REF_CHECK); public static final Table PATTERN_STOP = new Table("pattern_stops", PatternStop.class, OPTIONAL, new StringField("pattern_id", REQUIRED).isReferenceTo(PATTERNS), @@ -1007,8 +1008,40 @@ public int getKeyFieldIndex(Field[] fields) { return Field.getFieldIndex(fields, keyField); } - public Table addConditionallyRequired(ConditionallyRequiredCheck check, double minValue, double maxValue) { - this.conditionallyRequiredFields.add(new ConditionallyRequired(check, minValue, maxValue)); + /** + * Adds a conditionally required field check to a table. + * @param referenceField The value of this field will determine if the conditional field is required. + * @param referenceCheck The type of check to be carried out on the reference field. + * @param conditionalField The field that maybe required if the reference checks are true. + * @param conditionalCheck The type of check to be carried out on the conditional field. + * @param minValue The minimum reference field value needed for conditionally required. + * @param maxValue The maximum reference field value needed for conditionally required. + */ + public Table addConditionalRequiredCheck ( + String referenceField, + ConditionallyRequiredFieldCheck referenceCheck, + String conditionalField, + ConditionallyRequiredFieldCheck conditionalCheck, + double minValue, + double maxValue + ) { + this.conditionallyRequiredFields.add( + new ConditionallyRequiredField( + referenceField, + referenceCheck, + conditionalField, + conditionalCheck, + minValue, + maxValue) + ); + return this; + } + + /** + * Adds a conditionally required foreign reference check to a table. + */ + public Table addConditionallyRequiredForeignRefCheck(ConditionallyRequiredForeignRefCheck check) { + this.conditionallyRequiredForeignRefChecks.add(check); return this; } } diff --git a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java index 9734a054e..11c77d726 100644 --- a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java @@ -38,40 +38,55 @@ public static void tearDownClass() { @Test public void stopTableMissingConditionallyRequiredStopName() { - checkFeedHasError(CONDITIONALLY_REQUIRED, "stops.txt, stop_name is required for id 4957."); + checkFeedHasError(CONDITIONALLY_REQUIRED, "Stop","2", "4957","stop_name is conditionally required."); } @Test public void stopTableMissingConditionallyRequiredParentStation() { - checkFeedHasError(CONDITIONALLY_REQUIRED, "stops.txt, parent_station is required for id 691."); + checkFeedHasError(CONDITIONALLY_REQUIRED, "Stop","3", "691","parent_station is conditionally required."); } @Test public void stopTableMissingConditionallyRequiredStopLat() { - checkFeedHasError(CONDITIONALLY_REQUIRED, "stops.txt, stop_lat is required for id 691."); + checkFeedHasError(CONDITIONALLY_REQUIRED, "Stop","3", "691","stop_lat is conditionally required."); } @Test public void stopTableMissingConditionallyRequiredStopLon() { - checkFeedHasError(CONDITIONALLY_REQUIRED, "stops.txt, stop_long is required for id 692."); + checkFeedHasError(CONDITIONALLY_REQUIRED, "Stop","4", "692","stop_lon is conditionally required."); } @Test public void stopTableMissingConditionallyRequiredZoneId() { - checkFeedHasError(CONDITIONALLY_REQUIRED, "stops.txt, zone_id 1 is required by fare_rules.txt."); + checkFeedHasError(CONDITIONALLY_REQUIRED, "zone_id 1 is required by fare_rules within stops."); } /** - * Check that the test feed has exactly one error for the given type and badValue. + * Check that the test feed has exactly one error for the provided values. */ - private void checkFeedHasError(NewGTFSErrorType type, String badValue) { + private void checkFeedHasError(NewGTFSErrorType errorType, String entityType, String lineNumber, String entityId, String badValue) { assertThatSqlCountQueryYieldsExpectedCount( testDataSource, - String.format("select count(*) from %s.errors where error_type = '%s' and bad_value = '%s'", + String.format("select count(*) from %s.errors where error_type = '%s' and entity_type = '%s' and line_number = '%s' and entity_id = '%s' and bad_value = '%s'", testNamespace, - type, + errorType, + entityType, + lineNumber, + entityId, badValue), 1); } + /** + * Check that the test feed has exactly one error for the given error type and badValue. + */ + private void checkFeedHasError(NewGTFSErrorType errorType, String badValue) { + assertThatSqlCountQueryYieldsExpectedCount( + testDataSource, + String.format("select count(*) from %s.errors where error_type = '%s' and bad_value = '%s'", + testNamespace, + errorType, + badValue), + 1); + } } From 45e65895de4be7c81958890a2ea7f6d85f0b5936 Mon Sep 17 00:00:00 2001 From: Rob Beer Date: Tue, 20 Apr 2021 17:25:02 +0100 Subject: [PATCH 03/45] refactor(ReferenceTracker.java): Addressed NPE --- src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java index a3f2b8959..9e4b4412d 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java +++ b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java @@ -151,6 +151,7 @@ public Set checkConditionallyRequiredFields(Table table, HashMap checkConditionallyRequiredFields(Table table, HashMap Date: Wed, 21 Apr 2021 10:33:28 +0100 Subject: [PATCH 04/45] refactor(Bug fixes and refactor): Moved conditional checks in to reference tracker --- .../conveyal/gtfs/loader/JdbcGtfsLoader.java | 66 +-------- .../gtfs/loader/ReferenceTracker.java | 138 +++++++++++++++--- .../loader/ConditionallyRequiredTest.java | 2 +- .../stops.txt | 2 +- 4 files changed, 125 insertions(+), 83 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java index 9bc2defd3..09de1fe53 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java @@ -20,6 +20,8 @@ import java.io.*; import java.sql.*; import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; @@ -284,7 +286,9 @@ private TableLoadResult load(Table table) { tableLoadResult.rowCount = loadInternal(table); tableLoadResult.fileSize = getTableSize(table); if (table.conditionallyRequiredForeignRefChecks.size() > 0) { - conditionallyRequiredForeignRefChecks(table); + errorStorage.storeErrors( + referenceTracker.conditionallyRequiredForeignRefChecks(table,dataSource, tablePrefix) + ); } LOG.info(String.format("loaded in %d %s records", tableLoadResult.rowCount, table.name)); } catch (Exception ex) { @@ -398,7 +402,7 @@ private int loadInternal(Table table) throws Exception { // Maintain a separate columnIndex from for loop because some fields may be null and not included in the set // of fields for this table. int columnIndex = 0; - HashMap fieldLineData = new HashMap<>(); + List fieldLineData = new ArrayList<>(); for (int f = 0; f < fields.length; f++) { Field field = fields[f]; // If the field is null, it represents a duplicate header or ID field and must be skipped to maintain @@ -439,7 +443,7 @@ private int loadInternal(Table table) throws Exception { setValueForField(table, columnIndex, lineNumber, field, string, postgresText, transformedStrings); if (field.isConditionallyRequired()) { // Hold the field line data for use in checking conditionally required fields. - fieldLineData.put(field.name, new ReferenceTracker.LineData(keyValue, lineNumber, string)); + fieldLineData.add(new ReferenceTracker.LineData(table, field, keyValue, lineNumber, string)); } // Increment column index. columnIndex += 1; @@ -590,60 +594,4 @@ public static String sanitize(String string, SQLErrorStorage errorStorage) { } return clean; } - - /** - * Perform all conditionally required foreign reference checks on the fields within the provided table. - */ - private void conditionallyRequiredForeignRefChecks(Table table) { - final TableReader stopTableReader = new JDBCTableReader(Table.STOPS, dataSource, tablePrefix, EntityPopulator.STOP); - Iterable stops = stopTableReader.getAllOrdered(); - - final TableReader fareRulesTableReader = new JDBCTableReader(Table.FARE_RULES, dataSource, tablePrefix, EntityPopulator.FARE_RULE); - Iterable fareRules = fareRulesTableReader.getAllOrdered(); - - if (table.name.equals(Table.STOPS.name)) { - for (ConditionallyRequiredForeignRefCheck check : table.conditionallyRequiredForeignRefChecks) { - // As the fare rule table is produced before the stops table, the conditionally required checks have to - // be done in reverse. Instead of the fare rule table checking the zone id in the stops table, the stops table - // is responsible for iterating over the fare rules table to confirm required zone id references are available. - if (check == STOPS_ZONE_ID_FARE_RULES_FOREIGN_REF_CHECK) { - // Get a unique list of all zone ids referenced by the fare rule table. - Set zoneIds = new HashSet<>(); - for (FareRule rule : fareRules) { - if (rule.origin_id != null) { - zoneIds.add(rule.origin_id); - } else if (rule.destination_id != null) { - zoneIds.add(rule.destination_id); - } else if (rule.contains_id != null) { - zoneIds.add(rule.contains_id); - } - } - - // No zone_id references used in fare rules. - if (zoneIds.isEmpty()) { - continue; - } - - // Make sure all zone id references are available, if not flag an error. - for (String zoneId : zoneIds) { - boolean match = false; - for (Stop stop : stops) { - if (zoneId.equals(stop.zone_id)) { - match = true; - break; - } - } - if (!match) { - errorStorage.storeError( - NewGTFSError.forFeed( - CONDITIONALLY_REQUIRED, - String.format("zone_id %s is required by fare_rules within stops.", zoneId) - ) - ); - } - } - } - } - } - } } diff --git a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java index 9e4b4412d..303a64d7a 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java +++ b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java @@ -1,11 +1,16 @@ package com.conveyal.gtfs.loader; import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.model.FareRule; +import com.conveyal.gtfs.model.Stop; import org.apache.commons.lang3.math.NumberUtils; +import javax.sql.DataSource; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.List; +import java.util.Optional; import java.util.Set; import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; @@ -13,6 +18,7 @@ import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; import static com.conveyal.gtfs.loader.ConditionallyRequiredFieldCheck.FIELD_IN_RANGE; import static com.conveyal.gtfs.loader.ConditionallyRequiredFieldCheck.FIELD_NOT_EMPTY; +import static com.conveyal.gtfs.loader.ConditionallyRequiredForeignRefCheck.STOPS_ZONE_ID_FARE_RULES_FOREIGN_REF_CHECK; /** * This class is used while loading GTFS to track the unique keys that are encountered in a GTFS @@ -143,38 +149,116 @@ public Set checkReferencesAndUniqueness(String keyValue, int lineN } /** - * Work through each conditionally required check assigned to a table. + * Perform all conditionally required foreign reference checks on the fields within the provided table. */ - public Set checkConditionallyRequiredFields(Table table, HashMap fieldLineData) { + public Set conditionallyRequiredForeignRefChecks(Table table, DataSource dataSource, String tablePrefix) { + Set errors = new HashSet<>(); + final TableReader stopTableReader = new JDBCTableReader(Table.STOPS, dataSource, tablePrefix, EntityPopulator.STOP); + Iterable stops = stopTableReader.getAllOrdered(); + + final TableReader fareRulesTableReader = new JDBCTableReader(Table.FARE_RULES, dataSource, tablePrefix, EntityPopulator.FARE_RULE); + Iterable fareRules = fareRulesTableReader.getAllOrdered(); + + if (table.name.equals(Table.STOPS.name)) { + for (ConditionallyRequiredForeignRefCheck check : table.conditionallyRequiredForeignRefChecks) { + // As the fare rule table is produced before the stops table, the conditionally required checks have to + // be done in reverse. Instead of the fare rule table checking the zone id in the stops table, the stops table + // is responsible for iterating over the fare rules table to confirm required zone id references are available. + if (check == STOPS_ZONE_ID_FARE_RULES_FOREIGN_REF_CHECK) { + // Get a unique list of all zone ids referenced by the fare rule table. + Set zoneIds = new HashSet<>(); + for (FareRule rule : fareRules) { + if (rule.origin_id != null) { + zoneIds.add(rule.origin_id); + } else if (rule.destination_id != null) { + zoneIds.add(rule.destination_id); + } else if (rule.contains_id != null) { + zoneIds.add(rule.contains_id); + } + } + + // No zone_id references used in fare rules. + if (zoneIds.isEmpty()) { + continue; + } + + // Make sure all zone id references are available, if not flag an error. + for (String zoneId : zoneIds) { + boolean match = false; + for (Stop stop : stops) { + if (zoneId.equals(stop.zone_id)) { + match = true; + break; + } + } + if (!match) { + errors.add ( + NewGTFSError.forFeed ( + CONDITIONALLY_REQUIRED, + String.format("zone_id %s is required by fare_rules within stops.", zoneId) + ) + ); + } + } + } + } + } + return errors; + } + + + /** + * Work through each conditionally required check assigned to a table. First check the reference field to confirm + * if it meets the conditions whereby the conditional field is required. If the conditional field is required confirm + * that a value has been provided, if not, log a an error. + */ + public Set checkConditionallyRequiredFields(Table table, List fieldLineData) { Set errors = new HashSet<>(); Set fieldsToCheck = table.conditionallyRequiredFields; for (ConditionallyRequiredField check : fieldsToCheck) { - LineData refFieldLineData = fieldLineData.get(check.referenceFieldName); - if (check.referenceCheck == FIELD_IN_RANGE && + LineData refFieldLineData = getFieldLineData(table.name, check.referenceFieldName, fieldLineData); + if ( + check.referenceCheck == FIELD_IN_RANGE && refFieldLineData != null && - !referenceFieldInRange(refFieldLineData.fieldValue, check.minReferenceValue, check.maxReferenceValue) + referenceFieldInRange(refFieldLineData.fieldValue, check.minReferenceValue, check.maxReferenceValue) ) { - // reference field not within range, move to the next check. - continue; + // reference field within range, perform check on conditional field. + LineData conFieldLineData = getFieldLineData(table.name, check.conditionalFieldName, fieldLineData); + if ( + check.conditionalCheck == FIELD_NOT_EMPTY && + conFieldLineData != null && + isEmpty(conFieldLineData.fieldValue) + ) { + errors.add( + NewGTFSError + .forLine( + table, + conFieldLineData.lineNumber, + CONDITIONALLY_REQUIRED, + String.format("%s is conditionally required.", check.conditionalFieldName) + ).setEntityId(conFieldLineData.keyValue) + ); + } } - LineData conFieldLineData = fieldLineData.get(check.conditionalFieldName); - if (check.conditionalCheck == FIELD_NOT_EMPTY && - conFieldLineData != null && - isEmpty(conFieldLineData.fieldValue) - ) { - NewGTFSError conReqError = NewGTFSError - .forLine(table, - conFieldLineData.lineNumber, - CONDITIONALLY_REQUIRED, - String.format("%s is conditionally required.", check.conditionalFieldName)) - .setEntityId(conFieldLineData.keyValue); - errors.add(conReqError); - } } return errors; } + /** + * Return the line data that matches the table and field provided. + */ + private LineData getFieldLineData(String tableName, String fieldName, List fieldLineData) { + Optional match = fieldLineData + .stream() + .filter( + lineData -> lineData.table.name.equals(tableName) && + lineData.field.name.equals(fieldName) + ) + .findFirst(); + return match.orElse(null); + } + /** * Check if the provided reference field value is within the min and max values. If the field value can not be converted * to a number it is assumed that the value is not a number and will therefore never be within the min/max range. @@ -182,7 +266,7 @@ public Set checkConditionallyRequiredFields(Table table, HashMap= min || fieldValue <= max; + return fieldValue >= min && fieldValue <= max; } catch (NumberFormatException e) { return false; } @@ -209,6 +293,10 @@ private boolean isEmpty(String str) { if (iValue == Integer.MIN_VALUE) { return true; } + int sValue = Short.parseShort(str); + if (sValue == Short.MIN_VALUE) { + return true; + } } catch (NumberFormatException e) { return false; } @@ -220,6 +308,10 @@ private boolean isEmpty(String str) { * Holds line data that will be used in relation to a conditionally required field. */ public static class LineData { + /** The table associated with this line of data. */ + public final Table table; + /** The field associated with this line of data. */ + public final Field field; /** The key associated with this line of data. */ public final String keyValue; /** The line number. */ @@ -227,7 +319,9 @@ public static class LineData { /** The string representation of the field value. */ public final String fieldValue; - public LineData(String keyValue, int lineNumber, String fieldValue) { + public LineData(Table table, Field field, String keyValue, int lineNumber, String fieldValue) { + this.table = table; + this.field = field; this.keyValue = keyValue; this.lineNumber = lineNumber; this.fieldValue = fieldValue; diff --git a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java index 11c77d726..9780f6798 100644 --- a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java @@ -43,7 +43,7 @@ public void stopTableMissingConditionallyRequiredStopName() { @Test public void stopTableMissingConditionallyRequiredParentStation() { - checkFeedHasError(CONDITIONALLY_REQUIRED, "Stop","3", "691","parent_station is conditionally required."); + checkFeedHasError(CONDITIONALLY_REQUIRED, "Stop","5", "1266","parent_station is conditionally required."); } @Test diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/stops.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/stops.txt index 024711c02..f8eeac273 100644 --- a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/stops.txt +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/stops.txt @@ -2,7 +2,7 @@ stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,locatio 4957,64957,,Southbound,37.40048600,-122.10892700,,,0,1,1,, 691,60691,San Antonio & El Camino,Northbound,,-122.11319800,,,0,,1,, 692,60692,San Antonio & Miller,Northbound,37.40462900,,,,0,,1,, -1266,61266,San Antonio & California,Northbound,37.40607000,-122.11050500,,,0,,1,, +1266,61266,San Antonio & California,Northbound,37.40607000,-122.11050500,,,3,,1,, 1267,61267,San Antonio & Nita,Northbound,37.41186000,-122.10592500,,,0,,1,, 1268,61268,San Antonio & Nita,Northbound,37.41461600,-122.10392500,,,0,,1,, 1542,61542,Middlefield & Montrose,Northbound,37.41752400,-122.10575900,,,0,,1,, From d2899ad2d8c16ec896632777327eeb2579b9bb56 Mon Sep 17 00:00:00 2001 From: Rob Beer Date: Wed, 21 Apr 2021 13:09:18 +0100 Subject: [PATCH 05/45] refactor(Updates to allow unit tests to pass): Removed platform_code from stops, for now, to minimiz --- .gitignore | 2 +- src/main/java/com/conveyal/gtfs/loader/Field.java | 2 +- src/main/java/com/conveyal/gtfs/loader/Table.java | 13 ++++++------- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 2cfdf9567..dc693fe79 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,4 @@ .idea/ target/ -GTFSGraphQLTest/ \ No newline at end of file +lambda$*.json \ No newline at end of file diff --git a/src/main/java/com/conveyal/gtfs/loader/Field.java b/src/main/java/com/conveyal/gtfs/loader/Field.java index 806482b17..d9d22ee5e 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Field.java +++ b/src/main/java/com/conveyal/gtfs/loader/Field.java @@ -192,7 +192,7 @@ public String getColumnExpression(String prefix, boolean csvOutput) { /** * Flag this field as conditionally required. */ - public Field addConditionallyRequired() { + public Field conditionallyRequired() { this.isConditionallyRequired = true; return this; } diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index ec1efe94d..c9b7c7864 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -232,17 +232,16 @@ public Table (String name, Class entityClass, Requirement requ public static final Table STOPS = new Table("stops", Stop.class, REQUIRED, new StringField("stop_id", REQUIRED), new StringField("stop_code", OPTIONAL), - new StringField("stop_name", OPTIONAL).addConditionallyRequired(), + new StringField("stop_name", OPTIONAL).conditionallyRequired(), new StringField("stop_desc", OPTIONAL), - new DoubleField("stop_lat", OPTIONAL, -80, 80, 6).addConditionallyRequired(), - new DoubleField("stop_lon", OPTIONAL, -180, 180, 6).addConditionallyRequired(), + new DoubleField("stop_lat", OPTIONAL, -80, 80, 6).conditionallyRequired(), + new DoubleField("stop_lon", OPTIONAL, -180, 180, 6).conditionallyRequired(), new StringField("zone_id", OPTIONAL), new URLField("stop_url", OPTIONAL), - new ShortField("location_type", OPTIONAL, 2).addConditionallyRequired(), - new StringField("parent_station", REQUIRED).addConditionallyRequired(), + new ShortField("location_type", OPTIONAL, 2).conditionallyRequired(), + new StringField("parent_station", OPTIONAL).conditionallyRequired(), new StringField("stop_timezone", OPTIONAL), - new ShortField("wheelchair_boarding", OPTIONAL, 2), - new StringField("platform_code", OPTIONAL).addConditionallyRequired() + new ShortField("wheelchair_boarding", OPTIONAL, 2) ) .restrictDelete() .addPrimaryKey() From 481353b6d46a1cfd0286d9fc2c1820f1ded84ba7 Mon Sep 17 00:00:00 2001 From: Landon Reed Date: Wed, 21 Apr 2021 10:34:37 -0400 Subject: [PATCH 06/45] refactor: re-order stops/fare_rules for ref check --- .../conveyal/gtfs/loader/JdbcGtfsLoader.java | 7 +-- .../gtfs/loader/ReferenceTracker.java | 58 ------------------- .../java/com/conveyal/gtfs/loader/Table.java | 52 ++++++++--------- .../loader/ConditionallyRequiredTest.java | 15 ++--- 4 files changed, 35 insertions(+), 97 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java index 09de1fe53..b467a7f4b 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java @@ -162,10 +162,10 @@ public FeedLoadResult loadTables() { result.calendarDates = load(Table.CALENDAR_DATES); result.routes = load(Table.ROUTES); result.fareAttributes = load(Table.FARE_ATTRIBUTES); - result.fareRules = load(Table.FARE_RULES); result.feedInfo = load(Table.FEED_INFO); result.shapes = load(Table.SHAPES); result.stops = load(Table.STOPS); + result.fareRules = load(Table.FARE_RULES); result.transfers = load(Table.TRANSFERS); result.trips = load(Table.TRIPS); // refs routes result.frequencies = load(Table.FREQUENCIES); // refs trips @@ -285,11 +285,6 @@ private TableLoadResult load(Table table) { try { tableLoadResult.rowCount = loadInternal(table); tableLoadResult.fileSize = getTableSize(table); - if (table.conditionallyRequiredForeignRefChecks.size() > 0) { - errorStorage.storeErrors( - referenceTracker.conditionallyRequiredForeignRefChecks(table,dataSource, tablePrefix) - ); - } LOG.info(String.format("loaded in %d %s records", tableLoadResult.rowCount, table.name)); } catch (Exception ex) { LOG.error("Fatal error loading table", ex); diff --git a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java index 303a64d7a..44ba41da3 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java +++ b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java @@ -148,64 +148,6 @@ public Set checkReferencesAndUniqueness(String keyValue, int lineN return errors; } - /** - * Perform all conditionally required foreign reference checks on the fields within the provided table. - */ - public Set conditionallyRequiredForeignRefChecks(Table table, DataSource dataSource, String tablePrefix) { - Set errors = new HashSet<>(); - final TableReader stopTableReader = new JDBCTableReader(Table.STOPS, dataSource, tablePrefix, EntityPopulator.STOP); - Iterable stops = stopTableReader.getAllOrdered(); - - final TableReader fareRulesTableReader = new JDBCTableReader(Table.FARE_RULES, dataSource, tablePrefix, EntityPopulator.FARE_RULE); - Iterable fareRules = fareRulesTableReader.getAllOrdered(); - - if (table.name.equals(Table.STOPS.name)) { - for (ConditionallyRequiredForeignRefCheck check : table.conditionallyRequiredForeignRefChecks) { - // As the fare rule table is produced before the stops table, the conditionally required checks have to - // be done in reverse. Instead of the fare rule table checking the zone id in the stops table, the stops table - // is responsible for iterating over the fare rules table to confirm required zone id references are available. - if (check == STOPS_ZONE_ID_FARE_RULES_FOREIGN_REF_CHECK) { - // Get a unique list of all zone ids referenced by the fare rule table. - Set zoneIds = new HashSet<>(); - for (FareRule rule : fareRules) { - if (rule.origin_id != null) { - zoneIds.add(rule.origin_id); - } else if (rule.destination_id != null) { - zoneIds.add(rule.destination_id); - } else if (rule.contains_id != null) { - zoneIds.add(rule.contains_id); - } - } - - // No zone_id references used in fare rules. - if (zoneIds.isEmpty()) { - continue; - } - - // Make sure all zone id references are available, if not flag an error. - for (String zoneId : zoneIds) { - boolean match = false; - for (Stop stop : stops) { - if (zoneId.equals(stop.zone_id)) { - match = true; - break; - } - } - if (!match) { - errors.add ( - NewGTFSError.forFeed ( - CONDITIONALLY_REQUIRED, - String.format("zone_id %s is required by fare_rules within stops.", zoneId) - ) - ); - } - } - } - } - } - return errors; - } - /** * Work through each conditionally required check assigned to a table. First check the reference field to confirm diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index c9b7c7864..bf1525410 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -195,16 +195,6 @@ public Table (String name, Class entityClass, Requirement requ new ShortField("status", EDITOR, 2) ).addPrimaryKey(); - public static final Table FARE_RULES = new Table("fare_rules", FareRule.class, OPTIONAL, - new StringField("fare_id", REQUIRED).isReferenceTo(FARE_ATTRIBUTES), - new StringField("route_id", OPTIONAL).isReferenceTo(ROUTES), - // FIXME: referential integrity check for zone_id for below three fields? - new StringField("origin_id", OPTIONAL), - new StringField("destination_id", OPTIONAL), - new StringField("contains_id", OPTIONAL)) - .withParentTable(FARE_ATTRIBUTES) - .addPrimaryKey().keyFieldIsNotUnique(); - public static final Table SHAPES = new Table("shapes", ShapePoint.class, OPTIONAL, new StringField("shape_id", REQUIRED), new IntegerField("shape_pt_sequence", REQUIRED), @@ -251,6 +241,16 @@ public Table (String name, Class entityClass, Requirement requ .addConditionalRequiredCheck("location_type", FIELD_IN_RANGE,"parent_station", FIELD_NOT_EMPTY,2, 4) .addConditionallyRequiredForeignRefCheck(STOPS_ZONE_ID_FARE_RULES_FOREIGN_REF_CHECK); + public static final Table FARE_RULES = new Table("fare_rules", FareRule.class, OPTIONAL, + new StringField("fare_id", REQUIRED).isReferenceTo(FARE_ATTRIBUTES), + new StringField("route_id", OPTIONAL).isReferenceTo(ROUTES), + // FIXME: referential integrity check for zone_id for below three fields? + new StringField("origin_id", OPTIONAL).isReferenceTo(STOPS), + new StringField("destination_id", OPTIONAL).isReferenceTo(STOPS), + new StringField("contains_id", OPTIONAL).isReferenceTo(STOPS)) + .withParentTable(FARE_ATTRIBUTES) + .addPrimaryKey().keyFieldIsNotUnique(); + public static final Table PATTERN_STOP = new Table("pattern_stops", PatternStop.class, OPTIONAL, new StringField("pattern_id", REQUIRED).isReferenceTo(PATTERNS), new IntegerField("stop_sequence", REQUIRED, 0, Integer.MAX_VALUE), @@ -325,22 +325,22 @@ public Table (String name, Class entityClass, Requirement requ /** List of tables in order needed for checking referential integrity during load stage. */ public static final Table[] tablesInOrder = { - AGENCY, - CALENDAR, - SCHEDULE_EXCEPTIONS, - CALENDAR_DATES, - FARE_ATTRIBUTES, - FEED_INFO, - ROUTES, - FARE_RULES, - PATTERNS, - SHAPES, - STOPS, - PATTERN_STOP, - TRANSFERS, - TRIPS, - STOP_TIMES, - FREQUENCIES + AGENCY, + CALENDAR, + SCHEDULE_EXCEPTIONS, + CALENDAR_DATES, + FARE_ATTRIBUTES, + FEED_INFO, + ROUTES, + PATTERNS, + SHAPES, + STOPS, + FARE_RULES, + PATTERN_STOP, + TRANSFERS, + TRIPS, + STOP_TIMES, + FREQUENCIES }; /** diff --git a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java index 9780f6798..debfdf8d5 100644 --- a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java @@ -13,6 +13,7 @@ import static com.conveyal.gtfs.GTFS.load; import static com.conveyal.gtfs.TestUtils.assertThatSqlCountQueryYieldsExpectedCount; import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; +import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; public class ConditionallyRequiredTest { private static String testDBName; @@ -38,33 +39,33 @@ public static void tearDownClass() { @Test public void stopTableMissingConditionallyRequiredStopName() { - checkFeedHasError(CONDITIONALLY_REQUIRED, "Stop","2", "4957","stop_name is conditionally required."); + checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Stop","2", "4957","stop_name is conditionally required."); } @Test public void stopTableMissingConditionallyRequiredParentStation() { - checkFeedHasError(CONDITIONALLY_REQUIRED, "Stop","5", "1266","parent_station is conditionally required."); + checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Stop","5", "1266","parent_station is conditionally required."); } @Test public void stopTableMissingConditionallyRequiredStopLat() { - checkFeedHasError(CONDITIONALLY_REQUIRED, "Stop","3", "691","stop_lat is conditionally required."); + checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Stop","3", "691","stop_lat is conditionally required."); } @Test public void stopTableMissingConditionallyRequiredStopLon() { - checkFeedHasError(CONDITIONALLY_REQUIRED, "Stop","4", "692","stop_lon is conditionally required."); + checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Stop","4", "692","stop_lon is conditionally required."); } @Test public void stopTableMissingConditionallyRequiredZoneId() { - checkFeedHasError(CONDITIONALLY_REQUIRED, "zone_id 1 is required by fare_rules within stops."); + checkFeedHasOneError(REFERENTIAL_INTEGRITY, "stop_id:1"); } /** * Check that the test feed has exactly one error for the provided values. */ - private void checkFeedHasError(NewGTFSErrorType errorType, String entityType, String lineNumber, String entityId, String badValue) { + private void checkFeedHasOneError(NewGTFSErrorType errorType, String entityType, String lineNumber, String entityId, String badValue) { assertThatSqlCountQueryYieldsExpectedCount( testDataSource, String.format("select count(*) from %s.errors where error_type = '%s' and entity_type = '%s' and line_number = '%s' and entity_id = '%s' and bad_value = '%s'", @@ -80,7 +81,7 @@ private void checkFeedHasError(NewGTFSErrorType errorType, String entityType, St /** * Check that the test feed has exactly one error for the given error type and badValue. */ - private void checkFeedHasError(NewGTFSErrorType errorType, String badValue) { + private void checkFeedHasOneError(NewGTFSErrorType errorType, String badValue) { assertThatSqlCountQueryYieldsExpectedCount( testDataSource, String.format("select count(*) from %s.errors where error_type = '%s' and bad_value = '%s'", From effb4b97be764dac92d135d804ffaeed462496c2 Mon Sep 17 00:00:00 2001 From: Landon Reed Date: Wed, 21 Apr 2021 10:44:10 -0400 Subject: [PATCH 07/45] refactor: remove ConditionallyRequiredForeignRefCheck.java --- .../gtfs/loader/ConditionallyRequiredForeignRefCheck.java | 5 ----- src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java | 5 ----- src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java | 1 - src/main/java/com/conveyal/gtfs/loader/Table.java | 4 +--- 4 files changed, 1 insertion(+), 14 deletions(-) delete mode 100644 src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredForeignRefCheck.java diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredForeignRefCheck.java b/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredForeignRefCheck.java deleted file mode 100644 index b4b16de05..000000000 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredForeignRefCheck.java +++ /dev/null @@ -1,5 +0,0 @@ -package com.conveyal.gtfs.loader; - -public enum ConditionallyRequiredForeignRefCheck { - STOPS_ZONE_ID_FARE_RULES_FOREIGN_REF_CHECK // Confirm that all zone_id references in fare rules are available in stops. -} diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java index b467a7f4b..bf5507dcb 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java @@ -3,8 +3,6 @@ import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.error.NewGTFSErrorType; import com.conveyal.gtfs.error.SQLErrorStorage; -import com.conveyal.gtfs.model.FareRule; -import com.conveyal.gtfs.model.Stop; import com.conveyal.gtfs.storage.StorageException; import com.csvreader.CsvReader; import com.google.common.hash.HashCode; @@ -20,13 +18,10 @@ import java.io.*; import java.sql.*; import java.util.*; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; import static com.conveyal.gtfs.error.NewGTFSErrorType.*; -import static com.conveyal.gtfs.loader.ConditionallyRequiredForeignRefCheck.STOPS_ZONE_ID_FARE_RULES_FOREIGN_REF_CHECK; import static com.conveyal.gtfs.model.Entity.human; import static com.conveyal.gtfs.util.Util.randomIdString; diff --git a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java index 44ba41da3..60cce521f 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java +++ b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java @@ -18,7 +18,6 @@ import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; import static com.conveyal.gtfs.loader.ConditionallyRequiredFieldCheck.FIELD_IN_RANGE; import static com.conveyal.gtfs.loader.ConditionallyRequiredFieldCheck.FIELD_NOT_EMPTY; -import static com.conveyal.gtfs.loader.ConditionallyRequiredForeignRefCheck.STOPS_ZONE_ID_FARE_RULES_FOREIGN_REF_CHECK; /** * This class is used while loading GTFS to track the unique keys that are encountered in a GTFS diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index bf1525410..1dfb67e4f 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -49,7 +49,6 @@ import static com.conveyal.gtfs.error.NewGTFSErrorType.TABLE_IN_SUBDIRECTORY; import static com.conveyal.gtfs.loader.ConditionallyRequiredFieldCheck.FIELD_IN_RANGE; import static com.conveyal.gtfs.loader.ConditionallyRequiredFieldCheck.FIELD_NOT_EMPTY; -import static com.conveyal.gtfs.loader.ConditionallyRequiredForeignRefCheck.STOPS_ZONE_ID_FARE_RULES_FOREIGN_REF_CHECK; import static com.conveyal.gtfs.loader.JdbcGtfsLoader.sanitize; import static com.conveyal.gtfs.loader.Requirement.EDITOR; import static com.conveyal.gtfs.loader.Requirement.EXTENSION; @@ -238,8 +237,7 @@ public Table (String name, Class entityClass, Requirement requ .addConditionalRequiredCheck("location_type", FIELD_IN_RANGE,"stop_name", FIELD_NOT_EMPTY,0, 2) .addConditionalRequiredCheck("location_type", FIELD_IN_RANGE,"stop_lat", FIELD_NOT_EMPTY,0, 2) .addConditionalRequiredCheck("location_type", FIELD_IN_RANGE,"stop_lon", FIELD_NOT_EMPTY,0, 2) - .addConditionalRequiredCheck("location_type", FIELD_IN_RANGE,"parent_station", FIELD_NOT_EMPTY,2, 4) - .addConditionallyRequiredForeignRefCheck(STOPS_ZONE_ID_FARE_RULES_FOREIGN_REF_CHECK); + .addConditionalRequiredCheck("location_type", FIELD_IN_RANGE,"parent_station", FIELD_NOT_EMPTY,2, 4); public static final Table FARE_RULES = new Table("fare_rules", FareRule.class, OPTIONAL, new StringField("fare_id", REQUIRED).isReferenceTo(FARE_ATTRIBUTES), From e0881a8a0c2c0b9bb166d0cae796ec8bc09b8ab8 Mon Sep 17 00:00:00 2001 From: Landon Reed Date: Wed, 21 Apr 2021 10:46:03 -0400 Subject: [PATCH 08/45] refactor: finish removing ConditionallyRequiredForeignRefCheck --- src/main/java/com/conveyal/gtfs/loader/Table.java | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index 1dfb67e4f..ea8ce992e 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -90,7 +90,6 @@ public class Table { private boolean compoundKey; public Set conditionallyRequiredFields = new HashSet<>(); - public Set conditionallyRequiredForeignRefChecks = new HashSet<>(); public Table (String name, Class entityClass, Requirement required, Field... fields) { // TODO: verify table name is OK for use in constructing dynamic SQL queries @@ -1033,12 +1032,4 @@ public Table addConditionalRequiredCheck ( ); return this; } - - /** - * Adds a conditionally required foreign reference check to a table. - */ - public Table addConditionallyRequiredForeignRefCheck(ConditionallyRequiredForeignRefCheck check) { - this.conditionallyRequiredForeignRefChecks.add(check); - return this; - } } From 8620c49f06b7c831327c6eb383cdb72ea11c9e9a Mon Sep 17 00:00:00 2001 From: Landon Reed Date: Wed, 21 Apr 2021 14:58:32 -0400 Subject: [PATCH 09/45] refactor: modify conditional check structure --- .../conveyal/gtfs/error/NewGTFSErrorType.java | 2 +- ...ldCheck.java => ConditionalCheckType.java} | 4 +- .../gtfs/loader/ConditionalRequirement.java | 32 ++++++ .../loader/ConditionallyRequiredField.java | 36 ------ .../java/com/conveyal/gtfs/loader/Field.java | 5 +- .../conveyal/gtfs/loader/JdbcGtfsLoader.java | 13 +-- .../gtfs/loader/ReferenceTracker.java | 108 +++++++----------- .../java/com/conveyal/gtfs/loader/Table.java | 68 +++++------ .../loader/ConditionallyRequiredTest.java | 8 +- 9 files changed, 114 insertions(+), 162 deletions(-) rename src/main/java/com/conveyal/gtfs/loader/{ConditionallyRequiredFieldCheck.java => ConditionalCheckType.java} (68%) create mode 100644 src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java delete mode 100644 src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredField.java diff --git a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java index 8e648577f..949a35723 100644 --- a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java +++ b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java @@ -11,7 +11,7 @@ public enum NewGTFSErrorType { BOOLEAN_FORMAT(Priority.MEDIUM, "A GTFS boolean field must contain the value 1 or 0."), COLOR_FORMAT(Priority.MEDIUM, "A color should be specified with six-characters (three two-digit hexadecimal numbers)."), COLUMN_NAME_UNSAFE(Priority.HIGH, "Column header contains characters not safe in SQL, it was renamed."), - CONDITIONALLY_REQUIRED(Priority.HIGH, "Field is conditionally required."), + CONDITIONALLY_REQUIRED(Priority.HIGH, "A conditionally required field was missing ini a particular row."), CURRENCY_UNKNOWN(Priority.MEDIUM, "The currency code was not recognized."), DATE_FORMAT(Priority.MEDIUM, "Date format should be YYYYMMDD."), DATE_NO_SERVICE(Priority.MEDIUM, "No service_ids were active on a date within the range of dates with defined service."), diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredFieldCheck.java b/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java similarity index 68% rename from src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredFieldCheck.java rename to src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java index 94ec62bdf..d78a8a505 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredFieldCheck.java +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java @@ -2,9 +2,9 @@ /** * These are the conditionally required checks to be carried out inline with the values provided in - * {@link ConditionallyRequiredField}. + * {@link ConditionalRequirement}. */ -public enum ConditionallyRequiredFieldCheck { +public enum ConditionalCheckType { FIELD_NOT_EMPTY, FIELD_IN_RANGE } \ No newline at end of file diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java new file mode 100644 index 000000000..29c20c622 --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java @@ -0,0 +1,32 @@ +package com.conveyal.gtfs.loader; + +/** + * These are the values that are checked inline with {@link ConditionalCheckType} to determine if the required + * conditions have been met. + */ +public class ConditionalRequirement { + /** The type of check to be performed on the reference field. */ + public final ConditionalCheckType referenceCheck; + /** The minimum reference field value if a range check is being performed. */ + public int minReferenceValue; + /** The maximum reference field value if a range check is being performed. */ + public int maxReferenceValue; + /** The type of check to be performed on the conditional field. */ + public ConditionalCheckType conditionalCheck; + /** The name of the conditional field. */ + public String conditionalFieldName; + + public ConditionalRequirement( + int minReferenceValue, + int maxReferenceValue, + String conditionalFieldName, + ConditionalCheckType conditionalCheck + + ) { + this.referenceCheck = ConditionalCheckType.FIELD_IN_RANGE; + this.minReferenceValue = minReferenceValue; + this.maxReferenceValue = maxReferenceValue; + this.conditionalFieldName = conditionalFieldName; + this.conditionalCheck = conditionalCheck; + } +} diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredField.java b/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredField.java deleted file mode 100644 index a25dc4ed8..000000000 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionallyRequiredField.java +++ /dev/null @@ -1,36 +0,0 @@ -package com.conveyal.gtfs.loader; - -/** - * These are the values that are checked inline with {@link ConditionallyRequiredFieldCheck} to determine if the required - * conditions have been met. - */ -public class ConditionallyRequiredField { - /** The type of check to be performed on the reference field. */ - public final ConditionallyRequiredFieldCheck referenceCheck; - /** The type of check to be performed on the conditional field. */ - public ConditionallyRequiredFieldCheck conditionalCheck; - /** The minimum reference field value if a range check is being performed. */ - public double minReferenceValue; - /** The maximum reference field value if a range check is being performed. */ - public double maxReferenceValue; - /** The name of the reference field. */ - String referenceFieldName; - /** The name of the conditional field. */ - String conditionalFieldName; - - ConditionallyRequiredField ( - String referenceFieldName, - ConditionallyRequiredFieldCheck referenceCheck, - String conditionalFieldName, - ConditionallyRequiredFieldCheck conditionalCheck, - double minReferenceValue, - double maxReferenceValue - ) { - this.referenceFieldName = referenceFieldName; - this.referenceCheck = referenceCheck; - this.conditionalFieldName = conditionalFieldName; - this.conditionalCheck = conditionalCheck; - this.minReferenceValue = minReferenceValue; - this.maxReferenceValue = maxReferenceValue; - } -} diff --git a/src/main/java/com/conveyal/gtfs/loader/Field.java b/src/main/java/com/conveyal/gtfs/loader/Field.java index d9d22ee5e..0e9d54fb5 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Field.java +++ b/src/main/java/com/conveyal/gtfs/loader/Field.java @@ -7,7 +7,6 @@ import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.SQLType; -import java.util.HashSet; import java.util.Set; /** @@ -50,6 +49,7 @@ public abstract class Field { private boolean shouldBeIndexed; private boolean emptyValuePermitted; private boolean isConditionallyRequired; + public ConditionalRequirement[] conditions; public Field(String name, Requirement requirement) { this.name = name; @@ -192,8 +192,9 @@ public String getColumnExpression(String prefix, boolean csvOutput) { /** * Flag this field as conditionally required. */ - public Field conditionallyRequired() { + public Field requireConditions(ConditionalRequirement...conditions) { this.isConditionallyRequired = true; + this.conditions = conditions; return this; } diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java index bf5507dcb..e0e701938 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java @@ -67,7 +67,7 @@ public class JdbcGtfsLoader { public static final long INSERT_BATCH_SIZE = 500; // Represents null in Postgres text format - private static final String POSTGRES_NULL_TEXT = "\\N"; + public static final String POSTGRES_NULL_TEXT = "\\N"; private static final Logger LOG = LoggerFactory.getLogger(JdbcGtfsLoader.class); private String gtfsFilePath; @@ -366,7 +366,7 @@ private int loadInternal(Table table) throws Exception { // When outputting text, accumulate transformed strings to allow skipping rows when errors are encountered. // One extra position in the array for the CSV line number. String[] transformedStrings = new String[cleanFields.length + 1]; - + boolean tableHasConditions = table.hasConditionalRequirements(); // Iterate over each record and prepare the record for storage in the table either through batch insert // statements or postgres text copy operation. while (csvReader.readRecord()) { @@ -392,7 +392,6 @@ private int loadInternal(Table table) throws Exception { // Maintain a separate columnIndex from for loop because some fields may be null and not included in the set // of fields for this table. int columnIndex = 0; - List fieldLineData = new ArrayList<>(); for (int f = 0; f < fields.length; f++) { Field field = fields[f]; // If the field is null, it represents a duplicate header or ID field and must be skipped to maintain @@ -431,16 +430,12 @@ private int loadInternal(Table table) throws Exception { } // Add value for entry into table setValueForField(table, columnIndex, lineNumber, field, string, postgresText, transformedStrings); - if (field.isConditionallyRequired()) { - // Hold the field line data for use in checking conditionally required fields. - fieldLineData.add(new ReferenceTracker.LineData(table, field, keyValue, lineNumber, string)); - } // Increment column index. columnIndex += 1; } - if (fieldLineData.size() > 0) { + if (tableHasConditions) { errorStorage.storeErrors( - referenceTracker.checkConditionallyRequiredFields(table, fieldLineData) + referenceTracker.checkConditionallyRequiredFields(table, fields, transformedStrings, lineNumber) ); } if (postgresText) { diff --git a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java index 60cce521f..d2c108d9c 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java +++ b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java @@ -1,23 +1,21 @@ package com.conveyal.gtfs.loader; import com.conveyal.gtfs.error.NewGTFSError; -import com.conveyal.gtfs.model.FareRule; -import com.conveyal.gtfs.model.Stop; import org.apache.commons.lang3.math.NumberUtils; -import javax.sql.DataSource; +import java.util.Arrays; import java.util.Collections; -import java.util.HashMap; import java.util.HashSet; -import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; import static com.conveyal.gtfs.error.NewGTFSErrorType.DUPLICATE_ID; import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; -import static com.conveyal.gtfs.loader.ConditionallyRequiredFieldCheck.FIELD_IN_RANGE; -import static com.conveyal.gtfs.loader.ConditionallyRequiredFieldCheck.FIELD_NOT_EMPTY; +import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_IN_RANGE; +import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_NOT_EMPTY; +import static com.conveyal.gtfs.loader.JdbcGtfsLoader.POSTGRES_NULL_TEXT; /** * This class is used while loading GTFS to track the unique keys that are encountered in a GTFS @@ -153,58 +151,58 @@ public Set checkReferencesAndUniqueness(String keyValue, int lineN * if it meets the conditions whereby the conditional field is required. If the conditional field is required confirm * that a value has been provided, if not, log a an error. */ - public Set checkConditionallyRequiredFields(Table table, List fieldLineData) { + public Set checkConditionallyRequiredFields(Table table, Field[] fields, String[] rowData, int lineNumber) { Set errors = new HashSet<>(); - Set fieldsToCheck = table.conditionallyRequiredFields; - for (ConditionallyRequiredField check : fieldsToCheck) { - LineData refFieldLineData = getFieldLineData(table.name, check.referenceFieldName, fieldLineData); - if ( - check.referenceCheck == FIELD_IN_RANGE && - refFieldLineData != null && - referenceFieldInRange(refFieldLineData.fieldValue, check.minReferenceValue, check.maxReferenceValue) - ) { - // reference field within range, perform check on conditional field. - LineData conFieldLineData = getFieldLineData(table.name, check.conditionalFieldName, fieldLineData); - if ( - check.conditionalCheck == FIELD_NOT_EMPTY && - conFieldLineData != null && - isEmpty(conFieldLineData.fieldValue) - ) { + Map fieldsToCheck = table.getConditionalRequirements(); + for (Map.Entry entry : fieldsToCheck.entrySet()) { + Field referenceField = entry.getKey(); + ConditionalRequirement[] conditionalRequirements = entry.getValue(); + for (ConditionalRequirement check : conditionalRequirements) { + int refFieldIndex = Field.getFieldIndex(fields, referenceField.name); + String refFieldData = getValueForRow(rowData, refFieldIndex); + boolean referenceValueMeetsRangeCondition = check.referenceCheck == FIELD_IN_RANGE && + !POSTGRES_NULL_TEXT.equals(refFieldData) && + // TODO use pre-existing method in ShortField? + isValueInRange(refFieldData, check.minReferenceValue, check.maxReferenceValue); + // If ref value does not meet the range condition, there is no need to check the conditional value for + // (e.g.) an empty value. + if (!referenceValueMeetsRangeCondition) return errors; + int conditionalFieldIndex = Field.getFieldIndex(fields, check.conditionalFieldName); + String conditionalFieldData = getValueForRow(rowData, conditionalFieldIndex); + boolean conditionallyRequiredValueIsEmpty = check.conditionalCheck == FIELD_NOT_EMPTY && + POSTGRES_NULL_TEXT.equals(conditionalFieldData); + if (conditionallyRequiredValueIsEmpty) { + String entityId = getValueForRow(rowData, table.getKeyFieldIndex(fields)); + String message = String.format( + "%s is conditionally required when %s value is between %d and %d.", + check.conditionalFieldName, + referenceField.name, + check.minReferenceValue, + check.maxReferenceValue + ); errors.add( - NewGTFSError - .forLine( - table, - conFieldLineData.lineNumber, - CONDITIONALLY_REQUIRED, - String.format("%s is conditionally required.", check.conditionalFieldName) - ).setEntityId(conFieldLineData.keyValue) + NewGTFSError.forLine(table, lineNumber, CONDITIONALLY_REQUIRED, message).setEntityId(entityId) ); } } - } return errors; } /** - * Return the line data that matches the table and field provided. + * Get value for a particular column index from a set of row data. Note: the row data here has one extra value at + * the beginning of the array that represents the line number (hence the +1). This is because the data is formatted + * for batch insertion into a postgres table. */ - private LineData getFieldLineData(String tableName, String fieldName, List fieldLineData) { - Optional match = fieldLineData - .stream() - .filter( - lineData -> lineData.table.name.equals(tableName) && - lineData.field.name.equals(fieldName) - ) - .findFirst(); - return match.orElse(null); + private String getValueForRow(String[] rowData, int columnIndex) { + return rowData[columnIndex + 1]; } /** - * Check if the provided reference field value is within the min and max values. If the field value can not be converted + * Check if the provided value is within the min and max values. If the field value can not be converted * to a number it is assumed that the value is not a number and will therefore never be within the min/max range. */ - private boolean referenceFieldInRange(String referenceFieldValue, double min, double max) { + private boolean isValueInRange(String referenceFieldValue, int min, int max) { try { int fieldValue = Integer.parseInt(referenceFieldValue); return fieldValue >= min && fieldValue <= max; @@ -244,28 +242,4 @@ private boolean isEmpty(String str) { } return false; } - - /** - * Holds line data that will be used in relation to a conditionally required field. - */ - public static class LineData { - /** The table associated with this line of data. */ - public final Table table; - /** The field associated with this line of data. */ - public final Field field; - /** The key associated with this line of data. */ - public final String keyValue; - /** The line number. */ - public final int lineNumber; - /** The string representation of the field value. */ - public final String fieldValue; - - public LineData(Table table, Field field, String keyValue, int lineNumber, String fieldValue) { - this.table = table; - this.field = field; - this.keyValue = keyValue; - this.lineNumber = lineNumber; - this.fieldValue = fieldValue; - } - } } diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index ea8ce992e..1e150ed05 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -38,8 +38,10 @@ import java.util.Arrays; import java.util.Collections; import java.util.Enumeration; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.stream.Collectors; import java.util.zip.ZipEntry; @@ -47,8 +49,8 @@ import static com.conveyal.gtfs.error.NewGTFSErrorType.DUPLICATE_HEADER; import static com.conveyal.gtfs.error.NewGTFSErrorType.TABLE_IN_SUBDIRECTORY; -import static com.conveyal.gtfs.loader.ConditionallyRequiredFieldCheck.FIELD_IN_RANGE; -import static com.conveyal.gtfs.loader.ConditionallyRequiredFieldCheck.FIELD_NOT_EMPTY; +import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_IN_RANGE; +import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_NOT_EMPTY; import static com.conveyal.gtfs.loader.JdbcGtfsLoader.sanitize; import static com.conveyal.gtfs.loader.Requirement.EDITOR; import static com.conveyal.gtfs.loader.Requirement.EXTENSION; @@ -89,8 +91,6 @@ public class Table { * */ private boolean compoundKey; - public Set conditionallyRequiredFields = new HashSet<>(); - public Table (String name, Class entityClass, Requirement required, Field... fields) { // TODO: verify table name is OK for use in constructing dynamic SQL queries this.name = name; @@ -220,23 +220,24 @@ public Table (String name, Class entityClass, Requirement requ public static final Table STOPS = new Table("stops", Stop.class, REQUIRED, new StringField("stop_id", REQUIRED), new StringField("stop_code", OPTIONAL), - new StringField("stop_name", OPTIONAL).conditionallyRequired(), + new StringField("stop_name", OPTIONAL).requireConditions(), new StringField("stop_desc", OPTIONAL), - new DoubleField("stop_lat", OPTIONAL, -80, 80, 6).conditionallyRequired(), - new DoubleField("stop_lon", OPTIONAL, -180, 180, 6).conditionallyRequired(), + new DoubleField("stop_lat", OPTIONAL, -80, 80, 6).requireConditions(), + new DoubleField("stop_lon", OPTIONAL, -180, 180, 6).requireConditions(), new StringField("zone_id", OPTIONAL), new URLField("stop_url", OPTIONAL), - new ShortField("location_type", OPTIONAL, 2).conditionallyRequired(), - new StringField("parent_station", OPTIONAL).conditionallyRequired(), + new ShortField("location_type", OPTIONAL, 4).requireConditions( + new ConditionalRequirement( 0, 2, "stop_name", FIELD_NOT_EMPTY), + new ConditionalRequirement( 0, 2, "stop_lat", FIELD_NOT_EMPTY), + new ConditionalRequirement( 0, 2, "stop_lon", FIELD_NOT_EMPTY), + new ConditionalRequirement( 2, 4, "parent_station", FIELD_NOT_EMPTY) + ), + new StringField("parent_station", OPTIONAL).requireConditions(), new StringField("stop_timezone", OPTIONAL), new ShortField("wheelchair_boarding", OPTIONAL, 2) ) .restrictDelete() - .addPrimaryKey() - .addConditionalRequiredCheck("location_type", FIELD_IN_RANGE,"stop_name", FIELD_NOT_EMPTY,0, 2) - .addConditionalRequiredCheck("location_type", FIELD_IN_RANGE,"stop_lat", FIELD_NOT_EMPTY,0, 2) - .addConditionalRequiredCheck("location_type", FIELD_IN_RANGE,"stop_lon", FIELD_NOT_EMPTY,0, 2) - .addConditionalRequiredCheck("location_type", FIELD_IN_RANGE,"parent_station", FIELD_NOT_EMPTY,2, 4); + .addPrimaryKey(); public static final Table FARE_RULES = new Table("fare_rules", FareRule.class, OPTIONAL, new StringField("fare_id", REQUIRED).isReferenceTo(FARE_ATTRIBUTES), @@ -1004,32 +1005,17 @@ public int getKeyFieldIndex(Field[] fields) { return Field.getFieldIndex(fields, keyField); } - /** - * Adds a conditionally required field check to a table. - * @param referenceField The value of this field will determine if the conditional field is required. - * @param referenceCheck The type of check to be carried out on the reference field. - * @param conditionalField The field that maybe required if the reference checks are true. - * @param conditionalCheck The type of check to be carried out on the conditional field. - * @param minValue The minimum reference field value needed for conditionally required. - * @param maxValue The maximum reference field value needed for conditionally required. - */ - public Table addConditionalRequiredCheck ( - String referenceField, - ConditionallyRequiredFieldCheck referenceCheck, - String conditionalField, - ConditionallyRequiredFieldCheck conditionalCheck, - double minValue, - double maxValue - ) { - this.conditionallyRequiredFields.add( - new ConditionallyRequiredField( - referenceField, - referenceCheck, - conditionalField, - conditionalCheck, - minValue, - maxValue) - ); - return this; + public boolean hasConditionalRequirements() { + return !getConditionalRequirements().isEmpty(); + } + + public Map getConditionalRequirements() { + Map fieldsWithConditions = new HashMap<>(); + for (Field field : fields) { + if (field.isConditionallyRequired()) { + fieldsWithConditions.put(field, field.conditions); + } + } + return fieldsWithConditions; } } diff --git a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java index debfdf8d5..9c41e76af 100644 --- a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java @@ -39,22 +39,22 @@ public static void tearDownClass() { @Test public void stopTableMissingConditionallyRequiredStopName() { - checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Stop","2", "4957","stop_name is conditionally required."); + checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Stop","2", "4957","stop_name is conditionally required when location_type value is between 0 and 2."); } @Test public void stopTableMissingConditionallyRequiredParentStation() { - checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Stop","5", "1266","parent_station is conditionally required."); + checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Stop","5", "1266","parent_station is conditionally required when location_type value is between 0 and 4."); } @Test public void stopTableMissingConditionallyRequiredStopLat() { - checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Stop","3", "691","stop_lat is conditionally required."); + checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Stop","3", "691","stop_lat is conditionally required when location_type value is between 0 and 2."); } @Test public void stopTableMissingConditionallyRequiredStopLon() { - checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Stop","4", "692","stop_lon is conditionally required."); + checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Stop","4", "692","stop_lon is conditionally required when location_type value is between 0 and 2."); } @Test From a8b8e627cc5e5aea25e9f5ff9372d7bc7ceeab95 Mon Sep 17 00:00:00 2001 From: Rob Beer Date: Fri, 23 Apr 2021 12:07:59 +0100 Subject: [PATCH 10/45] refactor(ReferenceTracker.java): Updated logic to carry out all conditional checks --- .../java/com/conveyal/gtfs/error/NewGTFSErrorType.java | 2 +- .../java/com/conveyal/gtfs/loader/ReferenceTracker.java | 8 ++++---- .../conveyal/gtfs/loader/ConditionallyRequiredTest.java | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java index 949a35723..1da0aaff4 100644 --- a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java +++ b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java @@ -11,7 +11,7 @@ public enum NewGTFSErrorType { BOOLEAN_FORMAT(Priority.MEDIUM, "A GTFS boolean field must contain the value 1 or 0."), COLOR_FORMAT(Priority.MEDIUM, "A color should be specified with six-characters (three two-digit hexadecimal numbers)."), COLUMN_NAME_UNSAFE(Priority.HIGH, "Column header contains characters not safe in SQL, it was renamed."), - CONDITIONALLY_REQUIRED(Priority.HIGH, "A conditionally required field was missing ini a particular row."), + CONDITIONALLY_REQUIRED(Priority.HIGH, "A conditionally required field was missing in a particular row."), CURRENCY_UNKNOWN(Priority.MEDIUM, "The currency code was not recognized."), DATE_FORMAT(Priority.MEDIUM, "Date format should be YYYYMMDD."), DATE_NO_SERVICE(Priority.MEDIUM, "No service_ids were active on a date within the range of dates with defined service."), diff --git a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java index d2c108d9c..2ab88bdac 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java +++ b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java @@ -149,7 +149,7 @@ public Set checkReferencesAndUniqueness(String keyValue, int lineN /** * Work through each conditionally required check assigned to a table. First check the reference field to confirm * if it meets the conditions whereby the conditional field is required. If the conditional field is required confirm - * that a value has been provided, if not, log a an error. + * that a value has been provided, if not, log an error. */ public Set checkConditionallyRequiredFields(Table table, Field[] fields, String[] rowData, int lineNumber) { Set errors = new HashSet<>(); @@ -164,9 +164,9 @@ public Set checkConditionallyRequiredFields(Table table, Field[] f !POSTGRES_NULL_TEXT.equals(refFieldData) && // TODO use pre-existing method in ShortField? isValueInRange(refFieldData, check.minReferenceValue, check.maxReferenceValue); - // If ref value does not meet the range condition, there is no need to check the conditional value for - // (e.g.) an empty value. - if (!referenceValueMeetsRangeCondition) return errors; + // If ref value does not meet the range condition, there is no need to check this conditional value for + // (e.g.) an empty value. Continue to the next check. + if (!referenceValueMeetsRangeCondition) continue; int conditionalFieldIndex = Field.getFieldIndex(fields, check.conditionalFieldName); String conditionalFieldData = getValueForRow(rowData, conditionalFieldIndex); boolean conditionallyRequiredValueIsEmpty = check.conditionalCheck == FIELD_NOT_EMPTY && diff --git a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java index 9c41e76af..97017875a 100644 --- a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java @@ -44,7 +44,7 @@ public void stopTableMissingConditionallyRequiredStopName() { @Test public void stopTableMissingConditionallyRequiredParentStation() { - checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Stop","5", "1266","parent_station is conditionally required when location_type value is between 0 and 4."); + checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Stop","5", "1266","parent_station is conditionally required when location_type value is between 2 and 4."); } @Test From 3f921e3a750b127877b20f09ce3089cfbb7195ee Mon Sep 17 00:00:00 2001 From: Landon Reed Date: Fri, 23 Apr 2021 12:57:49 -0400 Subject: [PATCH 11/45] refactor: add (broken) WIP changes for more conditional checks --- .../java/com/conveyal/gtfs/error/NewGTFSErrorType.java | 1 + .../java/com/conveyal/gtfs/loader/ReferenceTracker.java | 7 +++++++ .../com/conveyal/gtfs/validator/NewTripTimesValidator.java | 5 +++++ 3 files changed, 13 insertions(+) diff --git a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java index 1da0aaff4..80ee7c5e0 100644 --- a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java +++ b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java @@ -8,6 +8,7 @@ */ public enum NewGTFSErrorType { // Standard errors. + AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD(Priority.HIGH, "For GTFS feeds with more than one agency, agency_id is required."), BOOLEAN_FORMAT(Priority.MEDIUM, "A GTFS boolean field must contain the value 1 or 0."), COLOR_FORMAT(Priority.MEDIUM, "A color should be specified with six-characters (three two-digit hexadecimal numbers)."), COLUMN_NAME_UNSAFE(Priority.HIGH, "Column header contains characters not safe in SQL, it was renamed."), diff --git a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java index 2ab88bdac..6d683a3fb 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java +++ b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java @@ -160,6 +160,13 @@ public Set checkConditionallyRequiredFields(Table table, Field[] f for (ConditionalRequirement check : conditionalRequirements) { int refFieldIndex = Field.getFieldIndex(fields, referenceField.name); String refFieldData = getValueForRow(rowData, refFieldIndex); + if (check.referenceCheck == ROW_COUNT_GREATER_THAN_ONE) { + if (table.name.equals("agency") && lineNumber == 1) { + // don't do check + } else if (transitIds.contains("agency_id:*") > 1 && POSTGRES_NULL_TEXT.equals(refFieldData)) { + // ERROR. + } + } boolean referenceValueMeetsRangeCondition = check.referenceCheck == FIELD_IN_RANGE && !POSTGRES_NULL_TEXT.equals(refFieldData) && // TODO use pre-existing method in ShortField? diff --git a/src/main/java/com/conveyal/gtfs/validator/NewTripTimesValidator.java b/src/main/java/com/conveyal/gtfs/validator/NewTripTimesValidator.java index cf395e674..72a969748 100644 --- a/src/main/java/com/conveyal/gtfs/validator/NewTripTimesValidator.java +++ b/src/main/java/com/conveyal/gtfs/validator/NewTripTimesValidator.java @@ -135,6 +135,8 @@ private void processTrip (List stopTimes) { // This error should already have been caught TODO verify. return; } + boolean hasContinuousBehavior = false; + // Our code should only call this method with non-null stopTimes. if (stopTimes.size() < 2) { registerError(trip, TRIP_TOO_FEW_STOP_TIMES); @@ -146,6 +148,9 @@ private void processTrip (List stopTimes) { List stops = new ArrayList<>(); for (Iterator it = stopTimes.iterator(); it.hasNext(); ) { StopTime stopTime = it.next(); + if (stopTime.continuous == 0,2,3) { + hasContinuousBehavior = true; + } Stop stop = stopById.get(stopTime.stop_id); if (stop == null) { // All bad references should have been recorded at import, we can just remove them from the trips. From a57509132e826880504b860897013599ea67bd4d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Apr 2021 16:14:03 +0000 Subject: [PATCH 12/45] chore(deps): bump commons-io from 2.4 to 2.7 Bumps commons-io from 2.4 to 2.7. Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index bb9da873b..6b4cfdd1f 100644 --- a/pom.xml +++ b/pom.xml @@ -292,7 +292,7 @@ commons-io commons-io - 2.4 + 2.7 From 0b2698accec013858017191d40ffb7ed9bdde6bc Mon Sep 17 00:00:00 2001 From: Rob Beer Date: Mon, 26 Apr 2021 19:29:17 +0100 Subject: [PATCH 13/45] refactor(Added addtional conditional checks): New checks in ReferenceTracker and NewTripTimesValidat --- .../gtfs/loader/ConditionalCheckType.java | 3 +- .../gtfs/loader/ConditionalRequirement.java | 16 +++- .../conveyal/gtfs/loader/EntityPopulator.java | 26 +++--- .../gtfs/loader/ReferenceTracker.java | 81 ++++++++++++------- .../java/com/conveyal/gtfs/loader/Table.java | 20 +++-- .../java/com/conveyal/gtfs/model/Route.java | 10 +++ .../com/conveyal/gtfs/model/StopTime.java | 10 ++- .../gtfs/validator/NewTripTimesValidator.java | 27 ++++++- .../loader/ConditionallyRequiredTest.java | 42 ++++++++++ .../agency.txt | 5 +- .../fare_attributes.txt | 4 +- .../routes.txt | 4 +- .../stop_times.txt | 52 ++++++------ .../trips.txt | 2 +- 14 files changed, 220 insertions(+), 82 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java b/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java index d78a8a505..19028c092 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java @@ -6,5 +6,6 @@ */ public enum ConditionalCheckType { FIELD_NOT_EMPTY, - FIELD_IN_RANGE + FIELD_IN_RANGE, + ROW_COUNT_GREATER_THAN_ONE } \ No newline at end of file diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java index 29c20c622..4a0970f90 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java @@ -6,7 +6,7 @@ */ public class ConditionalRequirement { /** The type of check to be performed on the reference field. */ - public final ConditionalCheckType referenceCheck; + public ConditionalCheckType referenceCheck; /** The minimum reference field value if a range check is being performed. */ public int minReferenceValue; /** The maximum reference field value if a range check is being performed. */ @@ -29,4 +29,18 @@ public ConditionalRequirement( this.conditionalFieldName = conditionalFieldName; this.conditionalCheck = conditionalCheck; } + + public ConditionalRequirement(String conditionalFieldName) { + this.referenceCheck = ConditionalCheckType.ROW_COUNT_GREATER_THAN_ONE; + this.conditionalFieldName = conditionalFieldName; + } + + public ConditionalRequirement( + String conditionalFieldName, + ConditionalCheckType conditionalCheck + ) { + this.referenceCheck = ConditionalCheckType.ROW_COUNT_GREATER_THAN_ONE; + this.conditionalFieldName = conditionalFieldName; + this.conditionalCheck = conditionalCheck; + } } diff --git a/src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java b/src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java index 91c40ad24..aa45a75bc 100644 --- a/src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java +++ b/src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java @@ -150,17 +150,19 @@ public interface EntityPopulator { }; EntityPopulator ROUTE = (result, columnForName) -> { - Route route = new Route(); - route.route_id = getStringIfPresent(result, "route_id", columnForName); - route.agency_id = getStringIfPresent(result, "agency_id", columnForName); - route.route_short_name = getStringIfPresent(result, "route_short_name", columnForName); - route.route_long_name = getStringIfPresent(result, "route_long_name", columnForName); - route.route_desc = getStringIfPresent(result, "route_desc", columnForName); - route.route_type = getIntIfPresent (result, "route_type", columnForName); - route.route_color = getStringIfPresent(result, "route_color", columnForName); - route.route_text_color = getStringIfPresent(result, "route_text_color", columnForName); - route.route_url = getUrlIfPresent (result, "route_url", columnForName); - route.route_branding_url = getUrlIfPresent (result, "route_branding_url", columnForName); + Route route = new Route(); + route.route_id = getStringIfPresent(result, "route_id", columnForName); + route.agency_id = getStringIfPresent(result, "agency_id", columnForName); + route.route_short_name = getStringIfPresent(result, "route_short_name", columnForName); + route.route_long_name = getStringIfPresent(result, "route_long_name", columnForName); + route.route_desc = getStringIfPresent(result, "route_desc", columnForName); + route.route_type = getIntIfPresent (result, "route_type", columnForName); + route.route_color = getStringIfPresent(result, "route_color", columnForName); + route.route_text_color = getStringIfPresent(result, "route_text_color", columnForName); + route.route_url = getUrlIfPresent (result, "route_url", columnForName); + route.route_branding_url = getUrlIfPresent (result, "route_branding_url", columnForName); + route.continuous_pickup = getIntIfPresent (result, "continuous_pickup", columnForName); + route.continuous_drop_off = getIntIfPresent (result, "continuous_drop_off", columnForName); return route; }; @@ -216,6 +218,8 @@ public interface EntityPopulator { stopTime.stop_headsign = getStringIfPresent(result, "stop_headsign", columnForName); stopTime.pickup_type = getIntIfPresent (result, "pickup_type", columnForName); stopTime.drop_off_type = getIntIfPresent (result, "drop_off_type", columnForName); + stopTime.continuous_pickup = getIntIfPresent (result, "continuous_pickup", columnForName); + stopTime.continuous_drop_off = getIntIfPresent (result, "continuous_drop_off", columnForName); stopTime.timepoint = getIntIfPresent (result, "timepoint", columnForName); stopTime.shape_dist_traveled = getDoubleIfPresent(result, "shape_dist_traveled", columnForName); return stopTime; diff --git a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java index 6d683a3fb..25aa6aa4e 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java +++ b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java @@ -15,6 +15,7 @@ import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_IN_RANGE; import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_NOT_EMPTY; +import static com.conveyal.gtfs.loader.ConditionalCheckType.ROW_COUNT_GREATER_THAN_ONE; import static com.conveyal.gtfs.loader.JdbcGtfsLoader.POSTGRES_NULL_TEXT; /** @@ -161,35 +162,59 @@ public Set checkConditionallyRequiredFields(Table table, Field[] f int refFieldIndex = Field.getFieldIndex(fields, referenceField.name); String refFieldData = getValueForRow(rowData, refFieldIndex); if (check.referenceCheck == ROW_COUNT_GREATER_THAN_ONE) { - if (table.name.equals("agency") && lineNumber == 1) { - // don't do check - } else if (transitIds.contains("agency_id:*") > 1 && POSTGRES_NULL_TEXT.equals(refFieldData)) { - // ERROR. + int conditionalFieldIndex = Field.getFieldIndex(fields, check.conditionalFieldName); + String conditionalFieldData = getValueForRow(rowData, conditionalFieldIndex); + if (table.name.equals("agency") && + lineNumber > 2 && + transitIds.stream().filter(transitId -> transitId.contains("agency_id")).count() != lineNumber - 1 + ) { + String message = String.format( + "%s is conditionally required when there is more than one agency.", + check.conditionalFieldName + ); + errors.add( + NewGTFSError.forLine(table, lineNumber, CONDITIONALLY_REQUIRED, message) + ); + } else if ((table.name.equals("routes") + || table.name.equals("fare_attributes")) && + transitIds.stream().filter(transitId -> transitId.contains("agency_id")).count() > 1 && + check.conditionalCheck == FIELD_NOT_EMPTY && + POSTGRES_NULL_TEXT.equals(conditionalFieldData)) { + // FIXME: This doesn't work if only one agency_id is defined in the agency table. + String entityId = getValueForRow(rowData, table.getKeyFieldIndex(fields)); + String message = String.format( + "%s is conditionally required when there is more than one agency.", + check.conditionalFieldName + ); + errors.add( + NewGTFSError.forLine(table, lineNumber, CONDITIONALLY_REQUIRED, message).setEntityId(entityId) + ); + } + } else if (check.referenceCheck == FIELD_IN_RANGE) { + boolean referenceValueMeetsRangeCondition = + !POSTGRES_NULL_TEXT.equals(refFieldData) && + // TODO use pre-existing method in ShortField? + isValueInRange(refFieldData, check.minReferenceValue, check.maxReferenceValue); + // If ref value does not meet the range condition, there is no need to check this conditional value for + // (e.g.) an empty value. Continue to the next check. + if (!referenceValueMeetsRangeCondition) continue; + int conditionalFieldIndex = Field.getFieldIndex(fields, check.conditionalFieldName); + String conditionalFieldData = getValueForRow(rowData, conditionalFieldIndex); + boolean conditionallyRequiredValueIsEmpty = check.conditionalCheck == FIELD_NOT_EMPTY && + POSTGRES_NULL_TEXT.equals(conditionalFieldData); + if (conditionallyRequiredValueIsEmpty) { + String entityId = getValueForRow(rowData, table.getKeyFieldIndex(fields)); + String message = String.format( + "%s is conditionally required when %s value is between %d and %d.", + check.conditionalFieldName, + referenceField.name, + check.minReferenceValue, + check.maxReferenceValue + ); + errors.add( + NewGTFSError.forLine(table, lineNumber, CONDITIONALLY_REQUIRED, message).setEntityId(entityId) + ); } - } - boolean referenceValueMeetsRangeCondition = check.referenceCheck == FIELD_IN_RANGE && - !POSTGRES_NULL_TEXT.equals(refFieldData) && - // TODO use pre-existing method in ShortField? - isValueInRange(refFieldData, check.minReferenceValue, check.maxReferenceValue); - // If ref value does not meet the range condition, there is no need to check this conditional value for - // (e.g.) an empty value. Continue to the next check. - if (!referenceValueMeetsRangeCondition) continue; - int conditionalFieldIndex = Field.getFieldIndex(fields, check.conditionalFieldName); - String conditionalFieldData = getValueForRow(rowData, conditionalFieldIndex); - boolean conditionallyRequiredValueIsEmpty = check.conditionalCheck == FIELD_NOT_EMPTY && - POSTGRES_NULL_TEXT.equals(conditionalFieldData); - if (conditionallyRequiredValueIsEmpty) { - String entityId = getValueForRow(rowData, table.getKeyFieldIndex(fields)); - String message = String.format( - "%s is conditionally required when %s value is between %d and %d.", - check.conditionalFieldName, - referenceField.name, - check.minReferenceValue, - check.maxReferenceValue - ); - errors.add( - NewGTFSError.forLine(table, lineNumber, CONDITIONALLY_REQUIRED, message).setEntityId(entityId) - ); } } } diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index 1e150ed05..596cf45c6 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -49,8 +49,8 @@ import static com.conveyal.gtfs.error.NewGTFSErrorType.DUPLICATE_HEADER; import static com.conveyal.gtfs.error.NewGTFSErrorType.TABLE_IN_SUBDIRECTORY; -import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_IN_RANGE; import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_NOT_EMPTY; +import static com.conveyal.gtfs.loader.ConditionalCheckType.ROW_COUNT_GREATER_THAN_ONE; import static com.conveyal.gtfs.loader.JdbcGtfsLoader.sanitize; import static com.conveyal.gtfs.loader.Requirement.EDITOR; import static com.conveyal.gtfs.loader.Requirement.EXTENSION; @@ -100,7 +100,9 @@ public Table (String name, Class entityClass, Requirement requ } public static final Table AGENCY = new Table("agency", Agency.class, REQUIRED, - new StringField("agency_id", OPTIONAL), // FIXME? only required if there are more than one + new StringField("agency_id", OPTIONAL).requireConditions( + new ConditionalRequirement("agency_id") + ), new StringField("agency_name", REQUIRED), new URLField("agency_url", REQUIRED), new StringField("agency_timezone", REQUIRED), // FIXME new field type for time zones? @@ -149,7 +151,9 @@ public Table (String name, Class entityClass, Requirement requ new CurrencyField("currency_type", REQUIRED), new ShortField("payment_method", REQUIRED, 1), new ShortField("transfers", REQUIRED, 2).permitEmptyValue(), - new StringField("agency_id", OPTIONAL), // FIXME? only required if there are more than one + new StringField("agency_id", OPTIONAL).requireConditions( + new ConditionalRequirement( "agency_id", FIELD_NOT_EMPTY) + ), new IntegerField("transfer_duration", OPTIONAL) ).addPrimaryKey(); @@ -173,7 +177,9 @@ public Table (String name, Class entityClass, Requirement requ public static final Table ROUTES = new Table("routes", Route.class, REQUIRED, new StringField("route_id", REQUIRED), - new StringField("agency_id", OPTIONAL).isReferenceTo(AGENCY), + new StringField("agency_id", OPTIONAL).isReferenceTo(AGENCY).requireConditions( + new ConditionalRequirement( "agency_id", FIELD_NOT_EMPTY) + ), new StringField("route_short_name", OPTIONAL), // one of short or long must be provided new StringField("route_long_name", OPTIONAL), new StringField("route_desc", OPTIONAL), @@ -190,7 +196,9 @@ public Table (String name, Class entityClass, Requirement requ new ShortField("wheelchair_accessible", EDITOR, 2).permitEmptyValue(), new IntegerField("route_sort_order", OPTIONAL, 0, Integer.MAX_VALUE), // Status values are In progress (0), Pending approval (1), and Approved (2). - new ShortField("status", EDITOR, 2) + new ShortField("status", EDITOR, 2), + new ShortField("continuous_pickup", OPTIONAL, 3), + new ShortField("continuous_drop_off", OPTIONAL, 3) ).addPrimaryKey(); public static final Table SHAPES = new Table("shapes", ShapePoint.class, OPTIONAL, @@ -303,6 +311,8 @@ public Table (String name, Class entityClass, Requirement requ new StringField("stop_headsign", OPTIONAL), new ShortField("pickup_type", OPTIONAL, 3), new ShortField("drop_off_type", OPTIONAL, 3), + new ShortField("continuous_pickup", OPTIONAL, 3), + new ShortField("continuous_drop_off", OPTIONAL, 3), new DoubleField("shape_dist_traveled", OPTIONAL, 0, Double.POSITIVE_INFINITY, 2), new ShortField("timepoint", OPTIONAL, 1), new IntegerField("fare_units_traveled", EXTENSION) // OpenOV NL extension diff --git a/src/main/java/com/conveyal/gtfs/model/Route.java b/src/main/java/com/conveyal/gtfs/model/Route.java index c5eb05fbb..3c669e8e8 100644 --- a/src/main/java/com/conveyal/gtfs/model/Route.java +++ b/src/main/java/com/conveyal/gtfs/model/Route.java @@ -34,6 +34,8 @@ public class Route extends Entity { // implements Entity.Factory public String route_text_color; public URL route_branding_url; public String feed_id; + public int continuous_pickup = INT_MISSING; + public int continuous_drop_off = INT_MISSING; @Override public String getId () { @@ -64,6 +66,8 @@ public void setStatementParameters(PreparedStatement statement, boolean setDefau // route_sort_order setIntParameter(statement, oneBasedIndex++, route_sort_order); setIntParameter(statement, oneBasedIndex++, 0); + setIntParameter(statement, oneBasedIndex++, continuous_pickup); + setIntParameter(statement, oneBasedIndex++, continuous_drop_off); } public static class Loader extends Entity.Loader { @@ -104,6 +108,8 @@ public void loadOneRow() throws IOException { r.route_color = getStringField("route_color", false); r.route_text_color = getStringField("route_text_color", false); r.route_branding_url = getUrlField("route_branding_url", false); + r.continuous_pickup = getIntField("continuous_pickup", true, 0, 3); + r.continuous_pickup = getIntField("continuous_drop_off", true, 0, 3); r.feed = feed; r.feed_id = feed.feedId; // Attempting to put a null key or value will cause an NPE in BTreeMap @@ -130,6 +136,8 @@ public void writeHeaders() throws IOException { writeStringField("route_text_color"); writeStringField("route_branding_url"); writeStringField("route_sort_order"); + writeStringField("continuous_pickup"); + writeStringField("continuous_drop_off"); endRecord(); } @@ -146,6 +154,8 @@ public void writeOneRow(Route r) throws IOException { writeStringField(r.route_text_color); writeUrlField(r.route_branding_url); writeIntField(r.route_sort_order); + writeIntField(r.continuous_pickup); + writeIntField(r.continuous_drop_off); endRecord(); } diff --git a/src/main/java/com/conveyal/gtfs/model/StopTime.java b/src/main/java/com/conveyal/gtfs/model/StopTime.java index d69078e34..3bf355b37 100644 --- a/src/main/java/com/conveyal/gtfs/model/StopTime.java +++ b/src/main/java/com/conveyal/gtfs/model/StopTime.java @@ -26,6 +26,8 @@ public class StopTime extends Entity implements Cloneable, Serializable { public String stop_headsign; public int pickup_type; public int drop_off_type; + public int continuous_pickup = INT_MISSING; + public int continuous_drop_off = INT_MISSING; public double shape_dist_traveled = DOUBLE_MISSING; public int timepoint = INT_MISSING; @@ -55,6 +57,8 @@ public void setStatementParameters(PreparedStatement statement, boolean setDefau statement.setString(oneBasedIndex++, stop_headsign); setIntParameter(statement, oneBasedIndex++, pickup_type); setIntParameter(statement, oneBasedIndex++, drop_off_type); + setIntParameter(statement, oneBasedIndex++, continuous_pickup); + setIntParameter(statement, oneBasedIndex++, continuous_drop_off); statement.setDouble(oneBasedIndex++, shape_dist_traveled); setIntParameter(statement, oneBasedIndex++, timepoint); } @@ -84,6 +88,8 @@ public void loadOneRow() throws IOException { st.stop_headsign = getStringField("stop_headsign", false); st.pickup_type = getIntField("pickup_type", false, 0, 3); // TODO add ranges as parameters st.drop_off_type = getIntField("drop_off_type", false, 0, 3); + st.continuous_pickup = getIntField("continuous_pickup", true, 0, 3); + st.continuous_pickup = getIntField("continuous_drop_off", true, 0, 3); st.shape_dist_traveled = getDoubleField("shape_dist_traveled", false, 0D, Double.MAX_VALUE); // FIXME using both 0 and NaN for "missing", define DOUBLE_MISSING st.timepoint = getIntField("timepoint", false, 0, 1, INT_MISSING); st.feed = null; // this could circular-serialize the whole feed @@ -107,7 +113,7 @@ public Writer (GTFSFeed feed) { @Override protected void writeHeaders() throws IOException { writer.writeRecord(new String[] {"trip_id", "arrival_time", "departure_time", "stop_id", "stop_sequence", "stop_headsign", - "pickup_type", "drop_off_type", "shape_dist_traveled", "timepoint"}); + "pickup_type", "drop_off_type", "continuous_pickup", "continuous_drop_off", "shape_dist_traveled", "timepoint"}); } @Override @@ -120,6 +126,8 @@ protected void writeOneRow(StopTime st) throws IOException { writeStringField(st.stop_headsign); writeIntField(st.pickup_type); writeIntField(st.drop_off_type); + writeIntField(st.continuous_pickup); + writeIntField(st.continuous_drop_off); writeDoubleField(st.shape_dist_traveled); writeIntField(st.timepoint); endRecord(); diff --git a/src/main/java/com/conveyal/gtfs/validator/NewTripTimesValidator.java b/src/main/java/com/conveyal/gtfs/validator/NewTripTimesValidator.java index 72a969748..a56e84db5 100644 --- a/src/main/java/com/conveyal/gtfs/validator/NewTripTimesValidator.java +++ b/src/main/java/com/conveyal/gtfs/validator/NewTripTimesValidator.java @@ -114,6 +114,8 @@ private boolean fixInitialFinal (StopTime stopTime) { registerError(stopTime, MISSING_ARRIVAL_OR_DEPARTURE); fixMissingTimes(stopTime); if (missingEitherTime(stopTime)) { + //TODO: Is this even needed? Already covered by MISSING_ARRIVAL_OR_DEPARTURE. + registerError(stopTime, CONDITIONALLY_REQUIRED, "First and last stop times are conditionally required to have both an arrival and departure time."); return true; } } @@ -135,20 +137,27 @@ private void processTrip (List stopTimes) { // This error should already have been caught TODO verify. return; } - boolean hasContinuousBehavior = false; // Our code should only call this method with non-null stopTimes. if (stopTimes.size() < 2) { registerError(trip, TRIP_TOO_FEW_STOP_TIMES); return; } + boolean hasContinuousBehavior = false; // Make a parallel list of stops based on the stop_times for this trip. // We will remove any stop_times for stops that don't exist in the feed. // We could ask the SQL server to do the join between stop_times and stops, but we want to check references. List stops = new ArrayList<>(); for (Iterator it = stopTimes.iterator(); it.hasNext(); ) { StopTime stopTime = it.next(); - if (stopTime.continuous == 0,2,3) { + if ( + stopTime.continuous_drop_off == 0 || + stopTime.continuous_drop_off == 2 || + stopTime.continuous_drop_off == 3 || + stopTime.continuous_pickup == 0 || + stopTime.continuous_pickup == 2 || + stopTime.continuous_pickup == 3 + ) { hasContinuousBehavior = true; } Stop stop = stopById.get(stopTime.stop_id); @@ -171,6 +180,19 @@ private void processTrip (List stopTimes) { // All bad references should have been recorded at import and null trip check is handled above, we can just // ignore nulls. Route route = routeById.get(trip.route_id); + if (route != null && + (route.continuous_drop_off == 0 || + route.continuous_drop_off == 2 || + route.continuous_drop_off == 3 || + route.continuous_pickup == 0 || + route.continuous_pickup == 2 || + route.continuous_pickup == 3) + ) { + hasContinuousBehavior = true; + } + if (hasContinuousBehavior && trip.shape_id == null) { + registerError(trip, CONDITIONALLY_REQUIRED, "shape_id is conditionally required when a trip has continuous behavior defined."); + } // Pass these same cleaned lists of stop_times and stops into each trip validator in turn. for (TripValidator tripValidator : tripValidators) tripValidator.validateTrip(trip, route, stopTimes, stops); } @@ -185,5 +207,4 @@ public void complete (ValidationResult validationResult) { LOG.info("{} finished", tripValidator.getClass().getSimpleName()); } } - } diff --git a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java index 97017875a..d0902d7ec 100644 --- a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java @@ -11,6 +11,7 @@ import java.io.IOException; import static com.conveyal.gtfs.GTFS.load; +import static com.conveyal.gtfs.GTFS.validate; import static com.conveyal.gtfs.TestUtils.assertThatSqlCountQueryYieldsExpectedCount; import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; @@ -30,6 +31,7 @@ public static void setUpClass() throws IOException { String zipFileName = TestUtils.zipFolderFiles("real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks", true); FeedLoadResult feedLoadResult = load(zipFileName, testDataSource); testNamespace = feedLoadResult.uniqueIdentifier; + validate(testNamespace, testDataSource); } @AfterAll @@ -62,6 +64,31 @@ public void stopTableMissingConditionallyRequiredZoneId() { checkFeedHasOneError(REFERENTIAL_INTEGRITY, "stop_id:1"); } + @Test + public void agencyTableMissingConditionallyRequiredAgencyId() { + checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Agency","3", "agency_id is conditionally required when there is more than one agency."); + } + + @Test + public void tripTableMissingConditionallyRequiredShapeId() { + checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Trip","2", "1","shape_id is conditionally required when a trip has continuous behavior defined."); + } + + @Test + public void stopTimeTableMissingConditionallyRequiredArrivalDepartureTimes() { + checkFeedHasOneError(CONDITIONALLY_REQUIRED, "StopTime","10", "1","First and last stop times are conditionally required to have both an arrival and departure time."); + } + + @Test + public void routeTableMissingConditionallyRequiredAgencyId() { + checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Route","2", "21","agency_id is conditionally required when there is more than one agency."); + } + + @Test + public void fareAttributeTableMissingConditionallyRequiredAgencyId() { + checkFeedHasOneError(CONDITIONALLY_REQUIRED, "FareAttribute","2", "1","agency_id is conditionally required when there is more than one agency."); + } + /** * Check that the test feed has exactly one error for the provided values. */ @@ -78,6 +105,21 @@ private void checkFeedHasOneError(NewGTFSErrorType errorType, String entityType, 1); } + /** + * Check that the test feed has exactly one error for the provided values. + */ + private void checkFeedHasOneError(NewGTFSErrorType errorType, String entityType, String lineNumber, String badValue) { + assertThatSqlCountQueryYieldsExpectedCount( + testDataSource, + String.format("select count(*) from %s.errors where error_type = '%s' and entity_type = '%s' and line_number = '%s' and bad_value = '%s'", + testNamespace, + errorType, + entityType, + lineNumber, + badValue), + 1); + } + /** * Check that the test feed has exactly one error for the given error type and badValue. */ diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/agency.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/agency.txt index b758beb62..71811c1f9 100644 --- a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/agency.txt +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/agency.txt @@ -1,2 +1,5 @@ agency_id,agency_name,agency_url,agency_timezone,agency_lang,agency_phone,agency_fare_url,agency_email -VTA,VTA,https://www.vta.org,America/Los_Angeles,EN,408-321-2300,https://www.vta.org/go/fares,customer.service@vta.org +,VTA,https://www.vta.org,America/Los_Angeles,EN,408-321-2300,https://www.vta.org/go/fares,customer.service@vta.org +VTA2,VTA2,https://www.vta.org,America/Los_Angeles,EN,408-321-2300,https://www.vta.org/go/fares,customer.service@vta.org +VTA3,VTA3,https://www.vta.org,America/Los_Angeles,EN,408-321-2300,https://www.vta.org/go/fares,customer.service@vta.org + diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/fare_attributes.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/fare_attributes.txt index 4f13201f0..5ccbf0e80 100644 --- a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/fare_attributes.txt +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/fare_attributes.txt @@ -1,2 +1,2 @@ -fare_id,price,currency_type,payment_method,transfers,transfer_duration -1,2.50000000,USD,0,0, \ No newline at end of file +fare_id,price,currency_type,payment_method,transfers,agency_id,transfer_duration +1,2.50000000,USD,0,,, \ No newline at end of file diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/routes.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/routes.txt index c7f99008f..250682bcd 100644 --- a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/routes.txt +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/routes.txt @@ -1,2 +1,2 @@ -route_id,agency_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_sort_order,ext_route_type -21,VTA,21,Stanford Shopping Center - Santa Clara Transit Center,,3,https://www.vta.org/go/routes/21,29588c,FFFFFF,21,704 \ No newline at end of file +route_id,agency_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_sort_order,ext_route_type,continuous_pickup,continuous_drop_off +21,,21,Stanford Shopping Center - Santa Clara Transit Center,,3,https://www.vta.org/go/routes/21,29588c,FFFFFF,21,704,, \ No newline at end of file diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/stop_times.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/stop_times.txt index 8a875cb93..84e00fde9 100644 --- a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/stop_times.txt +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/stop_times.txt @@ -1,26 +1,26 @@ -trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint -1,05:43:00,05:43:00,4957,1,,0,0,,1 -1,05:44:00,05:44:00,691,2,,0,0,0.58999997,0 -1,05:45:00,05:45:00,692,3,,0,0,0.96569997,0 -1,05:46:00,05:46:00,1266,4,,0,0,1.14470005,0 -1,05:47:00,05:47:00,1267,5,,0,0,1.92729998,0 -1,05:48:00,05:48:00,1268,6,,0,0,2.28160000,0 -1,05:49:00,05:49:00,1542,7,,0,0,2.72530007,0 -1,05:50:00,05:50:00,1543,8,,0,0,3.23429990,0 -1,05:51:00,05:51:00,1544,9,,0,0,3.59170008,1 -2,05:52:00,05:52:00,1545,1,,0,0,3.88010001,0 -2,,,1546,2,,0,0,4.32210016,0 -2,,,1547,3,,0,0,4.82560015,0 -2,05:55:00,05:55:00,1548,4,,0,0,5.09070015,0 -3,05:55:00,05:55:00,1550,1,,0,0,5.59749985,0 -3,05:56:00,05:56:00,1562,2,,0,0,7.09219980,1 -4,05:55:00,05:55:00,1550,1,,0,0,5.59749985,0 -4,,,1558,2,,0,0,8.34879971,0 -4,,,1559,3,,0,0,8.68850040,0 -4,05:56:00,05:56:00,1562,4,,0,0,5.59749985,0 -5,00:00:00,00:00:00,4957,1,,0,0,,1 -5,01:00:00,01:00:00,1558,2,,0,0,8.34879971,0 -5,23:59:00,23:59:00,1562,3,,0,0,9.39290047,0 -6,00:00:00,00:00:00,4957,1,,0,0,,1 -6,,,1558,2,,0,0,8.34879971,0 -6,23:59:00,23:59:00,1562,3,,0,0,9.39290047,0 \ No newline at end of file +trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,continuous_pickup,continuous_drop_off,shape_dist_traveled,timepoint +1,05:43:00,05:43:00,4957,1,,0,0,0,,,1 +1,05:44:00,05:44:00,691,2,,0,0,0,,0.58999997,0 +1,05:45:00,05:45:00,692,3,,0,0,0,,0.96569997,0 +1,05:46:00,05:46:00,1266,4,,0,0,0,,1.14470005,0 +1,05:47:00,05:47:00,1267,5,,0,0,0,,1.92729998,0 +1,05:48:00,05:48:00,1268,6,,0,0,0,,2.28160000,0 +1,05:49:00,05:49:00,1542,7,,0,0,0,,2.72530007,0 +1,05:50:00,05:50:00,1543,8,,0,0,0,,3.23429990,0 +1,,,1544,9,,0,0,0,,3.59170008,1 +2,05:52:00,05:52:00,1545,1,,0,0,,,3.88010001,0 +2,,,1546,2,,0,0,,,4.32210016,0 +2,,,1547,3,,0,0,,,4.82560015,0 +2,05:55:00,05:55:00,1548,4,,0,0,,,5.09070015,0 +3,05:55:00,05:55:00,1550,1,,0,0,,,5.59749985,0 +3,05:56:00,05:56:00,1562,2,,0,0,,,7.09219980,1 +4,05:55:00,05:55:00,1550,1,,0,0,,,5.59749985,0 +4,,,1558,2,,0,0,,,8.34879971,0 +4,,,1559,3,,0,0,,,8.68850040,0 +4,05:56:00,05:56:00,1562,4,,0,0,,,5.59749985,0 +5,00:00:00,00:00:00,4957,1,,0,0,,,,1 +5,01:00:00,01:00:00,1558,2,,0,0,,,8.34879971,0 +5,23:59:00,23:59:00,1562,3,,0,0,,,9.39290047,0 +6,00:00:00,00:00:00,4957,1,,0,0,,,,1 +6,,,1558,2,,0,0,,,8.34879971,0 +6,23:59:00,23:59:00,1562,3,,0,0,,,9.39290047,0 \ No newline at end of file diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/trips.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/trips.txt index 0e7afa8b6..eac861720 100644 --- a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/trips.txt +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/trips.txt @@ -1,5 +1,5 @@ route_id,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,wheelchair_accessible,bikes_allowed -21,1,1,PALO ALTO TRANSIT CTR 1,,1,2145,101395,0,0 +21,1,1,PALO ALTO TRANSIT CTR 1,,1,2145,,0,0 21,1,2,PALO ALTO TRANSIT CTR 2,,1,2145,101395,0,0 22,1,3,PALO ALTO TRANSIT CTR 3,,1,2145,101395,0,0 23,1,4,PALO ALTO TRANSIT CTR 4,,1,2145,101395,0,0 From 88219fab4cfcc7585623d63b2fcd3f8a018292ef Mon Sep 17 00:00:00 2001 From: Rob Beer Date: Wed, 28 Apr 2021 15:30:36 +0100 Subject: [PATCH 14/45] refactor(Added foreign field reference check): Update to check foreign field referential integrity f --- .../gtfs/loader/ConditionalCheckType.java | 1 + .../gtfs/loader/ConditionalRequirement.java | 17 +++-- .../java/com/conveyal/gtfs/loader/Field.java | 10 +++ .../gtfs/loader/ReferenceTracker.java | 63 ++++++++----------- .../java/com/conveyal/gtfs/loader/Table.java | 36 ++++++----- .../java/com/conveyal/gtfs/dto/RouteDTO.java | 2 + .../com/conveyal/gtfs/dto/StopTimeDTO.java | 2 + .../loader/ConditionallyRequiredTest.java | 19 +----- .../fare_rules.txt | 2 +- .../stops.txt | 2 +- 10 files changed, 80 insertions(+), 74 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java b/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java index 19028c092..a26eb742f 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java @@ -7,5 +7,6 @@ public enum ConditionalCheckType { FIELD_NOT_EMPTY, FIELD_IN_RANGE, + FOREIGN_FIELD_VALUE_MATCH, ROW_COUNT_GREATER_THAN_ONE } \ No newline at end of file diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java index 4a0970f90..52cdb3387 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java @@ -20,27 +20,32 @@ public ConditionalRequirement( int minReferenceValue, int maxReferenceValue, String conditionalFieldName, - ConditionalCheckType conditionalCheck + ConditionalCheckType conditionalCheck, + ConditionalCheckType referenceCheck ) { - this.referenceCheck = ConditionalCheckType.FIELD_IN_RANGE; this.minReferenceValue = minReferenceValue; this.maxReferenceValue = maxReferenceValue; this.conditionalFieldName = conditionalFieldName; this.conditionalCheck = conditionalCheck; + this.referenceCheck = referenceCheck; } - public ConditionalRequirement(String conditionalFieldName) { - this.referenceCheck = ConditionalCheckType.ROW_COUNT_GREATER_THAN_ONE; + public ConditionalRequirement( + String conditionalFieldName, + ConditionalCheckType referenceCheck + ) { + this.referenceCheck = referenceCheck; this.conditionalFieldName = conditionalFieldName; } public ConditionalRequirement( String conditionalFieldName, - ConditionalCheckType conditionalCheck + ConditionalCheckType conditionalCheck, + ConditionalCheckType referenceCheck ) { - this.referenceCheck = ConditionalCheckType.ROW_COUNT_GREATER_THAN_ONE; this.conditionalFieldName = conditionalFieldName; this.conditionalCheck = conditionalCheck; + this.referenceCheck = referenceCheck; } } diff --git a/src/main/java/com/conveyal/gtfs/loader/Field.java b/src/main/java/com/conveyal/gtfs/loader/Field.java index 0e9d54fb5..fe9113cc4 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Field.java +++ b/src/main/java/com/conveyal/gtfs/loader/Field.java @@ -49,6 +49,7 @@ public abstract class Field { private boolean shouldBeIndexed; private boolean emptyValuePermitted; private boolean isConditionallyRequired; + private boolean isForeignFieldReference; public ConditionalRequirement[] conditions; public Field(String name, Requirement requirement) { @@ -204,4 +205,13 @@ public Field requireConditions(ConditionalRequirement...conditions) { public boolean isConditionallyRequired() { return isConditionallyRequired; } + + public Field foreignFieldReference() { + isForeignFieldReference = true; + return this; + } + + public boolean isForeignFieldReference() { + return isForeignFieldReference; + } } diff --git a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java index 25aa6aa4e..2a605b5bc 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java +++ b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java @@ -1,13 +1,10 @@ package com.conveyal.gtfs.loader; import com.conveyal.gtfs.error.NewGTFSError; -import org.apache.commons.lang3.math.NumberUtils; -import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.Map; -import java.util.Optional; import java.util.Set; import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; @@ -15,6 +12,7 @@ import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_IN_RANGE; import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_NOT_EMPTY; +import static com.conveyal.gtfs.loader.ConditionalCheckType.FOREIGN_FIELD_VALUE_MATCH; import static com.conveyal.gtfs.loader.ConditionalCheckType.ROW_COUNT_GREATER_THAN_ONE; import static com.conveyal.gtfs.loader.JdbcGtfsLoader.POSTGRES_NULL_TEXT; @@ -30,6 +28,7 @@ */ public class ReferenceTracker { public final Set transitIds = new HashSet<>(); + public final Set foreignFieldIds = new HashSet<>(); public final Set transitIdsWithSequence = new HashSet<>(); /** @@ -71,6 +70,11 @@ public Set checkReferencesAndUniqueness(String keyValue, int lineN : !table.hasUniqueKeyField ? null : keyField; String transitId = String.join(":", keyField, keyValue); + // Field value is required for referential integrity checks as part of conditionally required checks. + if (!"".equals(value) && field.isForeignFieldReference()) { + foreignFieldIds.add(String.join(":", field.name, value)); + } + // If the field is optional and there is no value present, skip check. if (!field.isRequired() && "".equals(value)) return Collections.emptySet(); @@ -180,7 +184,8 @@ public Set checkConditionallyRequiredFields(Table table, Field[] f transitIds.stream().filter(transitId -> transitId.contains("agency_id")).count() > 1 && check.conditionalCheck == FIELD_NOT_EMPTY && POSTGRES_NULL_TEXT.equals(conditionalFieldData)) { - // FIXME: This doesn't work if only one agency_id is defined in the agency table. + // FIXME: This doesn't work if only one agency_id is defined in the agency table. e.g. 2 rows of + // data, but the first doesn't define an agency_id. String entityId = getValueForRow(rowData, table.getKeyFieldIndex(fields)); String message = String.format( "%s is conditionally required when there is more than one agency.", @@ -215,6 +220,24 @@ public Set checkConditionallyRequiredFields(Table table, Field[] f NewGTFSError.forLine(table, lineNumber, CONDITIONALLY_REQUIRED, message).setEntityId(entityId) ); } + } else if (check.referenceCheck == FOREIGN_FIELD_VALUE_MATCH) { + String foreignFieldReference = String.join(":", check.conditionalFieldName, refFieldData); + if (table.name.equals("fare_rules") && + !POSTGRES_NULL_TEXT.equals(refFieldData) && + foreignFieldIds.stream().noneMatch(id -> id.contains(foreignFieldReference)) + ) { + String entityId = getValueForRow(rowData, table.getKeyFieldIndex(fields)); + String message = String.format( + "%s %s is conditionally required in stops when referenced by %s in %s.", + check.conditionalFieldName, + refFieldData, + referenceField.name, + table.name + ); + errors.add( + NewGTFSError.forLine(table, lineNumber, CONDITIONALLY_REQUIRED, message).setEntityId(entityId) + ); + } } } } @@ -242,36 +265,4 @@ private boolean isValueInRange(String referenceFieldValue, int min, int max) { return false; } } - - /** - * Checks if the provided field value is empty. If the value can be converted to either a double or int and these - * match the minimum value it is assumed these are empty. - */ - private boolean isEmpty(String str) { - // Text values - if (str == null || str.isEmpty()) { - return true; - } - - // Number values - if (NumberUtils.isParsable(str)) { - try { - double dValue = Double.parseDouble(str); - if (dValue == Double.MIN_VALUE) { - return true; - } - int iValue = Integer.parseInt(str); - if (iValue == Integer.MIN_VALUE) { - return true; - } - int sValue = Short.parseShort(str); - if (sValue == Short.MIN_VALUE) { - return true; - } - } catch (NumberFormatException e) { - return false; - } - } - return false; - } } diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index 596cf45c6..cac4b1e5b 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -49,7 +49,9 @@ import static com.conveyal.gtfs.error.NewGTFSErrorType.DUPLICATE_HEADER; import static com.conveyal.gtfs.error.NewGTFSErrorType.TABLE_IN_SUBDIRECTORY; +import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_IN_RANGE; import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_NOT_EMPTY; +import static com.conveyal.gtfs.loader.ConditionalCheckType.FOREIGN_FIELD_VALUE_MATCH; import static com.conveyal.gtfs.loader.ConditionalCheckType.ROW_COUNT_GREATER_THAN_ONE; import static com.conveyal.gtfs.loader.JdbcGtfsLoader.sanitize; import static com.conveyal.gtfs.loader.Requirement.EDITOR; @@ -101,7 +103,7 @@ public Table (String name, Class entityClass, Requirement requ public static final Table AGENCY = new Table("agency", Agency.class, REQUIRED, new StringField("agency_id", OPTIONAL).requireConditions( - new ConditionalRequirement("agency_id") + new ConditionalRequirement("agency_id", ROW_COUNT_GREATER_THAN_ONE) ), new StringField("agency_name", REQUIRED), new URLField("agency_url", REQUIRED), @@ -152,7 +154,7 @@ public Table (String name, Class entityClass, Requirement requ new ShortField("payment_method", REQUIRED, 1), new ShortField("transfers", REQUIRED, 2).permitEmptyValue(), new StringField("agency_id", OPTIONAL).requireConditions( - new ConditionalRequirement( "agency_id", FIELD_NOT_EMPTY) + new ConditionalRequirement( "agency_id", FIELD_NOT_EMPTY, ROW_COUNT_GREATER_THAN_ONE) ), new IntegerField("transfer_duration", OPTIONAL) ).addPrimaryKey(); @@ -178,7 +180,7 @@ public Table (String name, Class entityClass, Requirement requ public static final Table ROUTES = new Table("routes", Route.class, REQUIRED, new StringField("route_id", REQUIRED), new StringField("agency_id", OPTIONAL).isReferenceTo(AGENCY).requireConditions( - new ConditionalRequirement( "agency_id", FIELD_NOT_EMPTY) + new ConditionalRequirement( "agency_id", FIELD_NOT_EMPTY, ROW_COUNT_GREATER_THAN_ONE) ), new StringField("route_short_name", OPTIONAL), // one of short or long must be provided new StringField("route_long_name", OPTIONAL), @@ -232,13 +234,13 @@ public Table (String name, Class entityClass, Requirement requ new StringField("stop_desc", OPTIONAL), new DoubleField("stop_lat", OPTIONAL, -80, 80, 6).requireConditions(), new DoubleField("stop_lon", OPTIONAL, -180, 180, 6).requireConditions(), - new StringField("zone_id", OPTIONAL), + new StringField("zone_id", OPTIONAL).foreignFieldReference(), new URLField("stop_url", OPTIONAL), new ShortField("location_type", OPTIONAL, 4).requireConditions( - new ConditionalRequirement( 0, 2, "stop_name", FIELD_NOT_EMPTY), - new ConditionalRequirement( 0, 2, "stop_lat", FIELD_NOT_EMPTY), - new ConditionalRequirement( 0, 2, "stop_lon", FIELD_NOT_EMPTY), - new ConditionalRequirement( 2, 4, "parent_station", FIELD_NOT_EMPTY) + new ConditionalRequirement( 0, 2, "stop_name", FIELD_NOT_EMPTY, FIELD_IN_RANGE), + new ConditionalRequirement( 0, 2, "stop_lat", FIELD_NOT_EMPTY, FIELD_IN_RANGE), + new ConditionalRequirement( 0, 2, "stop_lon", FIELD_NOT_EMPTY, FIELD_IN_RANGE), + new ConditionalRequirement( 2, 4, "parent_station", FIELD_NOT_EMPTY, FIELD_IN_RANGE) ), new StringField("parent_station", OPTIONAL).requireConditions(), new StringField("stop_timezone", OPTIONAL), @@ -250,12 +252,18 @@ public Table (String name, Class entityClass, Requirement requ public static final Table FARE_RULES = new Table("fare_rules", FareRule.class, OPTIONAL, new StringField("fare_id", REQUIRED).isReferenceTo(FARE_ATTRIBUTES), new StringField("route_id", OPTIONAL).isReferenceTo(ROUTES), - // FIXME: referential integrity check for zone_id for below three fields? - new StringField("origin_id", OPTIONAL).isReferenceTo(STOPS), - new StringField("destination_id", OPTIONAL).isReferenceTo(STOPS), - new StringField("contains_id", OPTIONAL).isReferenceTo(STOPS)) - .withParentTable(FARE_ATTRIBUTES) - .addPrimaryKey().keyFieldIsNotUnique(); + new StringField("origin_id", OPTIONAL).requireConditions( + new ConditionalRequirement( "zone_id", FOREIGN_FIELD_VALUE_MATCH) + ), + new StringField("destination_id", OPTIONAL).requireConditions( + new ConditionalRequirement( "zone_id", FOREIGN_FIELD_VALUE_MATCH) + ), + new StringField("contains_id", OPTIONAL).requireConditions( + new ConditionalRequirement( "zone_id", FOREIGN_FIELD_VALUE_MATCH) + ) + ) + .withParentTable(FARE_ATTRIBUTES) + .addPrimaryKey().keyFieldIsNotUnique(); public static final Table PATTERN_STOP = new Table("pattern_stops", PatternStop.class, OPTIONAL, new StringField("pattern_id", REQUIRED).isReferenceTo(PATTERNS), diff --git a/src/test/java/com/conveyal/gtfs/dto/RouteDTO.java b/src/test/java/com/conveyal/gtfs/dto/RouteDTO.java index 0ea408213..ec15aca45 100644 --- a/src/test/java/com/conveyal/gtfs/dto/RouteDTO.java +++ b/src/test/java/com/conveyal/gtfs/dto/RouteDTO.java @@ -24,4 +24,6 @@ public class RouteDTO { /** This field is incorrectly set to String in order to test how empty string literals are persisted to the database. */ public String route_sort_order; public Integer status; + public int continuous_pickup; + public int continuous_drop_off; } diff --git a/src/test/java/com/conveyal/gtfs/dto/StopTimeDTO.java b/src/test/java/com/conveyal/gtfs/dto/StopTimeDTO.java index 9e385e3ca..7941c68ee 100644 --- a/src/test/java/com/conveyal/gtfs/dto/StopTimeDTO.java +++ b/src/test/java/com/conveyal/gtfs/dto/StopTimeDTO.java @@ -11,6 +11,8 @@ public class StopTimeDTO { public Integer drop_off_type; public Integer pickup_type; public Double shape_dist_traveled; + public int continuous_pickup; + public int continuous_drop_off; /** * Empty constructor for deserialization diff --git a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java index d0902d7ec..4375920bf 100644 --- a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java @@ -7,14 +7,12 @@ import org.junit.jupiter.api.Test; import javax.sql.DataSource; - import java.io.IOException; import static com.conveyal.gtfs.GTFS.load; import static com.conveyal.gtfs.GTFS.validate; import static com.conveyal.gtfs.TestUtils.assertThatSqlCountQueryYieldsExpectedCount; import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; -import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; public class ConditionallyRequiredTest { private static String testDBName; @@ -61,7 +59,9 @@ public void stopTableMissingConditionallyRequiredStopLon() { @Test public void stopTableMissingConditionallyRequiredZoneId() { - checkFeedHasOneError(REFERENTIAL_INTEGRITY, "stop_id:1"); + checkFeedHasOneError(CONDITIONALLY_REQUIRED, "FareRule","3", "1", "zone_id 4 is conditionally required in stops when referenced by contains_id in fare_rules."); + checkFeedHasOneError(CONDITIONALLY_REQUIRED, "FareRule","3", "1", "zone_id 3 is conditionally required in stops when referenced by destination_id in fare_rules."); + checkFeedHasOneError(CONDITIONALLY_REQUIRED, "FareRule","3", "1", "zone_id 2 is conditionally required in stops when referenced by origin_id in fare_rules."); } @Test @@ -119,17 +119,4 @@ private void checkFeedHasOneError(NewGTFSErrorType errorType, String entityType, badValue), 1); } - - /** - * Check that the test feed has exactly one error for the given error type and badValue. - */ - private void checkFeedHasOneError(NewGTFSErrorType errorType, String badValue) { - assertThatSqlCountQueryYieldsExpectedCount( - testDataSource, - String.format("select count(*) from %s.errors where error_type = '%s' and bad_value = '%s'", - testNamespace, - errorType, - badValue), - 1); - } } diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/fare_rules.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/fare_rules.txt index 6597f36bc..5591a375d 100644 --- a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/fare_rules.txt +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/fare_rules.txt @@ -1,3 +1,3 @@ fare_id,route_id,origin_id,destination_id,contains_id 1,21,1,, -1,22,,, \ No newline at end of file +1,22,2,3,4 \ No newline at end of file diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/stops.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/stops.txt index f8eeac273..ecbf5ee72 100644 --- a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/stops.txt +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/stops.txt @@ -1,5 +1,5 @@ stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,wheelchair_boarding,platform_code,sign_dest -4957,64957,,Southbound,37.40048600,-122.10892700,,,0,1,1,, +4957,64957,,Southbound,37.40048600,-122.10892700,1,,0,1,1,, 691,60691,San Antonio & El Camino,Northbound,,-122.11319800,,,0,,1,, 692,60692,San Antonio & Miller,Northbound,37.40462900,,,,0,,1,, 1266,61266,San Antonio & California,Northbound,37.40607000,-122.11050500,,,3,,1,, From 507d3f1f6ae7e553cf9afd9219df06e40d098964 Mon Sep 17 00:00:00 2001 From: Rob Beer Date: Wed, 28 Apr 2021 15:54:39 +0100 Subject: [PATCH 15/45] refactor(GTFSTest.java): Fixed bug related to dropping DB schema --- src/test/java/com/conveyal/gtfs/GTFSTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/java/com/conveyal/gtfs/GTFSTest.java b/src/test/java/com/conveyal/gtfs/GTFSTest.java index 87a2a60d1..2932783bc 100644 --- a/src/test/java/com/conveyal/gtfs/GTFSTest.java +++ b/src/test/java/com/conveyal/gtfs/GTFSTest.java @@ -560,8 +560,8 @@ private boolean runIntegrationTestOnZipFile( ErrorExpectation[] errorExpectations, FeedValidatorCreator... customValidators ) { - String newDBName = TestUtils.generateNewDB(); - String dbConnectionUrl = String.join("/", JDBC_URL, newDBName); + String testDBName = TestUtils.generateNewDB(); + String dbConnectionUrl = String.join("/", JDBC_URL, testDBName); DataSource dataSource = TestUtils.createTestDataSource(dbConnectionUrl); String namespace; From ed4e6102cefe5e99c372f175f96cb7a3ea7debad Mon Sep 17 00:00:00 2001 From: Rob Beer Date: Wed, 28 Apr 2021 16:41:43 +0100 Subject: [PATCH 16/45] refactor(ReferenceTracker.java): Update to log the correct GTFS error type. --- src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java | 3 ++- .../com/conveyal/gtfs/loader/ConditionallyRequiredTest.java | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java index 2a605b5bc..a38b1f3ef 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java +++ b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java @@ -7,6 +7,7 @@ import java.util.Map; import java.util.Set; +import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD; import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; import static com.conveyal.gtfs.error.NewGTFSErrorType.DUPLICATE_ID; import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; @@ -192,7 +193,7 @@ public Set checkConditionallyRequiredFields(Table table, Field[] f check.conditionalFieldName ); errors.add( - NewGTFSError.forLine(table, lineNumber, CONDITIONALLY_REQUIRED, message).setEntityId(entityId) + NewGTFSError.forLine(table, lineNumber, AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD, message).setEntityId(entityId) ); } } else if (check.referenceCheck == FIELD_IN_RANGE) { diff --git a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java index 4375920bf..bd16e8c8f 100644 --- a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java @@ -12,6 +12,7 @@ import static com.conveyal.gtfs.GTFS.load; import static com.conveyal.gtfs.GTFS.validate; import static com.conveyal.gtfs.TestUtils.assertThatSqlCountQueryYieldsExpectedCount; +import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD; import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; public class ConditionallyRequiredTest { @@ -81,12 +82,12 @@ public void stopTimeTableMissingConditionallyRequiredArrivalDepartureTimes() { @Test public void routeTableMissingConditionallyRequiredAgencyId() { - checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Route","2", "21","agency_id is conditionally required when there is more than one agency."); + checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD, "Route","2", "21","agency_id is conditionally required when there is more than one agency."); } @Test public void fareAttributeTableMissingConditionallyRequiredAgencyId() { - checkFeedHasOneError(CONDITIONALLY_REQUIRED, "FareAttribute","2", "1","agency_id is conditionally required when there is more than one agency."); + checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD, "FareAttribute","2", "1","agency_id is conditionally required when there is more than one agency."); } /** From 6fa3316257f4b49b835ffa6a09e162e6dd028968 Mon Sep 17 00:00:00 2001 From: Rob Beer Date: Tue, 4 May 2021 13:22:42 +0100 Subject: [PATCH 17/45] refactor(Addressed PR feedback): Applied suggested updates. --- pom.xml | 7 + .../gtfs/loader/ConditionalCheckType.java | 15 ++ .../gtfs/loader/ConditionalRequirement.java | 194 +++++++++++++++++- .../conveyal/gtfs/loader/EntityPopulator.java | 11 - .../java/com/conveyal/gtfs/loader/Field.java | 19 +- .../conveyal/gtfs/loader/JdbcGtfsLoader.java | 26 ++- .../gtfs/loader/ReferenceTracker.java | 164 ++++++--------- .../java/com/conveyal/gtfs/loader/Table.java | 98 +++++---- .../gtfs/validator/NewTripTimesValidator.java | 53 +++-- .../loader/ConditionallyRequiredTest.java | 22 +- 10 files changed, 409 insertions(+), 200 deletions(-) diff --git a/pom.xml b/pom.xml index bb9da873b..bd15efaa8 100644 --- a/pom.xml +++ b/pom.xml @@ -234,6 +234,13 @@ 5.7.0 test + + + org.junit.jupiter + junit-jupiter-params + 5.6.2 + test + com.beust diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java b/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java index a26eb742f..8ec57e371 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java @@ -5,8 +5,23 @@ * {@link ConditionalRequirement}. */ public enum ConditionalCheckType { + /** + * The conditionally required field value must not be empty. This is used in conjunction with + * {@link ConditionalCheckType#FIELD_IN_RANGE}. E.g. if the reference field is within a specified range, the + * conditionally required field must not be empty. + */ FIELD_NOT_EMPTY, + /** + * The reference field value must be within a specified range. + */ FIELD_IN_RANGE, + /** + * The reference field value must be available in order to match the conditionally required field value. + */ FOREIGN_FIELD_VALUE_MATCH, + /** + * If the reference table row count is greater than one, the conditionally required field values must not be empty. + * This is used in conjunction with {@link ConditionalCheckType#FIELD_NOT_EMPTY}. + */ ROW_COUNT_GREATER_THAN_ONE } \ No newline at end of file diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java index 52cdb3387..428054820 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java @@ -1,17 +1,29 @@ package com.conveyal.gtfs.loader; +import com.conveyal.gtfs.error.NewGTFSError; + +import java.util.HashSet; +import java.util.Set; + +import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD; +import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; +import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_NOT_EMPTY; +import static com.conveyal.gtfs.loader.JdbcGtfsLoader.POSTGRES_NULL_TEXT; + /** * These are the values that are checked inline with {@link ConditionalCheckType} to determine if the required * conditions have been met. */ public class ConditionalRequirement { - /** The type of check to be performed on the reference field. */ + /** The type of check to be performed on a reference field. A reference field value is used to determine whether or + * not a conditional field is required. */ public ConditionalCheckType referenceCheck; /** The minimum reference field value if a range check is being performed. */ public int minReferenceValue; /** The maximum reference field value if a range check is being performed. */ public int maxReferenceValue; - /** The type of check to be performed on the conditional field. */ + /** The type of check to be performed on a conditional field. A conditional field is one that may require a value + * if the reference and conditional checks met certain conditions. */ public ConditionalCheckType conditionalCheck; /** The name of the conditional field. */ public String conditionalFieldName; @@ -35,8 +47,7 @@ public ConditionalRequirement( String conditionalFieldName, ConditionalCheckType referenceCheck ) { - this.referenceCheck = referenceCheck; - this.conditionalFieldName = conditionalFieldName; + this(0,0, conditionalFieldName, null, referenceCheck); } public ConditionalRequirement( @@ -44,8 +55,177 @@ public ConditionalRequirement( ConditionalCheckType conditionalCheck, ConditionalCheckType referenceCheck ) { - this.conditionalFieldName = conditionalFieldName; - this.conditionalCheck = conditionalCheck; - this.referenceCheck = referenceCheck; + this(0,0, conditionalFieldName, conditionalCheck, referenceCheck); + } + + /** + * Flag an error if the number of rows in the agency table is greater than one and the agency_id has not been defined + * for each row. + */ + public static Set checkRowCountGreaterThanOne( + Table table, + int lineNumber, + Set transitIds, + ConditionalRequirement check, + String conditionalFieldValue, + String entityId + ) { + Set errors = new HashSet<>(); + if (table.name.equals("agency") && + lineNumber > 2 && + transitIds + .stream() + .filter(transitId -> transitId.contains("agency_id")) + .count() != lineNumber - 1 + ) { + // The check on the agency table is carried out whilst the agency table is being loaded so it + // is possible to compare the number of transitIds added against the number of rows loaded to + // accurately determine missing agency_id values. + String message = String.format( + "%s is conditionally required when there is more than one agency.", + check.conditionalFieldName + ); + errors.add( + NewGTFSError.forLine(table, lineNumber, CONDITIONALLY_REQUIRED, message) + ); + } else if ( + ( + table.name.equals("routes") || + table.name.equals("fare_attributes") + ) && + transitIds + .stream() + .filter(transitId -> transitId.contains("agency_id")) + .count() > 1 && + check.conditionalCheck == FIELD_NOT_EMPTY && + POSTGRES_NULL_TEXT.equals(conditionalFieldValue) + ) { + // By this point the agency table has already been loaded, therefore, if the number of agency_id + // transitIds is greater than one it is assumed more than one agency has been provided. + // FIXME: This doesn't work if only one agency_id is defined in the agency table. e.g. 2 rows of + // data, but the first doesn't define an agency_id. + String message = String.format( + "%s is conditionally required when there is more than one agency.", + check.conditionalFieldName + ); + errors.add( + NewGTFSError.forLine( + table, + lineNumber, + AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD, + message).setEntityId(entityId) + ); + } + return errors; } + + /** + * If the reference field value is within a defined range and the conditional field value has not be defined, flag + * an error. + */ + public static Set checkFieldInRange( + Table table, + int lineNumber, + Field referenceField, + ConditionalRequirement check, + String referenceFieldValue, + String conditionalFieldValue, + String entityId + ) { + Set errors = new HashSet<>(); + + boolean referenceValueMeetsRangeCondition = + !POSTGRES_NULL_TEXT.equals(referenceFieldValue) && + // TODO use pre-existing method in ShortField? + isValueInRange(referenceFieldValue, check.minReferenceValue, check.maxReferenceValue); + + if (!referenceValueMeetsRangeCondition) { + // If ref value does not meet the range condition, there is no need to check this conditional + // value for (e.g.) an empty value. Continue to the next check. + return errors; + } + boolean conditionallyRequiredValueIsEmpty = + check.conditionalCheck == FIELD_NOT_EMPTY && + POSTGRES_NULL_TEXT.equals(conditionalFieldValue); + + if (conditionallyRequiredValueIsEmpty) { + // Reference value in range and conditionally required field is empty. + String message = String.format( + "%s is conditionally required when %s value is between %d and %d.", + check.conditionalFieldName, + referenceField.name, + check.minReferenceValue, + check.maxReferenceValue + ); + errors.add( + NewGTFSError.forLine( + table, + lineNumber, + CONDITIONALLY_REQUIRED, + message).setEntityId(entityId) + ); + } + return errors; + } + + /** + * Check that an expected foreign field value matches a conditional field value. Selected foreign field values are + * added to {@link ReferenceTracker#foreignFieldIds} as part of the load process and are used here to check + * conditional fields which have a dependency on them. + */ + public static Set checkForeignFieldValueMatch( + Table table, + int lineNumber, + Field referenceField, + ConditionalRequirement check, + String referenceFieldValue, + Set foreignFieldIds, + String entityId + ) { + Set errors = new HashSet<>(); + // Expected reference in foreign field id list. + String foreignFieldReference = + String.join( + ":", + check.conditionalFieldName, + referenceFieldValue + ); + if (table.name.equals("fare_rules") && + !POSTGRES_NULL_TEXT.equals(referenceFieldValue) && + foreignFieldIds + .stream() + .noneMatch(id -> id.contains(foreignFieldReference)) + ) { + // The foreign key reference required by fields in fare rules is not available in stops. + String message = String.format( + "%s %s is conditionally required in stops when referenced by %s in %s.", + check.conditionalFieldName, + referenceFieldValue, + referenceField.name, + table.name + ); + errors.add( + NewGTFSError.forLine( + table, + lineNumber, + CONDITIONALLY_REQUIRED, + message).setEntityId(entityId) + ); + } + return errors; + } + + /** + * Check if the provided value is within the min and max values. If the field value can not be converted + * to a number it is assumed that the value is not a number and will therefore never be within the min/max range. + */ + private static boolean isValueInRange(String referenceFieldValue, int min, int max) { + try { + int fieldValue = Integer.parseInt(referenceFieldValue); + return fieldValue >= min && fieldValue <= max; + } catch (NumberFormatException e) { + return false; + } + } + } diff --git a/src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java b/src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java index aa45a75bc..8de1a1cfa 100644 --- a/src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java +++ b/src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java @@ -5,7 +5,6 @@ import com.conveyal.gtfs.model.CalendarDate; import com.conveyal.gtfs.model.Entity; import com.conveyal.gtfs.model.FareAttribute; -import com.conveyal.gtfs.model.FareRule; import com.conveyal.gtfs.model.Frequency; import com.conveyal.gtfs.model.PatternStop; import com.conveyal.gtfs.model.Route; @@ -118,16 +117,6 @@ public interface EntityPopulator { return fareAttribute; }; - EntityPopulator FARE_RULE = (result, columnForName) -> { - FareRule fareRule = new FareRule(); - fareRule.fare_id = getStringIfPresent(result, "fare_id", columnForName); - fareRule.route_id = getStringIfPresent(result, "route_id", columnForName); - fareRule.origin_id = getStringIfPresent(result, "origin_id", columnForName); - fareRule.destination_id = getStringIfPresent (result, "destination_id", columnForName); - fareRule.contains_id = getStringIfPresent (result, "contains_id", columnForName); - return fareRule; - }; - EntityPopulator FREQUENCY = (result, columnForName) -> { Frequency frequency = new Frequency(); frequency.trip_id = getStringIfPresent(result, "trip_id", columnForName); diff --git a/src/main/java/com/conveyal/gtfs/loader/Field.java b/src/main/java/com/conveyal/gtfs/loader/Field.java index fe9113cc4..898efaa98 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Field.java +++ b/src/main/java/com/conveyal/gtfs/loader/Field.java @@ -49,7 +49,7 @@ public abstract class Field { private boolean shouldBeIndexed; private boolean emptyValuePermitted; private boolean isConditionallyRequired; - private boolean isForeignFieldReference; + private boolean isForeign; public ConditionalRequirement[] conditions; public Field(String name, Requirement requirement) { @@ -191,7 +191,7 @@ public String getColumnExpression(String prefix, boolean csvOutput) { } /** - * Flag this field as conditionally required. + * Flag this field as conditionally required. If needed an optional list of conditions can be provided. */ public Field requireConditions(ConditionalRequirement...conditions) { this.isConditionallyRequired = true; @@ -206,12 +206,19 @@ public boolean isConditionallyRequired() { return isConditionallyRequired; } - public Field foreignFieldReference() { - isForeignFieldReference = true; + /** + * Flag this field as a foreign reference. If flagged the field value is added to + * {@link ReferenceTracker#foreignFieldIds} to be used as a look-up for reference matches. + */ + public Field foreign() { + isForeign = true; return this; } - public boolean isForeignFieldReference() { - return isForeignFieldReference; + /** + * Indicates that this field is required as a foreign reference. + */ + public boolean isForeign() { + return isForeign; } } diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java index e0e701938..e077f23d3 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java @@ -523,9 +523,12 @@ public void setValueForField(Table table, int fieldIndex, int lineNumber, Field if (postgresText) { ValidateFieldResult result = field.validateAndConvert(string); // If the result is null, use the null-setting method. - if (result.clean == null) setFieldToNull(postgresText, transformedStrings, fieldIndex, field); + if (result.clean == null) { + setFieldToNull(postgresText, transformedStrings, fieldIndex, field); + } else { // Otherwise, set the cleaned field according to its index. - else transformedStrings[fieldIndex + 1] = result.clean; + transformedStrings[fieldIndex + 1] = result.clean; + } errors = result.errors; } else { errors = field.setParameter(insertStatement, fieldIndex + 2, string); @@ -551,15 +554,18 @@ public void setValueForField(Table table, int fieldIndex, int lineNumber, Field * Sets field to null in statement or string array depending on whether postgres is being used. */ private void setFieldToNull(boolean postgresText, String[] transformedStrings, int fieldIndex, Field field) { - if (postgresText) transformedStrings[fieldIndex + 1] = POSTGRES_NULL_TEXT; + if (postgresText) { + transformedStrings[fieldIndex + 1] = POSTGRES_NULL_TEXT; + } else { // Adjust parameter index by two: indexes are one-based and the first one is the CSV line number. - else try { - // LOG.info("setting {} index to null", fieldIndex + 2); - field.setNull(insertStatement, fieldIndex + 2); - } catch (SQLException e) { - e.printStackTrace(); - // FIXME: store error here? It appears that an exception should only be thrown if the type value is invalid, - // the connection is closed, or the index is out of bounds. So storing an error may be unnecessary. + try { + // LOG.info("setting {} index to null", fieldIndex + 2); + field.setNull(insertStatement, fieldIndex + 2); + } catch (SQLException e) { + e.printStackTrace(); + // FIXME: store error here? It appears that an exception should only be thrown if the type value is invalid, + // the connection is closed, or the index is out of bounds. So storing an error may be unnecessary. + } } } diff --git a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java index a38b1f3ef..1e16ea2bd 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java +++ b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java @@ -7,15 +7,8 @@ import java.util.Map; import java.util.Set; -import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD; -import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; import static com.conveyal.gtfs.error.NewGTFSErrorType.DUPLICATE_ID; import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; -import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_IN_RANGE; -import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_NOT_EMPTY; -import static com.conveyal.gtfs.loader.ConditionalCheckType.FOREIGN_FIELD_VALUE_MATCH; -import static com.conveyal.gtfs.loader.ConditionalCheckType.ROW_COUNT_GREATER_THAN_ONE; -import static com.conveyal.gtfs.loader.JdbcGtfsLoader.POSTGRES_NULL_TEXT; /** * This class is used while loading GTFS to track the unique keys that are encountered in a GTFS @@ -72,7 +65,7 @@ public Set checkReferencesAndUniqueness(String keyValue, int lineN String transitId = String.join(":", keyField, keyValue); // Field value is required for referential integrity checks as part of conditionally required checks. - if (!"".equals(value) && field.isForeignFieldReference()) { + if (!"".equals(value) && field.isForeign()) { foreignFieldIds.add(String.join(":", field.name, value)); } @@ -153,92 +146,82 @@ public Set checkReferencesAndUniqueness(String keyValue, int lineN /** - * Work through each conditionally required check assigned to a table. First check the reference field to confirm - * if it meets the conditions whereby the conditional field is required. If the conditional field is required confirm - * that a value has been provided, if not, log an error. + * Work through each conditionally required check assigned to fields within a table. First check the reference field + * to confirm if it meets the conditions whereby the conditional field is required. If the conditional field is + * required confirm that a value has been provided, if not, log an error. */ - public Set checkConditionallyRequiredFields(Table table, Field[] fields, String[] rowData, int lineNumber) { + public Set checkConditionallyRequiredFields( + Table table, + Field[] fields, + String[] rowData, + int lineNumber + ) { Set errors = new HashSet<>(); Map fieldsToCheck = table.getConditionalRequirements(); + + // Work through each field that has been assigned a conditional requirement. for (Map.Entry entry : fieldsToCheck.entrySet()) { Field referenceField = entry.getKey(); + // Extract reference field value from the row currently being processed. + String referenceFieldValue = + getValueForRow( + rowData, + Field.getFieldIndex(fields, referenceField.name) + ); + String entityId = + getValueForRow( + rowData, + table.getKeyFieldIndex(fields) + ); ConditionalRequirement[] conditionalRequirements = entry.getValue(); + + // Work through each field's conditional requirements. for (ConditionalRequirement check : conditionalRequirements) { - int refFieldIndex = Field.getFieldIndex(fields, referenceField.name); - String refFieldData = getValueForRow(rowData, refFieldIndex); - if (check.referenceCheck == ROW_COUNT_GREATER_THAN_ONE) { - int conditionalFieldIndex = Field.getFieldIndex(fields, check.conditionalFieldName); - String conditionalFieldData = getValueForRow(rowData, conditionalFieldIndex); - if (table.name.equals("agency") && - lineNumber > 2 && - transitIds.stream().filter(transitId -> transitId.contains("agency_id")).count() != lineNumber - 1 - ) { - String message = String.format( - "%s is conditionally required when there is more than one agency.", - check.conditionalFieldName - ); - errors.add( - NewGTFSError.forLine(table, lineNumber, CONDITIONALLY_REQUIRED, message) - ); - } else if ((table.name.equals("routes") - || table.name.equals("fare_attributes")) && - transitIds.stream().filter(transitId -> transitId.contains("agency_id")).count() > 1 && - check.conditionalCheck == FIELD_NOT_EMPTY && - POSTGRES_NULL_TEXT.equals(conditionalFieldData)) { - // FIXME: This doesn't work if only one agency_id is defined in the agency table. e.g. 2 rows of - // data, but the first doesn't define an agency_id. - String entityId = getValueForRow(rowData, table.getKeyFieldIndex(fields)); - String message = String.format( - "%s is conditionally required when there is more than one agency.", - check.conditionalFieldName - ); - errors.add( - NewGTFSError.forLine(table, lineNumber, AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD, message).setEntityId(entityId) - ); - } - } else if (check.referenceCheck == FIELD_IN_RANGE) { - boolean referenceValueMeetsRangeCondition = - !POSTGRES_NULL_TEXT.equals(refFieldData) && - // TODO use pre-existing method in ShortField? - isValueInRange(refFieldData, check.minReferenceValue, check.maxReferenceValue); - // If ref value does not meet the range condition, there is no need to check this conditional value for - // (e.g.) an empty value. Continue to the next check. - if (!referenceValueMeetsRangeCondition) continue; - int conditionalFieldIndex = Field.getFieldIndex(fields, check.conditionalFieldName); - String conditionalFieldData = getValueForRow(rowData, conditionalFieldIndex); - boolean conditionallyRequiredValueIsEmpty = check.conditionalCheck == FIELD_NOT_EMPTY && - POSTGRES_NULL_TEXT.equals(conditionalFieldData); - if (conditionallyRequiredValueIsEmpty) { - String entityId = getValueForRow(rowData, table.getKeyFieldIndex(fields)); - String message = String.format( - "%s is conditionally required when %s value is between %d and %d.", - check.conditionalFieldName, - referenceField.name, - check.minReferenceValue, - check.maxReferenceValue + // Extract conditional field value from the row currently being processed. + String conditionalFieldValue = + getValueForRow( + rowData, + Field.getFieldIndex(fields, check.conditionalFieldName) + ); + switch(check.referenceCheck) { + case ROW_COUNT_GREATER_THAN_ONE: + errors.addAll( + ConditionalRequirement.checkRowCountGreaterThanOne( + table, + lineNumber, + foreignFieldIds, + check, + conditionalFieldValue, + entityId + ) ); - errors.add( - NewGTFSError.forLine(table, lineNumber, CONDITIONALLY_REQUIRED, message).setEntityId(entityId) + break; + case FIELD_IN_RANGE: + errors.addAll( + ConditionalRequirement.checkFieldInRange( + table, + lineNumber, + referenceField, + check, + referenceFieldValue, + conditionalFieldValue, + entityId + ) ); - } - } else if (check.referenceCheck == FOREIGN_FIELD_VALUE_MATCH) { - String foreignFieldReference = String.join(":", check.conditionalFieldName, refFieldData); - if (table.name.equals("fare_rules") && - !POSTGRES_NULL_TEXT.equals(refFieldData) && - foreignFieldIds.stream().noneMatch(id -> id.contains(foreignFieldReference)) - ) { - String entityId = getValueForRow(rowData, table.getKeyFieldIndex(fields)); - String message = String.format( - "%s %s is conditionally required in stops when referenced by %s in %s.", - check.conditionalFieldName, - refFieldData, - referenceField.name, - table.name + break; + case FOREIGN_FIELD_VALUE_MATCH: + errors.addAll( + ConditionalRequirement.checkForeignFieldValueMatch( + table, + lineNumber, + referenceField, + check, + referenceFieldValue, + foreignFieldIds, + entityId + ) ); - errors.add( - NewGTFSError.forLine(table, lineNumber, CONDITIONALLY_REQUIRED, message).setEntityId(entityId) - ); - } + break; } } } @@ -253,17 +236,4 @@ public Set checkConditionallyRequiredFields(Table table, Field[] f private String getValueForRow(String[] rowData, int columnIndex) { return rowData[columnIndex + 1]; } - - /** - * Check if the provided value is within the min and max values. If the field value can not be converted - * to a number it is assumed that the value is not a number and will therefore never be within the min/max range. - */ - private boolean isValueInRange(String referenceFieldValue, int min, int max) { - try { - int fieldValue = Integer.parseInt(referenceFieldValue); - return fieldValue >= min && fieldValue <= max; - } catch (NumberFormatException e) { - return false; - } - } } diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index cac4b1e5b..197fd79e1 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -103,16 +103,18 @@ public Table (String name, Class entityClass, Requirement requ public static final Table AGENCY = new Table("agency", Agency.class, REQUIRED, new StringField("agency_id", OPTIONAL).requireConditions( + // If there is more than one agency, the agency_id must be provided + // https://developers.google.com/transit/gtfs/reference#agencytxt new ConditionalRequirement("agency_id", ROW_COUNT_GREATER_THAN_ONE) - ), - new StringField("agency_name", REQUIRED), - new URLField("agency_url", REQUIRED), - new StringField("agency_timezone", REQUIRED), // FIXME new field type for time zones? + ).foreign(), + new StringField("agency_name", REQUIRED), + new URLField("agency_url", REQUIRED), + new StringField("agency_timezone", REQUIRED), // FIXME new field type for time zones? new StringField("agency_lang", OPTIONAL), // FIXME new field type for languages? - new StringField("agency_phone", OPTIONAL), - new URLField("agency_branding_url", OPTIONAL), - new URLField("agency_fare_url", OPTIONAL), - new StringField("agency_email", OPTIONAL) // FIXME new field type for emails? + new StringField("agency_phone", OPTIONAL), + new URLField("agency_branding_url", OPTIONAL), + new URLField("agency_fare_url", OPTIONAL), + new StringField("agency_email", OPTIONAL) // FIXME new field type for emails? ).restrictDelete().addPrimaryKey(); // The GTFS spec says this table is required, but in practice it is not required if calendar_dates is present. @@ -154,7 +156,9 @@ public Table (String name, Class entityClass, Requirement requ new ShortField("payment_method", REQUIRED, 1), new ShortField("transfers", REQUIRED, 2).permitEmptyValue(), new StringField("agency_id", OPTIONAL).requireConditions( - new ConditionalRequirement( "agency_id", FIELD_NOT_EMPTY, ROW_COUNT_GREATER_THAN_ONE) + // If there is more than one agency, this agency_id is required. + // https://developers.google.com/transit/gtfs/reference#fare_attributestxt + new ConditionalRequirement("agency_id", FIELD_NOT_EMPTY, ROW_COUNT_GREATER_THAN_ONE) ), new IntegerField("transfer_duration", OPTIONAL) ).addPrimaryKey(); @@ -180,27 +184,29 @@ public Table (String name, Class entityClass, Requirement requ public static final Table ROUTES = new Table("routes", Route.class, REQUIRED, new StringField("route_id", REQUIRED), new StringField("agency_id", OPTIONAL).isReferenceTo(AGENCY).requireConditions( - new ConditionalRequirement( "agency_id", FIELD_NOT_EMPTY, ROW_COUNT_GREATER_THAN_ONE) + // If there is more than one agency, this agency_id is required. + // https://developers.google.com/transit/gtfs/reference#routestxt + new ConditionalRequirement("agency_id", FIELD_NOT_EMPTY, ROW_COUNT_GREATER_THAN_ONE) ), - new StringField("route_short_name", OPTIONAL), // one of short or long must be provided - new StringField("route_long_name", OPTIONAL), - new StringField("route_desc", OPTIONAL), + new StringField("route_short_name", OPTIONAL), // one of short or long must be provided + new StringField("route_long_name", OPTIONAL), + new StringField("route_desc", OPTIONAL), // Max route type according to the GTFS spec is 7; however, there is a GTFS proposal that could see this // max value grow to around 1800: https://groups.google.com/forum/#!msg/gtfs-changes/keT5rTPS7Y0/71uMz2l6ke0J new IntegerField("route_type", REQUIRED, 1800), - new URLField("route_url", OPTIONAL), - new URLField("route_branding_url", OPTIONAL), - new ColorField("route_color", OPTIONAL), // really this is an int in hex notation - new ColorField("route_text_color", OPTIONAL), + new URLField("route_url", OPTIONAL), + new URLField("route_branding_url", OPTIONAL), + new ColorField("route_color", OPTIONAL), // really this is an int in hex notation + new ColorField("route_text_color", OPTIONAL), // Editor fields below. new ShortField("publicly_visible", EDITOR, 1), // wheelchair_accessible is an exemplar field applied to all trips on a route. new ShortField("wheelchair_accessible", EDITOR, 2).permitEmptyValue(), new IntegerField("route_sort_order", OPTIONAL, 0, Integer.MAX_VALUE), // Status values are In progress (0), Pending approval (1), and Approved (2). - new ShortField("status", EDITOR, 2), - new ShortField("continuous_pickup", OPTIONAL, 3), - new ShortField("continuous_drop_off", OPTIONAL, 3) + new ShortField("status", EDITOR, 2), + new ShortField("continuous_pickup", OPTIONAL,3), + new ShortField("continuous_drop_off", OPTIONAL,3) ).addPrimaryKey(); public static final Table SHAPES = new Table("shapes", ShapePoint.class, OPTIONAL, @@ -228,22 +234,24 @@ public Table (String name, Class entityClass, Requirement requ ).addPrimaryKey(); public static final Table STOPS = new Table("stops", Stop.class, REQUIRED, - new StringField("stop_id", REQUIRED), - new StringField("stop_code", OPTIONAL), - new StringField("stop_name", OPTIONAL).requireConditions(), - new StringField("stop_desc", OPTIONAL), + new StringField("stop_id", REQUIRED), + new StringField("stop_code", OPTIONAL), + new StringField("stop_name", OPTIONAL).requireConditions(), + new StringField("stop_desc", OPTIONAL), new DoubleField("stop_lat", OPTIONAL, -80, 80, 6).requireConditions(), new DoubleField("stop_lon", OPTIONAL, -180, 180, 6).requireConditions(), - new StringField("zone_id", OPTIONAL).foreignFieldReference(), - new URLField("stop_url", OPTIONAL), + new StringField("zone_id", OPTIONAL).foreign(), + new URLField("stop_url", OPTIONAL), new ShortField("location_type", OPTIONAL, 4).requireConditions( - new ConditionalRequirement( 0, 2, "stop_name", FIELD_NOT_EMPTY, FIELD_IN_RANGE), - new ConditionalRequirement( 0, 2, "stop_lat", FIELD_NOT_EMPTY, FIELD_IN_RANGE), - new ConditionalRequirement( 0, 2, "stop_lon", FIELD_NOT_EMPTY, FIELD_IN_RANGE), - new ConditionalRequirement( 2, 4, "parent_station", FIELD_NOT_EMPTY, FIELD_IN_RANGE) + // If the location type is defined and within range, the conditional fields are required. + // https://developers.google.com/transit/gtfs/reference#stopstxt + new ConditionalRequirement(0, 2, "stop_name", FIELD_NOT_EMPTY, FIELD_IN_RANGE), + new ConditionalRequirement(0, 2, "stop_lat", FIELD_NOT_EMPTY, FIELD_IN_RANGE), + new ConditionalRequirement(0, 2, "stop_lon", FIELD_NOT_EMPTY, FIELD_IN_RANGE), + new ConditionalRequirement(2, 4, "parent_station", FIELD_NOT_EMPTY, FIELD_IN_RANGE) ), - new StringField("parent_station", OPTIONAL).requireConditions(), - new StringField("stop_timezone", OPTIONAL), + new StringField("parent_station", OPTIONAL).requireConditions(), + new StringField("stop_timezone", OPTIONAL), new ShortField("wheelchair_boarding", OPTIONAL, 2) ) .restrictDelete() @@ -253,13 +261,19 @@ public Table (String name, Class entityClass, Requirement requ new StringField("fare_id", REQUIRED).isReferenceTo(FARE_ATTRIBUTES), new StringField("route_id", OPTIONAL).isReferenceTo(ROUTES), new StringField("origin_id", OPTIONAL).requireConditions( - new ConditionalRequirement( "zone_id", FOREIGN_FIELD_VALUE_MATCH) + // If the origin id is defined, the matching zone_id must be defined in stops. + // https://developers.google.com/transit/gtfs/reference#fare_rulestxt + new ConditionalRequirement("zone_id", FOREIGN_FIELD_VALUE_MATCH) ), new StringField("destination_id", OPTIONAL).requireConditions( - new ConditionalRequirement( "zone_id", FOREIGN_FIELD_VALUE_MATCH) + // If the destination id is defined, the matching zone_id must be defined in stops. + // https://developers.google.com/transit/gtfs/reference#fare_rulestxt + new ConditionalRequirement("zone_id", FOREIGN_FIELD_VALUE_MATCH) ), new StringField("contains_id", OPTIONAL).requireConditions( - new ConditionalRequirement( "zone_id", FOREIGN_FIELD_VALUE_MATCH) + // If the contains id is defined, the matching zone_id must be defined in stops. + // https://developers.google.com/transit/gtfs/reference#fare_rulestxt + new ConditionalRequirement("zone_id", FOREIGN_FIELD_VALUE_MATCH) ) ) .withParentTable(FARE_ATTRIBUTES) @@ -290,16 +304,16 @@ public Table (String name, Class entityClass, Requirement requ .hasCompoundKey(); public static final Table TRIPS = new Table("trips", Trip.class, REQUIRED, - new StringField("trip_id", REQUIRED), - new StringField("route_id", REQUIRED).isReferenceTo(ROUTES).indexThisColumn(), + new StringField("trip_id", REQUIRED), + new StringField("route_id", REQUIRED).isReferenceTo(ROUTES).indexThisColumn(), // FIXME: Should this also optionally reference CALENDAR_DATES? // FIXME: Do we need an index on service_id - new StringField("service_id", REQUIRED).isReferenceTo(CALENDAR), - new StringField("trip_headsign", OPTIONAL), - new StringField("trip_short_name", OPTIONAL), + new StringField("service_id", REQUIRED).isReferenceTo(CALENDAR), + new StringField("trip_headsign", OPTIONAL), + new StringField("trip_short_name", OPTIONAL), new ShortField("direction_id", OPTIONAL, 1), - new StringField("block_id", OPTIONAL), - new StringField("shape_id", OPTIONAL).isReferenceTo(SHAPES), + new StringField("block_id", OPTIONAL), + new StringField("shape_id", OPTIONAL).isReferenceTo(SHAPES), new ShortField("wheelchair_accessible", OPTIONAL, 2), new ShortField("bikes_allowed", OPTIONAL, 2), // Editor-specific fields below. diff --git a/src/main/java/com/conveyal/gtfs/validator/NewTripTimesValidator.java b/src/main/java/com/conveyal/gtfs/validator/NewTripTimesValidator.java index a56e84db5..3d80a5a6b 100644 --- a/src/main/java/com/conveyal/gtfs/validator/NewTripTimesValidator.java +++ b/src/main/java/com/conveyal/gtfs/validator/NewTripTimesValidator.java @@ -4,18 +4,21 @@ import com.conveyal.gtfs.loader.Feed; import com.conveyal.gtfs.model.Entity; import com.conveyal.gtfs.model.Route; -import com.conveyal.gtfs.model.ShapePoint; import com.conveyal.gtfs.model.Stop; import com.conveyal.gtfs.model.StopTime; import com.conveyal.gtfs.model.Trip; -import com.google.common.collect.ListMultimap; -import com.google.common.collect.MultimapBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; -import static com.conveyal.gtfs.error.NewGTFSErrorType.*; +import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; +import static com.conveyal.gtfs.error.NewGTFSErrorType.MISSING_ARRIVAL_OR_DEPARTURE; +import static com.conveyal.gtfs.error.NewGTFSErrorType.TRIP_TOO_FEW_STOP_TIMES; /** * Check that the travel times between adjacent stops in trips are reasonable. @@ -150,14 +153,7 @@ private void processTrip (List stopTimes) { List stops = new ArrayList<>(); for (Iterator it = stopTimes.iterator(); it.hasNext(); ) { StopTime stopTime = it.next(); - if ( - stopTime.continuous_drop_off == 0 || - stopTime.continuous_drop_off == 2 || - stopTime.continuous_drop_off == 3 || - stopTime.continuous_pickup == 0 || - stopTime.continuous_pickup == 2 || - stopTime.continuous_pickup == 3 - ) { + if (hasContinuousBehavior(stopTime.continuous_drop_off, stopTime.continuous_pickup)) { hasContinuousBehavior = true; } Stop stop = stopById.get(stopTime.stop_id); @@ -181,17 +177,16 @@ private void processTrip (List stopTimes) { // ignore nulls. Route route = routeById.get(trip.route_id); if (route != null && - (route.continuous_drop_off == 0 || - route.continuous_drop_off == 2 || - route.continuous_drop_off == 3 || - route.continuous_pickup == 0 || - route.continuous_pickup == 2 || - route.continuous_pickup == 3) - ) { + hasContinuousBehavior(route.continuous_drop_off, route.continuous_pickup)) { hasContinuousBehavior = true; } - if (hasContinuousBehavior && trip.shape_id == null) { - registerError(trip, CONDITIONALLY_REQUIRED, "shape_id is conditionally required when a trip has continuous behavior defined."); + + if (trip.shape_id == null && hasContinuousBehavior) { + registerError( + trip, + CONDITIONALLY_REQUIRED, + "shape_id is conditionally required when a trip has continuous behavior defined." + ); } // Pass these same cleaned lists of stop_times and stops into each trip validator in turn. for (TripValidator tripValidator : tripValidators) tripValidator.validateTrip(trip, route, stopTimes, stops); @@ -207,4 +202,18 @@ public void complete (ValidationResult validationResult) { LOG.info("{} finished", tripValidator.getClass().getSimpleName()); } } + + /** + * Determine if a trip has continuous behaviour by checking the values that have been defined for continuous drop + * off and pickup. + */ + private boolean hasContinuousBehavior(int continuousDropOff, int continuousPickup) { + return + continuousDropOff == 0 || + continuousDropOff == 2 || + continuousDropOff == 3 || + continuousPickup == 0 || + continuousPickup == 2 || + continuousPickup == 3; + } } diff --git a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java index bd16e8c8f..91d813494 100644 --- a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java @@ -5,9 +5,13 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import javax.sql.DataSource; import java.io.IOException; +import java.util.stream.Stream; import static com.conveyal.gtfs.GTFS.load; import static com.conveyal.gtfs.GTFS.validate; @@ -58,13 +62,21 @@ public void stopTableMissingConditionallyRequiredStopLon() { checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Stop","4", "692","stop_lon is conditionally required when location_type value is between 0 and 2."); } - @Test - public void stopTableMissingConditionallyRequiredZoneId() { - checkFeedHasOneError(CONDITIONALLY_REQUIRED, "FareRule","3", "1", "zone_id 4 is conditionally required in stops when referenced by contains_id in fare_rules."); - checkFeedHasOneError(CONDITIONALLY_REQUIRED, "FareRule","3", "1", "zone_id 3 is conditionally required in stops when referenced by destination_id in fare_rules."); - checkFeedHasOneError(CONDITIONALLY_REQUIRED, "FareRule","3", "1", "zone_id 2 is conditionally required in stops when referenced by origin_id in fare_rules."); + @ParameterizedTest + @MethodSource("createZoneIdDependencies") + public void stopTableMissingConditionallyRequiredZoneId(String entityType, String lineNumber, String entityId, String badValue) { + checkFeedHasOneError(CONDITIONALLY_REQUIRED, entityType, lineNumber, entityId, badValue); + } + + private static Stream createZoneIdDependencies() { + return Stream.of( + Arguments.of("FareRule", "3", "1", "zone_id 4 is conditionally required in stops when referenced by contains_id in fare_rules."), + Arguments.of("FareRule", "3", "1", "zone_id 3 is conditionally required in stops when referenced by destination_id in fare_rules."), + Arguments.of("FareRule", "3", "1", "zone_id 2 is conditionally required in stops when referenced by origin_id in fare_rules.") + ); } + @Test public void agencyTableMissingConditionallyRequiredAgencyId() { checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Agency","3", "agency_id is conditionally required when there is more than one agency."); From 400aed0f8b0e5d34c233332683820f07423e65d9 Mon Sep 17 00:00:00 2001 From: Landon Reed Date: Fri, 7 May 2021 12:14:24 -0400 Subject: [PATCH 18/45] refactor: update conditional requirements checks --- .../com/conveyal/gtfs/error/NewGTFSError.java | 9 + .../conveyal/gtfs/error/NewGTFSErrorType.java | 2 +- .../gtfs/loader/ConditionalCheckType.java | 12 +- .../gtfs/loader/ConditionalRequirement.java | 209 +++++++++--------- .../java/com/conveyal/gtfs/loader/Field.java | 38 ++-- .../conveyal/gtfs/loader/JdbcGtfsLoader.java | 4 +- .../com/conveyal/gtfs/loader/LineContext.java | 43 ++++ .../gtfs/loader/ReferenceTracker.java | 86 ++----- .../java/com/conveyal/gtfs/loader/Table.java | 26 +-- .../loader/ConditionallyRequiredTest.java | 6 +- 10 files changed, 224 insertions(+), 211 deletions(-) create mode 100644 src/main/java/com/conveyal/gtfs/loader/LineContext.java diff --git a/src/main/java/com/conveyal/gtfs/error/NewGTFSError.java b/src/main/java/com/conveyal/gtfs/error/NewGTFSError.java index e350fe671..c945b397c 100644 --- a/src/main/java/com/conveyal/gtfs/error/NewGTFSError.java +++ b/src/main/java/com/conveyal/gtfs/error/NewGTFSError.java @@ -1,5 +1,6 @@ package com.conveyal.gtfs.error; +import com.conveyal.gtfs.loader.LineContext; import com.conveyal.gtfs.loader.Table; import com.conveyal.gtfs.model.Entity; import org.slf4j.Logger; @@ -82,6 +83,14 @@ public static NewGTFSError forLine (Table table, int lineNumber, NewGTFSErrorTyp return error; } + // Factory Builder for cases where an entity has not yet been constructed, but we know the line number. + public static NewGTFSError forLine (LineContext lineContext, NewGTFSErrorType errorType, String badValue) { + NewGTFSError error = new NewGTFSError(lineContext.table.getEntityClass(), errorType); + error.lineNumber = lineContext.lineNumber; + error.badValue = badValue; + return error; + } + // Factory Builder for cases where the entity has already been decoded and an error is discovered during validation public static NewGTFSError forEntity(Entity entity, NewGTFSErrorType errorType) { NewGTFSError error = new NewGTFSError(entity.getClass(), errorType); diff --git a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java index 80ee7c5e0..49f44fa3f 100644 --- a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java +++ b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java @@ -8,7 +8,7 @@ */ public enum NewGTFSErrorType { // Standard errors. - AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD(Priority.HIGH, "For GTFS feeds with more than one agency, agency_id is required."), + AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS(Priority.HIGH, "For GTFS feeds with more than one agency, agency_id is required."), BOOLEAN_FORMAT(Priority.MEDIUM, "A GTFS boolean field must contain the value 1 or 0."), COLOR_FORMAT(Priority.MEDIUM, "A color should be specified with six-characters (three two-digit hexadecimal numbers)."), COLUMN_NAME_UNSAFE(Priority.HIGH, "Column header contains characters not safe in SQL, it was renamed."), diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java b/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java index 8ec57e371..1960d9441 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java @@ -8,7 +8,7 @@ public enum ConditionalCheckType { /** * The conditionally required field value must not be empty. This is used in conjunction with * {@link ConditionalCheckType#FIELD_IN_RANGE}. E.g. if the reference field is within a specified range, the - * conditionally required field must not be empty. + * dependent field must not be empty. */ FIELD_NOT_EMPTY, /** @@ -16,12 +16,12 @@ public enum ConditionalCheckType { */ FIELD_IN_RANGE, /** - * The reference field value must be available in order to match the conditionally required field value. + * This checks that the foreign reference exists in the dependent field (e.g., stops#zone_id). */ - FOREIGN_FIELD_VALUE_MATCH, + FOREIGN_REF_EXISTS, /** - * If the reference table row count is greater than one, the conditionally required field values must not be empty. - * This is used in conjunction with {@link ConditionalCheckType#FIELD_NOT_EMPTY}. + * Check that the reference table has multiple records. This is sometimes used in conjunction with + * {@link ConditionalCheckType#FIELD_NOT_EMPTY} (e.g., to check that multiple agencies exist). */ - ROW_COUNT_GREATER_THAN_ONE + HAS_MULTIPLE_ROWS } \ No newline at end of file diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java index 428054820..498150817 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java @@ -1,139 +1,148 @@ package com.conveyal.gtfs.loader; import com.conveyal.gtfs.error.NewGTFSError; +import com.google.common.collect.TreeMultimap; import java.util.HashSet; +import java.util.NavigableSet; import java.util.Set; -import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD; +import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS; import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; +import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_NOT_EMPTY; +import static com.conveyal.gtfs.loader.ConditionalCheckType.HAS_MULTIPLE_ROWS; import static com.conveyal.gtfs.loader.JdbcGtfsLoader.POSTGRES_NULL_TEXT; /** - * These are the values that are checked inline with {@link ConditionalCheckType} to determine if the required - * conditions have been met. + * These are the requirements that are checked inline with {@link ConditionalCheckType} to determine if the required + * conditions set forth for certain fields in the GTFS spec have been met. These requirements are applied directly to + * their "reference fields" with the help of {@link Field#requireConditions}. */ public class ConditionalRequirement { - /** The type of check to be performed on a reference field. A reference field value is used to determine whether or - * not a conditional field is required. */ - public ConditionalCheckType referenceCheck; + private static final int FIRST_ROW = 2; + private static final int SECOND_ROW = 3; + /** The type of check to be performed on a reference field. A reference field value is used to determine which check + * (e.g., {@link #checkHasMultipleRows}) should be applied to the field. */ + public ConditionalCheckType referenceFieldCheck; /** The minimum reference field value if a range check is being performed. */ public int minReferenceValue; /** The maximum reference field value if a range check is being performed. */ public int maxReferenceValue; - /** The type of check to be performed on a conditional field. A conditional field is one that may require a value - * if the reference and conditional checks met certain conditions. */ - public ConditionalCheckType conditionalCheck; - /** The name of the conditional field. */ - public String conditionalFieldName; + /** The type of check to be performed on the dependent field. */ + public ConditionalCheckType dependentFieldCheck; + /** The name of the dependent field, which is a field that requires a specific value if the reference and + * (in some cases) dependent field checks meet certain conditions.*/ + public String dependentFieldName; public ConditionalRequirement( int minReferenceValue, int maxReferenceValue, - String conditionalFieldName, - ConditionalCheckType conditionalCheck, - ConditionalCheckType referenceCheck + String dependentFieldName, + ConditionalCheckType dependentFieldCheck, + ConditionalCheckType referenceFieldCheck ) { this.minReferenceValue = minReferenceValue; this.maxReferenceValue = maxReferenceValue; - this.conditionalFieldName = conditionalFieldName; - this.conditionalCheck = conditionalCheck; - this.referenceCheck = referenceCheck; + this.dependentFieldName = dependentFieldName; + this.dependentFieldCheck = dependentFieldCheck; + this.referenceFieldCheck = referenceFieldCheck; } public ConditionalRequirement( - String conditionalFieldName, - ConditionalCheckType referenceCheck + String dependentFieldName, + ConditionalCheckType referenceFieldCheck ) { - this(0,0, conditionalFieldName, null, referenceCheck); + this(0,0, dependentFieldName, null, referenceFieldCheck); } public ConditionalRequirement( - String conditionalFieldName, - ConditionalCheckType conditionalCheck, - ConditionalCheckType referenceCheck + String dependentFieldName, + ConditionalCheckType dependentFieldCheck, + ConditionalCheckType referenceFieldCheck ) { - this(0,0, conditionalFieldName, conditionalCheck, referenceCheck); + this(0,0, dependentFieldName, dependentFieldCheck, referenceFieldCheck); } /** - * Flag an error if the number of rows in the agency table is greater than one and the agency_id has not been defined - * for each row. + * Flag an error if there are multiple rows (designed for agency.txt) and the agency_id is missing for any rows. */ - public static Set checkRowCountGreaterThanOne( - Table table, - int lineNumber, - Set transitIds, - ConditionalRequirement check, - String conditionalFieldValue, - String entityId + public static Set checkHasMultipleRows( + LineContext lineContext, + TreeMultimap uniqueValuesForFields, + ConditionalRequirement check ) { + String dependentFieldValue = lineContext.getValueForRow(check.dependentFieldName); Set errors = new HashSet<>(); - if (table.name.equals("agency") && - lineNumber > 2 && - transitIds - .stream() - .filter(transitId -> transitId.contains("agency_id")) - .count() != lineNumber - 1 - ) { + NavigableSet agencyIdValues = uniqueValuesForFields.get(check.dependentFieldName); + // Do some awkward checks to determine if the first or second row (or another) is missing the agency_id. + boolean firstOrSecondMissingId = lineContext.lineNumber == SECOND_ROW && agencyIdValues.contains(""); + boolean currentRowMissingId = POSTGRES_NULL_TEXT.equals(dependentFieldValue); + boolean secondRowMissingId = firstOrSecondMissingId && currentRowMissingId; + if (firstOrSecondMissingId || (lineContext.lineNumber > SECOND_ROW && currentRowMissingId)) { // The check on the agency table is carried out whilst the agency table is being loaded so it // is possible to compare the number of transitIds added against the number of rows loaded to // accurately determine missing agency_id values. - String message = String.format( - "%s is conditionally required when there is more than one agency.", - check.conditionalFieldName - ); - errors.add( - NewGTFSError.forLine(table, lineNumber, CONDITIONALLY_REQUIRED, message) - ); - } else if ( - ( - table.name.equals("routes") || - table.name.equals("fare_attributes") - ) && - transitIds - .stream() - .filter(transitId -> transitId.contains("agency_id")) - .count() > 1 && - check.conditionalCheck == FIELD_NOT_EMPTY && - POSTGRES_NULL_TEXT.equals(conditionalFieldValue) - ) { - // By this point the agency table has already been loaded, therefore, if the number of agency_id - // transitIds is greater than one it is assumed more than one agency has been provided. - // FIXME: This doesn't work if only one agency_id is defined in the agency table. e.g. 2 rows of - // data, but the first doesn't define an agency_id. - String message = String.format( - "%s is conditionally required when there is more than one agency.", - check.conditionalFieldName - ); + int lineNumber = secondRowMissingId + ? SECOND_ROW + : firstOrSecondMissingId + ? FIRST_ROW + : lineContext.lineNumber; errors.add( NewGTFSError.forLine( - table, + lineContext.table, lineNumber, - AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD, - message).setEntityId(entityId) + AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, + check.dependentFieldName + ) ); } return errors; } + /** + * Checks that the reference field is not empty when the dependent field/table has multiple rows. This is + * principally designed for checking that routes#agency_id is filled when multiple agencies exist. + */ + public static Set checkFieldEmpty( + LineContext lineContext, + Field referenceField, + TreeMultimap uniqueValuesForFields, + ConditionalRequirement check + ) { + String referenceFieldValue = lineContext.getValueForRow(referenceField.name); + Set errors = new HashSet<>(); + int dependentFieldCount = uniqueValuesForFields.get(check.dependentFieldName).size(); + if (check.dependentFieldCheck == HAS_MULTIPLE_ROWS && dependentFieldCount > 1) { + // If there are multiple entries for the dependent field (including empty strings to account for any + // potentially missing values), the reference field must not be empty. + boolean referenceFieldIsEmpty = POSTGRES_NULL_TEXT.equals(referenceFieldValue); + if (referenceFieldIsEmpty) { + errors.add( + NewGTFSError.forLine( + lineContext, + AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, + null + ).setEntityId(lineContext.getEntityId()) + ); + } + } + return errors; + } + /** * If the reference field value is within a defined range and the conditional field value has not be defined, flag * an error. */ public static Set checkFieldInRange( - Table table, - int lineNumber, + LineContext lineContext, Field referenceField, - ConditionalRequirement check, - String referenceFieldValue, - String conditionalFieldValue, - String entityId + ConditionalRequirement check ) { Set errors = new HashSet<>(); - + String referenceFieldValue = lineContext.getValueForRow(referenceField.name); + String conditionalFieldValue = lineContext.getValueForRow(check.dependentFieldName); boolean referenceValueMeetsRangeCondition = !POSTGRES_NULL_TEXT.equals(referenceFieldValue) && // TODO use pre-existing method in ShortField? @@ -145,24 +154,23 @@ public static Set checkFieldInRange( return errors; } boolean conditionallyRequiredValueIsEmpty = - check.conditionalCheck == FIELD_NOT_EMPTY && + check.dependentFieldCheck == FIELD_NOT_EMPTY && POSTGRES_NULL_TEXT.equals(conditionalFieldValue); if (conditionallyRequiredValueIsEmpty) { // Reference value in range and conditionally required field is empty. String message = String.format( "%s is conditionally required when %s value is between %d and %d.", - check.conditionalFieldName, + check.dependentFieldName, referenceField.name, check.minReferenceValue, check.maxReferenceValue ); errors.add( NewGTFSError.forLine( - table, - lineNumber, + lineContext, CONDITIONALLY_REQUIRED, - message).setEntityId(entityId) + message).setEntityId(lineContext.getEntityId()) ); } return errors; @@ -170,46 +178,35 @@ public static Set checkFieldInRange( /** * Check that an expected foreign field value matches a conditional field value. Selected foreign field values are - * added to {@link ReferenceTracker#foreignFieldIds} as part of the load process and are used here to check + * added to {@link ReferenceTracker#uniqueValuesForFields} as part of the load process and are used here to check * conditional fields which have a dependency on them. */ - public static Set checkForeignFieldValueMatch( - Table table, - int lineNumber, + public static Set checkForeignRefExists( + LineContext lineContext, Field referenceField, ConditionalRequirement check, - String referenceFieldValue, - Set foreignFieldIds, - String entityId + TreeMultimap uniqueValuesForFields ) { Set errors = new HashSet<>(); + String referenceFieldValue = lineContext.getValueForRow(referenceField.name); // Expected reference in foreign field id list. String foreignFieldReference = String.join( ":", - check.conditionalFieldName, + check.dependentFieldName, referenceFieldValue ); - if (table.name.equals("fare_rules") && + if (lineContext.table.name.equals("fare_rules") && !POSTGRES_NULL_TEXT.equals(referenceFieldValue) && - foreignFieldIds - .stream() - .noneMatch(id -> id.contains(foreignFieldReference)) + !uniqueValuesForFields.get(check.dependentFieldName).contains(foreignFieldReference) ) { - // The foreign key reference required by fields in fare rules is not available in stops. - String message = String.format( - "%s %s is conditionally required in stops when referenced by %s in %s.", - check.conditionalFieldName, - referenceFieldValue, - referenceField.name, - table.name - ); + // stop#zone_id does not exist in stops table, but is required by fare_rules records (e.g., origin_id). errors.add( NewGTFSError.forLine( - table, - lineNumber, - CONDITIONALLY_REQUIRED, - message).setEntityId(entityId) + lineContext, + REFERENTIAL_INTEGRITY, + String.join(":", referenceField.name, foreignFieldReference) + ).setEntityId(lineContext.getEntityId()) ); } return errors; diff --git a/src/main/java/com/conveyal/gtfs/loader/Field.java b/src/main/java/com/conveyal/gtfs/loader/Field.java index 898efaa98..08b257828 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Field.java +++ b/src/main/java/com/conveyal/gtfs/loader/Field.java @@ -140,6 +140,25 @@ public boolean isForeignReference () { return this.referenceTable != null; } + /** + * Fluent method to flag field as having foreign references. If flagged, the field value is added to + * {@link ReferenceTracker#uniqueValuesForFields} to be used as a look-up for reference matches. Note: this is intended only for + * special cases (e.g., zone_id) where the field being referenced does not exist as the primary key of a table. + */ + Field hasForeignReferences() { + isForeign = true; + return this; + } + + /** + * Indicates that this field has foreign references. Note: this is intentionally distinct from + * {@link #isForeignReference()} and is intended only for special cases (e.g., zone_id) where the field being + * referenced does not exist as the primary key of a table. + */ + public boolean isForeign() { + return isForeign; + } + /** * Fluent method that indicates that a newly constructed field should be indexed after the table is loaded. * FIXME: should shouldBeIndexed be determined based on presence of referenceTable? @@ -155,7 +174,8 @@ public boolean shouldBeIndexed() { } /** - * Fluent method indicates that this field is a reference to an entry in the table provided as an argument. + * Fluent method indicates that this field is a reference to an entry in the table provided as an argument (i.e., it + * is a foreign reference). * @param table * @return this same Field instance */ @@ -205,20 +225,4 @@ public Field requireConditions(ConditionalRequirement...conditions) { public boolean isConditionallyRequired() { return isConditionallyRequired; } - - /** - * Flag this field as a foreign reference. If flagged the field value is added to - * {@link ReferenceTracker#foreignFieldIds} to be used as a look-up for reference matches. - */ - public Field foreign() { - isForeign = true; - return this; - } - - /** - * Indicates that this field is required as a foreign reference. - */ - public boolean isForeign() { - return isForeign; - } } diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java index e077f23d3..334b8040a 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java @@ -376,6 +376,7 @@ private int loadInternal(Table table) throws Exception { errorStorage.storeError(NewGTFSError.forTable(table, TABLE_TOO_LONG)); break; } + // Line 1 is considered the header row, so the first actual row of data will be line 2. int lineNumber = ((int) csvReader.getCurrentRecord()) + 2; if (lineNumber % 500_000 == 0) LOG.info("Processed {}", human(lineNumber)); if (csvReader.getColumnCount() != fields.length) { @@ -434,8 +435,9 @@ private int loadInternal(Table table) throws Exception { columnIndex += 1; } if (tableHasConditions) { + LineContext lineContext = new LineContext(table, fields, transformedStrings, lineNumber); errorStorage.storeErrors( - referenceTracker.checkConditionallyRequiredFields(table, fields, transformedStrings, lineNumber) + referenceTracker.checkConditionallyRequiredFields(lineContext) ); } if (postgresText) { diff --git a/src/main/java/com/conveyal/gtfs/loader/LineContext.java b/src/main/java/com/conveyal/gtfs/loader/LineContext.java new file mode 100644 index 000000000..80292fc0e --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/loader/LineContext.java @@ -0,0 +1,43 @@ +package com.conveyal.gtfs.loader; + +/** + * Wrapper class that provides access to row values and line context (e.g., line number) for a particular row of GTFS + * data. + */ +public class LineContext { + public final Table table; + private final Field[] fields; + private final String[] rowData; + public final int lineNumber; + + public LineContext(Table table, Field[] fields, String[] rowData, int lineNumber) { + this.table = table; + this.fields = fields; + this.rowData = rowData; + this.lineNumber = lineNumber; + } + + /** + * Get value for a particular column index from a set of row data. Note: the row data here has one extra value at + * the beginning of the array that represents the line number (hence the +1). This is because the data is formatted + * for batch insertion into a postgres table. + */ + public String getValueForRow(int columnIndex) { + return rowData[columnIndex + 1]; + } + + /** + * Overloaded method to provide value for the current line for a particular field. + */ + public String getValueForRow(String fieldName) { + int fieldIndex = Field.getFieldIndex(fields, fieldName); + return rowData[fieldIndex + 1]; + } + + /** + * Overloaded method to provide value for the current line for the key field. + */ + public String getEntityId() { + return getValueForRow(table.getKeyFieldIndex(fields)); + } +} diff --git a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java index 1e16ea2bd..b1ca27bad 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java +++ b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java @@ -1,6 +1,7 @@ package com.conveyal.gtfs.loader; import com.conveyal.gtfs.error.NewGTFSError; +import com.google.common.collect.TreeMultimap; import java.util.Collections; import java.util.HashSet; @@ -22,7 +23,7 @@ */ public class ReferenceTracker { public final Set transitIds = new HashSet<>(); - public final Set foreignFieldIds = new HashSet<>(); + public final TreeMultimap uniqueValuesForFields = TreeMultimap.create(); public final Set transitIdsWithSequence = new HashSet<>(); /** @@ -64,9 +65,11 @@ public Set checkReferencesAndUniqueness(String keyValue, int lineN : !table.hasUniqueKeyField ? null : keyField; String transitId = String.join(":", keyField, keyValue); - // Field value is required for referential integrity checks as part of conditionally required checks. - if (!"".equals(value) && field.isForeign()) { - foreignFieldIds.add(String.join(":", field.name, value)); + // Unique key values are needed for referential integrity checks as part of checks for fields that have + // conditional requirements. This also tracks "special" foreign keys like stop#zone_id that are not primary keys + // of the table they exist in. + if ((field.name.equals(keyField) && keyField.equals(uniqueKeyField)) || field.isForeign()) { + uniqueValuesForFields.put(field.name, value); } // If the field is optional and there is no value present, skip check. @@ -150,75 +153,39 @@ public Set checkReferencesAndUniqueness(String keyValue, int lineN * to confirm if it meets the conditions whereby the conditional field is required. If the conditional field is * required confirm that a value has been provided, if not, log an error. */ - public Set checkConditionallyRequiredFields( - Table table, - Field[] fields, - String[] rowData, - int lineNumber - ) { + public Set checkConditionallyRequiredFields(LineContext lineContext) { Set errors = new HashSet<>(); - Map fieldsToCheck = table.getConditionalRequirements(); + Map fieldsToCheck = lineContext.table.getConditionalRequirements(); // Work through each field that has been assigned a conditional requirement. for (Map.Entry entry : fieldsToCheck.entrySet()) { Field referenceField = entry.getKey(); - // Extract reference field value from the row currently being processed. - String referenceFieldValue = - getValueForRow( - rowData, - Field.getFieldIndex(fields, referenceField.name) - ); - String entityId = - getValueForRow( - rowData, - table.getKeyFieldIndex(fields) - ); ConditionalRequirement[] conditionalRequirements = entry.getValue(); - // Work through each field's conditional requirements. for (ConditionalRequirement check : conditionalRequirements) { - // Extract conditional field value from the row currently being processed. - String conditionalFieldValue = - getValueForRow( - rowData, - Field.getFieldIndex(fields, check.conditionalFieldName) - ); - switch(check.referenceCheck) { - case ROW_COUNT_GREATER_THAN_ONE: + switch(check.referenceFieldCheck) { + case HAS_MULTIPLE_ROWS: errors.addAll( - ConditionalRequirement.checkRowCountGreaterThanOne( - table, - lineNumber, - foreignFieldIds, - check, - conditionalFieldValue, - entityId - ) + ConditionalRequirement.checkHasMultipleRows(lineContext, uniqueValuesForFields, check) + ); + break; + case FIELD_NOT_EMPTY: + errors.addAll( + ConditionalRequirement.checkFieldEmpty(lineContext, referenceField, uniqueValuesForFields, check) ); break; case FIELD_IN_RANGE: errors.addAll( - ConditionalRequirement.checkFieldInRange( - table, - lineNumber, - referenceField, - check, - referenceFieldValue, - conditionalFieldValue, - entityId - ) + ConditionalRequirement.checkFieldInRange(lineContext, referenceField, check) ); break; - case FOREIGN_FIELD_VALUE_MATCH: + case FOREIGN_REF_EXISTS: errors.addAll( - ConditionalRequirement.checkForeignFieldValueMatch( - table, - lineNumber, + ConditionalRequirement.checkForeignRefExists( + lineContext, referenceField, check, - referenceFieldValue, - foreignFieldIds, - entityId + uniqueValuesForFields ) ); break; @@ -227,13 +194,4 @@ public Set checkConditionallyRequiredFields( } return errors; } - - /** - * Get value for a particular column index from a set of row data. Note: the row data here has one extra value at - * the beginning of the array that represents the line number (hence the +1). This is because the data is formatted - * for batch insertion into a postgres table. - */ - private String getValueForRow(String[] rowData, int columnIndex) { - return rowData[columnIndex + 1]; - } } diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index 197fd79e1..7c43176c0 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -51,8 +51,8 @@ import static com.conveyal.gtfs.error.NewGTFSErrorType.TABLE_IN_SUBDIRECTORY; import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_IN_RANGE; import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_NOT_EMPTY; -import static com.conveyal.gtfs.loader.ConditionalCheckType.FOREIGN_FIELD_VALUE_MATCH; -import static com.conveyal.gtfs.loader.ConditionalCheckType.ROW_COUNT_GREATER_THAN_ONE; +import static com.conveyal.gtfs.loader.ConditionalCheckType.FOREIGN_REF_EXISTS; +import static com.conveyal.gtfs.loader.ConditionalCheckType.HAS_MULTIPLE_ROWS; import static com.conveyal.gtfs.loader.JdbcGtfsLoader.sanitize; import static com.conveyal.gtfs.loader.Requirement.EDITOR; import static com.conveyal.gtfs.loader.Requirement.EXTENSION; @@ -105,8 +105,8 @@ public Table (String name, Class entityClass, Requirement requ new StringField("agency_id", OPTIONAL).requireConditions( // If there is more than one agency, the agency_id must be provided // https://developers.google.com/transit/gtfs/reference#agencytxt - new ConditionalRequirement("agency_id", ROW_COUNT_GREATER_THAN_ONE) - ).foreign(), + new ConditionalRequirement("agency_id", HAS_MULTIPLE_ROWS) + ).hasForeignReferences(), new StringField("agency_name", REQUIRED), new URLField("agency_url", REQUIRED), new StringField("agency_timezone", REQUIRED), // FIXME new field type for time zones? @@ -158,7 +158,7 @@ public Table (String name, Class entityClass, Requirement requ new StringField("agency_id", OPTIONAL).requireConditions( // If there is more than one agency, this agency_id is required. // https://developers.google.com/transit/gtfs/reference#fare_attributestxt - new ConditionalRequirement("agency_id", FIELD_NOT_EMPTY, ROW_COUNT_GREATER_THAN_ONE) + new ConditionalRequirement("agency_id", HAS_MULTIPLE_ROWS, FIELD_NOT_EMPTY) ), new IntegerField("transfer_duration", OPTIONAL) ).addPrimaryKey(); @@ -186,7 +186,7 @@ public Table (String name, Class entityClass, Requirement requ new StringField("agency_id", OPTIONAL).isReferenceTo(AGENCY).requireConditions( // If there is more than one agency, this agency_id is required. // https://developers.google.com/transit/gtfs/reference#routestxt - new ConditionalRequirement("agency_id", FIELD_NOT_EMPTY, ROW_COUNT_GREATER_THAN_ONE) + new ConditionalRequirement("agency_id", HAS_MULTIPLE_ROWS, FIELD_NOT_EMPTY) ), new StringField("route_short_name", OPTIONAL), // one of short or long must be provided new StringField("route_long_name", OPTIONAL), @@ -240,7 +240,7 @@ public Table (String name, Class entityClass, Requirement requ new StringField("stop_desc", OPTIONAL), new DoubleField("stop_lat", OPTIONAL, -80, 80, 6).requireConditions(), new DoubleField("stop_lon", OPTIONAL, -180, 180, 6).requireConditions(), - new StringField("zone_id", OPTIONAL).foreign(), + new StringField("zone_id", OPTIONAL).hasForeignReferences(), new URLField("stop_url", OPTIONAL), new ShortField("location_type", OPTIONAL, 4).requireConditions( // If the location type is defined and within range, the conditional fields are required. @@ -261,19 +261,19 @@ public Table (String name, Class entityClass, Requirement requ new StringField("fare_id", REQUIRED).isReferenceTo(FARE_ATTRIBUTES), new StringField("route_id", OPTIONAL).isReferenceTo(ROUTES), new StringField("origin_id", OPTIONAL).requireConditions( - // If the origin id is defined, the matching zone_id must be defined in stops. + // If the origin_id is defined, its value must exist as a zone_id in stops.txt. // https://developers.google.com/transit/gtfs/reference#fare_rulestxt - new ConditionalRequirement("zone_id", FOREIGN_FIELD_VALUE_MATCH) + new ConditionalRequirement("zone_id", FOREIGN_REF_EXISTS) ), new StringField("destination_id", OPTIONAL).requireConditions( - // If the destination id is defined, the matching zone_id must be defined in stops. + // If the destination_id is defined, its value must exist as a zone_id in stops.txt. // https://developers.google.com/transit/gtfs/reference#fare_rulestxt - new ConditionalRequirement("zone_id", FOREIGN_FIELD_VALUE_MATCH) + new ConditionalRequirement("zone_id", FOREIGN_REF_EXISTS) ), new StringField("contains_id", OPTIONAL).requireConditions( - // If the contains id is defined, the matching zone_id must be defined in stops. + // If the contains_id is defined, its value must exist as a zone_id in stops.txt. // https://developers.google.com/transit/gtfs/reference#fare_rulestxt - new ConditionalRequirement("zone_id", FOREIGN_FIELD_VALUE_MATCH) + new ConditionalRequirement("zone_id", FOREIGN_REF_EXISTS) ) ) .withParentTable(FARE_ATTRIBUTES) diff --git a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java index 91d813494..77d0f979d 100644 --- a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java @@ -16,7 +16,7 @@ import static com.conveyal.gtfs.GTFS.load; import static com.conveyal.gtfs.GTFS.validate; import static com.conveyal.gtfs.TestUtils.assertThatSqlCountQueryYieldsExpectedCount; -import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD; +import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS; import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; public class ConditionallyRequiredTest { @@ -94,12 +94,12 @@ public void stopTimeTableMissingConditionallyRequiredArrivalDepartureTimes() { @Test public void routeTableMissingConditionallyRequiredAgencyId() { - checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD, "Route","2", "21","agency_id is conditionally required when there is more than one agency."); + checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, "Route","2", "21","agency_id is conditionally required when there is more than one agency."); } @Test public void fareAttributeTableMissingConditionallyRequiredAgencyId() { - checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD, "FareAttribute","2", "1","agency_id is conditionally required when there is more than one agency."); + checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, "FareAttribute","2", "1","agency_id is conditionally required when there is more than one agency."); } /** From 6b54a66735024bacb91fa469cd6f64d575a7f2e0 Mon Sep 17 00:00:00 2001 From: Rob Beer Date: Fri, 7 May 2021 18:03:45 +0100 Subject: [PATCH 19/45] refactor(Added new GFTS tables and conditional checks): Added translation and attribution tables, co --- src/main/java/com/conveyal/gtfs/GTFSFeed.java | 5 +- .../gtfs/loader/ConditionalCheckType.java | 11 +- .../gtfs/loader/ConditionalRequirement.java | 99 ++++++++++++++- .../conveyal/gtfs/loader/EntityPopulator.java | 3 +- .../conveyal/gtfs/loader/FeedLoadResult.java | 4 + .../conveyal/gtfs/loader/JdbcGtfsLoader.java | 2 + .../gtfs/loader/JdbcGtfsSnapshotter.java | 1 + .../gtfs/loader/ReferenceTracker.java | 26 ++++ .../java/com/conveyal/gtfs/loader/Table.java | 60 +++++++-- .../com/conveyal/gtfs/model/Attribution.java | 119 ++++++++++++++++++ .../java/com/conveyal/gtfs/model/Stop.java | 22 ++-- .../com/conveyal/gtfs/model/Translation.java | 107 ++++++++++++++++ .../com/conveyal/gtfs/dto/FeedInfoDTO.java | 3 + .../java/com/conveyal/gtfs/dto/StopDTO.java | 1 + .../loader/ConditionallyRequiredTest.java | 45 ++++--- .../gtfs/loader/JDBCTableWriterTest.java | 6 + .../attributions.txt | 2 + .../translations.txt | 4 + 18 files changed, 472 insertions(+), 48 deletions(-) create mode 100644 src/main/java/com/conveyal/gtfs/model/Attribution.java create mode 100644 src/main/java/com/conveyal/gtfs/model/Translation.java create mode 100644 src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/attributions.txt create mode 100644 src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/translations.txt diff --git a/src/main/java/com/conveyal/gtfs/GTFSFeed.java b/src/main/java/com/conveyal/gtfs/GTFSFeed.java index c07c11a5f..8e6de45e2 100644 --- a/src/main/java/com/conveyal/gtfs/GTFSFeed.java +++ b/src/main/java/com/conveyal/gtfs/GTFSFeed.java @@ -14,7 +14,6 @@ import org.locationtech.jts.algorithm.ConvexHull; import org.locationtech.jts.geom.*; import org.locationtech.jts.index.strtree.STRtree; -import org.locationtech.jts.simplify.DouglasPeuckerSimplifier; import org.mapdb.BTreeMap; import org.mapdb.DB; import org.mapdb.DBMaker; @@ -66,6 +65,8 @@ public class GTFSFeed implements Cloneable, Closeable { public final Map stops; public final Map transfers; public final BTreeMap trips; + public final Map translations; + public final Map attributions; public final Set transitIds = new HashSet<>(); /** CRC32 of the GTFS file this was loaded from */ @@ -636,6 +637,8 @@ private GTFSFeed (DB db) { fares = db.getTreeMap("fares"); services = db.getTreeMap("services"); shape_points = db.getTreeMap("shape_points"); + translations = db.getTreeMap("translations"); + attributions = db.getTreeMap("attributions"); feedId = db.getAtomicString("feed_id").get(); checksum = db.getAtomicLong("checksum").get(); diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java b/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java index 8ec57e371..d800511d3 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java @@ -23,5 +23,14 @@ public enum ConditionalCheckType { * If the reference table row count is greater than one, the conditionally required field values must not be empty. * This is used in conjunction with {@link ConditionalCheckType#FIELD_NOT_EMPTY}. */ - ROW_COUNT_GREATER_THAN_ONE + ROW_COUNT_GREATER_THAN_ONE, + /** + * If the conditionally required field value is empty, the reference field value must be provided. + */ + FIELD_IS_EMPTY, + /** + * If the conditionally required field value is not empty and matches an expected value, the reference field + * value must not be empty. + */ + FIELD_NOT_EMPTY_AND_MATCHES_VALUE } \ No newline at end of file diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java index 428054820..a29a975e0 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java @@ -27,11 +27,14 @@ public class ConditionalRequirement { public ConditionalCheckType conditionalCheck; /** The name of the conditional field. */ public String conditionalFieldName; + /** The expected conditional field value. */ + public String conditionalFieldValue; public ConditionalRequirement( int minReferenceValue, int maxReferenceValue, String conditionalFieldName, + String conditionalFieldValue, ConditionalCheckType conditionalCheck, ConditionalCheckType referenceCheck @@ -39,15 +42,27 @@ public ConditionalRequirement( this.minReferenceValue = minReferenceValue; this.maxReferenceValue = maxReferenceValue; this.conditionalFieldName = conditionalFieldName; + this.conditionalFieldValue = conditionalFieldValue; this.conditionalCheck = conditionalCheck; this.referenceCheck = referenceCheck; } public ConditionalRequirement( + int minReferenceValue, + int maxReferenceValue, String conditionalFieldName, + ConditionalCheckType conditionalCheck, ConditionalCheckType referenceCheck + ) { - this(0,0, conditionalFieldName, null, referenceCheck); + this(minReferenceValue,maxReferenceValue, conditionalFieldName, null, conditionalCheck, referenceCheck); + } + + public ConditionalRequirement( + String conditionalFieldName, + ConditionalCheckType referenceCheck + ) { + this(0,0, conditionalFieldName, null, null, referenceCheck); } public ConditionalRequirement( @@ -55,9 +70,18 @@ public ConditionalRequirement( ConditionalCheckType conditionalCheck, ConditionalCheckType referenceCheck ) { - this(0,0, conditionalFieldName, conditionalCheck, referenceCheck); + this(0,0, conditionalFieldName, null,conditionalCheck, referenceCheck); } + public ConditionalRequirement( + String conditionalFieldName, + String conditionalFieldValue, + ConditionalCheckType referenceCheck + ) { + this(0,0, conditionalFieldName, conditionalFieldValue, null, referenceCheck); + } + + /** * Flag an error if the number of rows in the agency table is greater than one and the agency_id has not been defined * for each row. @@ -215,6 +239,77 @@ public static Set checkForeignFieldValueMatch( return errors; } + /** + * Check the conditional field value, if it is empty the reference field value must be provided. + */ + public static Set checkFieldIsEmpty( + Table table, + int lineNumber, + Field referenceField, + ConditionalRequirement check, + String referenceFieldValue, + String conditionalFieldValue, + String entityId + ) { + Set errors = new HashSet<>(); + if ( + POSTGRES_NULL_TEXT.equals(conditionalFieldValue) && + POSTGRES_NULL_TEXT.equals(referenceFieldValue) + ) { + // The reference field is required when the conditional field is empty. + String message = String.format( + "%s is conditionally required when %s is empty.", + referenceField.name, + check.conditionalFieldName + ); + errors.add( + NewGTFSError.forLine( + table, + lineNumber, + CONDITIONALLY_REQUIRED, + message).setEntityId(entityId) + ); + + } + return errors; + } + + /** + * Check the conditional field value is not empty and matches the expected value. + */ + public static Set checkFieldNotEmptyAndMatchesValue( + Table table, + int lineNumber, + Field referenceField, + ConditionalRequirement check, + String referenceFieldValue, + String conditionalFieldValue, + String entityId + ) { + Set errors = new HashSet<>(); + if ( + !POSTGRES_NULL_TEXT.equals(conditionalFieldValue) && + conditionalFieldValue.equals(check.conditionalFieldValue) && + POSTGRES_NULL_TEXT.equals(referenceFieldValue) + ) { + String message = String.format( + "%s is conditionally required when %s is provided and matches %s.", + referenceField.name, + check.conditionalFieldName, + check.conditionalFieldValue + ); + errors.add( + NewGTFSError.forLine( + table, + lineNumber, + CONDITIONALLY_REQUIRED, + message).setEntityId(entityId) + ); + + } + return errors; + } + /** * Check if the provided value is within the min and max values. If the field value can not be converted * to a number it is assumed that the value is not a number and will therefore never be within the min/max range. diff --git a/src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java b/src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java index 8de1a1cfa..4c44d3d64 100644 --- a/src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java +++ b/src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java @@ -168,7 +168,8 @@ public interface EntityPopulator { stop.stop_timezone = getStringIfPresent(result, "stop_timezone", columnForName); stop.stop_url = getUrlIfPresent (result, "stop_url", columnForName); stop.location_type = getIntIfPresent (result, "location_type", columnForName); - stop.wheelchair_boarding = Integer.toString(getIntIfPresent(result, "wheelchair_boarding", columnForName)); + stop.wheelchair_boarding = getIntIfPresent(result, "wheelchair_boarding", columnForName); + stop.platform_code = getStringIfPresent(result, "platform_code", columnForName); return stop; }; diff --git a/src/main/java/com/conveyal/gtfs/loader/FeedLoadResult.java b/src/main/java/com/conveyal/gtfs/loader/FeedLoadResult.java index 69d62d9f7..3a61fdd50 100644 --- a/src/main/java/com/conveyal/gtfs/loader/FeedLoadResult.java +++ b/src/main/java/com/conveyal/gtfs/loader/FeedLoadResult.java @@ -34,6 +34,8 @@ public class FeedLoadResult implements Serializable { public TableLoadResult stopTimes; public TableLoadResult transfers; public TableLoadResult trips; + public TableLoadResult translations; + public TableLoadResult attributions; public long loadTimeMillis; public long completionTime; @@ -59,5 +61,7 @@ public FeedLoadResult (boolean constructTableResults) { stopTimes = new TableLoadResult(); transfers = new TableLoadResult(); trips = new TableLoadResult(); + translations = new TableLoadResult(); + attributions = new TableLoadResult(); } } diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java index e077f23d3..99bdb964f 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java @@ -165,6 +165,8 @@ public FeedLoadResult loadTables() { result.trips = load(Table.TRIPS); // refs routes result.frequencies = load(Table.FREQUENCIES); // refs trips result.stopTimes = load(Table.STOP_TIMES); + result.translations = load(Table.TRANSLATIONS); + result.attributions = load(Table.ATTRIBUTIONS); result.errorCount = errorStorage.getErrorCount(); // This will commit and close the single connection that has been shared between all preceding load steps. errorStorage.commitAndClose(); diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsSnapshotter.java b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsSnapshotter.java index 610a4f8ac..f734f8d19 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsSnapshotter.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsSnapshotter.java @@ -112,6 +112,7 @@ public SnapshotResult copyTables() { result.stopTimes = copy(Table.STOP_TIMES, true); result.transfers = copy(Table.TRANSFERS, true); result.trips = copy(Table.TRIPS, true); + result.attributions = copy(Table.ATTRIBUTIONS, true); result.completionTime = System.currentTimeMillis(); result.loadTimeMillis = result.completionTime - startTime; LOG.info("Copying tables took {} sec", (result.loadTimeMillis) / 1000); diff --git a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java index 1e16ea2bd..9be589ddf 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java +++ b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java @@ -222,6 +222,32 @@ public Set checkConditionallyRequiredFields( ) ); break; + case FIELD_IS_EMPTY: + errors.addAll( + ConditionalRequirement.checkFieldIsEmpty( + table, + lineNumber, + referenceField, + check, + referenceFieldValue, + conditionalFieldValue, + entityId + ) + ); + break; + case FIELD_NOT_EMPTY_AND_MATCHES_VALUE: + errors.addAll( + ConditionalRequirement.checkFieldNotEmptyAndMatchesValue( + table, + lineNumber, + referenceField, + check, + referenceFieldValue, + conditionalFieldValue, + entityId + ) + ); + break; } } } diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index 197fd79e1..9681cfd58 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -3,6 +3,7 @@ import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.error.SQLErrorStorage; import com.conveyal.gtfs.model.Agency; +import com.conveyal.gtfs.model.Attribution; import com.conveyal.gtfs.model.Calendar; import com.conveyal.gtfs.model.CalendarDate; import com.conveyal.gtfs.model.Entity; @@ -18,6 +19,7 @@ import com.conveyal.gtfs.model.Stop; import com.conveyal.gtfs.model.StopTime; import com.conveyal.gtfs.model.Transfer; +import com.conveyal.gtfs.model.Translation; import com.conveyal.gtfs.model.Trip; import com.conveyal.gtfs.storage.StorageException; import com.csvreader.CsvReader; @@ -50,7 +52,9 @@ import static com.conveyal.gtfs.error.NewGTFSErrorType.DUPLICATE_HEADER; import static com.conveyal.gtfs.error.NewGTFSErrorType.TABLE_IN_SUBDIRECTORY; import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_IN_RANGE; +import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_IS_EMPTY; import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_NOT_EMPTY; +import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_NOT_EMPTY_AND_MATCHES_VALUE; import static com.conveyal.gtfs.loader.ConditionalCheckType.FOREIGN_FIELD_VALUE_MATCH; import static com.conveyal.gtfs.loader.ConditionalCheckType.ROW_COUNT_GREATER_THAN_ONE; import static com.conveyal.gtfs.loader.JdbcGtfsLoader.sanitize; @@ -168,7 +172,7 @@ public Table (String name, Class entityClass, Requirement requ public static final Table FEED_INFO = new Table("feed_info", FeedInfo.class, OPTIONAL, new StringField("feed_publisher_name", REQUIRED), // feed_id is not the first field because that would label it as the key field, which we do not want because the - // key field cannot be optional. + // key field cannot be optional. feed_id is not part of the GTFS spec, but is required by OTP. new StringField("feed_id", OPTIONAL), new URLField("feed_publisher_url", REQUIRED), new LanguageField("feed_lang", REQUIRED), @@ -178,7 +182,10 @@ public Table (String name, Class entityClass, Requirement requ // Editor-specific field that represents default route values for use in editing. new ColorField("default_route_color", EDITOR), // FIXME: Should the route type max value be equivalent to GTFS spec's max? - new IntegerField("default_route_type", EDITOR, 999) + new IntegerField("default_route_type", EDITOR, 999), + new LanguageField("default_lang", OPTIONAL), + new StringField("feed_contact_email", OPTIONAL), + new URLField("feed_contact_url", OPTIONAL) ).keyFieldIsNotUnique(); public static final Table ROUTES = new Table("routes", Route.class, REQUIRED, @@ -252,27 +259,26 @@ public Table (String name, Class entityClass, Requirement requ ), new StringField("parent_station", OPTIONAL).requireConditions(), new StringField("stop_timezone", OPTIONAL), - new ShortField("wheelchair_boarding", OPTIONAL, 2) + new ShortField("wheelchair_boarding", OPTIONAL, 2), + new StringField("platform_code", OPTIONAL) ) .restrictDelete() .addPrimaryKey(); + // GTFS reference: https://developers.google.com/transit/gtfs/reference#fare_rulestxt public static final Table FARE_RULES = new Table("fare_rules", FareRule.class, OPTIONAL, new StringField("fare_id", REQUIRED).isReferenceTo(FARE_ATTRIBUTES), new StringField("route_id", OPTIONAL).isReferenceTo(ROUTES), new StringField("origin_id", OPTIONAL).requireConditions( // If the origin id is defined, the matching zone_id must be defined in stops. - // https://developers.google.com/transit/gtfs/reference#fare_rulestxt new ConditionalRequirement("zone_id", FOREIGN_FIELD_VALUE_MATCH) ), new StringField("destination_id", OPTIONAL).requireConditions( // If the destination id is defined, the matching zone_id must be defined in stops. - // https://developers.google.com/transit/gtfs/reference#fare_rulestxt new ConditionalRequirement("zone_id", FOREIGN_FIELD_VALUE_MATCH) ), new StringField("contains_id", OPTIONAL).requireConditions( // If the contains id is defined, the matching zone_id must be defined in stops. - // https://developers.google.com/transit/gtfs/reference#fare_rulestxt new ConditionalRequirement("zone_id", FOREIGN_FIELD_VALUE_MATCH) ) ) @@ -353,6 +359,43 @@ public Table (String name, Class entityClass, Requirement requ .withParentTable(TRIPS) .keyFieldIsNotUnique(); + // GTFS reference: https://developers.google.com/transit/gtfs/reference#attributionstxt + public static final Table TRANSLATIONS = new Table("translations", Translation.class, OPTIONAL, + new StringField("table_name", REQUIRED), + new StringField("field_name", REQUIRED), + new LanguageField("language", REQUIRED), + new StringField("translation", REQUIRED), + new StringField("record_id", OPTIONAL).requireConditions( + // If the field_value is empty the record_id is required. + new ConditionalRequirement("field_value", FIELD_IS_EMPTY) + ), + new StringField("record_sub_id", OPTIONAL).requireConditions( + // If the record_id is not empty and the value is stop_times the record_sub_id is required. + new ConditionalRequirement( + "record_id", + "stop_times", + FIELD_NOT_EMPTY_AND_MATCHES_VALUE + ) + ), + new StringField("field_value", OPTIONAL).requireConditions( + // If the record_id is empty the field_value is required. + new ConditionalRequirement("record_id", FIELD_IS_EMPTY) + )) + .keyFieldIsNotUnique(); + + public static final Table ATTRIBUTIONS = new Table("attributions", Attribution.class, OPTIONAL, + new StringField("attribution_id", OPTIONAL), + new StringField("agency_id", OPTIONAL).isReferenceTo(AGENCY), + new LanguageField("route_id", OPTIONAL).isReferenceTo(ROUTES), + new StringField("trip_id", OPTIONAL).isReferenceTo(TRIPS), + new StringField("organization_name", REQUIRED), + new ShortField("is_producer", OPTIONAL, 1), + new ShortField("is_operator", OPTIONAL, 1), + new ShortField("is_authority", OPTIONAL, 1), + new URLField("attribution_url", OPTIONAL), + new StringField("attribution_email", OPTIONAL), + new StringField("attribution_phone", OPTIONAL)); + /** List of tables in order needed for checking referential integrity during load stage. */ public static final Table[] tablesInOrder = { AGENCY, @@ -370,7 +413,9 @@ public Table (String name, Class entityClass, Requirement requ TRANSFERS, TRIPS, STOP_TIMES, - FREQUENCIES + FREQUENCIES, + TRANSLATIONS, + ATTRIBUTIONS }; /** @@ -1050,4 +1095,5 @@ public Map getConditionalRequirements() { } return fieldsWithConditions; } + } diff --git a/src/main/java/com/conveyal/gtfs/model/Attribution.java b/src/main/java/com/conveyal/gtfs/model/Attribution.java new file mode 100644 index 000000000..deaa30be4 --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/model/Attribution.java @@ -0,0 +1,119 @@ +package com.conveyal.gtfs.model; + +import com.conveyal.gtfs.GTFSFeed; + +import java.io.IOException; +import java.net.URL; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.util.Iterator; + +public class Attribution extends Entity { + + String attribution_id; + String agency_id; + String route_id; + String trip_id; + String organization_name; + int is_producer; + int is_operator; + int is_authority; + URL attribution_url; + String attribution_email; + String attribution_phone; + + @Override + public String getId () { + return attribution_id; + } + + /** + * Sets the parameters for a prepared statement following the parameter order defined in + * {@link com.conveyal.gtfs.loader.Table#ATTRIBUTIONS}. JDBC prepared statement parameters use a one-based index. + */ + @Override + public void setStatementParameters(PreparedStatement statement, boolean setDefaultId) throws SQLException { + int oneBasedIndex = 1; + if (!setDefaultId) statement.setInt(oneBasedIndex++, id); + statement.setString(oneBasedIndex++, attribution_id); + statement.setString(oneBasedIndex++, agency_id); + statement.setString(oneBasedIndex++, route_id); + statement.setString(oneBasedIndex++, trip_id); + statement.setString(oneBasedIndex++, organization_name); + setIntParameter(statement, oneBasedIndex++, is_producer); + setIntParameter(statement, oneBasedIndex++, is_operator); + setIntParameter(statement, oneBasedIndex++, is_authority); + statement.setString(oneBasedIndex++, attribution_url != null ? attribution_url .toString() : null); + statement.setString(oneBasedIndex++, attribution_email); + statement.setString(oneBasedIndex++, attribution_phone); + } + + public static class Loader extends Entity.Loader { + + public Loader(GTFSFeed feed) { + super(feed, "attributions"); + } + + @Override + protected boolean isRequired() { + return false; + } + + @Override + public void loadOneRow() throws IOException { + Attribution a = new Attribution(); + a.id = row + 1; // offset line number by 1 to account for 0-based row index + a.attribution_id = getStringField("attribution_id", false); + a.agency_id = getStringField("agency_id", false); + a.route_id = getStringField("route_id", true); + a.trip_id = getStringField("trip_id", false); + a.organization_name = getStringField("organization_name", true); + a.is_producer = getIntField("is_producer", false, 0, 1); + a.is_operator = getIntField("is_operator", false, 0, 1); + a.is_authority = getIntField("is_authority", false, 0, 1); + a.attribution_url = getUrlField("attribution_url", false); + a.attribution_email = getStringField("attribution_email", false); + a.attribution_phone = getStringField("attribution_phone", false); + + // TODO clooge due to not being able to have null keys in mapdb + if (a.attribution_id == null) a.attribution_id = "NONE"; + + feed.attributions.put(a.attribution_id, a); + } + } + + public static class Writer extends Entity.Writer { + public Writer (GTFSFeed feed) { + super(feed, "attribution"); + } + + @Override + protected void writeHeaders() throws IOException { + writer.writeRecord(new String[] {"attribution_id", "agency_id", "route_id", "trip_id", "organization_name", + "is_producer", "is_operator", "is_authority", "attribution_url", "attribution_email", "attribution_phone"}); + } + + @Override + protected void writeOneRow(Attribution a) throws IOException { + writeStringField(a.attribution_id); + writeStringField(a.agency_id); + writeStringField(a.route_id); + writeStringField(a.trip_id); + writeStringField(a.organization_name); + writeIntField(a.is_producer); + writeIntField(a.is_operator); + writeIntField(a.is_authority); + writeUrlField(a.attribution_url); + writeStringField(a.attribution_email); + writeStringField(a.attribution_phone); + endRecord(); + } + + @Override + protected Iterator iterator() { + return feed.attributions.values().iterator(); + } + } + + +} diff --git a/src/main/java/com/conveyal/gtfs/model/Stop.java b/src/main/java/com/conveyal/gtfs/model/Stop.java index 25a3ce82a..24923380e 100644 --- a/src/main/java/com/conveyal/gtfs/model/Stop.java +++ b/src/main/java/com/conveyal/gtfs/model/Stop.java @@ -22,9 +22,9 @@ public class Stop extends Entity { public int location_type; public String parent_station; public String stop_timezone; - // TODO should be int - public String wheelchair_boarding; + public int wheelchair_boarding; public String feed_id; + public String platform_code; @Override public String getId () { @@ -39,12 +39,6 @@ public String getId () { public void setStatementParameters(PreparedStatement statement, boolean setDefaultId) throws SQLException { int oneBasedIndex = 1; if (!setDefaultId) statement.setInt(oneBasedIndex++, id); - int wheelchairBoarding = 0; - try { - wheelchairBoarding = Integer.parseInt(wheelchair_boarding); - } catch (NumberFormatException e) { - // Do nothing, wheelchairBoarding will remain zero. - } statement.setString(oneBasedIndex++, stop_id); statement.setString(oneBasedIndex++, stop_code); statement.setString(oneBasedIndex++, stop_name); @@ -56,8 +50,8 @@ public void setStatementParameters(PreparedStatement statement, boolean setDefau setIntParameter(statement, oneBasedIndex++, location_type); statement.setString(oneBasedIndex++, parent_station); statement.setString(oneBasedIndex++, stop_timezone); - // FIXME: For some reason wheelchair boarding type is String - setIntParameter(statement, oneBasedIndex++, wheelchairBoarding); + setIntParameter(statement, oneBasedIndex++, wheelchair_boarding); + statement.setString(oneBasedIndex++, platform_code); } public static class Loader extends Entity.Loader { @@ -86,9 +80,10 @@ public void loadOneRow() throws IOException { s.location_type = getIntField("location_type", false, 0, 1); s.parent_station = getStringField("parent_station", false); s.stop_timezone = getStringField("stop_timezone", false); - s.wheelchair_boarding = getStringField("wheelchair_boarding", false); + s.wheelchair_boarding = getIntField("wheelchair_boarding", false, 0, 2); s.feed = feed; s.feed_id = feed.feedId; + s.platform_code = getStringField("platform_code", false); /* TODO check ref integrity later, this table self-references via parent_station */ // Attempting to put a null key or value will cause an NPE in BTreeMap if (s.stop_id != null) feed.stops.put(s.stop_id, s); @@ -104,7 +99,7 @@ public Writer (GTFSFeed feed) { @Override public void writeHeaders() throws IOException { writer.writeRecord(new String[] {"stop_id", "stop_code", "stop_name", "stop_desc", "stop_lat", "stop_lon", "zone_id", - "stop_url", "location_type", "parent_station", "stop_timezone", "wheelchair_boarding"}); + "stop_url", "location_type", "parent_station", "stop_timezone", "wheelchair_boarding", "platform_code"}); } @Override @@ -120,7 +115,8 @@ public void writeOneRow(Stop s) throws IOException { writeIntField(s.location_type); writeStringField(s.parent_station); writeStringField(s.stop_timezone); - writeStringField(s.wheelchair_boarding); + writeIntField(s.wheelchair_boarding); + writeStringField(s.platform_code); endRecord(); } diff --git a/src/main/java/com/conveyal/gtfs/model/Translation.java b/src/main/java/com/conveyal/gtfs/model/Translation.java new file mode 100644 index 000000000..13d9ad981 --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/model/Translation.java @@ -0,0 +1,107 @@ +package com.conveyal.gtfs.model; + +import com.conveyal.gtfs.GTFSFeed; + +import java.io.IOException; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.util.Iterator; + +public class Translation extends Entity { + + public String table_name; + public String field_name; + public String language; + public String translation; + public String record_id; + public String record_sub_id; + public String field_value; + + @Override + public String getId() { + return createId(table_name, field_name, language); + } + + /** + * Sets the parameters for a prepared statement following the parameter order defined in + * {@link com.conveyal.gtfs.loader.Table#TRANSLATIONS}. JDBC prepared statement parameters use a one-based index. + */ + @Override + public void setStatementParameters(PreparedStatement statement, boolean setDefaultId) throws SQLException { + int oneBasedIndex = 1; + if (!setDefaultId) statement.setInt(oneBasedIndex++, id); + statement.setString(oneBasedIndex++, table_name); + statement.setString(oneBasedIndex++, field_name); + statement.setString(oneBasedIndex++, language); + statement.setString(oneBasedIndex++, translation); + statement.setString(oneBasedIndex++, record_id); + statement.setString(oneBasedIndex++, record_sub_id); + statement.setString(oneBasedIndex++, field_value); + } + + public static class Loader extends Entity.Loader { + + public Loader(GTFSFeed feed) { + super(feed, "translation"); + } + + @Override + protected boolean isRequired() { + return false; + } + + @Override + public void loadOneRow() throws IOException { + Translation t = new Translation(); + t.id = row + 1; // offset line number by 1 to account for 0-based row index + t.table_name = getStringField("table_name", true); + t.field_name = getStringField("field_name", true); + t.field_name = getStringField("language", true); + t.translation = getStringField("translation", true); + t.record_id = getStringField("record_id", false); + t.record_sub_id = getStringField("record_sub_id", false); + t.field_value = getStringField("field_value", false); + feed.translations.put( + createId(t.table_name, t.field_name, t.language), + t + ); + } + } + + public static class Writer extends Entity.Writer { + public Writer (GTFSFeed feed) { + super(feed, "translation"); + } + + @Override + protected void writeHeaders() throws IOException { + writer.writeRecord(new String[] {"table_name", "field_name", "language", "translation", "record_id", + "record_sub_id", "field_value"}); + } + + @Override + protected void writeOneRow(Translation t) throws IOException { + writeStringField(t.table_name); + writeStringField(t.field_name); + writeStringField(t.language); + writeStringField(t.translation); + writeStringField(t.record_id); + writeStringField(t.record_sub_id); + writeStringField(t.field_value); + endRecord(); + } + + @Override + protected Iterator iterator() { + return feed.translations.values().iterator(); + } + } + + /** + * Translation entries have no ID in GTFS so we define one based on the fields in the translation entry. + */ + private static String createId(String table_name, String field_name, String language) { + return String.format("%s_%s_%s", table_name, field_name, language); + } + +} diff --git a/src/test/java/com/conveyal/gtfs/dto/FeedInfoDTO.java b/src/test/java/com/conveyal/gtfs/dto/FeedInfoDTO.java index fb3feac2b..52ede95c3 100644 --- a/src/test/java/com/conveyal/gtfs/dto/FeedInfoDTO.java +++ b/src/test/java/com/conveyal/gtfs/dto/FeedInfoDTO.java @@ -14,4 +14,7 @@ public class FeedInfoDTO { public String feed_version; public String default_route_color; public String default_route_type; + public String default_lang; + public String feed_contact_email; + public String feed_contact_url; } diff --git a/src/test/java/com/conveyal/gtfs/dto/StopDTO.java b/src/test/java/com/conveyal/gtfs/dto/StopDTO.java index 21ec9e4fa..66a333863 100644 --- a/src/test/java/com/conveyal/gtfs/dto/StopDTO.java +++ b/src/test/java/com/conveyal/gtfs/dto/StopDTO.java @@ -14,4 +14,5 @@ public class StopDTO { public String parent_station; public Integer location_type; public Integer wheelchair_boarding; + public String platform_code; } diff --git a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java index 91d813494..29d6dd0d0 100644 --- a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java @@ -43,36 +43,39 @@ public static void tearDownClass() { } @Test - public void stopTableMissingConditionallyRequiredStopName() { - checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Stop","2", "4957","stop_name is conditionally required when location_type value is between 0 and 2."); - } - - @Test - public void stopTableMissingConditionallyRequiredParentStation() { - checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Stop","5", "1266","parent_station is conditionally required when location_type value is between 2 and 4."); + public void stopTimeTableMissingConditionallyRequiredArrivalDepartureTimes() { + checkFeedHasOneError(CONDITIONALLY_REQUIRED, "StopTime","10", "1","First and last stop times are conditionally required to have both an arrival and departure time."); } - @Test - public void stopTableMissingConditionallyRequiredStopLat() { - checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Stop","3", "691","stop_lat is conditionally required when location_type value is between 0 and 2."); + @ParameterizedTest + @MethodSource("createStopTableChecks") + public void stopTableConditionallyRequiredTests(String entityType, String lineNumber, String entityId, String badValue) { + checkFeedHasOneError(CONDITIONALLY_REQUIRED, entityType, lineNumber, entityId, badValue); } - @Test - public void stopTableMissingConditionallyRequiredStopLon() { - checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Stop","4", "692","stop_lon is conditionally required when location_type value is between 0 and 2."); + private static Stream createStopTableChecks() { + return Stream.of( + Arguments.of("Stop", "2", "4957", "stop_name is conditionally required when location_type value is between 0 and 2."), + Arguments.of("Stop", "5", "1266", "parent_station is conditionally required when location_type value is between 2 and 4."), + Arguments.of("Stop", "3", "691", "stop_lat is conditionally required when location_type value is between 0 and 2."), + Arguments.of("Stop", "4", "692", "stop_lon is conditionally required when location_type value is between 0 and 2."), + Arguments.of("FareRule", "3", "1", "zone_id 4 is conditionally required in stops when referenced by contains_id in fare_rules."), + Arguments.of("FareRule", "3", "1", "zone_id 3 is conditionally required in stops when referenced by destination_id in fare_rules."), + Arguments.of("FareRule", "3", "1", "zone_id 2 is conditionally required in stops when referenced by origin_id in fare_rules.") + ); } @ParameterizedTest - @MethodSource("createZoneIdDependencies") - public void stopTableMissingConditionallyRequiredZoneId(String entityType, String lineNumber, String entityId, String badValue) { + @MethodSource("createTranslationTableChecks") + public void translationTableConditionallyRequiredTests(String entityType, String lineNumber, String entityId, String badValue) { checkFeedHasOneError(CONDITIONALLY_REQUIRED, entityType, lineNumber, entityId, badValue); } - private static Stream createZoneIdDependencies() { + private static Stream createTranslationTableChecks() { return Stream.of( - Arguments.of("FareRule", "3", "1", "zone_id 4 is conditionally required in stops when referenced by contains_id in fare_rules."), - Arguments.of("FareRule", "3", "1", "zone_id 3 is conditionally required in stops when referenced by destination_id in fare_rules."), - Arguments.of("FareRule", "3", "1", "zone_id 2 is conditionally required in stops when referenced by origin_id in fare_rules.") + Arguments.of("Translation", "2", "stops", "record_id is conditionally required when field_value is empty."), + Arguments.of("Translation", "3", "stops", "field_value is conditionally required when record_id is empty."), + Arguments.of("Translation", "4", "stops", "record_sub_id is conditionally required when record_id is provided and matches stop_times.") ); } @@ -87,10 +90,6 @@ public void tripTableMissingConditionallyRequiredShapeId() { checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Trip","2", "1","shape_id is conditionally required when a trip has continuous behavior defined."); } - @Test - public void stopTimeTableMissingConditionallyRequiredArrivalDepartureTimes() { - checkFeedHasOneError(CONDITIONALLY_REQUIRED, "StopTime","10", "1","First and last stop times are conditionally required to have both an arrival and departure time."); - } @Test public void routeTableMissingConditionallyRequiredAgencyId() { diff --git a/src/test/java/com/conveyal/gtfs/loader/JDBCTableWriterTest.java b/src/test/java/com/conveyal/gtfs/loader/JDBCTableWriterTest.java index 660c00798..e57107f5f 100644 --- a/src/test/java/com/conveyal/gtfs/loader/JDBCTableWriterTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/JDBCTableWriterTest.java @@ -183,8 +183,14 @@ public void canPreventSQLInjection() throws IOException, SQLException, InvalidNa feedInfoInput.feed_publisher_name = publisherName; feedInfoInput.feed_publisher_url = "example.com"; feedInfoInput.feed_lang = "en"; + feedInfoInput.feed_start_date = "07052021"; + feedInfoInput.feed_end_date = "09052021"; + feedInfoInput.feed_lang = "en"; feedInfoInput.default_route_color = "1c8edb"; feedInfoInput.default_route_type = "3"; + feedInfoInput.default_lang = "en"; + feedInfoInput.feed_contact_email = "a@b.com"; + feedInfoInput.feed_contact_url = "example.com"; // convert object to json and save it JdbcTableWriter createTableWriter = createTestTableWriter(Table.FEED_INFO); diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/attributions.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/attributions.txt new file mode 100644 index 000000000..42a18d2c5 --- /dev/null +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/attributions.txt @@ -0,0 +1,2 @@ +attribution_id,agency_id,route_id,trip_id,organization_name,is_producer,is_operator,is_authority,attribution_url,attribution_email,attribution_phone +1,1,,,VTA,1,,,https://www.vta.org,customer.service@vta.org, \ No newline at end of file diff --git a/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/translations.txt b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/translations.txt new file mode 100644 index 000000000..560db0832 --- /dev/null +++ b/src/test/resources/real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks/translations.txt @@ -0,0 +1,4 @@ +table_name,field_name,language,translation,record_id,record_sub_id,field_value +stops,stop_desc,FR,vers le sud,,, +stops,stop_desc,FR,en direction du nord,,, +stops,stop_desc,FR,en direction du nord,stop_times,, \ No newline at end of file From 367ad4ce4fa765e0e6ceff485b7ab2f2255cbd89 Mon Sep 17 00:00:00 2001 From: Rob Beer Date: Mon, 10 May 2021 10:13:49 +0100 Subject: [PATCH 20/45] refactor(ConditionallyRequiredTest.java): Fixed broken unit tests --- .../loader/ConditionallyRequiredTest.java | 48 +++++++------------ 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java index 77d0f979d..8b94bfa19 100644 --- a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java @@ -18,6 +18,7 @@ import static com.conveyal.gtfs.TestUtils.assertThatSqlCountQueryYieldsExpectedCount; import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS; import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; +import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; public class ConditionallyRequiredTest { private static String testDBName; @@ -65,21 +66,21 @@ public void stopTableMissingConditionallyRequiredStopLon() { @ParameterizedTest @MethodSource("createZoneIdDependencies") public void stopTableMissingConditionallyRequiredZoneId(String entityType, String lineNumber, String entityId, String badValue) { - checkFeedHasOneError(CONDITIONALLY_REQUIRED, entityType, lineNumber, entityId, badValue); + checkFeedHasOneError(REFERENTIAL_INTEGRITY, entityType, lineNumber, entityId, badValue); } private static Stream createZoneIdDependencies() { return Stream.of( - Arguments.of("FareRule", "3", "1", "zone_id 4 is conditionally required in stops when referenced by contains_id in fare_rules."), - Arguments.of("FareRule", "3", "1", "zone_id 3 is conditionally required in stops when referenced by destination_id in fare_rules."), - Arguments.of("FareRule", "3", "1", "zone_id 2 is conditionally required in stops when referenced by origin_id in fare_rules.") + Arguments.of("FareRule", "3", "1", "contains_id:zone_id:4"), + Arguments.of("FareRule", "3", "1", "destination_id:zone_id:3"), + Arguments.of("FareRule", "3", "1", "origin_id:zone_id:2") ); } @Test public void agencyTableMissingConditionallyRequiredAgencyId() { - checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Agency","3", "agency_id is conditionally required when there is more than one agency."); + checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, "Agency","2", null, "agency_id"); } @Test @@ -94,42 +95,27 @@ public void stopTimeTableMissingConditionallyRequiredArrivalDepartureTimes() { @Test public void routeTableMissingConditionallyRequiredAgencyId() { - checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, "Route","2", "21","agency_id is conditionally required when there is more than one agency."); + checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, "Route","2", "21", null); } @Test public void fareAttributeTableMissingConditionallyRequiredAgencyId() { - checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, "FareAttribute","2", "1","agency_id is conditionally required when there is more than one agency."); + checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, "FareAttribute","2", "1", null); } /** * Check that the test feed has exactly one error for the provided values. */ private void checkFeedHasOneError(NewGTFSErrorType errorType, String entityType, String lineNumber, String entityId, String badValue) { - assertThatSqlCountQueryYieldsExpectedCount( - testDataSource, - String.format("select count(*) from %s.errors where error_type = '%s' and entity_type = '%s' and line_number = '%s' and entity_id = '%s' and bad_value = '%s'", - testNamespace, - errorType, - entityType, - lineNumber, - entityId, - badValue), - 1); - } + String sql = String.format("select count(*) from %s.errors where error_type = '%s' and entity_type = '%s' and line_number = '%s'", + testNamespace, + errorType, + entityType, + lineNumber); - /** - * Check that the test feed has exactly one error for the provided values. - */ - private void checkFeedHasOneError(NewGTFSErrorType errorType, String entityType, String lineNumber, String badValue) { - assertThatSqlCountQueryYieldsExpectedCount( - testDataSource, - String.format("select count(*) from %s.errors where error_type = '%s' and entity_type = '%s' and line_number = '%s' and bad_value = '%s'", - testNamespace, - errorType, - entityType, - lineNumber, - badValue), - 1); + if (entityId != null) sql += String.format(" and entity_id = '%s'", entityId); + if (badValue != null) sql += String.format(" and bad_value = '%s'", badValue); + + assertThatSqlCountQueryYieldsExpectedCount(testDataSource, sql,1); } } From aaf98be268d7ccb6fe18bb06e31c7fd04b60547e Mon Sep 17 00:00:00 2001 From: Rob Beer Date: Mon, 10 May 2021 11:29:00 +0100 Subject: [PATCH 21/45] refactor(Updated param names and fixed merge issues): --- .../gtfs/loader/ConditionalRequirement.java | 89 ++++++++----------- .../gtfs/loader/ReferenceTracker.java | 20 +---- .../loader/ConditionallyRequiredTest.java | 21 ++--- 3 files changed, 47 insertions(+), 83 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java index 48ddb1682..fbf102b02 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java @@ -34,17 +34,16 @@ public class ConditionalRequirement { /** The name of the dependent field, which is a field that requires a specific value if the reference and * (in some cases) dependent field checks meet certain conditions.*/ public String dependentFieldName; - /** The expected conditional field value. */ - public String conditionalFieldValue; + /** The expected dependent field value. */ + public String dependentFieldValue; public ConditionalRequirement( int minReferenceValue, int maxReferenceValue, String dependentFieldName, ConditionalCheckType dependentFieldCheck, - ConditionalCheckType referenceFieldCheck - String conditionalFieldValue, - ConditionalCheckType referenceCheck + ConditionalCheckType referenceFieldCheck, + String dependentFieldValue ) { this.minReferenceValue = minReferenceValue; @@ -52,35 +51,25 @@ public ConditionalRequirement( this.dependentFieldName = dependentFieldName; this.dependentFieldCheck = dependentFieldCheck; this.referenceFieldCheck = referenceFieldCheck; + this.dependentFieldValue = dependentFieldValue; } public ConditionalRequirement( String dependentFieldName, ConditionalCheckType referenceFieldCheck ) { - this(0,0, dependentFieldName, null, referenceFieldCheck); - this.conditionalFieldName = conditionalFieldName; - this.conditionalFieldValue = conditionalFieldValue; - this.conditionalCheck = conditionalCheck; - this.referenceCheck = referenceCheck; + this(0,0, dependentFieldName, null, referenceFieldCheck, null); } public ConditionalRequirement( int minReferenceValue, int maxReferenceValue, - String conditionalFieldName, - ConditionalCheckType conditionalCheck, - ConditionalCheckType referenceCheck - - ) { - this(minReferenceValue,maxReferenceValue, conditionalFieldName, null, conditionalCheck, referenceCheck); - } + String dependentFieldName, + ConditionalCheckType dependentFieldCheck, + ConditionalCheckType referenceFieldCheck - public ConditionalRequirement( - String conditionalFieldName, - ConditionalCheckType referenceCheck ) { - this(0,0, conditionalFieldName, null, null, referenceCheck); + this(minReferenceValue,maxReferenceValue, dependentFieldName, dependentFieldCheck, referenceFieldCheck, null); } public ConditionalRequirement( @@ -88,15 +77,15 @@ public ConditionalRequirement( ConditionalCheckType dependentFieldCheck, ConditionalCheckType referenceFieldCheck ) { - this(0,0, dependentFieldName, null, dependentFieldCheck, referenceFieldCheck); + this(0,0, dependentFieldName, dependentFieldCheck, referenceFieldCheck, null); } public ConditionalRequirement( - String conditionalFieldName, - String conditionalFieldValue, - ConditionalCheckType referenceCheck + String dependentFieldName, + String dependentFieldValue, + ConditionalCheckType referenceFieldCheck ) { - this(0,0, conditionalFieldName, conditionalFieldValue, null, referenceCheck); + this(0,0, dependentFieldName, null, referenceFieldCheck, dependentFieldValue); } @@ -248,34 +237,31 @@ public static Set checkForeignRefExists( } /** - * Check the conditional field value, if it is empty the reference field value must be provided. + * Check the dependent field value, if it is empty the reference field value must be provided. */ public static Set checkFieldIsEmpty( - Table table, - int lineNumber, + LineContext lineContext, Field referenceField, - ConditionalRequirement check, - String referenceFieldValue, - String conditionalFieldValue, - String entityId + ConditionalRequirement check ) { Set errors = new HashSet<>(); + String dependentFieldValue = lineContext.getValueForRow(check.dependentFieldName); + String referenceFieldValue = lineContext.getValueForRow(referenceField.name); if ( - POSTGRES_NULL_TEXT.equals(conditionalFieldValue) && + POSTGRES_NULL_TEXT.equals(dependentFieldValue) && POSTGRES_NULL_TEXT.equals(referenceFieldValue) ) { - // The reference field is required when the conditional field is empty. + // The reference field is required when the dependent field is empty. String message = String.format( "%s is conditionally required when %s is empty.", referenceField.name, - check.conditionalFieldName + check.dependentFieldName ); errors.add( NewGTFSError.forLine( - table, - lineNumber, + lineContext, CONDITIONALLY_REQUIRED, - message).setEntityId(entityId) + message).setEntityId(lineContext.getEntityId()) ); } @@ -283,35 +269,32 @@ public static Set checkFieldIsEmpty( } /** - * Check the conditional field value is not empty and matches the expected value. + * Check the dependent field value is not empty and matches the expected value. */ public static Set checkFieldNotEmptyAndMatchesValue( - Table table, - int lineNumber, + LineContext lineContext, Field referenceField, - ConditionalRequirement check, - String referenceFieldValue, - String conditionalFieldValue, - String entityId + ConditionalRequirement check ) { Set errors = new HashSet<>(); + String dependentFieldValue = lineContext.getValueForRow(check.dependentFieldName); + String referenceFieldValue = lineContext.getValueForRow(referenceField.name); if ( - !POSTGRES_NULL_TEXT.equals(conditionalFieldValue) && - conditionalFieldValue.equals(check.conditionalFieldValue) && + !POSTGRES_NULL_TEXT.equals(dependentFieldValue) && + dependentFieldValue.equals(check.dependentFieldValue) && POSTGRES_NULL_TEXT.equals(referenceFieldValue) ) { String message = String.format( "%s is conditionally required when %s is provided and matches %s.", referenceField.name, - check.conditionalFieldName, - check.conditionalFieldValue + check.dependentFieldName, + check.dependentFieldValue ); errors.add( NewGTFSError.forLine( - table, - lineNumber, + lineContext, CONDITIONALLY_REQUIRED, - message).setEntityId(entityId) + message).setEntityId(lineContext.getEntityId()) ); } diff --git a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java index 0e0cc660f..bce255a5f 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java +++ b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java @@ -191,28 +191,12 @@ public Set checkConditionallyRequiredFields(LineContext lineContex break; case FIELD_IS_EMPTY: errors.addAll( - ConditionalRequirement.checkFieldIsEmpty( - table, - lineNumber, - referenceField, - check, - referenceFieldValue, - conditionalFieldValue, - entityId - ) + ConditionalRequirement.checkFieldIsEmpty(lineContext, referenceField, check) ); break; case FIELD_NOT_EMPTY_AND_MATCHES_VALUE: errors.addAll( - ConditionalRequirement.checkFieldNotEmptyAndMatchesValue( - table, - lineNumber, - referenceField, - check, - referenceFieldValue, - conditionalFieldValue, - entityId - ) + ConditionalRequirement.checkFieldNotEmptyAndMatchesValue(lineContext, referenceField, check) ); break; } diff --git a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java index 36814c394..8118b81e9 100644 --- a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java @@ -50,20 +50,19 @@ public void stopTimeTableMissingConditionallyRequiredArrivalDepartureTimes() { @ParameterizedTest @MethodSource("createStopTableChecks") - public void stopTableConditionallyRequiredTests(String entityType, String lineNumber, String entityId, String badValue) { - // TODO: REFERENTIAL_INTEGRITY for last three tests. - checkFeedHasOneError(CONDITIONALLY_REQUIRED, entityType, lineNumber, entityId, badValue); + public void stopTableConditionallyRequiredTests(NewGTFSErrorType errorType, String entityType, String lineNumber, String entityId, String badValue) { + checkFeedHasOneError(errorType, entityType, lineNumber, entityId, badValue); } private static Stream createStopTableChecks() { return Stream.of( - Arguments.of("Stop", "2", "4957", "stop_name is conditionally required when location_type value is between 0 and 2."), - Arguments.of("Stop", "5", "1266", "parent_station is conditionally required when location_type value is between 2 and 4."), - Arguments.of("Stop", "3", "691", "stop_lat is conditionally required when location_type value is between 0 and 2."), - Arguments.of("Stop", "4", "692", "stop_lon is conditionally required when location_type value is between 0 and 2."), - Arguments.of("FareRule", "3", "1", "contains_id:zone_id:4"), - Arguments.of("FareRule", "3", "1", "destination_id:zone_id:3"), - Arguments.of("FareRule", "3", "1", "origin_id:zone_id:2") + Arguments.of(CONDITIONALLY_REQUIRED, "Stop", "2", "4957", "stop_name is conditionally required when location_type value is between 0 and 2."), + Arguments.of(CONDITIONALLY_REQUIRED, "Stop", "5", "1266", "parent_station is conditionally required when location_type value is between 2 and 4."), + Arguments.of(CONDITIONALLY_REQUIRED, "Stop", "3", "691", "stop_lat is conditionally required when location_type value is between 0 and 2."), + Arguments.of(CONDITIONALLY_REQUIRED, "Stop", "4", "692", "stop_lon is conditionally required when location_type value is between 0 and 2."), + Arguments.of(REFERENTIAL_INTEGRITY, "FareRule", "3", "1", "contains_id:zone_id:4"), + Arguments.of(REFERENTIAL_INTEGRITY, "FareRule", "3", "1", "destination_id:zone_id:3"), + Arguments.of(REFERENTIAL_INTEGRITY, "FareRule", "3", "1", "origin_id:zone_id:2") ); } @@ -81,7 +80,6 @@ private static Stream createTranslationTableChecks() { ); } - @Test public void agencyTableMissingConditionallyRequiredAgencyId() { checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, "Agency","2", null, "agency_id"); @@ -92,7 +90,6 @@ public void tripTableMissingConditionallyRequiredShapeId() { checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Trip","2", "1","shape_id is conditionally required when a trip has continuous behavior defined."); } - @Test public void routeTableMissingConditionallyRequiredAgencyId() { checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, "Route","2", "21", null); From d194afa6e9b33374205bb9be77a57caa3d49b146 Mon Sep 17 00:00:00 2001 From: Rob Beer Date: Mon, 10 May 2021 16:05:49 +0100 Subject: [PATCH 22/45] refactor(FeedInfo.java): Added new optional fields --- .../java/com/conveyal/gtfs/model/FeedInfo.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/conveyal/gtfs/model/FeedInfo.java b/src/main/java/com/conveyal/gtfs/model/FeedInfo.java index 42fc61cbf..1cc94008d 100644 --- a/src/main/java/com/conveyal/gtfs/model/FeedInfo.java +++ b/src/main/java/com/conveyal/gtfs/model/FeedInfo.java @@ -23,6 +23,9 @@ public class FeedInfo extends Entity implements Cloneable { public LocalDate feed_start_date; public LocalDate feed_end_date; public String feed_version; + public String default_lang; + public String feed_contact_email; + public URL feed_contact_url; public FeedInfo clone () { try { @@ -50,6 +53,11 @@ public void setStatementParameters(PreparedStatement statement, boolean setDefau feedStartDateField.setParameter(statement, oneBasedIndex++, feed_start_date); feedEndDateField.setParameter(statement, oneBasedIndex++, feed_end_date); statement.setString(oneBasedIndex++, feed_version); + statement.setString(oneBasedIndex++, default_lang); + statement.setString(oneBasedIndex++, feed_contact_email); + String feedContactUrl = feed_contact_url != null ? feed_contact_url.toString() : null; + statement.setString(oneBasedIndex++, feedContactUrl); + } public static class Loader extends Entity.Loader { @@ -74,6 +82,9 @@ public void loadOneRow() throws IOException { fi.feed_start_date = getDateField("feed_start_date", false); fi.feed_end_date = getDateField("feed_end_date", false); fi.feed_version = getStringField("feed_version", false); + fi.default_lang = getStringField("default_lang", false); + fi.feed_contact_email = getStringField("feed_contact_email", false); + fi.feed_contact_url = getUrlField("feed_contact_url", false); fi.feed = feed; if (feed.feedInfo.isEmpty()) { feed.feedInfo.put("NONE", fi); @@ -93,7 +104,7 @@ public Writer(GTFSFeed feed) { @Override public void writeHeaders() throws IOException { writer.writeRecord(new String[] {"feed_id", "feed_publisher_name", "feed_publisher_url", "feed_lang", - "feed_start_date", "feed_end_date", "feed_version"}); + "feed_start_date", "feed_end_date", "feed_version", "default_lang", "feed_contact_email", "feed_contact_url"}); } @Override @@ -110,6 +121,9 @@ public void writeOneRow(FeedInfo i) throws IOException { else writeStringField(""); writeStringField(i.feed_version); + writeStringField(i.default_lang); + writeStringField(i.feed_contact_email); + writeUrlField(i.feed_contact_url); endRecord(); } From 8d7ce2545ba48d3a6296528aa4d67d0fecda94b2 Mon Sep 17 00:00:00 2001 From: Rob Beer Date: Tue, 11 May 2021 11:45:28 +0100 Subject: [PATCH 23/45] refactor(Addressed PR feedback): Addressed PR feedback --- .../com/conveyal/gtfs/error/NewGTFSError.java | 2 +- .../conveyal/gtfs/error/NewGTFSErrorType.java | 2 +- .../gtfs/loader/ConditionalRequirement.java | 53 +++++++++++-------- .../java/com/conveyal/gtfs/loader/Field.java | 8 +-- .../com/conveyal/gtfs/loader/LineContext.java | 13 +++-- .../gtfs/loader/ReferenceTracker.java | 12 +++-- .../java/com/conveyal/gtfs/loader/Table.java | 6 +-- .../gtfs/validator/NewTripTimesValidator.java | 4 +- .../loader/ConditionallyRequiredTest.java | 26 ++++----- 9 files changed, 72 insertions(+), 54 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/error/NewGTFSError.java b/src/main/java/com/conveyal/gtfs/error/NewGTFSError.java index c945b397c..98e61895f 100644 --- a/src/main/java/com/conveyal/gtfs/error/NewGTFSError.java +++ b/src/main/java/com/conveyal/gtfs/error/NewGTFSError.java @@ -84,7 +84,7 @@ public static NewGTFSError forLine (Table table, int lineNumber, NewGTFSErrorTyp } // Factory Builder for cases where an entity has not yet been constructed, but we know the line number. - public static NewGTFSError forLine (LineContext lineContext, NewGTFSErrorType errorType, String badValue) { + public static NewGTFSError forLine(LineContext lineContext, NewGTFSErrorType errorType, String badValue) { NewGTFSError error = new NewGTFSError(lineContext.table.getEntityClass(), errorType); error.lineNumber = lineContext.lineNumber; error.badValue = badValue; diff --git a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java index 49f44fa3f..04e393d49 100644 --- a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java +++ b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java @@ -8,7 +8,6 @@ */ public enum NewGTFSErrorType { // Standard errors. - AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS(Priority.HIGH, "For GTFS feeds with more than one agency, agency_id is required."), BOOLEAN_FORMAT(Priority.MEDIUM, "A GTFS boolean field must contain the value 1 or 0."), COLOR_FORMAT(Priority.MEDIUM, "A color should be specified with six-characters (three two-digit hexadecimal numbers)."), COLUMN_NAME_UNSAFE(Priority.HIGH, "Column header contains characters not safe in SQL, it was renamed."), @@ -26,6 +25,7 @@ public enum NewGTFSErrorType { FEED_TRAVEL_TIMES_ROUNDED(Priority.LOW, "All travel times in the feed are rounded to the minute, which may cause unexpected results in routing applications where travel times are zero."), FLOATING_FORMAT(Priority.MEDIUM, "Incorrect floating point number format."), FREQUENCY_PERIOD_OVERLAP(Priority.MEDIUM, "A frequency for a trip overlaps with another frequency defined for the same trip."), + ID_REQUIRED_FOR_MULTI_FEEDS(Priority.HIGH, "For GTFS feeds with more than one row, the id is required."), ILLEGAL_FIELD_VALUE(Priority.MEDIUM, "Fields may not contain tabs, carriage returns or new lines."), INTEGER_FORMAT(Priority.MEDIUM, "Incorrect integer format."), LANGUAGE_FORMAT(Priority.LOW, "Language should be specified with a valid BCP47 tag."), diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java index fbf102b02..30e248daa 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java @@ -7,8 +7,8 @@ import java.util.NavigableSet; import java.util.Set; -import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS; import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; +import static com.conveyal.gtfs.error.NewGTFSErrorType.ID_REQUIRED_FOR_MULTI_FEEDS; import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_NOT_EMPTY; import static com.conveyal.gtfs.loader.ConditionalCheckType.HAS_MULTIPLE_ROWS; @@ -23,7 +23,7 @@ public class ConditionalRequirement { private static final int FIRST_ROW = 2; private static final int SECOND_ROW = 3; /** The type of check to be performed on a reference field. A reference field value is used to determine which check - * (e.g., {@link #checkHasMultipleRows}) should be applied to the field. */ + * (e.g., {@link #checkAgencyHasMultipleRows}) should be applied to the field. */ public ConditionalCheckType referenceFieldCheck; /** The minimum reference field value if a range check is being performed. */ public int minReferenceValue; @@ -36,6 +36,8 @@ public class ConditionalRequirement { public String dependentFieldName; /** The expected dependent field value. */ public String dependentFieldValue; + /** The reference table name required for checking foreign references. */ + String referenceTableName; public ConditionalRequirement( int minReferenceValue, @@ -43,8 +45,8 @@ public ConditionalRequirement( String dependentFieldName, ConditionalCheckType dependentFieldCheck, ConditionalCheckType referenceFieldCheck, - String dependentFieldValue - + String dependentFieldValue, + String referenceTableName ) { this.minReferenceValue = minReferenceValue; this.maxReferenceValue = maxReferenceValue; @@ -52,13 +54,22 @@ public ConditionalRequirement( this.dependentFieldCheck = dependentFieldCheck; this.referenceFieldCheck = referenceFieldCheck; this.dependentFieldValue = dependentFieldValue; + this.referenceTableName = referenceTableName; } public ConditionalRequirement( String dependentFieldName, ConditionalCheckType referenceFieldCheck ) { - this(0,0, dependentFieldName, null, referenceFieldCheck, null); + this(0, 0, dependentFieldName, null, referenceFieldCheck, null, null); + } + + public ConditionalRequirement( + String dependentFieldName, + ConditionalCheckType referenceFieldCheck, + String referenceTableName + ) { + this(0, 0, dependentFieldName, null, referenceFieldCheck, null, referenceTableName); } public ConditionalRequirement( @@ -69,7 +80,7 @@ public ConditionalRequirement( ConditionalCheckType referenceFieldCheck ) { - this(minReferenceValue,maxReferenceValue, dependentFieldName, dependentFieldCheck, referenceFieldCheck, null); + this(minReferenceValue, maxReferenceValue, dependentFieldName, dependentFieldCheck, referenceFieldCheck, null, null); } public ConditionalRequirement( @@ -77,7 +88,7 @@ public ConditionalRequirement( ConditionalCheckType dependentFieldCheck, ConditionalCheckType referenceFieldCheck ) { - this(0,0, dependentFieldName, dependentFieldCheck, referenceFieldCheck, null); + this(0, 0, dependentFieldName, dependentFieldCheck, referenceFieldCheck, null, null); } public ConditionalRequirement( @@ -85,14 +96,14 @@ public ConditionalRequirement( String dependentFieldValue, ConditionalCheckType referenceFieldCheck ) { - this(0,0, dependentFieldName, null, referenceFieldCheck, dependentFieldValue); + this(0, 0, dependentFieldName, null, referenceFieldCheck, dependentFieldValue, null); } /** * Flag an error if there are multiple rows (designed for agency.txt) and the agency_id is missing for any rows. */ - public static Set checkHasMultipleRows( + public static Set checkAgencyHasMultipleRows( LineContext lineContext, TreeMultimap uniqueValuesForFields, ConditionalRequirement check @@ -106,7 +117,7 @@ public static Set checkHasMultipleRows( boolean secondRowMissingId = firstOrSecondMissingId && currentRowMissingId; if (firstOrSecondMissingId || (lineContext.lineNumber > SECOND_ROW && currentRowMissingId)) { // The check on the agency table is carried out whilst the agency table is being loaded so it - // is possible to compare the number of transitIds added against the number of rows loaded to + // is possible to compare the number of agencyIdValues added against the number of rows loaded to // accurately determine missing agency_id values. int lineNumber = secondRowMissingId ? SECOND_ROW @@ -117,7 +128,7 @@ public static Set checkHasMultipleRows( NewGTFSError.forLine( lineContext.table, lineNumber, - AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, + ID_REQUIRED_FOR_MULTI_FEEDS, check.dependentFieldName ) ); @@ -129,7 +140,7 @@ public static Set checkHasMultipleRows( * Checks that the reference field is not empty when the dependent field/table has multiple rows. This is * principally designed for checking that routes#agency_id is filled when multiple agencies exist. */ - public static Set checkFieldEmpty( + public static Set checkWhetherReferenceFieldShouldBeEmpty( LineContext lineContext, Field referenceField, TreeMultimap uniqueValuesForFields, @@ -146,7 +157,7 @@ public static Set checkFieldEmpty( errors.add( NewGTFSError.forLine( lineContext, - AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, + ID_REQUIRED_FOR_MULTI_FEEDS, null ).setEntityId(lineContext.getEntityId()) ); @@ -184,7 +195,7 @@ public static Set checkFieldInRange( if (conditionallyRequiredValueIsEmpty) { // Reference value in range and conditionally required field is empty. String message = String.format( - "%s is conditionally required when %s value is between %d and %d.", + "%s is required when %s value is between %d and %d.", check.dependentFieldName, referenceField.name, check.minReferenceValue, @@ -203,7 +214,8 @@ public static Set checkFieldInRange( /** * Check that an expected foreign field value matches a conditional field value. Selected foreign field values are * added to {@link ReferenceTracker#uniqueValuesForFields} as part of the load process and are used here to check - * conditional fields which have a dependency on them. + * conditional fields which have a dependency on them. e.g. stop#zone_id does not exist in stops table, but is + * required by fare_rules records (e.g. origin_id). */ public static Set checkForeignRefExists( LineContext lineContext, @@ -220,11 +232,10 @@ public static Set checkForeignRefExists( check.dependentFieldName, referenceFieldValue ); - if (lineContext.table.name.equals("fare_rules") && + if (lineContext.table.name.equals(check.referenceTableName) && !POSTGRES_NULL_TEXT.equals(referenceFieldValue) && !uniqueValuesForFields.get(check.dependentFieldName).contains(foreignFieldReference) ) { - // stop#zone_id does not exist in stops table, but is required by fare_rules records (e.g., origin_id). errors.add( NewGTFSError.forLine( lineContext, @@ -253,7 +264,7 @@ public static Set checkFieldIsEmpty( ) { // The reference field is required when the dependent field is empty. String message = String.format( - "%s is conditionally required when %s is empty.", + "%s is required when %s is empty.", referenceField.name, check.dependentFieldName ); @@ -285,10 +296,10 @@ public static Set checkFieldNotEmptyAndMatchesValue( POSTGRES_NULL_TEXT.equals(referenceFieldValue) ) { String message = String.format( - "%s is conditionally required when %s is provided and matches %s.", + "%s is required and must match %s when %s is provided.", referenceField.name, - check.dependentFieldName, - check.dependentFieldValue + check.dependentFieldValue, + check.dependentFieldName ); errors.add( NewGTFSError.forLine( diff --git a/src/main/java/com/conveyal/gtfs/loader/Field.java b/src/main/java/com/conveyal/gtfs/loader/Field.java index 08b257828..e2c85e444 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Field.java +++ b/src/main/java/com/conveyal/gtfs/loader/Field.java @@ -141,7 +141,7 @@ public boolean isForeignReference () { } /** - * Fluent method to flag field as having foreign references. If flagged, the field value is added to + * Fluent method to mark a field as having foreign references. If flagged, the field value is added to * {@link ReferenceTracker#uniqueValuesForFields} to be used as a look-up for reference matches. Note: this is intended only for * special cases (e.g., zone_id) where the field being referenced does not exist as the primary key of a table. */ @@ -174,8 +174,8 @@ public boolean shouldBeIndexed() { } /** - * Fluent method indicates that this field is a reference to an entry in the table provided as an argument (i.e., it - * is a foreign reference). + * Fluent method that indicates that this field is a reference to an entry in the table provided as an argument + * (i.e. it is a foreign reference). * @param table * @return this same Field instance */ @@ -211,7 +211,7 @@ public String getColumnExpression(String prefix, boolean csvOutput) { } /** - * Flag this field as conditionally required. If needed an optional list of conditions can be provided. + * Mark this field as conditionally required. If needed an optional list of conditions can be provided. */ public Field requireConditions(ConditionalRequirement...conditions) { this.isConditionallyRequired = true; diff --git a/src/main/java/com/conveyal/gtfs/loader/LineContext.java b/src/main/java/com/conveyal/gtfs/loader/LineContext.java index 80292fc0e..2ef0f1900 100644 --- a/src/main/java/com/conveyal/gtfs/loader/LineContext.java +++ b/src/main/java/com/conveyal/gtfs/loader/LineContext.java @@ -7,13 +7,16 @@ public class LineContext { public final Table table; private final Field[] fields; - private final String[] rowData; + /** + * The row data has one extra value at the beginning of the array that represents the line number. + */ + private final String[] rowDataWithLineNumber; public final int lineNumber; - public LineContext(Table table, Field[] fields, String[] rowData, int lineNumber) { + public LineContext(Table table, Field[] fields, String[] rowDataWithLineNumber, int lineNumber) { this.table = table; this.fields = fields; - this.rowData = rowData; + this.rowDataWithLineNumber = rowDataWithLineNumber; this.lineNumber = lineNumber; } @@ -23,7 +26,7 @@ public LineContext(Table table, Field[] fields, String[] rowData, int lineNumber * for batch insertion into a postgres table. */ public String getValueForRow(int columnIndex) { - return rowData[columnIndex + 1]; + return rowDataWithLineNumber[columnIndex + 1]; } /** @@ -31,7 +34,7 @@ public String getValueForRow(int columnIndex) { */ public String getValueForRow(String fieldName) { int fieldIndex = Field.getFieldIndex(fields, fieldName); - return rowData[fieldIndex + 1]; + return rowDataWithLineNumber[fieldIndex + 1]; } /** diff --git a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java index bce255a5f..f68ba7b65 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java +++ b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java @@ -160,18 +160,22 @@ public Set checkConditionallyRequiredFields(LineContext lineContex // Work through each field that has been assigned a conditional requirement. for (Map.Entry entry : fieldsToCheck.entrySet()) { Field referenceField = entry.getKey(); - ConditionalRequirement[] conditionalRequirements = entry.getValue(); // Work through each field's conditional requirements. - for (ConditionalRequirement check : conditionalRequirements) { + for (ConditionalRequirement check : entry.getValue()) { switch(check.referenceFieldCheck) { case HAS_MULTIPLE_ROWS: errors.addAll( - ConditionalRequirement.checkHasMultipleRows(lineContext, uniqueValuesForFields, check) + ConditionalRequirement.checkAgencyHasMultipleRows(lineContext, uniqueValuesForFields, check) ); break; case FIELD_NOT_EMPTY: errors.addAll( - ConditionalRequirement.checkFieldEmpty(lineContext, referenceField, uniqueValuesForFields, check) + ConditionalRequirement.checkWhetherReferenceFieldShouldBeEmpty( + lineContext, + referenceField, + uniqueValuesForFields, + check + ) ); break; case FIELD_IN_RANGE: diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index dcf0aa4f4..60ddda0a3 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -271,15 +271,15 @@ public Table (String name, Class entityClass, Requirement requ new StringField("route_id", OPTIONAL).isReferenceTo(ROUTES), new StringField("origin_id", OPTIONAL).requireConditions( // If the origin_id is defined, its value must exist as a zone_id in stops.txt. - new ConditionalRequirement("zone_id", FOREIGN_REF_EXISTS) + new ConditionalRequirement("zone_id", FOREIGN_REF_EXISTS, "fare_rules") ), new StringField("destination_id", OPTIONAL).requireConditions( // If the destination_id is defined, its value must exist as a zone_id in stops.txt. - new ConditionalRequirement("zone_id", FOREIGN_REF_EXISTS) + new ConditionalRequirement("zone_id", FOREIGN_REF_EXISTS, "fare_rules") ), new StringField("contains_id", OPTIONAL).requireConditions( // If the contains_id is defined, its value must exist as a zone_id in stops.txt. - new ConditionalRequirement("zone_id", FOREIGN_REF_EXISTS) + new ConditionalRequirement("zone_id", FOREIGN_REF_EXISTS, "fare_rules") ) ) .withParentTable(FARE_ATTRIBUTES) diff --git a/src/main/java/com/conveyal/gtfs/validator/NewTripTimesValidator.java b/src/main/java/com/conveyal/gtfs/validator/NewTripTimesValidator.java index 3d80a5a6b..dffdc2c44 100644 --- a/src/main/java/com/conveyal/gtfs/validator/NewTripTimesValidator.java +++ b/src/main/java/com/conveyal/gtfs/validator/NewTripTimesValidator.java @@ -118,7 +118,7 @@ private boolean fixInitialFinal (StopTime stopTime) { fixMissingTimes(stopTime); if (missingEitherTime(stopTime)) { //TODO: Is this even needed? Already covered by MISSING_ARRIVAL_OR_DEPARTURE. - registerError(stopTime, CONDITIONALLY_REQUIRED, "First and last stop times are conditionally required to have both an arrival and departure time."); + registerError(stopTime, CONDITIONALLY_REQUIRED, "First and last stop times are required to have both an arrival and departure time."); return true; } } @@ -185,7 +185,7 @@ private void processTrip (List stopTimes) { registerError( trip, CONDITIONALLY_REQUIRED, - "shape_id is conditionally required when a trip has continuous behavior defined." + "shape_id is required when a trip has continuous behavior defined." ); } // Pass these same cleaned lists of stop_times and stops into each trip validator in turn. diff --git a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java index 8118b81e9..4932194a5 100644 --- a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java @@ -16,8 +16,8 @@ import static com.conveyal.gtfs.GTFS.load; import static com.conveyal.gtfs.GTFS.validate; import static com.conveyal.gtfs.TestUtils.assertThatSqlCountQueryYieldsExpectedCount; -import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS; import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; +import static com.conveyal.gtfs.error.NewGTFSErrorType.ID_REQUIRED_FOR_MULTI_FEEDS; import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; public class ConditionallyRequiredTest { @@ -45,7 +45,7 @@ public static void tearDownClass() { @Test public void stopTimeTableMissingConditionallyRequiredArrivalDepartureTimes() { - checkFeedHasOneError(CONDITIONALLY_REQUIRED, "StopTime","10", "1","First and last stop times are conditionally required to have both an arrival and departure time."); + checkFeedHasOneError(CONDITIONALLY_REQUIRED, "StopTime","10", "1","First and last stop times are required to have both an arrival and departure time."); } @ParameterizedTest @@ -56,10 +56,10 @@ public void stopTableConditionallyRequiredTests(NewGTFSErrorType errorType, Stri private static Stream createStopTableChecks() { return Stream.of( - Arguments.of(CONDITIONALLY_REQUIRED, "Stop", "2", "4957", "stop_name is conditionally required when location_type value is between 0 and 2."), - Arguments.of(CONDITIONALLY_REQUIRED, "Stop", "5", "1266", "parent_station is conditionally required when location_type value is between 2 and 4."), - Arguments.of(CONDITIONALLY_REQUIRED, "Stop", "3", "691", "stop_lat is conditionally required when location_type value is between 0 and 2."), - Arguments.of(CONDITIONALLY_REQUIRED, "Stop", "4", "692", "stop_lon is conditionally required when location_type value is between 0 and 2."), + Arguments.of(CONDITIONALLY_REQUIRED, "Stop", "2", "4957", "stop_name is required when location_type value is between 0 and 2."), + Arguments.of(CONDITIONALLY_REQUIRED, "Stop", "5", "1266", "parent_station is required when location_type value is between 2 and 4."), + Arguments.of(CONDITIONALLY_REQUIRED, "Stop", "3", "691", "stop_lat is required when location_type value is between 0 and 2."), + Arguments.of(CONDITIONALLY_REQUIRED, "Stop", "4", "692", "stop_lon is required when location_type value is between 0 and 2."), Arguments.of(REFERENTIAL_INTEGRITY, "FareRule", "3", "1", "contains_id:zone_id:4"), Arguments.of(REFERENTIAL_INTEGRITY, "FareRule", "3", "1", "destination_id:zone_id:3"), Arguments.of(REFERENTIAL_INTEGRITY, "FareRule", "3", "1", "origin_id:zone_id:2") @@ -74,30 +74,30 @@ public void translationTableConditionallyRequiredTests(String entityType, String private static Stream createTranslationTableChecks() { return Stream.of( - Arguments.of("Translation", "2", "stops", "record_id is conditionally required when field_value is empty."), - Arguments.of("Translation", "3", "stops", "field_value is conditionally required when record_id is empty."), - Arguments.of("Translation", "4", "stops", "record_sub_id is conditionally required when record_id is provided and matches stop_times.") + Arguments.of("Translation", "2", "stops", "record_id is required when field_value is empty."), + Arguments.of("Translation", "3", "stops", "field_value is required when record_id is empty."), + Arguments.of("Translation", "4", "stops", "record_sub_id is required and must match stop_times when record_id is provided.") ); } @Test public void agencyTableMissingConditionallyRequiredAgencyId() { - checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, "Agency","2", null, "agency_id"); + checkFeedHasOneError(ID_REQUIRED_FOR_MULTI_FEEDS, "Agency","2", null, "agency_id"); } @Test public void tripTableMissingConditionallyRequiredShapeId() { - checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Trip","2", "1","shape_id is conditionally required when a trip has continuous behavior defined."); + checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Trip","2", "1","shape_id is required when a trip has continuous behavior defined."); } @Test public void routeTableMissingConditionallyRequiredAgencyId() { - checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, "Route","2", "21", null); + checkFeedHasOneError(ID_REQUIRED_FOR_MULTI_FEEDS, "Route","2", "21", null); } @Test public void fareAttributeTableMissingConditionallyRequiredAgencyId() { - checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, "FareAttribute","2", "1", null); + checkFeedHasOneError(ID_REQUIRED_FOR_MULTI_FEEDS, "FareAttribute","2", "1", null); } /** From 45ee42c2112518e83b08128d1c2cd31ae553682f Mon Sep 17 00:00:00 2001 From: Evan Siroky Date: Tue, 11 May 2021 12:04:24 -0700 Subject: [PATCH 24/45] ci: remove codecov reporting --- .github/workflows/maven.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 695b467c4..d30bb6689 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -48,10 +48,6 @@ jobs: key: maven-local-repo - name: Build with Maven run: mvn --no-transfer-progress package - - name: Codecov - # this first codecov run will upload a report associated with the commit set through CI environment variables - uses: codecov/codecov-action@v1.2.0 - continue-on-error: true - name: Clear contents of the target directory # Avoids issues where maven-semantic-release attempts to upload # multiple versions/builds (and fails due to the pre-existence of the version on maven central). @@ -71,7 +67,3 @@ jobs: GH_TOKEN: ${{ secrets.GH_TOKEN }} run: | semantic-release --prepare @conveyal/maven-semantic-release --publish @semantic-release/github,@conveyal/maven-semantic-release --verify-conditions @semantic-release/github,@conveyal/maven-semantic-release --verify-release @conveyal/maven-semantic-release --use-conveyal-workflow --dev-branch=dev --skip-maven-deploy - if [[ "$GITHUB_REF_SLUG" = "master" ]]; then - bash <(curl -s https://codecov.io/bash) -C "$(git rev-parse HEAD)" - bash <(curl -s https://codecov.io/bash) -C "$(git rev-parse HEAD^)" - fi From 140d45a0945b7cfffd78f49041d1d8a53c176807 Mon Sep 17 00:00:00 2001 From: Robin Beer Date: Wed, 12 May 2021 08:17:13 +0100 Subject: [PATCH 25/45] Update src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java Co-authored-by: binh-dam-ibigroup <56846598+binh-dam-ibigroup@users.noreply.github.com> --- .../java/com/conveyal/gtfs/loader/ConditionalRequirement.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java index 30e248daa..10972a806 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java @@ -101,7 +101,7 @@ public ConditionalRequirement( /** - * Flag an error if there are multiple rows (designed for agency.txt) and the agency_id is missing for any rows. + * Flag an error if there are multiple rows in agency.txt and the agency_id is missing for any rows. */ public static Set checkAgencyHasMultipleRows( LineContext lineContext, From 051c4696b8eb71dcd644f32e87809443ef846889 Mon Sep 17 00:00:00 2001 From: Rob Beer Date: Wed, 12 May 2021 08:36:44 +0100 Subject: [PATCH 26/45] refactor(Reverted GTFS error type): Restored the more specific agency id feed error type replacing t --- .../java/com/conveyal/gtfs/error/NewGTFSErrorType.java | 2 +- .../com/conveyal/gtfs/loader/ConditionalRequirement.java | 6 +++--- .../java/com/conveyal/gtfs/loader/ReferenceTracker.java | 3 ++- .../conveyal/gtfs/loader/ConditionallyRequiredTest.java | 8 ++++---- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java index 04e393d49..80ee7c5e0 100644 --- a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java +++ b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java @@ -8,6 +8,7 @@ */ public enum NewGTFSErrorType { // Standard errors. + AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD(Priority.HIGH, "For GTFS feeds with more than one agency, agency_id is required."), BOOLEAN_FORMAT(Priority.MEDIUM, "A GTFS boolean field must contain the value 1 or 0."), COLOR_FORMAT(Priority.MEDIUM, "A color should be specified with six-characters (three two-digit hexadecimal numbers)."), COLUMN_NAME_UNSAFE(Priority.HIGH, "Column header contains characters not safe in SQL, it was renamed."), @@ -25,7 +26,6 @@ public enum NewGTFSErrorType { FEED_TRAVEL_TIMES_ROUNDED(Priority.LOW, "All travel times in the feed are rounded to the minute, which may cause unexpected results in routing applications where travel times are zero."), FLOATING_FORMAT(Priority.MEDIUM, "Incorrect floating point number format."), FREQUENCY_PERIOD_OVERLAP(Priority.MEDIUM, "A frequency for a trip overlaps with another frequency defined for the same trip."), - ID_REQUIRED_FOR_MULTI_FEEDS(Priority.HIGH, "For GTFS feeds with more than one row, the id is required."), ILLEGAL_FIELD_VALUE(Priority.MEDIUM, "Fields may not contain tabs, carriage returns or new lines."), INTEGER_FORMAT(Priority.MEDIUM, "Incorrect integer format."), LANGUAGE_FORMAT(Priority.LOW, "Language should be specified with a valid BCP47 tag."), diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java index 10972a806..8709cd93d 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java +++ b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java @@ -7,8 +7,8 @@ import java.util.NavigableSet; import java.util.Set; +import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD; import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; -import static com.conveyal.gtfs.error.NewGTFSErrorType.ID_REQUIRED_FOR_MULTI_FEEDS; import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_NOT_EMPTY; import static com.conveyal.gtfs.loader.ConditionalCheckType.HAS_MULTIPLE_ROWS; @@ -128,7 +128,7 @@ public static Set checkAgencyHasMultipleRows( NewGTFSError.forLine( lineContext.table, lineNumber, - ID_REQUIRED_FOR_MULTI_FEEDS, + AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD, check.dependentFieldName ) ); @@ -157,7 +157,7 @@ public static Set checkWhetherReferenceFieldShouldBeEmpty( errors.add( NewGTFSError.forLine( lineContext, - ID_REQUIRED_FOR_MULTI_FEEDS, + AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD, null ).setEntityId(lineContext.getEntityId()) ); diff --git a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java index f68ba7b65..bfefe1df7 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java +++ b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java @@ -160,8 +160,9 @@ public Set checkConditionallyRequiredFields(LineContext lineContex // Work through each field that has been assigned a conditional requirement. for (Map.Entry entry : fieldsToCheck.entrySet()) { Field referenceField = entry.getKey(); + ConditionalRequirement[] conditionalRequirements = entry.getValue(); // Work through each field's conditional requirements. - for (ConditionalRequirement check : entry.getValue()) { + for (ConditionalRequirement check : conditionalRequirements) { switch(check.referenceFieldCheck) { case HAS_MULTIPLE_ROWS: errors.addAll( diff --git a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java index 4932194a5..846b7daf0 100644 --- a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java @@ -17,8 +17,8 @@ import static com.conveyal.gtfs.GTFS.validate; import static com.conveyal.gtfs.TestUtils.assertThatSqlCountQueryYieldsExpectedCount; import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; -import static com.conveyal.gtfs.error.NewGTFSErrorType.ID_REQUIRED_FOR_MULTI_FEEDS; import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; +import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD; public class ConditionallyRequiredTest { private static String testDBName; @@ -82,7 +82,7 @@ private static Stream createTranslationTableChecks() { @Test public void agencyTableMissingConditionallyRequiredAgencyId() { - checkFeedHasOneError(ID_REQUIRED_FOR_MULTI_FEEDS, "Agency","2", null, "agency_id"); + checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD, "Agency","2", null, "agency_id"); } @Test @@ -92,12 +92,12 @@ public void tripTableMissingConditionallyRequiredShapeId() { @Test public void routeTableMissingConditionallyRequiredAgencyId() { - checkFeedHasOneError(ID_REQUIRED_FOR_MULTI_FEEDS, "Route","2", "21", null); + checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD, "Route","2", "21", null); } @Test public void fareAttributeTableMissingConditionallyRequiredAgencyId() { - checkFeedHasOneError(ID_REQUIRED_FOR_MULTI_FEEDS, "FareAttribute","2", "1", null); + checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD, "FareAttribute","2", "1", null); } /** From 314f0295507ace10201055f9116f60db9174c54d Mon Sep 17 00:00:00 2001 From: Robin Beer Date: Mon, 17 May 2021 19:17:23 +0100 Subject: [PATCH 27/45] refactor(Addressed PR feedback): PR feedback updates in particular changes related to making Conditi --- .../conveyal/gtfs/error/NewGTFSErrorType.java | 2 +- .../gtfs/loader/ConditionalRequirement.java | 328 ------------------ .../java/com/conveyal/gtfs/loader/Field.java | 1 + .../conveyal/gtfs/loader/JdbcGtfsLoader.java | 4 +- .../gtfs/loader/JdbcGtfsSnapshotter.java | 1 + .../gtfs/loader/ReferenceTracker.java | 52 +-- .../java/com/conveyal/gtfs/loader/Table.java | 49 +-- .../AgencyHasMultipleRowsCheck.java | 66 ++++ .../ConditionalCheckType.java | 2 +- .../conditions/ConditionalRequirement.java | 41 +++ .../loader/conditions/FieldInRangeCheck.java | 92 +++++ .../loader/conditions/FieldIsEmptyCheck.java | 60 ++++ .../FieldNotEmptyAndMatchesValueCheck.java | 62 ++++ .../conditions/ForeignRefExistsCheck.java | 65 ++++ .../ReferenceFieldShouldBeProvidedCheck.java | 60 ++++ .../loader/ConditionallyRequiredTest.java | 8 +- 16 files changed, 491 insertions(+), 402 deletions(-) delete mode 100644 src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java create mode 100644 src/main/java/com/conveyal/gtfs/loader/conditions/AgencyHasMultipleRowsCheck.java rename src/main/java/com/conveyal/gtfs/loader/{ => conditions}/ConditionalCheckType.java (96%) create mode 100644 src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalRequirement.java create mode 100644 src/main/java/com/conveyal/gtfs/loader/conditions/FieldInRangeCheck.java create mode 100644 src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java create mode 100644 src/main/java/com/conveyal/gtfs/loader/conditions/FieldNotEmptyAndMatchesValueCheck.java create mode 100644 src/main/java/com/conveyal/gtfs/loader/conditions/ForeignRefExistsCheck.java create mode 100644 src/main/java/com/conveyal/gtfs/loader/conditions/ReferenceFieldShouldBeProvidedCheck.java diff --git a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java index 80ee7c5e0..49f44fa3f 100644 --- a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java +++ b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java @@ -8,7 +8,7 @@ */ public enum NewGTFSErrorType { // Standard errors. - AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD(Priority.HIGH, "For GTFS feeds with more than one agency, agency_id is required."), + AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS(Priority.HIGH, "For GTFS feeds with more than one agency, agency_id is required."), BOOLEAN_FORMAT(Priority.MEDIUM, "A GTFS boolean field must contain the value 1 or 0."), COLOR_FORMAT(Priority.MEDIUM, "A color should be specified with six-characters (three two-digit hexadecimal numbers)."), COLUMN_NAME_UNSAFE(Priority.HIGH, "Column header contains characters not safe in SQL, it was renamed."), diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java b/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java deleted file mode 100644 index 8709cd93d..000000000 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionalRequirement.java +++ /dev/null @@ -1,328 +0,0 @@ -package com.conveyal.gtfs.loader; - -import com.conveyal.gtfs.error.NewGTFSError; -import com.google.common.collect.TreeMultimap; - -import java.util.HashSet; -import java.util.NavigableSet; -import java.util.Set; - -import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD; -import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; -import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; -import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_NOT_EMPTY; -import static com.conveyal.gtfs.loader.ConditionalCheckType.HAS_MULTIPLE_ROWS; -import static com.conveyal.gtfs.loader.JdbcGtfsLoader.POSTGRES_NULL_TEXT; - -/** - * These are the requirements that are checked inline with {@link ConditionalCheckType} to determine if the required - * conditions set forth for certain fields in the GTFS spec have been met. These requirements are applied directly to - * their "reference fields" with the help of {@link Field#requireConditions}. - */ -public class ConditionalRequirement { - private static final int FIRST_ROW = 2; - private static final int SECOND_ROW = 3; - /** The type of check to be performed on a reference field. A reference field value is used to determine which check - * (e.g., {@link #checkAgencyHasMultipleRows}) should be applied to the field. */ - public ConditionalCheckType referenceFieldCheck; - /** The minimum reference field value if a range check is being performed. */ - public int minReferenceValue; - /** The maximum reference field value if a range check is being performed. */ - public int maxReferenceValue; - /** The type of check to be performed on the dependent field. */ - public ConditionalCheckType dependentFieldCheck; - /** The name of the dependent field, which is a field that requires a specific value if the reference and - * (in some cases) dependent field checks meet certain conditions.*/ - public String dependentFieldName; - /** The expected dependent field value. */ - public String dependentFieldValue; - /** The reference table name required for checking foreign references. */ - String referenceTableName; - - public ConditionalRequirement( - int minReferenceValue, - int maxReferenceValue, - String dependentFieldName, - ConditionalCheckType dependentFieldCheck, - ConditionalCheckType referenceFieldCheck, - String dependentFieldValue, - String referenceTableName - ) { - this.minReferenceValue = minReferenceValue; - this.maxReferenceValue = maxReferenceValue; - this.dependentFieldName = dependentFieldName; - this.dependentFieldCheck = dependentFieldCheck; - this.referenceFieldCheck = referenceFieldCheck; - this.dependentFieldValue = dependentFieldValue; - this.referenceTableName = referenceTableName; - } - - public ConditionalRequirement( - String dependentFieldName, - ConditionalCheckType referenceFieldCheck - ) { - this(0, 0, dependentFieldName, null, referenceFieldCheck, null, null); - } - - public ConditionalRequirement( - String dependentFieldName, - ConditionalCheckType referenceFieldCheck, - String referenceTableName - ) { - this(0, 0, dependentFieldName, null, referenceFieldCheck, null, referenceTableName); - } - - public ConditionalRequirement( - int minReferenceValue, - int maxReferenceValue, - String dependentFieldName, - ConditionalCheckType dependentFieldCheck, - ConditionalCheckType referenceFieldCheck - - ) { - this(minReferenceValue, maxReferenceValue, dependentFieldName, dependentFieldCheck, referenceFieldCheck, null, null); - } - - public ConditionalRequirement( - String dependentFieldName, - ConditionalCheckType dependentFieldCheck, - ConditionalCheckType referenceFieldCheck - ) { - this(0, 0, dependentFieldName, dependentFieldCheck, referenceFieldCheck, null, null); - } - - public ConditionalRequirement( - String dependentFieldName, - String dependentFieldValue, - ConditionalCheckType referenceFieldCheck - ) { - this(0, 0, dependentFieldName, null, referenceFieldCheck, dependentFieldValue, null); - } - - - /** - * Flag an error if there are multiple rows in agency.txt and the agency_id is missing for any rows. - */ - public static Set checkAgencyHasMultipleRows( - LineContext lineContext, - TreeMultimap uniqueValuesForFields, - ConditionalRequirement check - ) { - String dependentFieldValue = lineContext.getValueForRow(check.dependentFieldName); - Set errors = new HashSet<>(); - NavigableSet agencyIdValues = uniqueValuesForFields.get(check.dependentFieldName); - // Do some awkward checks to determine if the first or second row (or another) is missing the agency_id. - boolean firstOrSecondMissingId = lineContext.lineNumber == SECOND_ROW && agencyIdValues.contains(""); - boolean currentRowMissingId = POSTGRES_NULL_TEXT.equals(dependentFieldValue); - boolean secondRowMissingId = firstOrSecondMissingId && currentRowMissingId; - if (firstOrSecondMissingId || (lineContext.lineNumber > SECOND_ROW && currentRowMissingId)) { - // The check on the agency table is carried out whilst the agency table is being loaded so it - // is possible to compare the number of agencyIdValues added against the number of rows loaded to - // accurately determine missing agency_id values. - int lineNumber = secondRowMissingId - ? SECOND_ROW - : firstOrSecondMissingId - ? FIRST_ROW - : lineContext.lineNumber; - errors.add( - NewGTFSError.forLine( - lineContext.table, - lineNumber, - AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD, - check.dependentFieldName - ) - ); - } - return errors; - } - - /** - * Checks that the reference field is not empty when the dependent field/table has multiple rows. This is - * principally designed for checking that routes#agency_id is filled when multiple agencies exist. - */ - public static Set checkWhetherReferenceFieldShouldBeEmpty( - LineContext lineContext, - Field referenceField, - TreeMultimap uniqueValuesForFields, - ConditionalRequirement check - ) { - String referenceFieldValue = lineContext.getValueForRow(referenceField.name); - Set errors = new HashSet<>(); - int dependentFieldCount = uniqueValuesForFields.get(check.dependentFieldName).size(); - if (check.dependentFieldCheck == HAS_MULTIPLE_ROWS && dependentFieldCount > 1) { - // If there are multiple entries for the dependent field (including empty strings to account for any - // potentially missing values), the reference field must not be empty. - boolean referenceFieldIsEmpty = POSTGRES_NULL_TEXT.equals(referenceFieldValue); - if (referenceFieldIsEmpty) { - errors.add( - NewGTFSError.forLine( - lineContext, - AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD, - null - ).setEntityId(lineContext.getEntityId()) - ); - } - } - return errors; - } - - /** - * If the reference field value is within a defined range and the conditional field value has not be defined, flag - * an error. - */ - public static Set checkFieldInRange( - LineContext lineContext, - Field referenceField, - ConditionalRequirement check - ) { - Set errors = new HashSet<>(); - String referenceFieldValue = lineContext.getValueForRow(referenceField.name); - String conditionalFieldValue = lineContext.getValueForRow(check.dependentFieldName); - boolean referenceValueMeetsRangeCondition = - !POSTGRES_NULL_TEXT.equals(referenceFieldValue) && - // TODO use pre-existing method in ShortField? - isValueInRange(referenceFieldValue, check.minReferenceValue, check.maxReferenceValue); - - if (!referenceValueMeetsRangeCondition) { - // If ref value does not meet the range condition, there is no need to check this conditional - // value for (e.g.) an empty value. Continue to the next check. - return errors; - } - boolean conditionallyRequiredValueIsEmpty = - check.dependentFieldCheck == FIELD_NOT_EMPTY && - POSTGRES_NULL_TEXT.equals(conditionalFieldValue); - - if (conditionallyRequiredValueIsEmpty) { - // Reference value in range and conditionally required field is empty. - String message = String.format( - "%s is required when %s value is between %d and %d.", - check.dependentFieldName, - referenceField.name, - check.minReferenceValue, - check.maxReferenceValue - ); - errors.add( - NewGTFSError.forLine( - lineContext, - CONDITIONALLY_REQUIRED, - message).setEntityId(lineContext.getEntityId()) - ); - } - return errors; - } - - /** - * Check that an expected foreign field value matches a conditional field value. Selected foreign field values are - * added to {@link ReferenceTracker#uniqueValuesForFields} as part of the load process and are used here to check - * conditional fields which have a dependency on them. e.g. stop#zone_id does not exist in stops table, but is - * required by fare_rules records (e.g. origin_id). - */ - public static Set checkForeignRefExists( - LineContext lineContext, - Field referenceField, - ConditionalRequirement check, - TreeMultimap uniqueValuesForFields - ) { - Set errors = new HashSet<>(); - String referenceFieldValue = lineContext.getValueForRow(referenceField.name); - // Expected reference in foreign field id list. - String foreignFieldReference = - String.join( - ":", - check.dependentFieldName, - referenceFieldValue - ); - if (lineContext.table.name.equals(check.referenceTableName) && - !POSTGRES_NULL_TEXT.equals(referenceFieldValue) && - !uniqueValuesForFields.get(check.dependentFieldName).contains(foreignFieldReference) - ) { - errors.add( - NewGTFSError.forLine( - lineContext, - REFERENTIAL_INTEGRITY, - String.join(":", referenceField.name, foreignFieldReference) - ).setEntityId(lineContext.getEntityId()) - ); - } - return errors; - } - - /** - * Check the dependent field value, if it is empty the reference field value must be provided. - */ - public static Set checkFieldIsEmpty( - LineContext lineContext, - Field referenceField, - ConditionalRequirement check - ) { - Set errors = new HashSet<>(); - String dependentFieldValue = lineContext.getValueForRow(check.dependentFieldName); - String referenceFieldValue = lineContext.getValueForRow(referenceField.name); - if ( - POSTGRES_NULL_TEXT.equals(dependentFieldValue) && - POSTGRES_NULL_TEXT.equals(referenceFieldValue) - ) { - // The reference field is required when the dependent field is empty. - String message = String.format( - "%s is required when %s is empty.", - referenceField.name, - check.dependentFieldName - ); - errors.add( - NewGTFSError.forLine( - lineContext, - CONDITIONALLY_REQUIRED, - message).setEntityId(lineContext.getEntityId()) - ); - - } - return errors; - } - - /** - * Check the dependent field value is not empty and matches the expected value. - */ - public static Set checkFieldNotEmptyAndMatchesValue( - LineContext lineContext, - Field referenceField, - ConditionalRequirement check - ) { - Set errors = new HashSet<>(); - String dependentFieldValue = lineContext.getValueForRow(check.dependentFieldName); - String referenceFieldValue = lineContext.getValueForRow(referenceField.name); - if ( - !POSTGRES_NULL_TEXT.equals(dependentFieldValue) && - dependentFieldValue.equals(check.dependentFieldValue) && - POSTGRES_NULL_TEXT.equals(referenceFieldValue) - ) { - String message = String.format( - "%s is required and must match %s when %s is provided.", - referenceField.name, - check.dependentFieldValue, - check.dependentFieldName - ); - errors.add( - NewGTFSError.forLine( - lineContext, - CONDITIONALLY_REQUIRED, - message).setEntityId(lineContext.getEntityId()) - ); - - } - return errors; - } - - /** - * Check if the provided value is within the min and max values. If the field value can not be converted - * to a number it is assumed that the value is not a number and will therefore never be within the min/max range. - */ - private static boolean isValueInRange(String referenceFieldValue, int min, int max) { - try { - int fieldValue = Integer.parseInt(referenceFieldValue); - return fieldValue >= min && fieldValue <= max; - } catch (NumberFormatException e) { - return false; - } - } - -} diff --git a/src/main/java/com/conveyal/gtfs/loader/Field.java b/src/main/java/com/conveyal/gtfs/loader/Field.java index e2c85e444..52cda789c 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Field.java +++ b/src/main/java/com/conveyal/gtfs/loader/Field.java @@ -2,6 +2,7 @@ import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.error.NewGTFSErrorType; +import com.conveyal.gtfs.loader.conditions.ConditionalRequirement; import com.google.common.collect.ImmutableSet; import java.sql.PreparedStatement; diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java index 06e83e139..b17a812c0 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java @@ -368,7 +368,7 @@ private int loadInternal(Table table) throws Exception { // When outputting text, accumulate transformed strings to allow skipping rows when errors are encountered. // One extra position in the array for the CSV line number. String[] transformedStrings = new String[cleanFields.length + 1]; - boolean tableHasConditions = table.hasConditionalRequirements(); + boolean tableHasConditionalRequirements = table.hasConditionalRequirements(); // Iterate over each record and prepare the record for storage in the table either through batch insert // statements or postgres text copy operation. while (csvReader.readRecord()) { @@ -436,7 +436,7 @@ private int loadInternal(Table table) throws Exception { // Increment column index. columnIndex += 1; } - if (tableHasConditions) { + if (tableHasConditionalRequirements) { LineContext lineContext = new LineContext(table, fields, transformedStrings, lineNumber); errorStorage.storeErrors( referenceTracker.checkConditionallyRequiredFields(lineContext) diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsSnapshotter.java b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsSnapshotter.java index f734f8d19..3b36a9b8e 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsSnapshotter.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsSnapshotter.java @@ -113,6 +113,7 @@ public SnapshotResult copyTables() { result.transfers = copy(Table.TRANSFERS, true); result.trips = copy(Table.TRIPS, true); result.attributions = copy(Table.ATTRIBUTIONS, true); + result.translations = copy(Table.TRANSLATIONS, true); result.completionTime = System.currentTimeMillis(); result.loadTimeMillis = result.completionTime - startTime; LOG.info("Copying tables took {} sec", (result.loadTimeMillis) / 1000); diff --git a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java index bfefe1df7..acd1178c7 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java +++ b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java @@ -1,7 +1,8 @@ package com.conveyal.gtfs.loader; import com.conveyal.gtfs.error.NewGTFSError; -import com.google.common.collect.TreeMultimap; +import com.conveyal.gtfs.loader.conditions.ConditionalRequirement; +import com.google.common.collect.HashMultimap; import java.util.Collections; import java.util.HashSet; @@ -23,7 +24,7 @@ */ public class ReferenceTracker { public final Set transitIds = new HashSet<>(); - public final TreeMultimap uniqueValuesForFields = TreeMultimap.create(); + public final HashMultimap uniqueValuesForFields = HashMultimap.create(); public final Set transitIdsWithSequence = new HashSet<>(); /** @@ -162,49 +163,10 @@ public Set checkConditionallyRequiredFields(LineContext lineContex Field referenceField = entry.getKey(); ConditionalRequirement[] conditionalRequirements = entry.getValue(); // Work through each field's conditional requirements. - for (ConditionalRequirement check : conditionalRequirements) { - switch(check.referenceFieldCheck) { - case HAS_MULTIPLE_ROWS: - errors.addAll( - ConditionalRequirement.checkAgencyHasMultipleRows(lineContext, uniqueValuesForFields, check) - ); - break; - case FIELD_NOT_EMPTY: - errors.addAll( - ConditionalRequirement.checkWhetherReferenceFieldShouldBeEmpty( - lineContext, - referenceField, - uniqueValuesForFields, - check - ) - ); - break; - case FIELD_IN_RANGE: - errors.addAll( - ConditionalRequirement.checkFieldInRange(lineContext, referenceField, check) - ); - break; - case FOREIGN_REF_EXISTS: - errors.addAll( - ConditionalRequirement.checkForeignRefExists( - lineContext, - referenceField, - check, - uniqueValuesForFields - ) - ); - break; - case FIELD_IS_EMPTY: - errors.addAll( - ConditionalRequirement.checkFieldIsEmpty(lineContext, referenceField, check) - ); - break; - case FIELD_NOT_EMPTY_AND_MATCHES_VALUE: - errors.addAll( - ConditionalRequirement.checkFieldNotEmptyAndMatchesValue(lineContext, referenceField, check) - ); - break; - } + for (ConditionalRequirement conditionalRequirement : conditionalRequirements) { + errors.addAll( + conditionalRequirement.check(lineContext, referenceField, uniqueValuesForFields) + ); } } return errors; diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index 60ddda0a3..6662bf3e1 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -2,6 +2,13 @@ import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.error.SQLErrorStorage; +import com.conveyal.gtfs.loader.conditions.AgencyHasMultipleRowsCheck; +import com.conveyal.gtfs.loader.conditions.ConditionalRequirement; +import com.conveyal.gtfs.loader.conditions.FieldInRangeCheck; +import com.conveyal.gtfs.loader.conditions.FieldIsEmptyCheck; +import com.conveyal.gtfs.loader.conditions.FieldNotEmptyAndMatchesValueCheck; +import com.conveyal.gtfs.loader.conditions.ForeignRefExistsCheck; +import com.conveyal.gtfs.loader.conditions.ReferenceFieldShouldBeProvidedCheck; import com.conveyal.gtfs.model.Agency; import com.conveyal.gtfs.model.Attribution; import com.conveyal.gtfs.model.Calendar; @@ -51,12 +58,12 @@ import static com.conveyal.gtfs.error.NewGTFSErrorType.DUPLICATE_HEADER; import static com.conveyal.gtfs.error.NewGTFSErrorType.TABLE_IN_SUBDIRECTORY; -import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_IN_RANGE; -import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_IS_EMPTY; -import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_NOT_EMPTY; -import static com.conveyal.gtfs.loader.ConditionalCheckType.FOREIGN_REF_EXISTS; -import static com.conveyal.gtfs.loader.ConditionalCheckType.HAS_MULTIPLE_ROWS; -import static com.conveyal.gtfs.loader.ConditionalCheckType.FIELD_NOT_EMPTY_AND_MATCHES_VALUE; +import static com.conveyal.gtfs.loader.conditions.ConditionalCheckType.FIELD_IN_RANGE; +import static com.conveyal.gtfs.loader.conditions.ConditionalCheckType.FIELD_IS_EMPTY; +import static com.conveyal.gtfs.loader.conditions.ConditionalCheckType.FIELD_NOT_EMPTY; +import static com.conveyal.gtfs.loader.conditions.ConditionalCheckType.FOREIGN_REF_EXISTS; +import static com.conveyal.gtfs.loader.conditions.ConditionalCheckType.HAS_MULTIPLE_ROWS; +import static com.conveyal.gtfs.loader.conditions.ConditionalCheckType.FIELD_NOT_EMPTY_AND_MATCHES_VALUE; import static com.conveyal.gtfs.loader.JdbcGtfsLoader.sanitize; import static com.conveyal.gtfs.loader.Requirement.EDITOR; import static com.conveyal.gtfs.loader.Requirement.EXTENSION; @@ -109,7 +116,7 @@ public Table (String name, Class entityClass, Requirement requ new StringField("agency_id", OPTIONAL).requireConditions( // If there is more than one agency, the agency_id must be provided // https://developers.google.com/transit/gtfs/reference#agencytxt - new ConditionalRequirement("agency_id", HAS_MULTIPLE_ROWS) + new AgencyHasMultipleRowsCheck("agency_id", HAS_MULTIPLE_ROWS) ).hasForeignReferences(), new StringField("agency_name", REQUIRED), new URLField("agency_url", REQUIRED), @@ -162,7 +169,7 @@ public Table (String name, Class entityClass, Requirement requ new StringField("agency_id", OPTIONAL).requireConditions( // If there is more than one agency, this agency_id is required. // https://developers.google.com/transit/gtfs/reference#fare_attributestxt - new ConditionalRequirement("agency_id", HAS_MULTIPLE_ROWS, FIELD_NOT_EMPTY) + new ReferenceFieldShouldBeProvidedCheck("agency_id", HAS_MULTIPLE_ROWS, FIELD_NOT_EMPTY) ), new IntegerField("transfer_duration", OPTIONAL) ).addPrimaryKey(); @@ -172,7 +179,7 @@ public Table (String name, Class entityClass, Requirement requ public static final Table FEED_INFO = new Table("feed_info", FeedInfo.class, OPTIONAL, new StringField("feed_publisher_name", REQUIRED), // feed_id is not the first field because that would label it as the key field, which we do not want because the - // key field cannot be optional. feed_id is not part of the GTFS spec, but is required by OTP. + // key field cannot be optional. feed_id is not part of the GTFS spec, but is useful to OTP. new StringField("feed_id", OPTIONAL), new URLField("feed_publisher_url", REQUIRED), new LanguageField("feed_lang", REQUIRED), @@ -193,7 +200,7 @@ public Table (String name, Class entityClass, Requirement requ new StringField("agency_id", OPTIONAL).isReferenceTo(AGENCY).requireConditions( // If there is more than one agency, this agency_id is required. // https://developers.google.com/transit/gtfs/reference#routestxt - new ConditionalRequirement("agency_id", HAS_MULTIPLE_ROWS, FIELD_NOT_EMPTY) + new ReferenceFieldShouldBeProvidedCheck("agency_id", HAS_MULTIPLE_ROWS, FIELD_NOT_EMPTY) ), new StringField("route_short_name", OPTIONAL), // one of short or long must be provided new StringField("route_long_name", OPTIONAL), @@ -250,12 +257,12 @@ public Table (String name, Class entityClass, Requirement requ new StringField("zone_id", OPTIONAL).hasForeignReferences(), new URLField("stop_url", OPTIONAL), new ShortField("location_type", OPTIONAL, 4).requireConditions( - // If the location type is defined and within range, the conditional fields are required. + // If the location type is defined and within range, the dependent fields are required. // https://developers.google.com/transit/gtfs/reference#stopstxt - new ConditionalRequirement(0, 2, "stop_name", FIELD_NOT_EMPTY, FIELD_IN_RANGE), - new ConditionalRequirement(0, 2, "stop_lat", FIELD_NOT_EMPTY, FIELD_IN_RANGE), - new ConditionalRequirement(0, 2, "stop_lon", FIELD_NOT_EMPTY, FIELD_IN_RANGE), - new ConditionalRequirement(2, 4, "parent_station", FIELD_NOT_EMPTY, FIELD_IN_RANGE) + new FieldInRangeCheck(0, 2, "stop_name", FIELD_NOT_EMPTY, FIELD_IN_RANGE), + new FieldInRangeCheck(0, 2, "stop_lat", FIELD_NOT_EMPTY, FIELD_IN_RANGE), + new FieldInRangeCheck(0, 2, "stop_lon", FIELD_NOT_EMPTY, FIELD_IN_RANGE), + new FieldInRangeCheck(2, 4, "parent_station", FIELD_NOT_EMPTY, FIELD_IN_RANGE) ), new StringField("parent_station", OPTIONAL).requireConditions(), new StringField("stop_timezone", OPTIONAL), @@ -271,15 +278,15 @@ public Table (String name, Class entityClass, Requirement requ new StringField("route_id", OPTIONAL).isReferenceTo(ROUTES), new StringField("origin_id", OPTIONAL).requireConditions( // If the origin_id is defined, its value must exist as a zone_id in stops.txt. - new ConditionalRequirement("zone_id", FOREIGN_REF_EXISTS, "fare_rules") + new ForeignRefExistsCheck("zone_id", FOREIGN_REF_EXISTS, "fare_rules") ), new StringField("destination_id", OPTIONAL).requireConditions( // If the destination_id is defined, its value must exist as a zone_id in stops.txt. - new ConditionalRequirement("zone_id", FOREIGN_REF_EXISTS, "fare_rules") + new ForeignRefExistsCheck("zone_id", FOREIGN_REF_EXISTS, "fare_rules") ), new StringField("contains_id", OPTIONAL).requireConditions( // If the contains_id is defined, its value must exist as a zone_id in stops.txt. - new ConditionalRequirement("zone_id", FOREIGN_REF_EXISTS, "fare_rules") + new ForeignRefExistsCheck("zone_id", FOREIGN_REF_EXISTS, "fare_rules") ) ) .withParentTable(FARE_ATTRIBUTES) @@ -367,11 +374,11 @@ public Table (String name, Class entityClass, Requirement requ new StringField("translation", REQUIRED), new StringField("record_id", OPTIONAL).requireConditions( // If the field_value is empty the record_id is required. - new ConditionalRequirement("field_value", FIELD_IS_EMPTY) + new FieldIsEmptyCheck("field_value", FIELD_IS_EMPTY) ), new StringField("record_sub_id", OPTIONAL).requireConditions( // If the record_id is not empty and the value is stop_times the record_sub_id is required. - new ConditionalRequirement( + new FieldNotEmptyAndMatchesValueCheck( "record_id", "stop_times", FIELD_NOT_EMPTY_AND_MATCHES_VALUE @@ -379,7 +386,7 @@ public Table (String name, Class entityClass, Requirement requ ), new StringField("field_value", OPTIONAL).requireConditions( // If the record_id is empty the field_value is required. - new ConditionalRequirement("record_id", FIELD_IS_EMPTY) + new FieldIsEmptyCheck("record_id", FIELD_IS_EMPTY) )) .keyFieldIsNotUnique(); diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/AgencyHasMultipleRowsCheck.java b/src/main/java/com/conveyal/gtfs/loader/conditions/AgencyHasMultipleRowsCheck.java new file mode 100644 index 000000000..78d1e5a94 --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/AgencyHasMultipleRowsCheck.java @@ -0,0 +1,66 @@ +package com.conveyal.gtfs.loader.conditions; + +import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.loader.Field; +import com.conveyal.gtfs.loader.LineContext; +import com.google.common.collect.HashMultimap; + +import java.util.HashSet; +import java.util.Set; + +import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS; +import static com.conveyal.gtfs.loader.JdbcGtfsLoader.POSTGRES_NULL_TEXT; + +/** + * Conditional requirement to check that an agency_id has been provided if more than one row exists in agency.txt. + */ +public class AgencyHasMultipleRowsCheck extends ConditionalRequirement { + + private final int FIRST_ROW = 2; + private final int SECOND_ROW = 3; + + public AgencyHasMultipleRowsCheck( + String dependentFieldName, + ConditionalCheckType dependentFieldCheck + ) { + this.dependentFieldName = dependentFieldName; + this.dependentFieldCheck = dependentFieldCheck; + } + + /** + * Flag an error if there are multiple rows in agency.txt and the agency_id is missing for any rows. + */ + public Set check( + LineContext lineContext, + Field referenceField, + HashMultimap uniqueValuesForFields + ) { + String dependentFieldValue = lineContext.getValueForRow(dependentFieldName); + Set errors = new HashSet<>(); + Set agencyIdValues = uniqueValuesForFields.get(dependentFieldName); + // Do some awkward checks to determine if the first or second row (or another) is missing the agency_id. + boolean firstOrSecondMissingId = lineContext.lineNumber == SECOND_ROW && agencyIdValues.contains(""); + boolean currentRowMissingId = POSTGRES_NULL_TEXT.equals(dependentFieldValue); + boolean secondRowMissingId = firstOrSecondMissingId && currentRowMissingId; + if (firstOrSecondMissingId || (lineContext.lineNumber > SECOND_ROW && currentRowMissingId)) { + // The check on the agency table is carried out whilst the agency table is being loaded so it + // is possible to compare the number of agencyIdValues added against the number of rows loaded to + // accurately determine missing agency_id values. + int lineNumber = secondRowMissingId + ? SECOND_ROW + : firstOrSecondMissingId + ? FIRST_ROW + : lineContext.lineNumber; + errors.add( + NewGTFSError.forLine( + lineContext.table, + lineNumber, + AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, + dependentFieldName + ) + ); + } + return errors; + } + +} diff --git a/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java b/src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalCheckType.java similarity index 96% rename from src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java rename to src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalCheckType.java index 66c811af3..968f4b243 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ConditionalCheckType.java +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalCheckType.java @@ -1,4 +1,4 @@ -package com.conveyal.gtfs.loader; +package com.conveyal.gtfs.loader.conditions; /** * These are the conditionally required checks to be carried out inline with the values provided in diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalRequirement.java b/src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalRequirement.java new file mode 100644 index 000000000..6395c947f --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalRequirement.java @@ -0,0 +1,41 @@ +package com.conveyal.gtfs.loader.conditions; + +import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.loader.Field; +import com.conveyal.gtfs.loader.LineContext; +import com.google.common.collect.HashMultimap; + +import java.util.Set; + +/** + * These are the requirements that are checked inline with {@link ConditionalCheckType} to determine if the required + * conditions set forth for certain fields in the GTFS spec have been met. These requirements are applied directly to + * their "reference fields" with the help of {@link Field#requireConditions}. + */ +public abstract class ConditionalRequirement { + /** The type of check to be performed on a reference field. A reference field value is used to determine which check + * (e.g., {@link AgencyHasMultipleRowsCheck#check}) should be applied to the field. */ + protected ConditionalCheckType referenceFieldCheck; + /** The minimum reference field value if a range check is being performed. */ + protected int minReferenceValue; + /** The maximum reference field value if a range check is being performed. */ + protected int maxReferenceValue; + /** The type of check to be performed on the dependent field. */ + protected ConditionalCheckType dependentFieldCheck; + /** The name of the dependent field, which is a field that requires a specific value if the reference and + * (in some cases) dependent field checks meet certain conditions.*/ + protected String dependentFieldName; + /** The expected dependent field value. */ + protected String dependentFieldValue; + /** The reference table name required for checking foreign references. */ + protected String referenceTableName; + + /** + * All sub classes must implement this method and provide related conditional checks. + */ + public abstract Set check( + LineContext lineContext, + Field referenceField, + HashMultimap uniqueValuesForFields + ); +} diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/FieldInRangeCheck.java b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldInRangeCheck.java new file mode 100644 index 000000000..474933bb2 --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldInRangeCheck.java @@ -0,0 +1,92 @@ +package com.conveyal.gtfs.loader.conditions; + +import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.loader.Field; +import com.conveyal.gtfs.loader.LineContext; +import com.google.common.collect.HashMultimap; + +import java.util.HashSet; +import java.util.Set; + +import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; +import static com.conveyal.gtfs.loader.JdbcGtfsLoader.POSTGRES_NULL_TEXT; +import static com.conveyal.gtfs.loader.conditions.ConditionalCheckType.FIELD_NOT_EMPTY; + +/** + * Conditional requirement to check that a reference field value is within a defined range and the conditional field + * value has not be defined. + */ +public class FieldInRangeCheck extends ConditionalRequirement { + + public FieldInRangeCheck( + int minReferenceValue, + int maxReferenceValue, + String dependentFieldName, + ConditionalCheckType dependentFieldCheck, + ConditionalCheckType referenceFieldCheck + ) { + this.minReferenceValue = minReferenceValue; + this.maxReferenceValue = maxReferenceValue; + this.dependentFieldName = dependentFieldName; + this.dependentFieldCheck = dependentFieldCheck; + this.referenceFieldCheck = referenceFieldCheck; + } + + /** + * If the reference field value is within a defined range and the conditional field value has not be defined, flag + * an error. + */ + public Set check( + LineContext lineContext, + Field referenceField, + HashMultimap uniqueValuesForFields + ) { + Set errors = new HashSet<>(); + String referenceFieldValue = lineContext.getValueForRow(referenceField.name); + String conditionalFieldValue = lineContext.getValueForRow(dependentFieldName); + boolean referenceValueMeetsRangeCondition = + !POSTGRES_NULL_TEXT.equals(referenceFieldValue) && + // TODO use pre-existing method in ShortField? + isValueInRange(referenceFieldValue, minReferenceValue, maxReferenceValue); + + if (!referenceValueMeetsRangeCondition) { + // If ref value does not meet the range condition, there is no need to check this conditional + // value for (e.g.) an empty value. Continue to the next check. + return errors; + } + boolean conditionallyRequiredValueIsEmpty = + dependentFieldCheck == FIELD_NOT_EMPTY && + POSTGRES_NULL_TEXT.equals(conditionalFieldValue); + + if (conditionallyRequiredValueIsEmpty) { + // Reference value in range and conditionally required field is empty. + String message = String.format( + "%s is required when %s value is between %d and %d.", + dependentFieldName, + referenceField.name, + minReferenceValue, + maxReferenceValue + ); + errors.add( + NewGTFSError.forLine( + lineContext, + CONDITIONALLY_REQUIRED, + message).setEntityId(lineContext.getEntityId()) + ); + } + return errors; + } + + /** + * Check if the provided value is within the min and max values. If the field value can not be converted + * to a number it is assumed that the value is not a number and will therefore never be within the min/max range. + */ + private boolean isValueInRange(String referenceFieldValue, int min, int max) { + try { + int fieldValue = Integer.parseInt(referenceFieldValue); + return fieldValue >= min && fieldValue <= max; + } catch (NumberFormatException e) { + return false; + } + } +} diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java new file mode 100644 index 000000000..ec9436351 --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java @@ -0,0 +1,60 @@ +package com.conveyal.gtfs.loader.conditions; + +import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.loader.Field; +import com.conveyal.gtfs.loader.LineContext; +import com.google.common.collect.HashMultimap; + +import java.util.HashSet; +import java.util.Set; + +import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; +import static com.conveyal.gtfs.loader.JdbcGtfsLoader.POSTGRES_NULL_TEXT; + +/** + * Conditional requirement to check that if a dependent field value is empty the reference field value is provided. + */ +public class FieldIsEmptyCheck extends ConditionalRequirement { + + public FieldIsEmptyCheck( + String dependentFieldName, + ConditionalCheckType referenceFieldCheck + + ) { + this.dependentFieldName = dependentFieldName; + this.referenceFieldCheck = referenceFieldCheck; + } + + /** + * Check the dependent field value, if it is empty the reference field value must be provided. + */ + public Set check( + LineContext lineContext, + Field referenceField, + HashMultimap uniqueValuesForFields + ) { + Set errors = new HashSet<>(); + String dependentFieldValue = lineContext.getValueForRow(dependentFieldName); + String referenceFieldValue = lineContext.getValueForRow(referenceField.name); + if ( + POSTGRES_NULL_TEXT.equals(dependentFieldValue) && + POSTGRES_NULL_TEXT.equals(referenceFieldValue) + ) { + // The reference field is required when the dependent field is empty. + String message = String.format( + "%s is required when %s is empty.", + referenceField.name, + dependentFieldName + ); + errors.add( + NewGTFSError.forLine( + lineContext, + CONDITIONALLY_REQUIRED, + message).setEntityId(lineContext.getEntityId()) + ); + + } + return errors; + } + +} diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/FieldNotEmptyAndMatchesValueCheck.java b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldNotEmptyAndMatchesValueCheck.java new file mode 100644 index 000000000..218dba776 --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldNotEmptyAndMatchesValueCheck.java @@ -0,0 +1,62 @@ +package com.conveyal.gtfs.loader.conditions; + +import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.loader.Field; +import com.conveyal.gtfs.loader.LineContext; +import com.google.common.collect.HashMultimap; + +import java.util.HashSet; +import java.util.Set; + +import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; +import static com.conveyal.gtfs.loader.JdbcGtfsLoader.POSTGRES_NULL_TEXT; + +/** + * Conditional requirement to check that a dependent field value is not empty and matches an expected value. + */ +public class FieldNotEmptyAndMatchesValueCheck extends ConditionalRequirement { + + public FieldNotEmptyAndMatchesValueCheck( + String dependentFieldName, + String dependentFieldValue, + ConditionalCheckType referenceFieldCheck + ) { + this.dependentFieldName = dependentFieldName; + this.dependentFieldValue = dependentFieldValue; + this.referenceFieldCheck = referenceFieldCheck; + } + + /** + * Check the dependent field value is not empty and matches the expected value. + */ + public Set check( + LineContext lineContext, + Field referenceField, + HashMultimap uniqueValuesForFields + ) { + Set errors = new HashSet<>(); + String dependentFieldValue = lineContext.getValueForRow(dependentFieldName); + String referenceFieldValue = lineContext.getValueForRow(referenceField.name); + if ( + !POSTGRES_NULL_TEXT.equals(dependentFieldValue) && + dependentFieldValue.equals(dependentFieldValue) && + POSTGRES_NULL_TEXT.equals(referenceFieldValue) + ) { + String message = String.format( + "%s is required and must match %s when %s is provided.", + referenceField.name, + dependentFieldValue, + dependentFieldName + ); + errors.add( + NewGTFSError.forLine( + lineContext, + CONDITIONALLY_REQUIRED, + message).setEntityId(lineContext.getEntityId()) + ); + + } + return errors; + } + +} diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/ForeignRefExistsCheck.java b/src/main/java/com/conveyal/gtfs/loader/conditions/ForeignRefExistsCheck.java new file mode 100644 index 000000000..d1d4d9f8d --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/ForeignRefExistsCheck.java @@ -0,0 +1,65 @@ +package com.conveyal.gtfs.loader.conditions; + +import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.loader.Field; +import com.conveyal.gtfs.loader.LineContext; +import com.conveyal.gtfs.loader.ReferenceTracker; +import com.google.common.collect.HashMultimap; + +import java.util.HashSet; +import java.util.Set; + +import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; +import static com.conveyal.gtfs.loader.JdbcGtfsLoader.POSTGRES_NULL_TEXT; + +/** + * Conditional requirement to check that an expected foreign field value matches a conditional field value. + */ +public class ForeignRefExistsCheck extends ConditionalRequirement { + + public ForeignRefExistsCheck( + String dependentFieldName, + ConditionalCheckType referenceFieldCheck, + String referenceTableName + ) { + this.dependentFieldName = dependentFieldName; + this.referenceFieldCheck = referenceFieldCheck; + this.referenceTableName = referenceTableName; + } + + /** + * Check that an expected foreign field value matches a conditional field value. Selected foreign field values are + * added to {@link ReferenceTracker#uniqueValuesForFields} as part of the load process and are used here to check + * conditional fields which have a dependency on them. e.g. stop#zone_id does not exist in stops table, but is + * required by fare_rules records (e.g. origin_id). + */ + public Set check( + LineContext lineContext, + Field referenceField, + HashMultimap uniqueValuesForFields + ) { + Set errors = new HashSet<>(); + String referenceFieldValue = lineContext.getValueForRow(referenceField.name); + // Expected reference in foreign field id list. + String foreignFieldReference = + String.join( + ":", + dependentFieldName, + referenceFieldValue + ); + if (lineContext.table.name.equals(referenceTableName) && + !POSTGRES_NULL_TEXT.equals(referenceFieldValue) && + !uniqueValuesForFields.get(dependentFieldName).contains(foreignFieldReference) + ) { + errors.add( + NewGTFSError.forLine( + lineContext, + REFERENTIAL_INTEGRITY, + String.join(":", referenceField.name, foreignFieldReference) + ).setEntityId(lineContext.getEntityId()) + ); + } + return errors; + } + +} diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/ReferenceFieldShouldBeProvidedCheck.java b/src/main/java/com/conveyal/gtfs/loader/conditions/ReferenceFieldShouldBeProvidedCheck.java new file mode 100644 index 000000000..03a9eb7ab --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/ReferenceFieldShouldBeProvidedCheck.java @@ -0,0 +1,60 @@ +package com.conveyal.gtfs.loader.conditions; + +import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.loader.Field; +import com.conveyal.gtfs.loader.LineContext; +import com.google.common.collect.HashMultimap; + +import java.util.HashSet; +import java.util.Set; + +import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS; +import static com.conveyal.gtfs.loader.JdbcGtfsLoader.POSTGRES_NULL_TEXT; +import static com.conveyal.gtfs.loader.conditions.ConditionalCheckType.HAS_MULTIPLE_ROWS; + +/** + * Conditional requirement to check that the reference field is not empty when the dependent field/table has multiple + * rows. + */ +public class ReferenceFieldShouldBeProvidedCheck extends ConditionalRequirement { + + public ReferenceFieldShouldBeProvidedCheck( + String dependentFieldName, + ConditionalCheckType dependentFieldCheck, + ConditionalCheckType referenceFieldCheck + ) { + this.dependentFieldName = dependentFieldName; + this.dependentFieldCheck = dependentFieldCheck; + this.referenceFieldCheck = referenceFieldCheck; + } + + /** + * Checks that the reference field is not empty when the dependent field/table has multiple rows. This is + * principally designed for checking that routes#agency_id is filled when multiple agencies exist. + */ + public Set check( + LineContext lineContext, + Field referenceField, + HashMultimap uniqueValuesForFields + ) { + String referenceFieldValue = lineContext.getValueForRow(referenceField.name); + Set errors = new HashSet<>(); + int dependentFieldCount = uniqueValuesForFields.get(dependentFieldName).size(); + if (dependentFieldCheck == HAS_MULTIPLE_ROWS && dependentFieldCount > 1) { + // If there are multiple entries for the dependent field (including empty strings to account for any + // potentially missing values), the reference field must not be empty. + boolean referenceFieldIsEmpty = POSTGRES_NULL_TEXT.equals(referenceFieldValue); + if (referenceFieldIsEmpty) { + errors.add( + NewGTFSError.forLine( + lineContext, + AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, + null + ).setEntityId(lineContext.getEntityId()) + ); + } + } + return errors; + } + +} diff --git a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java index 846b7daf0..f7e51dde9 100644 --- a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java @@ -18,7 +18,7 @@ import static com.conveyal.gtfs.TestUtils.assertThatSqlCountQueryYieldsExpectedCount; import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; -import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD; +import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS; public class ConditionallyRequiredTest { private static String testDBName; @@ -82,7 +82,7 @@ private static Stream createTranslationTableChecks() { @Test public void agencyTableMissingConditionallyRequiredAgencyId() { - checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD, "Agency","2", null, "agency_id"); + checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, "Agency","2", null, "agency_id"); } @Test @@ -92,12 +92,12 @@ public void tripTableMissingConditionallyRequiredShapeId() { @Test public void routeTableMissingConditionallyRequiredAgencyId() { - checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD, "Route","2", "21", null); + checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, "Route","2", "21", null); } @Test public void fareAttributeTableMissingConditionallyRequiredAgencyId() { - checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_TABLES_WITH_MORE_THAN_ONE_RECORD, "FareAttribute","2", "1", null); + checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, "FareAttribute","2", "1", null); } /** From 593ed7306e6ddc2931d578a2174af7deccd7aaeb Mon Sep 17 00:00:00 2001 From: Evan Siroky Date: Tue, 18 May 2021 13:38:36 -0700 Subject: [PATCH 28/45] refactor: remove unneeded arguments from ConditionalRequirements classes --- .../java/com/conveyal/gtfs/loader/Table.java | 30 ++++++++----------- .../AgencyHasMultipleRowsCheck.java | 10 +++---- .../conditions/ConditionalRequirement.java | 11 ------- .../loader/conditions/FieldInRangeCheck.java | 8 +++-- .../loader/conditions/FieldIsEmptyCheck.java | 7 +---- .../FieldNotEmptyAndMatchesValueCheck.java | 12 ++++---- .../conditions/ForeignRefExistsCheck.java | 9 ++---- .../ReferenceFieldShouldBeProvidedCheck.java | 4 +-- 8 files changed, 33 insertions(+), 58 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index 6662bf3e1..bf8ccb317 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -116,7 +116,7 @@ public Table (String name, Class entityClass, Requirement requ new StringField("agency_id", OPTIONAL).requireConditions( // If there is more than one agency, the agency_id must be provided // https://developers.google.com/transit/gtfs/reference#agencytxt - new AgencyHasMultipleRowsCheck("agency_id", HAS_MULTIPLE_ROWS) + new AgencyHasMultipleRowsCheck() ).hasForeignReferences(), new StringField("agency_name", REQUIRED), new URLField("agency_url", REQUIRED), @@ -169,7 +169,7 @@ public Table (String name, Class entityClass, Requirement requ new StringField("agency_id", OPTIONAL).requireConditions( // If there is more than one agency, this agency_id is required. // https://developers.google.com/transit/gtfs/reference#fare_attributestxt - new ReferenceFieldShouldBeProvidedCheck("agency_id", HAS_MULTIPLE_ROWS, FIELD_NOT_EMPTY) + new ReferenceFieldShouldBeProvidedCheck("agency_id", HAS_MULTIPLE_ROWS) ), new IntegerField("transfer_duration", OPTIONAL) ).addPrimaryKey(); @@ -200,7 +200,7 @@ public Table (String name, Class entityClass, Requirement requ new StringField("agency_id", OPTIONAL).isReferenceTo(AGENCY).requireConditions( // If there is more than one agency, this agency_id is required. // https://developers.google.com/transit/gtfs/reference#routestxt - new ReferenceFieldShouldBeProvidedCheck("agency_id", HAS_MULTIPLE_ROWS, FIELD_NOT_EMPTY) + new ReferenceFieldShouldBeProvidedCheck("agency_id", HAS_MULTIPLE_ROWS) ), new StringField("route_short_name", OPTIONAL), // one of short or long must be provided new StringField("route_long_name", OPTIONAL), @@ -259,10 +259,10 @@ public Table (String name, Class entityClass, Requirement requ new ShortField("location_type", OPTIONAL, 4).requireConditions( // If the location type is defined and within range, the dependent fields are required. // https://developers.google.com/transit/gtfs/reference#stopstxt - new FieldInRangeCheck(0, 2, "stop_name", FIELD_NOT_EMPTY, FIELD_IN_RANGE), - new FieldInRangeCheck(0, 2, "stop_lat", FIELD_NOT_EMPTY, FIELD_IN_RANGE), - new FieldInRangeCheck(0, 2, "stop_lon", FIELD_NOT_EMPTY, FIELD_IN_RANGE), - new FieldInRangeCheck(2, 4, "parent_station", FIELD_NOT_EMPTY, FIELD_IN_RANGE) + new FieldInRangeCheck(0, 2, "stop_name", FIELD_NOT_EMPTY), + new FieldInRangeCheck(0, 2, "stop_lat", FIELD_NOT_EMPTY), + new FieldInRangeCheck(0, 2, "stop_lon", FIELD_NOT_EMPTY), + new FieldInRangeCheck(2, 4, "parent_station", FIELD_NOT_EMPTY) ), new StringField("parent_station", OPTIONAL).requireConditions(), new StringField("stop_timezone", OPTIONAL), @@ -278,15 +278,15 @@ public Table (String name, Class entityClass, Requirement requ new StringField("route_id", OPTIONAL).isReferenceTo(ROUTES), new StringField("origin_id", OPTIONAL).requireConditions( // If the origin_id is defined, its value must exist as a zone_id in stops.txt. - new ForeignRefExistsCheck("zone_id", FOREIGN_REF_EXISTS, "fare_rules") + new ForeignRefExistsCheck("zone_id", "fare_rules") ), new StringField("destination_id", OPTIONAL).requireConditions( // If the destination_id is defined, its value must exist as a zone_id in stops.txt. - new ForeignRefExistsCheck("zone_id", FOREIGN_REF_EXISTS, "fare_rules") + new ForeignRefExistsCheck("zone_id", "fare_rules") ), new StringField("contains_id", OPTIONAL).requireConditions( // If the contains_id is defined, its value must exist as a zone_id in stops.txt. - new ForeignRefExistsCheck("zone_id", FOREIGN_REF_EXISTS, "fare_rules") + new ForeignRefExistsCheck("zone_id", "fare_rules") ) ) .withParentTable(FARE_ATTRIBUTES) @@ -374,19 +374,15 @@ public Table (String name, Class entityClass, Requirement requ new StringField("translation", REQUIRED), new StringField("record_id", OPTIONAL).requireConditions( // If the field_value is empty the record_id is required. - new FieldIsEmptyCheck("field_value", FIELD_IS_EMPTY) + new FieldIsEmptyCheck("field_value") ), new StringField("record_sub_id", OPTIONAL).requireConditions( // If the record_id is not empty and the value is stop_times the record_sub_id is required. - new FieldNotEmptyAndMatchesValueCheck( - "record_id", - "stop_times", - FIELD_NOT_EMPTY_AND_MATCHES_VALUE - ) + new FieldNotEmptyAndMatchesValueCheck("record_id", "stop_times") ), new StringField("field_value", OPTIONAL).requireConditions( // If the record_id is empty the field_value is required. - new FieldIsEmptyCheck("record_id", FIELD_IS_EMPTY) + new FieldIsEmptyCheck("record_id") )) .keyFieldIsNotUnique(); diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/AgencyHasMultipleRowsCheck.java b/src/main/java/com/conveyal/gtfs/loader/conditions/AgencyHasMultipleRowsCheck.java index 78d1e5a94..81535307a 100644 --- a/src/main/java/com/conveyal/gtfs/loader/conditions/AgencyHasMultipleRowsCheck.java +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/AgencyHasMultipleRowsCheck.java @@ -10,6 +10,7 @@ import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS; import static com.conveyal.gtfs.loader.JdbcGtfsLoader.POSTGRES_NULL_TEXT; +import static com.conveyal.gtfs.loader.conditions.ConditionalCheckType.HAS_MULTIPLE_ROWS; /** * Conditional requirement to check that an agency_id has been provided if more than one row exists in agency.txt. @@ -19,12 +20,9 @@ public class AgencyHasMultipleRowsCheck extends ConditionalRequirement { private final int FIRST_ROW = 2; private final int SECOND_ROW = 3; - public AgencyHasMultipleRowsCheck( - String dependentFieldName, - ConditionalCheckType dependentFieldCheck - ) { - this.dependentFieldName = dependentFieldName; - this.dependentFieldCheck = dependentFieldCheck; + public AgencyHasMultipleRowsCheck() { + this.dependentFieldName = "agency_id"; + this.dependentFieldCheck = HAS_MULTIPLE_ROWS; } /** diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalRequirement.java b/src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalRequirement.java index 6395c947f..82171f005 100644 --- a/src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalRequirement.java +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalRequirement.java @@ -13,22 +13,11 @@ * their "reference fields" with the help of {@link Field#requireConditions}. */ public abstract class ConditionalRequirement { - /** The type of check to be performed on a reference field. A reference field value is used to determine which check - * (e.g., {@link AgencyHasMultipleRowsCheck#check}) should be applied to the field. */ - protected ConditionalCheckType referenceFieldCheck; - /** The minimum reference field value if a range check is being performed. */ - protected int minReferenceValue; - /** The maximum reference field value if a range check is being performed. */ - protected int maxReferenceValue; /** The type of check to be performed on the dependent field. */ protected ConditionalCheckType dependentFieldCheck; /** The name of the dependent field, which is a field that requires a specific value if the reference and * (in some cases) dependent field checks meet certain conditions.*/ protected String dependentFieldName; - /** The expected dependent field value. */ - protected String dependentFieldValue; - /** The reference table name required for checking foreign references. */ - protected String referenceTableName; /** * All sub classes must implement this method and provide related conditional checks. diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/FieldInRangeCheck.java b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldInRangeCheck.java index 474933bb2..4d3244c12 100644 --- a/src/main/java/com/conveyal/gtfs/loader/conditions/FieldInRangeCheck.java +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldInRangeCheck.java @@ -17,19 +17,21 @@ * value has not be defined. */ public class FieldInRangeCheck extends ConditionalRequirement { + /** The minimum reference field value if a range check is being performed. */ + protected int minReferenceValue; + /** The maximum reference field value if a range check is being performed. */ + protected int maxReferenceValue; public FieldInRangeCheck( int minReferenceValue, int maxReferenceValue, String dependentFieldName, - ConditionalCheckType dependentFieldCheck, - ConditionalCheckType referenceFieldCheck + ConditionalCheckType dependentFieldCheck ) { this.minReferenceValue = minReferenceValue; this.maxReferenceValue = maxReferenceValue; this.dependentFieldName = dependentFieldName; this.dependentFieldCheck = dependentFieldCheck; - this.referenceFieldCheck = referenceFieldCheck; } /** diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java index ec9436351..7b48d8781 100644 --- a/src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java @@ -16,13 +16,8 @@ */ public class FieldIsEmptyCheck extends ConditionalRequirement { - public FieldIsEmptyCheck( - String dependentFieldName, - ConditionalCheckType referenceFieldCheck - - ) { + public FieldIsEmptyCheck(String dependentFieldName) { this.dependentFieldName = dependentFieldName; - this.referenceFieldCheck = referenceFieldCheck; } /** diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/FieldNotEmptyAndMatchesValueCheck.java b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldNotEmptyAndMatchesValueCheck.java index 218dba776..c693120fe 100644 --- a/src/main/java/com/conveyal/gtfs/loader/conditions/FieldNotEmptyAndMatchesValueCheck.java +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldNotEmptyAndMatchesValueCheck.java @@ -15,15 +15,15 @@ * Conditional requirement to check that a dependent field value is not empty and matches an expected value. */ public class FieldNotEmptyAndMatchesValueCheck extends ConditionalRequirement { + /** The expected dependent field value. */ + private String requiredDependentFieldValue; public FieldNotEmptyAndMatchesValueCheck( String dependentFieldName, - String dependentFieldValue, - ConditionalCheckType referenceFieldCheck + String requiredDependentFieldValue ) { this.dependentFieldName = dependentFieldName; - this.dependentFieldValue = dependentFieldValue; - this.referenceFieldCheck = referenceFieldCheck; + this.requiredDependentFieldValue = requiredDependentFieldValue; } /** @@ -39,13 +39,13 @@ public Set check( String referenceFieldValue = lineContext.getValueForRow(referenceField.name); if ( !POSTGRES_NULL_TEXT.equals(dependentFieldValue) && - dependentFieldValue.equals(dependentFieldValue) && + dependentFieldValue.equals(requiredDependentFieldValue) && POSTGRES_NULL_TEXT.equals(referenceFieldValue) ) { String message = String.format( "%s is required and must match %s when %s is provided.", referenceField.name, - dependentFieldValue, + requiredDependentFieldValue, dependentFieldName ); errors.add( diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/ForeignRefExistsCheck.java b/src/main/java/com/conveyal/gtfs/loader/conditions/ForeignRefExistsCheck.java index d1d4d9f8d..23c9d5e5d 100644 --- a/src/main/java/com/conveyal/gtfs/loader/conditions/ForeignRefExistsCheck.java +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/ForeignRefExistsCheck.java @@ -16,14 +16,11 @@ * Conditional requirement to check that an expected foreign field value matches a conditional field value. */ public class ForeignRefExistsCheck extends ConditionalRequirement { + /** The reference table name. */ + private String referenceTableName; - public ForeignRefExistsCheck( - String dependentFieldName, - ConditionalCheckType referenceFieldCheck, - String referenceTableName - ) { + public ForeignRefExistsCheck(String dependentFieldName, String referenceTableName) { this.dependentFieldName = dependentFieldName; - this.referenceFieldCheck = referenceFieldCheck; this.referenceTableName = referenceTableName; } diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/ReferenceFieldShouldBeProvidedCheck.java b/src/main/java/com/conveyal/gtfs/loader/conditions/ReferenceFieldShouldBeProvidedCheck.java index 03a9eb7ab..cf86604f6 100644 --- a/src/main/java/com/conveyal/gtfs/loader/conditions/ReferenceFieldShouldBeProvidedCheck.java +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/ReferenceFieldShouldBeProvidedCheck.java @@ -20,12 +20,10 @@ public class ReferenceFieldShouldBeProvidedCheck extends ConditionalRequirement public ReferenceFieldShouldBeProvidedCheck( String dependentFieldName, - ConditionalCheckType dependentFieldCheck, - ConditionalCheckType referenceFieldCheck + ConditionalCheckType dependentFieldCheck ) { this.dependentFieldName = dependentFieldName; this.dependentFieldCheck = dependentFieldCheck; - this.referenceFieldCheck = referenceFieldCheck; } /** From 3bfc8b563d8adf98bd28abfa3786a08909c2ccda Mon Sep 17 00:00:00 2001 From: Robin Beer Date: Wed, 19 May 2021 13:30:41 +0100 Subject: [PATCH 29/45] refactor(Various PR feedback updates.): PR feedback and removed ConditionalCheckType.java as it is --- .../java/com/conveyal/gtfs/loader/Table.java | 22 ++++++------ .../AgencyHasMultipleRowsCheck.java | 2 -- .../conditions/ConditionalCheckType.java | 36 ------------------- .../conditions/ConditionalRequirement.java | 7 ++-- .../loader/conditions/FieldInRangeCheck.java | 10 ++---- .../ReferenceFieldShouldBeProvidedCheck.java | 9 ++--- 6 files changed, 16 insertions(+), 70 deletions(-) delete mode 100644 src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalCheckType.java diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index bf8ccb317..f8e66f8ef 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -58,12 +58,6 @@ import static com.conveyal.gtfs.error.NewGTFSErrorType.DUPLICATE_HEADER; import static com.conveyal.gtfs.error.NewGTFSErrorType.TABLE_IN_SUBDIRECTORY; -import static com.conveyal.gtfs.loader.conditions.ConditionalCheckType.FIELD_IN_RANGE; -import static com.conveyal.gtfs.loader.conditions.ConditionalCheckType.FIELD_IS_EMPTY; -import static com.conveyal.gtfs.loader.conditions.ConditionalCheckType.FIELD_NOT_EMPTY; -import static com.conveyal.gtfs.loader.conditions.ConditionalCheckType.FOREIGN_REF_EXISTS; -import static com.conveyal.gtfs.loader.conditions.ConditionalCheckType.HAS_MULTIPLE_ROWS; -import static com.conveyal.gtfs.loader.conditions.ConditionalCheckType.FIELD_NOT_EMPTY_AND_MATCHES_VALUE; import static com.conveyal.gtfs.loader.JdbcGtfsLoader.sanitize; import static com.conveyal.gtfs.loader.Requirement.EDITOR; import static com.conveyal.gtfs.loader.Requirement.EXTENSION; @@ -169,7 +163,7 @@ public Table (String name, Class entityClass, Requirement requ new StringField("agency_id", OPTIONAL).requireConditions( // If there is more than one agency, this agency_id is required. // https://developers.google.com/transit/gtfs/reference#fare_attributestxt - new ReferenceFieldShouldBeProvidedCheck("agency_id", HAS_MULTIPLE_ROWS) + new ReferenceFieldShouldBeProvidedCheck("agency_id") ), new IntegerField("transfer_duration", OPTIONAL) ).addPrimaryKey(); @@ -200,7 +194,7 @@ public Table (String name, Class entityClass, Requirement requ new StringField("agency_id", OPTIONAL).isReferenceTo(AGENCY).requireConditions( // If there is more than one agency, this agency_id is required. // https://developers.google.com/transit/gtfs/reference#routestxt - new ReferenceFieldShouldBeProvidedCheck("agency_id", HAS_MULTIPLE_ROWS) + new ReferenceFieldShouldBeProvidedCheck("agency_id") ), new StringField("route_short_name", OPTIONAL), // one of short or long must be provided new StringField("route_long_name", OPTIONAL), @@ -250,20 +244,24 @@ public Table (String name, Class entityClass, Requirement requ public static final Table STOPS = new Table("stops", Stop.class, REQUIRED, new StringField("stop_id", REQUIRED), new StringField("stop_code", OPTIONAL), + // The actual conditions that will be acted upon are within the location_type field. new StringField("stop_name", OPTIONAL).requireConditions(), new StringField("stop_desc", OPTIONAL), + // The actual conditions that will be acted upon are within the location_type field. new DoubleField("stop_lat", OPTIONAL, -80, 80, 6).requireConditions(), + // The actual conditions that will be acted upon are within the location_type field. new DoubleField("stop_lon", OPTIONAL, -180, 180, 6).requireConditions(), new StringField("zone_id", OPTIONAL).hasForeignReferences(), new URLField("stop_url", OPTIONAL), new ShortField("location_type", OPTIONAL, 4).requireConditions( // If the location type is defined and within range, the dependent fields are required. // https://developers.google.com/transit/gtfs/reference#stopstxt - new FieldInRangeCheck(0, 2, "stop_name", FIELD_NOT_EMPTY), - new FieldInRangeCheck(0, 2, "stop_lat", FIELD_NOT_EMPTY), - new FieldInRangeCheck(0, 2, "stop_lon", FIELD_NOT_EMPTY), - new FieldInRangeCheck(2, 4, "parent_station", FIELD_NOT_EMPTY) + new FieldInRangeCheck(0, 2, "stop_name"), + new FieldInRangeCheck(0, 2, "stop_lat"), + new FieldInRangeCheck(0, 2, "stop_lon"), + new FieldInRangeCheck(2, 4, "parent_station") ), + // The actual conditions that will be acted upon are within the location_type field. new StringField("parent_station", OPTIONAL).requireConditions(), new StringField("stop_timezone", OPTIONAL), new ShortField("wheelchair_boarding", OPTIONAL, 2), diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/AgencyHasMultipleRowsCheck.java b/src/main/java/com/conveyal/gtfs/loader/conditions/AgencyHasMultipleRowsCheck.java index 81535307a..3b3b6ca9b 100644 --- a/src/main/java/com/conveyal/gtfs/loader/conditions/AgencyHasMultipleRowsCheck.java +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/AgencyHasMultipleRowsCheck.java @@ -10,7 +10,6 @@ import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS; import static com.conveyal.gtfs.loader.JdbcGtfsLoader.POSTGRES_NULL_TEXT; -import static com.conveyal.gtfs.loader.conditions.ConditionalCheckType.HAS_MULTIPLE_ROWS; /** * Conditional requirement to check that an agency_id has been provided if more than one row exists in agency.txt. @@ -22,7 +21,6 @@ public class AgencyHasMultipleRowsCheck extends ConditionalRequirement { public AgencyHasMultipleRowsCheck() { this.dependentFieldName = "agency_id"; - this.dependentFieldCheck = HAS_MULTIPLE_ROWS; } /** diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalCheckType.java b/src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalCheckType.java deleted file mode 100644 index 968f4b243..000000000 --- a/src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalCheckType.java +++ /dev/null @@ -1,36 +0,0 @@ -package com.conveyal.gtfs.loader.conditions; - -/** - * These are the conditionally required checks to be carried out inline with the values provided in - * {@link ConditionalRequirement}. - */ -public enum ConditionalCheckType { - /** - * The conditionally required field value must not be empty. This is used in conjunction with - * {@link ConditionalCheckType#FIELD_IN_RANGE}. E.g. if the reference field is within a specified range, the - * dependent field must not be empty. - */ - FIELD_NOT_EMPTY, - /** - * The reference field value must be within a specified range. - */ - FIELD_IN_RANGE, - /** - * This checks that the foreign reference exists in the dependent field (e.g., stops#zone_id). - */ - FOREIGN_REF_EXISTS, - /** - * Check that the reference table has multiple records. This is sometimes used in conjunction with - * {@link ConditionalCheckType#FIELD_NOT_EMPTY} (e.g., to check that multiple agencies exist). - */ - HAS_MULTIPLE_ROWS, - /** - * If the conditionally required field value is empty, the reference field value must be provided. - */ - FIELD_IS_EMPTY, - /** - * If the conditionally required field value is not empty and matches an expected value, the reference field - * value must not be empty. - */ - FIELD_NOT_EMPTY_AND_MATCHES_VALUE -} \ No newline at end of file diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalRequirement.java b/src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalRequirement.java index 82171f005..a8dcd0737 100644 --- a/src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalRequirement.java +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalRequirement.java @@ -8,13 +8,10 @@ import java.util.Set; /** - * These are the requirements that are checked inline with {@link ConditionalCheckType} to determine if the required - * conditions set forth for certain fields in the GTFS spec have been met. These requirements are applied directly to - * their "reference fields" with the help of {@link Field#requireConditions}. + * An abstract class which primarily defines a method used by implementing classes to define specific conditional + * requirement checks. */ public abstract class ConditionalRequirement { - /** The type of check to be performed on the dependent field. */ - protected ConditionalCheckType dependentFieldCheck; /** The name of the dependent field, which is a field that requires a specific value if the reference and * (in some cases) dependent field checks meet certain conditions.*/ protected String dependentFieldName; diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/FieldInRangeCheck.java b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldInRangeCheck.java index 4d3244c12..eb81836f1 100644 --- a/src/main/java/com/conveyal/gtfs/loader/conditions/FieldInRangeCheck.java +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldInRangeCheck.java @@ -10,7 +10,6 @@ import static com.conveyal.gtfs.error.NewGTFSErrorType.CONDITIONALLY_REQUIRED; import static com.conveyal.gtfs.loader.JdbcGtfsLoader.POSTGRES_NULL_TEXT; -import static com.conveyal.gtfs.loader.conditions.ConditionalCheckType.FIELD_NOT_EMPTY; /** * Conditional requirement to check that a reference field value is within a defined range and the conditional field @@ -25,13 +24,11 @@ public class FieldInRangeCheck extends ConditionalRequirement { public FieldInRangeCheck( int minReferenceValue, int maxReferenceValue, - String dependentFieldName, - ConditionalCheckType dependentFieldCheck + String dependentFieldName ) { this.minReferenceValue = minReferenceValue; this.maxReferenceValue = maxReferenceValue; this.dependentFieldName = dependentFieldName; - this.dependentFieldCheck = dependentFieldCheck; } /** @@ -56,11 +53,8 @@ public Set check( // value for (e.g.) an empty value. Continue to the next check. return errors; } - boolean conditionallyRequiredValueIsEmpty = - dependentFieldCheck == FIELD_NOT_EMPTY && - POSTGRES_NULL_TEXT.equals(conditionalFieldValue); - if (conditionallyRequiredValueIsEmpty) { + if (POSTGRES_NULL_TEXT.equals(conditionalFieldValue)) { // Reference value in range and conditionally required field is empty. String message = String.format( "%s is required when %s value is between %d and %d.", diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/ReferenceFieldShouldBeProvidedCheck.java b/src/main/java/com/conveyal/gtfs/loader/conditions/ReferenceFieldShouldBeProvidedCheck.java index cf86604f6..33fd55716 100644 --- a/src/main/java/com/conveyal/gtfs/loader/conditions/ReferenceFieldShouldBeProvidedCheck.java +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/ReferenceFieldShouldBeProvidedCheck.java @@ -10,7 +10,6 @@ import static com.conveyal.gtfs.error.NewGTFSErrorType.AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS; import static com.conveyal.gtfs.loader.JdbcGtfsLoader.POSTGRES_NULL_TEXT; -import static com.conveyal.gtfs.loader.conditions.ConditionalCheckType.HAS_MULTIPLE_ROWS; /** * Conditional requirement to check that the reference field is not empty when the dependent field/table has multiple @@ -18,12 +17,8 @@ */ public class ReferenceFieldShouldBeProvidedCheck extends ConditionalRequirement { - public ReferenceFieldShouldBeProvidedCheck( - String dependentFieldName, - ConditionalCheckType dependentFieldCheck - ) { + public ReferenceFieldShouldBeProvidedCheck(String dependentFieldName) { this.dependentFieldName = dependentFieldName; - this.dependentFieldCheck = dependentFieldCheck; } /** @@ -38,7 +33,7 @@ public Set check( String referenceFieldValue = lineContext.getValueForRow(referenceField.name); Set errors = new HashSet<>(); int dependentFieldCount = uniqueValuesForFields.get(dependentFieldName).size(); - if (dependentFieldCheck == HAS_MULTIPLE_ROWS && dependentFieldCount > 1) { + if (dependentFieldCount > 1) { // If there are multiple entries for the dependent field (including empty strings to account for any // potentially missing values), the reference field must not be empty. boolean referenceFieldIsEmpty = POSTGRES_NULL_TEXT.equals(referenceFieldValue); From f12c287c2727874a3d465edd1b9065fb5a950ad5 Mon Sep 17 00:00:00 2001 From: Robin Beer Date: Fri, 21 May 2021 11:51:02 +0100 Subject: [PATCH 30/45] Update src/main/java/com/conveyal/gtfs/loader/Table.java Co-authored-by: Landon Reed --- src/main/java/com/conveyal/gtfs/loader/Table.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index f8e66f8ef..16ccac151 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -173,7 +173,7 @@ public Table (String name, Class entityClass, Requirement requ public static final Table FEED_INFO = new Table("feed_info", FeedInfo.class, OPTIONAL, new StringField("feed_publisher_name", REQUIRED), // feed_id is not the first field because that would label it as the key field, which we do not want because the - // key field cannot be optional. feed_id is not part of the GTFS spec, but is useful to OTP. + // key field cannot be optional. feed_id is not part of the GTFS spec, but is required by OTP to associate static GTFS with GTFS-rt feeds. new StringField("feed_id", OPTIONAL), new URLField("feed_publisher_url", REQUIRED), new LanguageField("feed_lang", REQUIRED), From 3b36c633bc785711dadb2ef78343f45f3993f7dc Mon Sep 17 00:00:00 2001 From: Robin Beer Date: Fri, 21 May 2021 11:51:13 +0100 Subject: [PATCH 31/45] Update src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java Co-authored-by: Landon Reed --- .../com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java index 7b48d8781..e2c15a119 100644 --- a/src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java @@ -12,7 +12,7 @@ import static com.conveyal.gtfs.loader.JdbcGtfsLoader.POSTGRES_NULL_TEXT; /** - * Conditional requirement to check that if a dependent field value is empty the reference field value is provided. + * Conditional requirement to check that if a dependent field value is empty then the reference field value is provided. */ public class FieldIsEmptyCheck extends ConditionalRequirement { From a1595aca2e504a09611e8e19b1ec8dd6b2c52a80 Mon Sep 17 00:00:00 2001 From: Robin Beer Date: Fri, 21 May 2021 11:51:26 +0100 Subject: [PATCH 32/45] Update src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java Co-authored-by: Landon Reed --- .../com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java index e2c15a119..aeb596667 100644 --- a/src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java @@ -21,7 +21,7 @@ public FieldIsEmptyCheck(String dependentFieldName) { } /** - * Check the dependent field value, if it is empty the reference field value must be provided. + * Check the dependent field value. If it is empty, the reference field value must be provided. */ public Set check( LineContext lineContext, From 0f1d5ca2ccef966a794486a0b825fd15c134423b Mon Sep 17 00:00:00 2001 From: Robin Beer Date: Fri, 21 May 2021 15:05:16 +0100 Subject: [PATCH 33/45] refactor(Updated pattern stop): Pattern stop now has continuous pick up / drop off --- .../conveyal/gtfs/loader/EntityPopulator.java | 2 + .../conveyal/gtfs/loader/JdbcTableWriter.java | 2 + .../java/com/conveyal/gtfs/loader/Table.java | 4 +- .../com/conveyal/gtfs/model/PatternStop.java | 2 + .../loader/ConditionallyRequiredTest.java | 58 ++++++++++++++++--- 5 files changed, 59 insertions(+), 9 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java b/src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java index 4c44d3d64..7105c6c71 100644 --- a/src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java +++ b/src/main/java/com/conveyal/gtfs/loader/EntityPopulator.java @@ -64,6 +64,8 @@ public interface EntityPopulator { patternStop.stop_sequence = getIntIfPresent(result, "stop_sequence", columnForName); patternStop.timepoint = getIntIfPresent(result, "timepoint", columnForName); patternStop.shape_dist_traveled = getDoubleIfPresent(result, "shape_dist_traveled", columnForName); + patternStop.continuous_pickup = getIntIfPresent (result, "continuous_pickup", columnForName); + patternStop.continuous_drop_off = getIntIfPresent (result, "continuous_drop_off", columnForName); return patternStop; }; diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcTableWriter.java b/src/main/java/com/conveyal/gtfs/loader/JdbcTableWriter.java index 14665a7c0..172bf5501 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcTableWriter.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcTableWriter.java @@ -1146,6 +1146,8 @@ private void insertBlankStopTimes( stopTime.pickup_type = patternStop.pickup_type; stopTime.timepoint = patternStop.timepoint; stopTime.shape_dist_traveled = patternStop.shape_dist_traveled; + stopTime.continuous_drop_off = patternStop.continuous_drop_off; + stopTime.continuous_pickup = patternStop.continuous_pickup; stopTime.stop_sequence = i; // Update stop time with each trip ID and add to batch. for (String tripId : tripIds) { diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index f8e66f8ef..91ae8ce0a 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -301,7 +301,9 @@ public Table (String name, Class entityClass, Requirement requ new IntegerField("drop_off_type", EDITOR, 2), new IntegerField("pickup_type", EDITOR, 2), new DoubleField("shape_dist_traveled", EDITOR, 0, Double.POSITIVE_INFINITY, -1), - new ShortField("timepoint", EDITOR, 1) + new ShortField("timepoint", EDITOR, 1), + new ShortField("continuous_pickup", OPTIONAL,3), + new ShortField("continuous_drop_off", OPTIONAL,3) ).withParentTable(PATTERNS); public static final Table TRANSFERS = new Table("transfers", Transfer.class, OPTIONAL, diff --git a/src/main/java/com/conveyal/gtfs/model/PatternStop.java b/src/main/java/com/conveyal/gtfs/model/PatternStop.java index 547280c23..62f343c66 100644 --- a/src/main/java/com/conveyal/gtfs/model/PatternStop.java +++ b/src/main/java/com/conveyal/gtfs/model/PatternStop.java @@ -23,6 +23,8 @@ public class PatternStop extends Entity { public int pickup_type; public int drop_off_type; public int timepoint; + public int continuous_pickup = INT_MISSING; + public int continuous_drop_off = INT_MISSING; public PatternStop () {} diff --git a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java index f7e51dde9..530f4499e 100644 --- a/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/ConditionallyRequiredTest.java @@ -32,7 +32,9 @@ public static void setUpClass() throws IOException { String dbConnectionUrl = String.format("jdbc:postgresql://localhost/%s", testDBName); testDataSource = TestUtils.createTestDataSource(dbConnectionUrl); // load feed into db - String zipFileName = TestUtils.zipFolderFiles("real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks", true); + String zipFileName = TestUtils.zipFolderFiles( + "real-world-gtfs-feeds/VTA-gtfs-conditionally-required-checks", + true); FeedLoadResult feedLoadResult = load(zipFileName, testDataSource); testNamespace = feedLoadResult.uniqueIdentifier; validate(testNamespace, testDataSource); @@ -45,12 +47,24 @@ public static void tearDownClass() { @Test public void stopTimeTableMissingConditionallyRequiredArrivalDepartureTimes() { - checkFeedHasOneError(CONDITIONALLY_REQUIRED, "StopTime","10", "1","First and last stop times are required to have both an arrival and departure time."); + checkFeedHasOneError( + CONDITIONALLY_REQUIRED, + "StopTime", + "10", + "1", + "First and last stop times are required to have both an arrival and departure time." + ); } @ParameterizedTest @MethodSource("createStopTableChecks") - public void stopTableConditionallyRequiredTests(NewGTFSErrorType errorType, String entityType, String lineNumber, String entityId, String badValue) { + public void stopTableConditionallyRequiredTests( + NewGTFSErrorType errorType, + String entityType, + String lineNumber, + String entityId, + String badValue + ) { checkFeedHasOneError(errorType, entityType, lineNumber, entityId, badValue); } @@ -68,7 +82,12 @@ private static Stream createStopTableChecks() { @ParameterizedTest @MethodSource("createTranslationTableChecks") - public void translationTableConditionallyRequiredTests(String entityType, String lineNumber, String entityId, String badValue) { + public void translationTableConditionallyRequiredTests( + String entityType, + String lineNumber, + String entityId, + String badValue + ) { checkFeedHasOneError(CONDITIONALLY_REQUIRED, entityType, lineNumber, entityId, badValue); } @@ -82,22 +101,45 @@ private static Stream createTranslationTableChecks() { @Test public void agencyTableMissingConditionallyRequiredAgencyId() { - checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, "Agency","2", null, "agency_id"); + checkFeedHasOneError( + AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, + "Agency", + "2", + null, + "agency_id"); } @Test public void tripTableMissingConditionallyRequiredShapeId() { - checkFeedHasOneError(CONDITIONALLY_REQUIRED, "Trip","2", "1","shape_id is required when a trip has continuous behavior defined."); + checkFeedHasOneError( + CONDITIONALLY_REQUIRED, + "Trip", + "2", + "1", + "shape_id is required when a trip has continuous behavior defined." + ); } @Test public void routeTableMissingConditionallyRequiredAgencyId() { - checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, "Route","2", "21", null); + checkFeedHasOneError( + AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, + "Route", + "2", + "21", + null + ); } @Test public void fareAttributeTableMissingConditionallyRequiredAgencyId() { - checkFeedHasOneError(AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, "FareAttribute","2", "1", null); + checkFeedHasOneError( + AGENCY_ID_REQUIRED_FOR_MULTI_AGENCY_FEEDS, + "FareAttribute", + "2", + "1", + null + ); } /** From 38b5e34175dfa04e1ce51d2604ad4706a21f8900 Mon Sep 17 00:00:00 2001 From: Robin Beer Date: Fri, 21 May 2021 16:59:25 +0100 Subject: [PATCH 34/45] refactor(Fixed broken unit tests): Fixed broken unit tests as a result of pattern stop updates BREAKING CHANGE: This PR contains schema changes to the following tables routes, stop times, stop and feed info --- src/main/java/com/conveyal/gtfs/TripPatternKey.java | 4 ++++ .../com/conveyal/gtfs/validator/PatternFinderValidator.java | 2 ++ src/test/java/com/conveyal/gtfs/dto/PatternStopDTO.java | 2 ++ 3 files changed, 8 insertions(+) diff --git a/src/main/java/com/conveyal/gtfs/TripPatternKey.java b/src/main/java/com/conveyal/gtfs/TripPatternKey.java index e06a5f6ab..9331483e0 100644 --- a/src/main/java/com/conveyal/gtfs/TripPatternKey.java +++ b/src/main/java/com/conveyal/gtfs/TripPatternKey.java @@ -24,6 +24,8 @@ public class TripPatternKey { public TIntList arrivalTimes = new TIntArrayList(); public TIntList departureTimes = new TIntArrayList(); public TIntList timepoints = new TIntArrayList(); + public TIntList continuous_pickup = new TIntArrayList(); + public TIntList continuous_drop_off = new TIntArrayList(); public TDoubleList shapeDistances = new TDoubleArrayList(); public TripPatternKey (String routeId) { @@ -39,6 +41,8 @@ public void addStopTime (StopTime st) { departureTimes.add(st.departure_time); timepoints.add(st.timepoint); shapeDistances.add(st.shape_dist_traveled); + continuous_pickup.add(st.continuous_pickup); + continuous_drop_off.add(st.continuous_drop_off); } @Override diff --git a/src/main/java/com/conveyal/gtfs/validator/PatternFinderValidator.java b/src/main/java/com/conveyal/gtfs/validator/PatternFinderValidator.java index 9584b9e61..f24ac1c44 100644 --- a/src/main/java/com/conveyal/gtfs/validator/PatternFinderValidator.java +++ b/src/main/java/com/conveyal/gtfs/validator/PatternFinderValidator.java @@ -158,6 +158,8 @@ public void complete(ValidationResult validationResult) { setIntParameter(insertPatternStopStatement,7, key.pickupTypes.get(i)); setDoubleParameter(insertPatternStopStatement, 8, key.shapeDistances.get(i)); setIntParameter(insertPatternStopStatement,9, key.timepoints.get(i)); + setIntParameter(insertPatternStopStatement,10, key.continuous_pickup.get(i)); + setIntParameter(insertPatternStopStatement,11, key.continuous_drop_off.get(i)); patternStopTracker.addBatch(); } // Finally, update all trips on this pattern to reference this pattern's ID. diff --git a/src/test/java/com/conveyal/gtfs/dto/PatternStopDTO.java b/src/test/java/com/conveyal/gtfs/dto/PatternStopDTO.java index 12888f4d6..f61b68cb5 100644 --- a/src/test/java/com/conveyal/gtfs/dto/PatternStopDTO.java +++ b/src/test/java/com/conveyal/gtfs/dto/PatternStopDTO.java @@ -11,6 +11,8 @@ public class PatternStopDTO { public Integer pickup_type; public Integer stop_sequence; public Integer timepoint; + public Integer continuous_pickup; + public Integer continuous_drop_off; /** Empty constructor for deserialization */ public PatternStopDTO() {} From e9934692bc7ed9f478b4686ceb182117a36d7592 Mon Sep 17 00:00:00 2001 From: Landon Reed Date: Tue, 25 May 2021 13:45:34 -0400 Subject: [PATCH 35/45] refactor(GraphQLGtfsSchema): update GraphQL for spec changes --- .../com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java b/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java index e752660cb..b35020de4 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java +++ b/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java @@ -144,9 +144,12 @@ public class GraphQLGtfsSchema { .description("A GTFS feed_info object") .field(MapFetcher.field("id", GraphQLInt)) .field(MapFetcher.field("feed_id")) + .field(MapFetcher.field("feed_contact_email")) + .field(MapFetcher.field("feed_contact_url")) .field(MapFetcher.field("feed_publisher_name")) .field(MapFetcher.field("feed_publisher_url")) .field(MapFetcher.field("feed_lang")) + .field(MapFetcher.field("default_lang")) .field(MapFetcher.field("feed_start_date")) .field(MapFetcher.field("feed_end_date")) .field(MapFetcher.field("feed_version")) @@ -257,7 +260,8 @@ public class GraphQLGtfsSchema { .field(MapFetcher.field("timepoint", GraphQLInt)) .field(MapFetcher.field("drop_off_type", GraphQLInt)) .field(MapFetcher.field("pickup_type", GraphQLInt)) - // Editor-specific fields + .field(MapFetcher.field("continuous_drop_off", GraphQLInt)) + .field(MapFetcher.field("continuous_pickup", GraphQLInt)) .field(MapFetcher.field("shape_dist_traveled", GraphQLFloat)) .build(); @@ -272,7 +276,8 @@ public class GraphQLGtfsSchema { .field(MapFetcher.field("route_desc")) .field(MapFetcher.field("route_url")) .field(MapFetcher.field("route_branding_url")) - // TODO route_type as enum or int + .field(MapFetcher.field("continuous_drop_off", GraphQLInt)) + .field(MapFetcher.field("continuous_pickup", GraphQLInt)) .field(MapFetcher.field("route_type", GraphQLInt)) .field(MapFetcher.field("route_color")) .field(MapFetcher.field("route_text_color")) @@ -341,6 +346,7 @@ public class GraphQLGtfsSchema { .field(MapFetcher.field("stop_url")) .field(MapFetcher.field("stop_timezone")) .field(MapFetcher.field("parent_station")) + .field(MapFetcher.field("platform_code")) .field(MapFetcher.field("location_type", GraphQLInt)) .field(MapFetcher.field("wheelchair_boarding", GraphQLInt)) // Returns all stops that reference parent stop's stop_id @@ -403,6 +409,8 @@ public class GraphQLGtfsSchema { .field(MapFetcher.field("shape_dist_traveled", GraphQLFloat)) .field(MapFetcher.field("drop_off_type", GraphQLInt)) .field(MapFetcher.field("pickup_type", GraphQLInt)) + .field(MapFetcher.field("continuous_drop_off", GraphQLInt)) + .field(MapFetcher.field("continuous_pickup", GraphQLInt)) .field(MapFetcher.field("stop_sequence", GraphQLInt)) .field(MapFetcher.field("timepoint", GraphQLInt)) // FIXME: This will only returns a list with one stop entity (unless there is a referential integrity issue) From 8d775012121636adf2b65bcc4489b27dfa1358ce Mon Sep 17 00:00:00 2001 From: Landon Reed Date: Thu, 3 Jun 2021 09:12:57 -0400 Subject: [PATCH 36/45] fix(build): remove gpg signing --- maven-artifact-signing-key.asc.enc | Bin 5744 -> 0 bytes maven-settings.xml | 26 -------------------------- pom.xml | 16 ---------------- 3 files changed, 42 deletions(-) delete mode 100644 maven-artifact-signing-key.asc.enc delete mode 100644 maven-settings.xml diff --git a/maven-artifact-signing-key.asc.enc b/maven-artifact-signing-key.asc.enc deleted file mode 100644 index c611046ba863749d04550af86f6030d8da17bcfe..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5744 zcmV-$7LVy^H?jcpIy$#%2|-Ft^ksQiO5qsF)XELP~63 ztoFH{1s4^x$$qUp)qyghAbwLBS%mgeuY-mU^#a&gNY=!AI9~gf>|L2gAgp|?C(%bn zi|A8vh^>q|jlw9385+fAG;_)5v69YxX-w%2U+JK=OT#q>o;Bd#@w?Jo2tn;-QHC_2 zUl4PeIRGlvjfW-TF>m6FQ0runXBPP&3t9R|cN_Xc*vH_Kc$S6wVM|vQEq>?b)#cUL zY^eB$4fMSs%*pclZ^+a#AyeH<_A_62fi2EctMgtX>`9!0T&ik7@UJmeklw0=;@BNp zqLA3uZCY)t0`BF+#wjM)#;02Kky$aPI@Eq0u&uNKtLefqoNpZ*)V?g#w{8Xj0*zN3 z&?N}q2^iZYE8_%S1>AHOh_)_#vpSH?M^7C0@IVq_B9>5x@K^{TiPfuLihVOZr|(#~RTr-nyr3 zSv8$Dq&sUT+2?`(Qben`^6q=6m%ACX#X{Mrt*lh~d2)}wqR)J6cp^$UQ?DrzEI3dg z=rlpnc67nyq$TDW{QZS6LgV1bGXa?U|K}B$)m41lreun&xpaejBEbL}q{Dk+HgCOc zdRy0o-rd%j1KO;=0?w6sk)uf>>>e=sz17A}ho<}e1My1~z^VW6KTsoHgG@hr577C} zK%d*B0Yr%jOEr}XR-9R>zF6F%+=j$@79@=b<;ZPh@iXV%;|<*MGAv**p)0=AqF`OA)$S&(Y4#9B|s#?FZQAW z46yRUG5o?!kH!fGk|%S|DvX6|RF-TWf9-+j5%Zqa} zlYFn1srOK|G(IY%w&;z+vIwM( zN&D-o(I>abYL9(!s&m#|kji_C^g4`Lnn1y%JzulX8U0eFmBov;2k54#p+gJjoL0J! zf970$Ig`>h6?9$3^a>>Pgx{MHu<>7QOkY3V!8$sNJq|KZ*p)H(RR-Ss^?r%O>{FVv zffFLES>8Ya8k)-eNoG}Vuq-c?0VNZJWc3+(dz`b&>izU^pUZ825uclM|(vhRg=L`=hT@2uWe5Ga?R zuk&=>t;C(`iCiUC(HKz^t{e9$=co>g==@)0MKj?*YAS!pl^r~?%>CS_h%(m1GlJB| zE2h7#xekUti^uJUmeB!*UgNQhbV-Jmw$kNAGJANCnj`ulmr6l(>%x*@1ho%AfAsM@ z=z>834C<;RS{`do<_dU-Mnjxcb!%8+?QKD=Ivx%l7riM%5tTulCk!{A zf25_ckIq}H!g@#EAu`Xqpxn?Keo(eV)K(X)H}jt1#)LZGO=3P-aR|>YPkR2L87pfg z%}U0>^zwjeu*GB;C1uqlb04Z5eJLb<{~z1p*%)CT!+t+qhfZ{W-lWp zfq)Ndlp^3buN_y9j^v{J(Vz9**j+6%!cNzZ8ogCyF1muJHVSjUV{;gU!gmd$}V zodMivXNv5sS)&|$ev@TVHI1#o94K%|;NyYH; z*l)5QRwW<~&6VOS5V@3IYGR9Bz{tZ;W3SZgV*_9hS5a<*?FA3u^NXKUWuy-*K<^84 zRrN79rE%#4!6+;Fm?_o&2`XqZ1j?*Z0^+{w@!v6Uzha2dp}tBMCAxuBv>nj6LMwQZ zGPxVM2U_3V$bH-?4-82n^^lT|?<7FIP+ff?d`#O1nYN}GGPI5UAM1z(Y~_e4Rigxg z%#tDmB{-_Mzg#YYU~?nlSz*qi<73Mrl}r}XsvW?ru|H* ziL6D#q%6{&z$vK!6c9xlr~uC~w&BT2j4JQ@snrGn(oe_VjvK?SFa7IjSeliVyTt%c z>qR4@N*_^^UN>2hQk@tOorUCT>P6Izz65DI8jY3wvE-An)6%?30)q=>Gfd~~`4oGI z_(@pVjrEh+KmGtWPqq%@hk(j;qi!wkfA=;o5CmqZSe4amVv3rmu2`+Uq~;j$_tK@- z_4vx)RnUd0z*AG;X%+flS8&wsX~DHji7FlRs%!Cdpcri*Q_?2(Esg(&7F<{iavVdS zkLT%fZ-`Zq{yy(0EHdY)@GuU7H2%m_c>_WeBU{`OpP>od=W3Y*xsy9+ONHxj5NjelqZl5s;GuwR63fF0kvuk&zSY0i|R=8ScG}ZzSP0Ftp$UNV4MVfdEB|N zH9b;SAj6l|xDm86G3|$PI*v{6b+q<*P}nbU3`8=RB8fuP5Wpu z60gdPAKz8lqymu}Th~MHF-AeE|2nuBxna#l-wjE=4h?iQWl{6@;E5*t1%2YwUP(t( z+?gUTV8>W5SbXvLywAXS@r;?!o)7G#)8ts5~s z*4TTDJb#WT*5ifYa7dGhx~hpIKPtYKQr0oV`nKC%2f+#^{JIuwP3pVt;xiz?-S{xQ zn#m%k)st?#w7pJVLGpXn0{1!@x6HpT*0P@XOIIz@KxS8ucB&KsRG=B|EK$l9Mx>q! zs>h0chEb|fP9z31DXX(YXVA=NIOqXX|C~gCC8XPtOnt(pYa^SM)gO(%J4{wA1Obz- z31Y~c$4SN7Np_A1d)BTBGN>9`m)N;%+*)pkh#|0x_m;?Rb@-TD!x4DjO~%_U5^kEZ z-H~A*J}sV(S%%@n2%;y&uw9B{Q2(T&>7hge8j57J;l!pEZ4nv6>AbH)umcp%+a#RZ zy?Xd6l?E0+Xs_in-Yhe1AO{x07%Z4B>&zj`?c}YDCA}8(@|8*G7eaiiuVZk#W71%> z9&){fYgn)-%NaPj)rxWj{-#Xmw4Z^#sOndUdDwT)XiaC*Jt%#6wE?q-GON7IuZ=2b0VTg zL@C_?n-r}$hCujipq3qceRHW%v#)yTW$so)*qX0o&T}~^_I?}7bB2y{fu!Y2`YwbM zaRNO5<8v1O2a>9*8-!9%fg`g^ zad7)Q_0poMWz*e{#R)%^*5@f`^OS7n-eu-MEdT=Z!uMUQ$83=P{G!8`1m$6$2u`LO36b>-F4nhgd7@pTP0AZd zcqdRRE|=3#rB)vw3TG8tK}*{D`y^4j9r?O1%b`fSx(1?X7)o>$v_O#C$$ z%?gSCwM~*G3pOts?$;@2ZxCWd@+j6!VG6f#hk`4LZL^`kl?!jaxrA=EbUWpcFrr2@y~Io}X*hrdgaHl*z#g=E zYM%N+kzGY9IzL`?zo0;tD^%P>{-#p?$+M30b1cs^n3uvuI+E4{bR;+W!1O zHbTFri&n-U15n~fPbv(bF7d~ELw(h1dT5Ap^if?t1bOX2j4AQ9H}UobW>mNDvtNX; z*jm^@hSo+DkSQBG1mm2@qkJ6_%krNT@X^pXH6*Gg#cp;a1+TJxRtn9h6pM>il1=ch zZkIu=;J1cbYR5Lv9EHog>nzV~rW5w5oN|m_uBOHU6CFdjk~n>(a1Uc$OGxHo$*zh- z+!7>D&Ovh^ybywGmfQEu|4fxo9Lq>W4%{rSqH(nJD}WQ4erP0Y6D8OBT9T5$&rztxs&|J6Ahi^_I4vgZmdG{~4sH2uly)Ob6H2vCiS)OaK29Z!&TE*or+pYMd+wnMazO*otoYCm6;cC;O62XxFQ>%MiVduq31#=ex+Zx92vkOQ~U?T2rU}N)PnhW z*S8NefCY#0t=AU#YCl)&pRj@fZXeA__gRPac+qhDNa3+P{@yAFY{8FK3SGijDc0iaT z4e-`1l``7A>+ zm5I-D>`|Cn-Rh#Xtqw@3nV|nFlV%eiJy+kMDNv9IlAaE20|j`Pxkk{N!90zRB)2%*2ij;jj#Qqd|0|KQb|cVL_EK_Fs~%XsXVNI0UJ1g?SX52`zJ^? zfAhq*bbhK8rLG{Zd$`b%^tm)suZjV*^}gFe5NfZ}dtcTZ)4(JW4+q?fe5bWjTHMq| zSPyru0yuDA($-}rMk4py9}I|bEg)D^VJMm7_;P04nypI$OewM(g-XUvPtu?v@}51; z1JBhV-Izmp)kZh?gxQErn^mQj9Oy7&O34Noji8&tnRv?k;nKV-Pm$44$QT7q-Hdz+ zv1FIYb%N%m-`U@=e4A{Rgk>_ZY=66zPA8tm01HSsmG~?jzG6?KUxGN zzw;YPs*mTEHAM1R*qDOY{WVQ>KnYj-FXg{C?fDUVaSGtF6Y0p;ngpd6D&x=;?{E>^ zn*6N&KRzSQPO1F_<`Dg@4%jHf`2rys12W=cPiH$Xn?g(ewbS=!^u@PLv?X$z%F)5o z__C4gU`?Vdcvq0JX%M9Sp9h@`K2h>Xk8+1r2;p&Af - - - - ossrh - ${env.OSSRH_JIRA_USERNAME} - ${env.OSSRH_JIRA_PASSWORD} - - - - - ossrh - - true - - - gpg - ${env.GPG_KEY_NAME} - ${env.GPG_PASSPHRASE} - - - - - diff --git a/pom.xml b/pom.xml index 6b4cfdd1f..237b1bf58 100644 --- a/pom.xml +++ b/pom.xml @@ -123,22 +123,6 @@ - - org.apache.maven.plugins - maven-gpg-plugin - 1.6 - - - sign-artifacts - - verify - - sign - - - - - org.apache.maven.plugins From a04294e4207c553f4ca592d5ac0bf76f4188e505 Mon Sep 17 00:00:00 2001 From: Landon Reed Date: Thu, 3 Jun 2021 09:18:32 -0400 Subject: [PATCH 37/45] docs: update usage instructions for jitpack --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1daff3de6..bd6be962a 100644 --- a/README.md +++ b/README.md @@ -20,9 +20,12 @@ gtfs-lib can be used as a Java library or run via the command line. If using th ### Library (maven) +**Note:** gtfs-lib builds are hosted on [jitpack](https://jitpack.io/#conveyal/gtfs-lib). + +[Release versions](https://github.com/conveyal/gtfs-lib/releases) are available by default. Branch- or commit-specific snapshot builds can be triggered by clicking `Get` for the build of your choice on jitpack's website. ```xml - com.conveyal + com.github.conveyal gtfs-lib ${choose-a-version} From e1a020db8a62fbae44772b3d4d04f5f5c245e1ec Mon Sep 17 00:00:00 2001 From: Landon Reed Date: Thu, 3 Jun 2021 09:51:06 -0400 Subject: [PATCH 38/45] docs: update README with more jitpack info --- README.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index bd6be962a..bd59f896f 100644 --- a/README.md +++ b/README.md @@ -19,10 +19,7 @@ A gtfs-lib GTFSFeed object should faithfully represent the contents of a single gtfs-lib can be used as a Java library or run via the command line. If using this library with PostgreSQL for persistence, you must use at least version 9.6 of PostgreSQL. ### Library (maven) - -**Note:** gtfs-lib builds are hosted on [jitpack](https://jitpack.io/#conveyal/gtfs-lib). - -[Release versions](https://github.com/conveyal/gtfs-lib/releases) are available by default. Branch- or commit-specific snapshot builds can be triggered by clicking `Get` for the build of your choice on jitpack's website. +Include gtfs-lib as a library in your project with the following dependency in your `pom.xml`. ```xml com.github.conveyal @@ -31,6 +28,14 @@ gtfs-lib can be used as a Java library or run via the command line. If using th ``` +#### Jitpack + +gtfs-lib builds are hosted on [jitpack](https://jitpack.io/#conveyal/gtfs-lib). + +[Release versions](https://github.com/conveyal/gtfs-lib/releases) are available by default. + +Branch- (e.g. `dev-SNAPSHOT`) or commit-specific (using a 10 character commit ID like `a04294e420`) snapshot builds can be triggered by clicking `Get` for the build of your choice on jitpack's website or visiting https://jitpack.io/#conveyal/gtfs-lib/YOUR_VERSION. + ### Command line ```bash From 315044fa63b0a2490ff736bb6c4883d65a07f0ad Mon Sep 17 00:00:00 2001 From: Robin Beer Date: Fri, 4 Jun 2021 08:34:51 +0100 Subject: [PATCH 39/45] refactor(GraphQLGtfsSchema.java): Added attributions and translations tables --- .../gtfs/graphql/GraphQLGtfsSchema.java | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java b/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java index b35020de4..bc7351d0e 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java +++ b/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java @@ -265,6 +265,32 @@ public class GraphQLGtfsSchema { .field(MapFetcher.field("shape_dist_traveled", GraphQLFloat)) .build(); + // Represents rows from attributions.txt + public static final GraphQLObjectType attributionsType = newObject().name("attributions") + .field(MapFetcher.field("attribution_id")) + .field(MapFetcher.field("agency_id")) + .field(MapFetcher.field("route_id")) + .field(MapFetcher.field("trip_id")) + .field(MapFetcher.field("organization_name")) + .field(MapFetcher.field("is_producer", GraphQLInt)) + .field(MapFetcher.field("is_operator", GraphQLInt)) + .field(MapFetcher.field("is_authority", GraphQLInt)) + .field(MapFetcher.field("attribution_url")) + .field(MapFetcher.field("attribution_email")) + .field(MapFetcher.field("attribution_phone")) + .build(); + + // Represents rows from attributions.txt + public static final GraphQLObjectType translationsType = newObject().name("translations") + .field(MapFetcher.field("table_name")) + .field(MapFetcher.field("field_name")) + .field(MapFetcher.field("language")) + .field(MapFetcher.field("translation")) + .field(MapFetcher.field("record_id")) + .field(MapFetcher.field("record_sub_id")) + .field(MapFetcher.field("field_value")) + .build(); + // Represents rows from routes.txt public static final GraphQLObjectType routeType = newObject().name("route") .description("A line from a GTFS routes.txt table") @@ -771,6 +797,26 @@ public class GraphQLGtfsSchema { .dataFetcher(new JDBCFetcher("services")) .build() ) + .field(newFieldDefinition() + .name("attributions") + .type(new GraphQLList(GraphQLGtfsSchema.attributionsType)) + .argument(stringArg("namespace")) // FIXME maybe these nested namespace arguments are not doing anything. + .argument(intArg(ID_ARG)) + .argument(intArg(LIMIT_ARG)) + .argument(intArg(OFFSET_ARG)) + .dataFetcher(new JDBCFetcher("attributions")) + .build() + ) + .field(newFieldDefinition() + .name("translations") + .type(new GraphQLList(GraphQLGtfsSchema.translationsType)) + .argument(stringArg("namespace")) // FIXME maybe these nested namespace arguments are not doing anything. + .argument(intArg(ID_ARG)) + .argument(intArg(LIMIT_ARG)) + .argument(intArg(OFFSET_ARG)) + .dataFetcher(new JDBCFetcher("translations")) + .build() + ) .build(); /** From fce6e8e744bd39a7835c456c3200a2e3bda47fe8 Mon Sep 17 00:00:00 2001 From: Robin Beer Date: Fri, 4 Jun 2021 08:51:37 +0100 Subject: [PATCH 40/45] refactor(GraphQLGtfsSchema.java): Corrected comment --- src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java b/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java index bc7351d0e..acd077d3d 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java +++ b/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java @@ -280,7 +280,7 @@ public class GraphQLGtfsSchema { .field(MapFetcher.field("attribution_phone")) .build(); - // Represents rows from attributions.txt + // Represents rows from translations.txt public static final GraphQLObjectType translationsType = newObject().name("translations") .field(MapFetcher.field("table_name")) .field(MapFetcher.field("field_name")) From 9d0908945fc2ffd69d1b9232461540ce070df6ab Mon Sep 17 00:00:00 2001 From: Robin Beer Date: Thu, 10 Jun 2021 09:53:47 +0100 Subject: [PATCH 41/45] refactor(Added GTFS graph QL tests): GTFS graph QL tests now include attributions and translations --- src/test/java/com/conveyal/gtfs/GTFSTest.java | 2 ++ .../gtfs/graphql/GTFSGraphQLTest.java | 16 +++++++++++++++ .../resources/fake-agency/attributions.txt | 2 ++ .../resources/fake-agency/translations.txt | 2 ++ .../resources/graphql/feedAttributions.txt | 18 +++++++++++++++++ .../resources/graphql/feedTranslations.txt | 14 +++++++++++++ .../canFetchAttributions-0.json | 20 +++++++++++++++++++ .../canFetchTranslations-0.json | 16 +++++++++++++++ 8 files changed, 90 insertions(+) create mode 100644 src/test/resources/fake-agency/attributions.txt create mode 100644 src/test/resources/fake-agency/translations.txt create mode 100644 src/test/resources/graphql/feedAttributions.txt create mode 100644 src/test/resources/graphql/feedTranslations.txt create mode 100644 src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchAttributions-0.json create mode 100644 src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchTranslations-0.json diff --git a/src/test/java/com/conveyal/gtfs/GTFSTest.java b/src/test/java/com/conveyal/gtfs/GTFSTest.java index 2932783bc..dca6a72bc 100644 --- a/src/test/java/com/conveyal/gtfs/GTFSTest.java +++ b/src/test/java/com/conveyal/gtfs/GTFSTest.java @@ -224,6 +224,8 @@ public void canLoadAndExportSimpleAgencyInSubDirectory() { new ErrorExpectation(NewGTFSErrorType.TABLE_IN_SUBDIRECTORY), new ErrorExpectation(NewGTFSErrorType.TABLE_IN_SUBDIRECTORY), new ErrorExpectation(NewGTFSErrorType.TABLE_IN_SUBDIRECTORY), + new ErrorExpectation(NewGTFSErrorType.TABLE_IN_SUBDIRECTORY), + new ErrorExpectation(NewGTFSErrorType.TABLE_IN_SUBDIRECTORY), new ErrorExpectation(NewGTFSErrorType.ROUTE_LONG_NAME_CONTAINS_SHORT_NAME), new ErrorExpectation(NewGTFSErrorType.FEED_TRAVEL_TIMES_ROUNDED), new ErrorExpectation(NewGTFSErrorType.STOP_UNUSED), diff --git a/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java b/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java index 8a9227003..240683306 100644 --- a/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java +++ b/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java @@ -148,6 +148,14 @@ public void canFetchAgencies() { }); } + /** Tests that the attributions of a feed can be fetched. */ + @Test + public void canFetchATtributions() { + assertTimeout(Duration.ofMillis(TEST_TIMEOUT), () -> { + MatcherAssert.assertThat(queryGraphQL("feedAttributions.txt"), matchesSnapshot()); + }); + } + /** Tests that the calendars of a feed can be fetched. */ @Test public void canFetchCalendars() { @@ -196,6 +204,14 @@ public void canFetchTrips() { }); } + /** Tests that the translations of a feed can be fetched. */ + @Test + public void canFetchTranslations() { + assertTimeout(Duration.ofMillis(TEST_TIMEOUT), () -> { + MatcherAssert.assertThat(queryGraphQL("feedTranslations.txt"), matchesSnapshot()); + }); + } + // TODO: make tests for schedule_exceptions / calendar_dates /** Tests that the stop times of a feed can be fetched. */ diff --git a/src/test/resources/fake-agency/attributions.txt b/src/test/resources/fake-agency/attributions.txt new file mode 100644 index 000000000..1d77168ac --- /dev/null +++ b/src/test/resources/fake-agency/attributions.txt @@ -0,0 +1,2 @@ +attribution_id,agency_id,route_id,trip_id,organization_name,is_producer,is_operator,is_authority,attribution_url,attribution_email,attribution_phone +1,1,,,Fake Transit,1,,,https://www.faketransit.org,customer.service@faketransit.org, \ No newline at end of file diff --git a/src/test/resources/fake-agency/translations.txt b/src/test/resources/fake-agency/translations.txt new file mode 100644 index 000000000..da8fe2407 --- /dev/null +++ b/src/test/resources/fake-agency/translations.txt @@ -0,0 +1,2 @@ +table_name,field_name,language,translation,record_id,record_sub_id,field_value +stops,stop_desc,FR,en direction du nord,4u6g,, \ No newline at end of file diff --git a/src/test/resources/graphql/feedAttributions.txt b/src/test/resources/graphql/feedAttributions.txt new file mode 100644 index 000000000..6972e2757 --- /dev/null +++ b/src/test/resources/graphql/feedAttributions.txt @@ -0,0 +1,18 @@ +query ($namespace: String) { + feed(namespace: $namespace) { + feed_version + attributions { + attribution_id + agency_id + route_id + trip_id + organization_name + is_producer + is_operator + is_authority + attribution_url + attribution_email + attribution_phone + } + } +} \ No newline at end of file diff --git a/src/test/resources/graphql/feedTranslations.txt b/src/test/resources/graphql/feedTranslations.txt new file mode 100644 index 000000000..4933e2e12 --- /dev/null +++ b/src/test/resources/graphql/feedTranslations.txt @@ -0,0 +1,14 @@ +query ($namespace: String) { + feed(namespace: $namespace) { + feed_version + translations { + table_name + field_name + language + translation + record_id + record_sub_id + field_value + } + } +} \ No newline at end of file diff --git a/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchAttributions-0.json b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchAttributions-0.json new file mode 100644 index 000000000..3092058c5 --- /dev/null +++ b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchAttributions-0.json @@ -0,0 +1,20 @@ +{ + "data" : { + "feed" : { + "attributions" : [ { + "attribution_id" : 1, + "agency_id" : "1", + "route_id" : null, + "trip_id" : null, + "organization_name" : "Fake Transit", + "is_producer" : 1, + "is_operator" : null, + "is_authority" : null, + "attribution_url" : "https://www.faketransit.org", + "attribution_email" : "customer.service@faketransit.org", + "attribution_phone" : null + } ], + "feed_version" : "1.0" + } + } +} \ No newline at end of file diff --git a/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchTranslations-0.json b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchTranslations-0.json new file mode 100644 index 000000000..1bae5c5e1 --- /dev/null +++ b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchTranslations-0.json @@ -0,0 +1,16 @@ +{ + "data" : { + "feed" : { + "translations" : [ { + "table_name" : "stops", + "field_name" : "stop_desc", + "language" : "FR", + "translation" : "en direction du nord", + "record_id" : "4u6g", + "record_sub_id" : null, + "field_value" : null + } ], + "feed_version" : "1.0" + } + } +} \ No newline at end of file From 1b644d6edc640ebeba7d0a6cff6978fdb150d1b9 Mon Sep 17 00:00:00 2001 From: Landon Reed Date: Mon, 14 Jun 2021 11:10:58 -0400 Subject: [PATCH 42/45] docs: update README with local dev instructions --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index bd59f896f..8dc117d4b 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ gtfs-lib can be used as a Java library or run via the command line. If using th Include gtfs-lib as a library in your project with the following dependency in your `pom.xml`. ```xml + com.github.conveyal gtfs-lib ${choose-a-version} From 5e7f9d8dbd78693e13612c044a14a4eea936bc04 Mon Sep 17 00:00:00 2001 From: Robin Beer Date: Wed, 16 Jun 2021 15:11:12 +0100 Subject: [PATCH 43/45] refactor(Addressed PR feedback): Fixed typo and formatting. --- .../gtfs/loader/conditions/ConditionalRequirement.java | 3 ++- .../com/conveyal/gtfs/loader/conditions/FieldInRangeCheck.java | 2 +- .../com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalRequirement.java b/src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalRequirement.java index a8dcd0737..25fe7087d 100644 --- a/src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalRequirement.java +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/ConditionalRequirement.java @@ -13,7 +13,8 @@ */ public abstract class ConditionalRequirement { /** The name of the dependent field, which is a field that requires a specific value if the reference and - * (in some cases) dependent field checks meet certain conditions.*/ + * (in some cases) dependent field checks meet certain conditions. + */ protected String dependentFieldName; /** diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/FieldInRangeCheck.java b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldInRangeCheck.java index eb81836f1..47f752c45 100644 --- a/src/main/java/com/conveyal/gtfs/loader/conditions/FieldInRangeCheck.java +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldInRangeCheck.java @@ -32,7 +32,7 @@ public FieldInRangeCheck( } /** - * If the reference field value is within a defined range and the conditional field value has not be defined, flag + * If the reference field value is within a defined range and the conditional field value has not been defined, flag * an error. */ public Set check( diff --git a/src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java index aeb596667..3ba3c291d 100644 --- a/src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java +++ b/src/main/java/com/conveyal/gtfs/loader/conditions/FieldIsEmptyCheck.java @@ -33,7 +33,7 @@ public Set check( String referenceFieldValue = lineContext.getValueForRow(referenceField.name); if ( POSTGRES_NULL_TEXT.equals(dependentFieldValue) && - POSTGRES_NULL_TEXT.equals(referenceFieldValue) + POSTGRES_NULL_TEXT.equals(referenceFieldValue) ) { // The reference field is required when the dependent field is empty. String message = String.format( From f8cd0ca8f7960d7c6e7b9a4e4c2c5ddec7cb7f21 Mon Sep 17 00:00:00 2001 From: binh-dam-ibigroup <56846598+binh-dam-ibigroup@users.noreply.github.com> Date: Tue, 20 Jul 2021 16:21:49 -0400 Subject: [PATCH 44/45] fix(JDBCTableWriter): Add stop_times linked fields continuous_pickup, continuous_drop_off. --- .../conveyal/gtfs/loader/JdbcTableWriter.java | 2 ++ .../gtfs/loader/JDBCTableWriterTest.java | 34 +++++++++++++++++-- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcTableWriter.java b/src/main/java/com/conveyal/gtfs/loader/JdbcTableWriter.java index 172bf5501..666ba77bf 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcTableWriter.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcTableWriter.java @@ -634,6 +634,8 @@ private String updateChildTable( "timepoint", "drop_off_type", "pickup_type", + "continuous_pickup", + "continuous_drop_off", "shape_dist_traveled" ); } diff --git a/src/test/java/com/conveyal/gtfs/loader/JDBCTableWriterTest.java b/src/test/java/com/conveyal/gtfs/loader/JDBCTableWriterTest.java index e57107f5f..0837da48a 100644 --- a/src/test/java/com/conveyal/gtfs/loader/JDBCTableWriterTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/JDBCTableWriterTest.java @@ -427,6 +427,14 @@ public void canCreateUpdateAndDeleteScheduleExceptions() throws IOException, SQL */ @Test public void shouldUpdateStopTimeShapeDistTraveledOnPatternStopUpdate() throws IOException, SQLException, InvalidNamespaceException { + final String[] STOP_TIMES_LINKED_FIELDS = new String[] { + "shape_dist_traveled", + "timepoint", + "drop_off_type", + "pickup_type", + "continuous_pickup", + "continuous_drop_off" + }; String routeId = newUUID(); String patternId = newUUID(); int startTime = 6 * 60 * 60; // 6 AM @@ -459,10 +467,12 @@ public void shouldUpdateStopTimeShapeDistTraveledOnPatternStopUpdate() throws IO assertNotNull(uuid); // Check that trip exists. assertThatSqlQueryYieldsRowCount(getColumnsForId(createdTrip.id, Table.TRIPS), 1); - // Check the stop_time's initial shape_dist_traveled value. TODO test that other linked fields are updated? + + // Check the stop_time's initial shape_dist_traveled value and other linked fields. PreparedStatement statement = testDataSource.getConnection().prepareStatement( String.format( - "select shape_dist_traveled from %s.stop_times where stop_sequence=1 and trip_id='%s'", + "select %s from %s.stop_times where stop_sequence=1 and trip_id='%s'", + String.join(", ", STOP_TIMES_LINKED_FIELDS), testNamespace, createdTrip.trip_id ) @@ -472,10 +482,23 @@ public void shouldUpdateStopTimeShapeDistTraveledOnPatternStopUpdate() throws IO while (resultSet.next()) { // First stop_time shape_dist_traveled should be zero. assertThat(resultSet.getInt(1), equalTo(0)); + + // Other linked fields should be interpreted as zero. + for (int i = 2; i <= STOP_TIMES_LINKED_FIELDS.length; i++) { + assertThat(resultSet.getInt(i), equalTo(0)); + } } + // Update pattern_stop#shape_dist_traveled and check that the stop_time's shape_dist value is updated. final double updatedShapeDistTraveled = 45.5; - pattern.pattern_stops[1].shape_dist_traveled = updatedShapeDistTraveled; + PatternStopDTO pattern_stop = pattern.pattern_stops[1]; + pattern_stop.shape_dist_traveled = updatedShapeDistTraveled; + // Assign an arbitrary value (the order of appearance in STOP_TIMES_LINKED_FIELDS) for the other linked fields. + pattern_stop.timepoint = 2; + pattern_stop.drop_off_type = 3; + pattern_stop.pickup_type = 4; + pattern_stop.continuous_pickup = 5; + pattern_stop.continuous_drop_off = 6; JdbcTableWriter patternUpdater = createTestTableWriter(Table.PATTERNS); String updatedPatternOutput = patternUpdater.update(pattern.id, mapper.writeValueAsString(pattern), true); LOG.info("Updated pattern: {}", updatedPatternOutput); @@ -483,6 +506,11 @@ public void shouldUpdateStopTimeShapeDistTraveledOnPatternStopUpdate() throws IO while (resultSet2.next()) { // First stop_time shape_dist_traveled should be updated. assertThat(resultSet2.getDouble(1), equalTo(updatedShapeDistTraveled)); + + // Other linked fields should be as set above. + for (int i = 2; i <= STOP_TIMES_LINKED_FIELDS.length; i++) { + assertThat(resultSet2.getInt(i), equalTo(i)); + } } } From ab6dfe754f8264c52b70f8144045841053aa306f Mon Sep 17 00:00:00 2001 From: binh-dam-ibigroup <56846598+binh-dam-ibigroup@users.noreply.github.com> Date: Tue, 20 Jul 2021 16:33:11 -0400 Subject: [PATCH 45/45] refactor(JDBCTableWriterTest): Refactor linked field test. --- .../com/conveyal/gtfs/loader/JDBCTableWriterTest.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/test/java/com/conveyal/gtfs/loader/JDBCTableWriterTest.java b/src/test/java/com/conveyal/gtfs/loader/JDBCTableWriterTest.java index 0837da48a..c9f396301 100644 --- a/src/test/java/com/conveyal/gtfs/loader/JDBCTableWriterTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/JDBCTableWriterTest.java @@ -422,11 +422,11 @@ public void canCreateUpdateAndDeleteScheduleExceptions() throws IOException, SQL } /** - * This test verifies that stop_times#shape_dist_traveled (and other "linked fields") are updated when a pattern + * This test verifies that stop_times#shape_dist_traveled and other linked fields are updated when a pattern * is updated. */ @Test - public void shouldUpdateStopTimeShapeDistTraveledOnPatternStopUpdate() throws IOException, SQLException, InvalidNamespaceException { + public void shouldUpdateStopTimeOnPatternStopUpdate() throws IOException, SQLException, InvalidNamespaceException { final String[] STOP_TIMES_LINKED_FIELDS = new String[] { "shape_dist_traveled", "timepoint", @@ -481,10 +481,8 @@ public void shouldUpdateStopTimeShapeDistTraveledOnPatternStopUpdate() throws IO ResultSet resultSet = statement.executeQuery(); while (resultSet.next()) { // First stop_time shape_dist_traveled should be zero. - assertThat(resultSet.getInt(1), equalTo(0)); - - // Other linked fields should be interpreted as zero. - for (int i = 2; i <= STOP_TIMES_LINKED_FIELDS.length; i++) { + // Other linked fields should be interpreted as zero too. + for (int i = 1; i <= STOP_TIMES_LINKED_FIELDS.length; i++) { assertThat(resultSet.getInt(i), equalTo(0)); } }