diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 000000000..3ab207f24 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,4 @@ +# See https://help.github.com/articles/about-codeowners/ + +# A Conveyal employee is required to approve PR merges +* @conveyal/employees diff --git a/.github/issue_template.md b/.github/issue_template.md new file mode 100644 index 000000000..30edd8a3f --- /dev/null +++ b/.github/issue_template.md @@ -0,0 +1,17 @@ +_**NOTE:** This issue system is intended for reporting bugs and tracking progress in software development. Although this software is licensed with an open-source license, any issue opened here may not be responded to in a timely manner. [Conveyal](https://www.conveyal.com) is unable to provide technical support for custom deployments of this software unless your company has a support contract with us. Please remove this note when creating the issue._ + +## Observed behavior + +Please explain what is being observed within the application here. + +## Expected behavior + +Please explain what should happen instead. + +## Steps to reproduce the problem + +Please be as specific as possible. + +## Notes on configuration and files required to reproduce the issue + +Please describe any applicable config files that were used. If this issue is reporting behavior that is related to the data format of a particular GTFS file, please upload that GTFS file and note the exact files and lines that are causing the issue. If this issue is reproducable in [datatools-server](https://github.com/conveyal/datatools-server) or [datatools-ui](https://github.com/conveyal/datatools-ui) please include the exact commits of those libraries and any configuration of those libraries that differs from the default configuration. \ No newline at end of file diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 000000000..0a0f36b9d --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,13 @@ +### Checklist + +- [ ] Appropriate branch selected _(all PRs must first be merged to `dev` before they can be merged to `master`)_ +- [ ] Any modified or new methods or classes have helpful JavaDoc and code is thoroughly commented +- [ ] The description lists all applicable issues this PR seeks to resolve +- [ ] The description lists any configuration setting(s) that differ from the default settings +- [ ] All tests and CI builds passing +- [ ] The description lists all relevant PRs included in this release _(remove this if not merging to master)_ +- [ ] Code coverage does not significantly worsen (ideally it improves) _(remove this if not merging to master)_ + +### Description + +Please explain the changes you made here and, if not immediately obvious from the code, how they resolve any referenced issues. Be sure to include all issues being resolved and any special configuration settings that are need for the software to run properly with these changes. If merging to master, please also list the PRs that are to be included. diff --git a/src/main/java/com/conveyal/gtfs/error/NewGTFSError.java b/src/main/java/com/conveyal/gtfs/error/NewGTFSError.java index b4abdedbd..58ab2039a 100644 --- a/src/main/java/com/conveyal/gtfs/error/NewGTFSError.java +++ b/src/main/java/com/conveyal/gtfs/error/NewGTFSError.java @@ -17,7 +17,7 @@ public class NewGTFSError { /** The class of the table in which the error was encountered. */ - public final Class entityType; + public Class entityType; /** The kind of error encountered. */ public final NewGTFSErrorType errorType; diff --git a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java index b35037642..d0a900030 100644 --- a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java +++ b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java @@ -10,6 +10,7 @@ public enum NewGTFSErrorType { TIME_FORMAT(Priority.MEDIUM, "Time format should be HH:MM:SS."), URL_FORMAT(Priority.MEDIUM, "URL format should be ://?#"), LANGUAGE_FORMAT(Priority.LOW, "Language should be specified with a valid BCP47 tag."), + ILLEGAL_FIELD_VALUE(Priority.MEDIUM, "Fields may not contain tabs, carriage returns or new lines."), INTEGER_FORMAT(Priority.MEDIUM, "Incorrect integer format."), FLOATING_FORMAT(Priority.MEDIUM, "Incorrect floating point number format."), COLUMN_NAME_UNSAFE(Priority.HIGH, "Column header contains characters not safe in SQL, it was renamed."), @@ -59,7 +60,7 @@ public enum NewGTFSErrorType { TRAVEL_TIME_ZERO(Priority.HIGH, "The vehicle arrives at this stop at the same time it departs from the previous stop."), MISSING_ARRIVAL_OR_DEPARTURE(Priority.MEDIUM, "First and last stop times are required to have both an arrival and departure time."), TRIP_TOO_FEW_STOP_TIMES(Priority.MEDIUM, "A trip must have at least two stop times to represent travel."), - TRIP_OVERLAP_IN_BLOCK(Priority.MEDIUM, "Blocks"), + TRIP_OVERLAP_IN_BLOCK(Priority.MEDIUM, "A trip overlaps another trip and shares the same block_id."), TRAVEL_TOO_SLOW(Priority.MEDIUM, "The vehicle is traveling very slowly to reach this stop from the previous one."), TRAVEL_TOO_FAST(Priority.MEDIUM, "The vehicle travels extremely fast to reach this stop from the previous one."), VALIDATOR_FAILED(Priority.HIGH, "The specified validation stage failed due to an error encountered during loading. This is likely due to an error encountered during loading (e.g., a date or number field is formatted incorrectly.)."), diff --git a/src/main/java/com/conveyal/gtfs/error/OverlappingTripsInBlockError.java b/src/main/java/com/conveyal/gtfs/error/OverlappingTripsInBlockError.java deleted file mode 100644 index 62a5a374a..000000000 --- a/src/main/java/com/conveyal/gtfs/error/OverlappingTripsInBlockError.java +++ /dev/null @@ -1,27 +0,0 @@ -package com.conveyal.gtfs.error; - -import com.conveyal.gtfs.model.Route; -import com.conveyal.gtfs.validator.model.Priority; - -import java.io.Serializable; - -/** - * Created by landon on 5/6/16. - */ -public class OverlappingTripsInBlockError extends GTFSError implements Serializable { - public static final long serialVersionUID = 1L; - - public final String[] tripIds; - public final Priority priority = Priority.HIGH; - public final String routeId; - - public OverlappingTripsInBlockError(long line, String field, String affectedEntityId, String routeId, String[] tripIds) { - super("trips", line, field, affectedEntityId); - this.tripIds = tripIds; - this.routeId = routeId; - } - - @Override public String getMessage() { - return String.format("Trip Ids %s overlap (route: %s) and share block ID %s", String.join(" & ", tripIds), routeId, affectedEntityId); - } -} diff --git a/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java b/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java index a8c2405fd..828a68278 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java +++ b/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java @@ -333,6 +333,18 @@ public class GraphQLGtfsSchema { .field(MapFetcher.field("parent_station")) .field(MapFetcher.field("location_type", GraphQLInt)) .field(MapFetcher.field("wheelchair_boarding", GraphQLInt)) + // Returns all stops that reference parent stop's stop_id + .field(newFieldDefinition() + .name("child_stops") + .type(new GraphQLList(new GraphQLTypeReference("stop"))) + .dataFetcher(new JDBCFetcher( + "stops", + "stop_id", + null, + false, + "parent_station" + )) + .build()) .field(RowCountFetcher.field("stop_time_count", "stop_times", "stop_id")) .field(newFieldDefinition() .name("patterns") diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java index d0a3e8806..a7ece5138 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java @@ -69,6 +69,7 @@ public class JDBCFetcher implements DataFetcher>> { final String parentJoinField; private final String sortField; private final boolean autoLimit; + private final String childJoinField; /** * Constructor for tables that need neither restriction by a where clause nor sorting based on the enclosing entity. @@ -96,10 +97,22 @@ public JDBCFetcher (String tableName, String parentJoinField) { * tables that it is unnatural to expect a limit (e.g., shape points or pattern stops). */ public JDBCFetcher (String tableName, String parentJoinField, String sortField, boolean autoLimit) { + this(tableName, parentJoinField, sortField, autoLimit, null); + } + + /** + * + * @param childJoinField The child table field that should be joined to the parent join field. This enables joining + * where references from the child table to the parent table do not share the same field name, + * e.g., stops#stop_id -> transfers#from_stop_id. This value defaults to parentJoinField if + * argument is null. + */ + public JDBCFetcher (String tableName, String parentJoinField, String sortField, boolean autoLimit, String childJoinField) { this.tableName = tableName; this.parentJoinField = parentJoinField; this.sortField = sortField; this.autoLimit = autoLimit; + this.childJoinField = childJoinField != null ? childJoinField : parentJoinField; } // We can't automatically generate JDBCFetcher based field definitions for inclusion in a GraphQL schema (as we @@ -198,8 +211,8 @@ List> getResults ( // If we are fetching an item nested within a GTFS entity in the GraphQL query, we want to add an SQL "where" // clause. This could conceivably be done automatically, but it's clearer to just express the intent. // Note, this is assuming the type of the field in the parent is a string. - if (parentJoinField != null && parentJoinValues != null && !parentJoinValues.isEmpty()) { - whereConditions.add(makeInClause(parentJoinField, parentJoinValues, preparedStatementParameters)); + if (childJoinField != null && parentJoinValues != null && !parentJoinValues.isEmpty()) { + whereConditions.add(makeInClause(childJoinField, parentJoinValues, preparedStatementParameters)); } if (sortField != null) { // Sort field is not provided by user input, so it's ok to add here (i.e., it's not prone to SQL injection). @@ -341,7 +354,7 @@ List> getResults ( } // This logging produces a lot of noise during testing due to large numbers of joined sub-queries // LOG.info("table name={}", tableName); - LOG.debug("SQL: {}", preparedStatement.toString()); + LOG.info("SQL: {}", preparedStatement.toString()); if (preparedStatement.execute()) { ResultSet resultSet = preparedStatement.getResultSet(); ResultSetMetaData meta = resultSet.getMetaData(); diff --git a/src/main/java/com/conveyal/gtfs/loader/BatchTracker.java b/src/main/java/com/conveyal/gtfs/loader/BatchTracker.java index 19d64aad9..2d1ae540d 100644 --- a/src/main/java/com/conveyal/gtfs/loader/BatchTracker.java +++ b/src/main/java/com/conveyal/gtfs/loader/BatchTracker.java @@ -34,7 +34,10 @@ public void addBatch() throws SQLException { } } - public void executeRemaining() throws SQLException { + /** + * Execute any remaining statements and return the total records processed. + */ + public int executeRemaining() throws SQLException { if (currentBatchSize > 0) { totalRecordsProcessed += currentBatchSize; preparedStatement.executeBatch(); @@ -42,7 +45,8 @@ public void executeRemaining() throws SQLException { } // Avoid reuse, signal that this was cleanly closed. preparedStatement = null; - LOG.info(String.format("Inserted %d %s records", totalRecordsProcessed, recordType)); + LOG.info(String.format("Processed %d %s records", totalRecordsProcessed, recordType)); + return totalRecordsProcessed; } public void finalize () { diff --git a/src/main/java/com/conveyal/gtfs/loader/BooleanField.java b/src/main/java/com/conveyal/gtfs/loader/BooleanField.java index b4a0ce265..5a22a27c0 100644 --- a/src/main/java/com/conveyal/gtfs/loader/BooleanField.java +++ b/src/main/java/com/conveyal/gtfs/loader/BooleanField.java @@ -1,11 +1,13 @@ package com.conveyal.gtfs.loader; +import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.error.NewGTFSErrorType; import com.conveyal.gtfs.storage.StorageException; import java.sql.JDBCType; import java.sql.PreparedStatement; import java.sql.SQLType; +import java.util.Set; /** * A GTFS boolean field, coded as a single character string 0 or 1. @@ -17,17 +19,21 @@ public BooleanField (String name, Requirement requirement) { super(name, requirement); } - private boolean validate (String string) { + private ValidateFieldResult validate (String string) { + ValidateFieldResult result = new ValidateFieldResult<>(); if ( ! ("0".equals(string) || "1".equals(string))) { - throw new StorageException(NewGTFSErrorType.BOOLEAN_FORMAT, string); + result.errors.add(NewGTFSError.forFeed(NewGTFSErrorType.BOOLEAN_FORMAT, string)); } - return "1".equals(string); + result.clean = "1".equals(string); + return result; } @Override - public void setParameter (PreparedStatement preparedStatement, int oneBasedIndex, String string) { + public Set setParameter (PreparedStatement preparedStatement, int oneBasedIndex, String string) { try { - preparedStatement.setBoolean(oneBasedIndex, validate(string)); + ValidateFieldResult result = validate(string); + preparedStatement.setBoolean(oneBasedIndex, result.clean); + return result.errors; } catch (Exception ex) { throw new StorageException(ex); } @@ -37,8 +43,8 @@ public void setParameter (PreparedStatement preparedStatement, int oneBasedIndex * The 0 or 1 will be converted to the string "true" or "false" for SQL COPY. */ @Override - public String validateAndConvert (String string) { - return Boolean.toString(validate(string)); + public ValidateFieldResult validateAndConvert (String string) { + return ValidateFieldResult.from(validate(string)); } @Override diff --git a/src/main/java/com/conveyal/gtfs/loader/ColorField.java b/src/main/java/com/conveyal/gtfs/loader/ColorField.java index 45ea49462..6ba19e6ae 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ColorField.java +++ b/src/main/java/com/conveyal/gtfs/loader/ColorField.java @@ -1,11 +1,13 @@ package com.conveyal.gtfs.loader; +import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.error.NewGTFSErrorType; import com.conveyal.gtfs.storage.StorageException; import java.sql.JDBCType; import java.sql.PreparedStatement; import java.sql.SQLType; +import java.util.Set; import static com.conveyal.gtfs.loader.Field.cleanString; @@ -21,21 +23,24 @@ public ColorField(String name, Requirement requirement) { } /** Check that a string can be properly parsed and is in range. */ - public String validateAndConvert (String string) { + public ValidateFieldResult validateAndConvert (String string) { + ValidateFieldResult result = new ValidateFieldResult<>(string); try { if (string.length() != 6) { - throw new StorageException(NewGTFSErrorType.COLOR_FORMAT, string); + result.errors.add(NewGTFSError.forFeed(NewGTFSErrorType.COLOR_FORMAT, string)); } int integer = Integer.parseInt(string, 16); - return string; // Could also store the integer. + return result; // Could also store the integer. } catch (Exception ex) { throw new StorageException(NewGTFSErrorType.COLOR_FORMAT, string); } } - public void setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string) { + public Set setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string) { try { - preparedStatement.setString(oneBasedIndex, validateAndConvert(string)); + ValidateFieldResult result = validateAndConvert(string); + preparedStatement.setString(oneBasedIndex, result.clean); + return result.errors; } catch (Exception ex) { throw new StorageException(ex); } diff --git a/src/main/java/com/conveyal/gtfs/loader/CurrencyField.java b/src/main/java/com/conveyal/gtfs/loader/CurrencyField.java index d506fc3f3..4d00ec4ee 100644 --- a/src/main/java/com/conveyal/gtfs/loader/CurrencyField.java +++ b/src/main/java/com/conveyal/gtfs/loader/CurrencyField.java @@ -1,5 +1,6 @@ package com.conveyal.gtfs.loader; +import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.error.NewGTFSErrorType; import com.conveyal.gtfs.storage.StorageException; import com.google.common.collect.Sets; @@ -198,21 +199,24 @@ public CurrencyField (String name, Requirement requirement) { super(name, requirement); } - private String validate (String string) { + private ValidateFieldResult validate (String string) { + ValidateFieldResult result = new ValidateFieldResult<>(string); if (!CURRENCY_CODES.contains(string)) { - throw new StorageException(NewGTFSErrorType.CURRENCY_UNKNOWN, string); + result.errors.add(NewGTFSError.forFeed(NewGTFSErrorType.CURRENCY_UNKNOWN, string)); } - return string; + return result; } /** Check that a string can be properly parsed and is in range. */ - public String validateAndConvert (String string) { + public ValidateFieldResult validateAndConvert (String string) { return validate(string); } - public void setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string) { + public Set setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string) { try { - preparedStatement.setString(oneBasedIndex, validateAndConvert(string)); + ValidateFieldResult result = validateAndConvert(string); + preparedStatement.setString(oneBasedIndex, result.clean); + return result.errors; } catch (Exception ex) { throw new StorageException(ex); } diff --git a/src/main/java/com/conveyal/gtfs/loader/DateField.java b/src/main/java/com/conveyal/gtfs/loader/DateField.java index 3072e3aa6..ec7d6d128 100644 --- a/src/main/java/com/conveyal/gtfs/loader/DateField.java +++ b/src/main/java/com/conveyal/gtfs/loader/DateField.java @@ -1,5 +1,6 @@ package com.conveyal.gtfs.loader; +import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.error.NewGTFSErrorType; import com.conveyal.gtfs.storage.StorageException; @@ -9,6 +10,8 @@ import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeParseException; +import java.util.Collections; +import java.util.Set; /** * A GTFS date in the format YYYYMMDD. @@ -26,26 +29,33 @@ public DateField (String name, Requirement requirement) { super(name, requirement); } - public static String validate (String string) { + public static ValidateFieldResult validate (String string) { + // Initialize default value as null (i.e., don't use the input value). + ValidateFieldResult result = new ValidateFieldResult<>(); // Parse the date out of the supplied string. LocalDate date; try { date = LocalDate.parse(string, GTFS_DATE_FORMATTER); + // Only set the clean result after the date parse is successful. + result.clean = string; } catch (DateTimeParseException ex) { - throw new StorageException(NewGTFSErrorType.DATE_FORMAT, string); + result.errors.add(NewGTFSError.forFeed(NewGTFSErrorType.DATE_FORMAT, string)); + return result; } // Range check on year. Parsing operation above should already have checked month and day ranges. int year = date.getYear(); if (year < 2000 || year > 2100) { - throw new StorageException(NewGTFSErrorType.DATE_RANGE, string); + result.errors.add(NewGTFSError.forFeed(NewGTFSErrorType.DATE_RANGE, string)); } - return string; + return result; } @Override - public void setParameter (PreparedStatement preparedStatement, int oneBasedIndex, String string) { + public Set setParameter (PreparedStatement preparedStatement, int oneBasedIndex, String string) { try { - preparedStatement.setString(oneBasedIndex, validate(string)); + ValidateFieldResult result = validate(string); + preparedStatement.setString(oneBasedIndex, result.clean); + return result.errors; } catch (Exception ex) { throw new StorageException(ex); } @@ -54,17 +64,18 @@ public void setParameter (PreparedStatement preparedStatement, int oneBasedIndex /** * DateField specific method to set a statement parameter from a {@link LocalDate}. */ - public void setParameter (PreparedStatement preparedStatement, int oneBasedIndex, LocalDate localDate) { + public Set setParameter (PreparedStatement preparedStatement, int oneBasedIndex, LocalDate localDate) { try { if (localDate == null) setNull(preparedStatement, oneBasedIndex); else preparedStatement.setString(oneBasedIndex, localDate.format(GTFS_DATE_FORMATTER)); + return Collections.EMPTY_SET; } catch (Exception e) { throw new StorageException(e); } } @Override - public String validateAndConvert (String string) { + public ValidateFieldResult validateAndConvert (String string) { return validate(string); } diff --git a/src/main/java/com/conveyal/gtfs/loader/DateListField.java b/src/main/java/com/conveyal/gtfs/loader/DateListField.java index 66b80dfec..7071fb7c1 100644 --- a/src/main/java/com/conveyal/gtfs/loader/DateListField.java +++ b/src/main/java/com/conveyal/gtfs/loader/DateListField.java @@ -1,5 +1,6 @@ package com.conveyal.gtfs.loader; +import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.storage.StorageException; import java.sql.Array; @@ -7,10 +8,13 @@ import java.sql.PreparedStatement; import java.sql.SQLType; import java.util.Arrays; -import java.util.stream.Collectors; - -import static com.conveyal.gtfs.loader.DateField.validate; +import java.util.Collections; +import java.util.Set; +/** + * Field type for a list of date strings. This type is ONLY used for the editor-specific schedule exceptions table, so + * not all methods are supported (e.g., validateAndConvert has no functionality because it is only called on GTFS tables). + */ public class DateListField extends Field { public DateListField(String name, Requirement requirement) { @@ -18,19 +22,23 @@ public DateListField(String name, Requirement requirement) { } @Override - public String validateAndConvert(String original) { - // FIXME - return null; + public ValidateFieldResult validateAndConvert(String original) { + // This function is currently only used during feed loading. Because the DateListField only exists for the + // schedule exceptions table (which is not a GTFS spec table), we do not expect to ever call this function on + // this table/field. + // TODO: is there any reason we may want to add support for validation? + throw new UnsupportedOperationException("Cannot call validateAndConvert on field of type DateListField because this is not a supported GTFS field type."); } @Override - public void setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string) { + public Set setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string) { try { String[] dateStrings = Arrays.stream(string.split(",")) .map(DateField::validate) .toArray(String[]::new); Array array = preparedStatement.getConnection().createArrayOf("text", dateStrings); preparedStatement.setArray(oneBasedIndex, array); + return Collections.EMPTY_SET; } catch (Exception ex) { throw new StorageException(ex); } diff --git a/src/main/java/com/conveyal/gtfs/loader/DoubleField.java b/src/main/java/com/conveyal/gtfs/loader/DoubleField.java index 74044bef1..d14e9acaf 100644 --- a/src/main/java/com/conveyal/gtfs/loader/DoubleField.java +++ b/src/main/java/com/conveyal/gtfs/loader/DoubleField.java @@ -1,10 +1,12 @@ package com.conveyal.gtfs.loader; +import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.storage.StorageException; import java.sql.JDBCType; import java.sql.PreparedStatement; import java.sql.SQLType; +import java.util.Set; import static com.conveyal.gtfs.error.NewGTFSErrorType.NUMBER_PARSING; import static com.conveyal.gtfs.error.NewGTFSErrorType.NUMBER_TOO_LARGE; @@ -29,31 +31,32 @@ public DoubleField (String name, Requirement requirement, double minValue, doubl this.outputPrecision = outputPrecision; } - private double validate(String string) { - double d; + private ValidateFieldResult validate(String string) { + ValidateFieldResult result = new ValidateFieldResult<>(); try { - d = Double.parseDouble(string); + result.clean = Double.parseDouble(string); } catch (NumberFormatException e) { throw new StorageException(NUMBER_PARSING, string); } - if (d < minValue) throw new StorageException(NUMBER_TOO_SMALL, Double.toString(d)); - if (d > maxValue) throw new StorageException(NUMBER_TOO_LARGE, Double.toString(d)); - return d; + if (result.clean < minValue) NewGTFSError.forFeed(NUMBER_TOO_SMALL, string); + if (result.clean > maxValue) NewGTFSError.forFeed(NUMBER_TOO_LARGE, string); + return result; } @Override - public void setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string) { + public Set setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string) { try { - preparedStatement.setDouble(oneBasedIndex, validate(string)); + ValidateFieldResult result = validate(string); + preparedStatement.setDouble(oneBasedIndex, result.clean); + return result.errors; } catch (Exception ex) { throw new StorageException(ex); } } @Override - public String validateAndConvert(String string) { - validate(string); - return string; + public ValidateFieldResult validateAndConvert(String string) { + return ValidateFieldResult.from(validate(string)); } @Override diff --git a/src/main/java/com/conveyal/gtfs/loader/Field.java b/src/main/java/com/conveyal/gtfs/loader/Field.java index ebc99b0df..49d312be2 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Field.java +++ b/src/main/java/com/conveyal/gtfs/loader/Field.java @@ -1,8 +1,13 @@ package com.conveyal.gtfs.loader; +import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.error.NewGTFSErrorType; +import com.google.common.collect.ImmutableSet; + import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.SQLType; +import java.util.Set; /** * Field subclasses process an incoming String that represents a single GTFS CSV field value. @@ -22,12 +27,25 @@ public abstract class Field { public final String name; - final Requirement requirement; + /** + * Keep any illegal character sequences and their respective replacements here. + * + * TODO: Add other illegal character sequences (e.g., HTML tags, comments or escape sequences). + */ + public static final Set ILLEGAL_CHARACTERS = ImmutableSet.of( + // Backslashes, newlines, and tabs have special meaning to Postgres. Also, new lines, tabs, and carriage returns are + // prohibited by GTFS. + new IllegalCharacter("\\", "\\\\", "Unescaped backslash"), + new IllegalCharacter("\t", " ", "Tab"), + new IllegalCharacter("\n", " ", "New line"), + new IllegalCharacter("\r", " ", "Carriage return") + ); + public final Requirement requirement; /** * Indicates that this field acts as a foreign key to this referenced table. This is used when checking referential * integrity when loading a feed. * */ - Table referenceTable = null; + public Table referenceTable = null; private boolean shouldBeIndexed; private boolean emptyValuePermitted; @@ -43,14 +61,24 @@ public Field(String name, Requirement requirement) { * @param original a non-null String * @return a string that is parseable as this field's type, or null if it is not parseable */ - public abstract String validateAndConvert(String original); + public abstract ValidateFieldResult validateAndConvert(String original); - public abstract void setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string); + public abstract Set setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string); public void setNull(PreparedStatement preparedStatement, int oneBasedIndex) throws SQLException { preparedStatement.setNull(oneBasedIndex, getSqlType().getVendorTypeNumber()); } + /** + * Finds the index of the field given a string name. + * @return the index of the field or -1 if no match is found + */ + public static int getFieldIndex (Field[] fields, String name) { + // Linear search, assuming a small number of fields per table. + for (int i = 0; i < fields.length; i++) if (fields[i].name.equals(name)) return i; + return -1; + } + public abstract SQLType getSqlType (); // Overridden to create exception for "double precision", since its enum value is just called DOUBLE, and ARRAY types, @@ -64,19 +92,24 @@ public String getSqlDeclaration() { } // TODO test for input with tabs, newlines, carriage returns, and slashes in it. - protected static String cleanString (String string) { - // Backslashes, newlines, and tabs have special meaning to Postgres. - // String.contains is significantly faster than using a regex or replace, and has barely any speed impact. - if (string.contains("\\")) { - string = string.replace("\\", "\\\\"); - } - if (string.contains("\t") || string.contains("\n") || string.contains("\r")) { - // TODO record error and recover, and use a single regex - string = string.replace("\t", " "); - string = string.replace("\n", " "); - string = string.replace("\r", " "); + protected static ValidateFieldResult cleanString (String string) { + return cleanString(new ValidateFieldResult<>(string)); + } + + protected static ValidateFieldResult cleanString (ValidateFieldResult previousResult) { + ValidateFieldResult result = ValidateFieldResult.from(previousResult); + // Check for illegal character sequences and replace them as needed. + for (IllegalCharacter illegalChar: ILLEGAL_CHARACTERS) { + // String.contains is significantly faster than using a regex or replace, and has barely any speed impact. + if (previousResult.clean.contains(illegalChar.illegalSequence)) { + // Use the result string value to ensure that each iteration is cleaned up properly. + result.clean = result.clean.replace(illegalChar.illegalSequence, illegalChar.replacement); + // We don't know the Table or line number here, but when the errors bubble up, these values should be + // assigned to the errors. + result.errors.add(NewGTFSError.forFeed(NewGTFSErrorType.ILLEGAL_FIELD_VALUE, illegalChar.description)); + } } - return string; + return result; } /** diff --git a/src/main/java/com/conveyal/gtfs/loader/IllegalCharacter.java b/src/main/java/com/conveyal/gtfs/loader/IllegalCharacter.java new file mode 100644 index 000000000..6c3a5ada2 --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/loader/IllegalCharacter.java @@ -0,0 +1,18 @@ +package com.conveyal.gtfs.loader; + +/** + * This class contains a convenient way to describe illegal character sequences during GTFS feed loading, as well as the + * appropriate replacement value and a human-readable description (rather than inputting the bad sequence into the + * database and having to deal with downstream issues). + */ +public class IllegalCharacter { + public String illegalSequence; + public String replacement; + public String description; + + public IllegalCharacter(String illegalSequence, String replacement, String description) { + this.illegalSequence = illegalSequence; + this.replacement = replacement; + this.description = description; + } +} diff --git a/src/main/java/com/conveyal/gtfs/loader/IntegerField.java b/src/main/java/com/conveyal/gtfs/loader/IntegerField.java index 2e4e1c519..e6631865c 100644 --- a/src/main/java/com/conveyal/gtfs/loader/IntegerField.java +++ b/src/main/java/com/conveyal/gtfs/loader/IntegerField.java @@ -1,12 +1,13 @@ package com.conveyal.gtfs.loader; +import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.error.NewGTFSErrorType; -import com.conveyal.gtfs.error.SQLErrorStorage; import com.conveyal.gtfs.storage.StorageException; import java.sql.JDBCType; import java.sql.PreparedStatement; import java.sql.SQLType; +import java.util.Set; public class IntegerField extends Field { @@ -28,31 +29,32 @@ public IntegerField(String name, Requirement requirement, int minValue, int maxV this.maxValue = maxValue; } - private int validate (String string) { - int i; + private ValidateFieldResult validate (String string) { + ValidateFieldResult result = new ValidateFieldResult<>(); try { - i = Integer.parseInt(string); + result.clean = Integer.parseInt(string); } catch (NumberFormatException e) { - throw new StorageException(NewGTFSErrorType.NUMBER_PARSING, string); + result.errors.add(NewGTFSError.forFeed(NewGTFSErrorType.NUMBER_PARSING, string)); } - if (i < minValue) throw new StorageException(NewGTFSErrorType.NUMBER_TOO_SMALL, string); - if (i > maxValue) throw new StorageException(NewGTFSErrorType.NUMBER_TOO_LARGE, string); - return i; + if (result.clean < minValue) result.errors.add(NewGTFSError.forFeed(NewGTFSErrorType.NUMBER_TOO_SMALL, string)); + if (result.clean > maxValue) result.errors.add(NewGTFSError.forFeed(NewGTFSErrorType.NUMBER_TOO_LARGE, string)); + return result; } @Override - public void setParameter (PreparedStatement preparedStatement, int oneBasedIndex, String string) { + public Set setParameter (PreparedStatement preparedStatement, int oneBasedIndex, String string) { try { - preparedStatement.setInt(oneBasedIndex, validate(string)); + ValidateFieldResult result = validate(string); + preparedStatement.setInt(oneBasedIndex, result.clean); + return result.errors; } catch (Exception ex) { throw new StorageException(ex); } } @Override - public String validateAndConvert (String string) { - validate(string); - return string; + public ValidateFieldResult validateAndConvert (String string) { + return ValidateFieldResult.from(validate(string)); } @Override diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java index 0a536b5ce..9ec8e266f 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java @@ -1,6 +1,7 @@ package com.conveyal.gtfs.loader; import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.error.NewGTFSErrorType; import com.conveyal.gtfs.error.SQLErrorStorage; import com.conveyal.gtfs.storage.StorageException; import com.csvreader.CsvReader; @@ -8,7 +9,6 @@ import com.google.common.hash.Hashing; import com.google.common.io.Files; import org.apache.commons.dbutils.DbUtils; -import org.apache.commons.io.input.BOMInputStream; import org.postgresql.copy.CopyManager; import org.postgresql.core.BaseConnection; import org.slf4j.Logger; @@ -16,7 +16,6 @@ import javax.sql.DataSource; import java.io.*; -import java.nio.charset.Charset; import java.sql.*; import java.util.*; import java.util.zip.ZipEntry; @@ -86,7 +85,7 @@ public class JdbcGtfsLoader { private SQLErrorStorage errorStorage; // Contains references to unique entity IDs during load stage used for referential integrity check. - private ReferenceTracker referenceTracker; + private ReferenceTracker referenceTracker = new ReferenceTracker(); public JdbcGtfsLoader(String gtfsFilePath, DataSource dataSource) { this.gtfsFilePath = gtfsFilePath; @@ -130,9 +129,9 @@ public FeedLoadResult loadTables () { result.filename = gtfsFilePath; result.uniqueIdentifier = tablePrefix; - //The order of the following four lines should not be changed because the schema needs to be in place - //before the error storage can be constructed, which in turn needs to exist in case any errors are - //encountered during the loading process. + // The order of the following four lines should not be changed because the schema needs to be in place + // before the error storage can be constructed, which in turn needs to exist in case any errors are + // encountered during the loading process. { createSchema(connection, tablePrefix); //the SQLErrorStorage constructor expects the tablePrefix to contain the dot separator. @@ -143,7 +142,6 @@ public FeedLoadResult loadTables () { // This allows everything to work even when there's no prefix. this.tablePrefix += "."; } - this.referenceTracker = new ReferenceTracker(errorStorage); // Load each table in turn, saving some summary information about what happened during each table load result.agency = load(Table.AGENCY); result.calendar = load(Table.CALENDAR); @@ -175,19 +173,18 @@ public FeedLoadResult loadTables () { } /** - * Creates a schema/namespace in the database. + * Creates a schema/namespace in the database WITHOUT committing the changes. * This does *not* setup any other tables or enter the schema name in a registry (@see #registerFeed). * * @param connection Connection to the database to create the schema on. * @param schemaName Name of the schema (i.e. table prefix). Should not include the dot suffix. */ - private static void createSchema (Connection connection, String schemaName) { + static void createSchema (Connection connection, String schemaName) { try { Statement statement = connection.createStatement(); // FIXME do the following only on databases that support schemas. // SQLite does not support them. Is there any advantage of schemas over flat tables? statement.execute("create schema " + schemaName); - connection.commit(); LOG.info("Created new feed schema: {}", statement); } catch (Exception ex) { LOG.error("Exception while registering new feed namespace in feeds table: {}", ex.getMessage()); @@ -210,7 +207,7 @@ private void registerFeed (File gtfsFile) { // FIXME is this extra CSV reader used anymore? Check comment below. // First, inspect feed_info.txt to extract the ID and version. // We could get this with SQL after loading, but feed_info, feed_id and feed_version are all optional. - CsvReader csvReader = getCsvReader(Table.FEED_INFO); + CsvReader csvReader = Table.FEED_INFO.getCsvReader(zip, errorStorage); String feedId = "", feedVersion = ""; if (csvReader != null) { // feed_info.txt has been found and opened. @@ -230,15 +227,12 @@ private void registerFeed (File gtfsFile) { String md5Hex = md5.toString(); HashCode sha1 = Files.hash(gtfsFile, Hashing.sha1()); String shaHex = sha1.toString(); - Statement statement = connection.createStatement(); + createFeedRegistryIfNotExists(connection); // TODO try to get the feed_id and feed_version out of the feed_info table // statement.execute("select * from feed_info"); // current_timestamp seems to be the only standard way to get the current time across all common databases. // Record total load processing time? - statement.execute("create table if not exists feeds (namespace varchar primary key, md5 varchar, " + - "sha1 varchar, feed_id varchar, feed_version varchar, filename varchar, loaded_date timestamp, " + - "snapshot_of varchar)"); PreparedStatement insertStatement = connection.prepareStatement( "insert into feeds values (?, ?, ?, ?, ?, ?, current_timestamp, null)"); insertStatement.setString(1, tablePrefix); @@ -257,40 +251,14 @@ private void registerFeed (File gtfsFile) { } /** - * In GTFS feeds, all files are supposed to be in the root of the zip file, but feed producers often put them - * in a subdirectory. This function will search subdirectories if the entry is not found in the root. - * It records an error if the entry is in a subdirectory. - * It then creates a CSV reader for that table if it's found. + * Creates the feed registry table if it does not already exist. This must occur before the first attempt to load a + * GTFS feed or create an empty snapshot. Note: the connection MUST be committed after this method call. */ - private CsvReader getCsvReader (Table table) { - final String tableFileName = table.name + ".txt"; - ZipEntry entry = zip.getEntry(tableFileName); - if (entry == null) { - // Table was not found, check if it is in a subdirectory. - Enumeration entries = zip.entries(); - while (entries.hasMoreElements()) { - ZipEntry e = entries.nextElement(); - if (e.getName().endsWith(tableFileName)) { - entry = e; - errorStorage.storeError(NewGTFSError.forTable(table, TABLE_IN_SUBDIRECTORY)); - break; - } - } - } - if (entry == null) return null; - try { - InputStream zipInputStream = zip.getInputStream(entry); - // Skip any byte order mark that may be present. Files must be UTF-8, - // but the GTFS spec says that "files that include the UTF byte order mark are acceptable". - InputStream bomInputStream = new BOMInputStream(zipInputStream); - CsvReader csvReader = new CsvReader(bomInputStream, ',', Charset.forName("UTF8")); - csvReader.readHeaders(); - return csvReader; - } catch (IOException e) { - LOG.error("Exception while opening zip entry: {}", e); - e.printStackTrace(); - return null; - } + static void createFeedRegistryIfNotExists(Connection connection) throws SQLException { + Statement statement = connection.createStatement(); + statement.execute("create table if not exists feeds (namespace varchar primary key, md5 varchar, " + + "sha1 varchar, feed_id varchar, feed_version varchar, filename varchar, loaded_date timestamp, " + + "snapshot_of varchar)"); } /** @@ -339,7 +307,7 @@ private int getTableSize(Table table) { * @return number of rows that were loaded. */ private int loadInternal (Table table) throws Exception { - CsvReader csvReader = getCsvReader(table); + CsvReader csvReader = table.getCsvReader(zip, errorStorage); if (csvReader == null) { LOG.info(String.format("file %s.txt not found in gtfs zipfile", table.name)); // This GTFS table could not be opened in the zip, even in a subdirectory. @@ -353,25 +321,8 @@ private int loadInternal (Table table) throws Exception { // TODO Strip out line returns, tabs in field contents. // By default the CSV reader trims leading and trailing whitespace in fields. // Build up a list of fields in the same order they appear in this GTFS CSV file. - int headerCount = csvReader.getHeaderCount(); - Field[] fields = new Field[headerCount]; - Set fieldsSeen = new HashSet<>(); - String keyField = table.getKeyFieldName(); - int keyFieldIndex = -1; - for (int h = 0; h < headerCount; h++) { - String header = sanitize(csvReader.getHeader(h)); - if (fieldsSeen.contains(header) || "id".equals(header)) { - // FIXME: add separate error for tables containing ID field. - errorStorage.storeError(NewGTFSError.forTable(table, DUPLICATE_HEADER).setBadValue(header)); - fields[h] = null; - } else { - fields[h] = table.getFieldForName(header); - fieldsSeen.add(header); - if (keyField.equals(header)) { - keyFieldIndex = h; - } - } - } + Field[] fields = table.getFieldsFromFieldHeaders(csvReader.getHeaders(), errorStorage); + int keyFieldIndex = table.getKeyFieldIndex(fields); // Create separate fields array with filtered list that does not include null values (for duplicate headers or // ID field). This is solely used to construct the table and array of values to load. Field[] cleanFields = Arrays.stream(fields).filter(field -> field != null).toArray(Field[]::new); @@ -439,7 +390,33 @@ private int loadInternal (Table table) throws Exception { // CSV reader get on an empty field will be an empty string literal. String string = csvReader.get(f); // Use spec table to check that references are valid and IDs are unique. - table.checkReferencesAndUniqueness(keyValue, lineNumber, field, string, referenceTracker); + Set errors = table.checkReferencesAndUniqueness(keyValue, lineNumber, field, string, referenceTracker); + // Check for special case with calendar_dates where added service should not trigger ref. integrity + // error. + if ( + table.name.equals("calendar_dates") && + "service_id".equals(field.name) && + "1".equals(csvReader.get(Field.getFieldIndex(fields, "exception_type"))) + + ){ + for (NewGTFSError error : errors) { + if (NewGTFSErrorType.REFERENTIAL_INTEGRITY.equals(error.errorType)) { + // Do not record bad service_id reference errors for calendar date entries that add service + // (exception type=1) because a corresponding service_id in calendars.txt is not required in + // this case. + LOG.info( + "A calendar_dates.txt entry added service (exception_type=1) for service_id={}, which does not have (or necessarily need) a corresponding entry in calendars.txt.", + keyValue + ); + } else { + errorStorage.storeError(error); + } + } + } + // In all other cases (i.e., outside of the calendar_dates special case), store the reference errors found. + else { + errorStorage.storeErrors(errors); + } // Add value for entry into table setValueForField(table, columnIndex, lineNumber, field, string, postgresText, transformedStrings); // Increment column index. @@ -520,13 +497,29 @@ public void setValueForField(Table table, int fieldIndex, int lineNumber, Field // rather than setObject with a type code. I think some databases don't have setObject though. // The Field objects throw exceptions to avoid passing the line number, table name etc. into them. try { - // FIXME we need to set the transformed string element even when an error occurs. - // This means the validation and insertion step need to happen separately. - // or the errors should not be signaled with exceptions. - // Also, we should probably not be converting any GTFS field values. - // We should be saving it as-is in the database and converting upon load into our model objects. - if (postgresText) transformedStrings[fieldIndex + 1] = field.validateAndConvert(string); - else field.setParameter(insertStatement, fieldIndex + 2, string); + // Here, we set the transformed string element even when an error occurs. + // Ideally, no errors should be signaled with exceptions, but this happens in a try/catch in case + // something goes wrong (we don't necessarily want to abort loading the feed altogether). + // FIXME Also, we should probably not be converting any GTFS field values, but some of them are coerced + // to null if they are unparseable (e.g., DateField). + // We should be saving it as-is in the database and converting upon load into our model objects. + Set errors; + if (postgresText) { + ValidateFieldResult result = field.validateAndConvert(string); + // If the result is null, use the null-setting method. + if (result.clean == null) setFieldToNull(postgresText, transformedStrings, fieldIndex, field); + // Otherwise, set the cleaned field according to its index. + else transformedStrings[fieldIndex + 1] = result.clean; + errors = result.errors; + } else { + errors = field.setParameter(insertStatement, fieldIndex + 2, string); + } + // Store any errors encountered after field value has been set. + for (NewGTFSError error : errors) { + error.entityType = table.getEntityClass(); + error.lineNumber = lineNumber; + if (errorStorage != null) errorStorage.storeError(error); + } } catch (StorageException ex) { // FIXME many exceptions don't have an error type if (errorStorage != null) { @@ -562,24 +555,12 @@ private void setFieldToNull(boolean postgresText, String[] transformedStrings, i * * TODO add a test including SQL injection text (quote and semicolon) */ - public String sanitize (String string) throws SQLException { + public static String sanitize (String string, SQLErrorStorage errorStorage) { String clean = string.replaceAll("[^\\p{Alnum}_]", ""); if (!clean.equals(string)) { LOG.warn("SQL identifier '{}' was sanitized to '{}'", string, clean); - if (errorStorage != null) { - errorStorage.storeError(NewGTFSError.forFeed(COLUMN_NAME_UNSAFE, string)); - } + if (errorStorage != null) errorStorage.storeError(NewGTFSError.forFeed(COLUMN_NAME_UNSAFE, string)); } return clean; } - - public class ReferenceTracker { - public final Set transitIds = new HashSet<>(); - public final Set transitIdsWithSequence = new HashSet<>(); - public final SQLErrorStorage errorStorage; - - public ReferenceTracker(SQLErrorStorage errorStorage) { - this.errorStorage = errorStorage; - } - } } diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsSnapshotter.java b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsSnapshotter.java index 415df8aa5..a152579b1 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsSnapshotter.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsSnapshotter.java @@ -20,6 +20,8 @@ import java.util.HashMap; import java.util.Map; +import static com.conveyal.gtfs.loader.JdbcGtfsLoader.createFeedRegistryIfNotExists; +import static com.conveyal.gtfs.loader.JdbcGtfsLoader.createSchema; import static com.conveyal.gtfs.util.Util.randomIdString; /** @@ -415,15 +417,12 @@ private void populateDefaultEditorValues(Connection connection, String tablePref */ private void registerSnapshot () { try { - Statement statement = connection.createStatement(); + // We cannot simply insert into the feeds table because if we are creating an empty snapshot (to create/edit + // a GTFS feed from scratch), the feed registry table will not exist. // TODO copy over feed_id and feed_version from source namespace? - - // FIXME do the following only on databases that support schemas. - // SQLite does not support them. Is there any advantage of schemas over flat tables? - statement.execute("create schema " + tablePrefix); // TODO: Record total snapshot processing time? - // Simply insert into feeds table (no need for table creation) because making a snapshot presumes that the - // feeds table already exists. + createFeedRegistryIfNotExists(connection); + createSchema(connection, tablePrefix); PreparedStatement insertStatement = connection.prepareStatement( "insert into feeds values (?, null, null, null, null, null, current_timestamp, ?)"); insertStatement.setString(1, tablePrefix); diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcTableWriter.java b/src/main/java/com/conveyal/gtfs/loader/JdbcTableWriter.java index 67168e7d7..bc5fbac84 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcTableWriter.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcTableWriter.java @@ -31,8 +31,10 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.UUID; import java.util.stream.Collectors; @@ -139,7 +141,7 @@ public String update(Integer id, String json, boolean autoCommit) throws SQLExce // Cast JsonNode to ObjectNode to allow mutations (e.g., updating the ID field). ObjectNode jsonObject = (ObjectNode) jsonNode; // Ensure that the key field is unique and that referencing tables are updated if the value is updated. - ensureReferentialIntegrity(connection, jsonObject, tablePrefix, specTable, id); + ensureReferentialIntegrity(jsonObject, tablePrefix, specTable, id); // Parse the fields/values into a Field -> String map (drops ALL fields not explicitly listed in spec table's // fields) // Note, this must follow referential integrity check because some tables will modify the jsonObject (e.g., @@ -252,6 +254,39 @@ public String update(Integer id, String json, boolean autoCommit) throws SQLExce } } + /** + * For a given pattern id and starting stop sequence (inclusive), normalize all stop times to match the pattern + * stops' travel times. + * @return number of stop times updated + */ + public int normalizeStopTimesForPattern (int id, int beginWithSequence) throws SQLException { + try { + JDBCTableReader patternStops = new JDBCTableReader( + Table.PATTERN_STOP, + dataSource, + tablePrefix + ".", + EntityPopulator.PATTERN_STOP + ); + String patternId = getValueForId(id, "pattern_id", tablePrefix, Table.PATTERNS, connection); + List patternStopsToNormalize = new ArrayList<>(); + for (PatternStop patternStop : patternStops.getOrdered(patternId)) { + // Update stop times for any pattern stop with matching stop sequence (or for all pattern stops if the list + // is null). + if (patternStop.stop_sequence >= beginWithSequence) { + patternStopsToNormalize.add(patternStop); + } + } + int stopTimesUpdated = updateStopTimesForPatternStops(patternStopsToNormalize); + connection.commit(); + return stopTimesUpdated; + } catch (Exception e) { + e.printStackTrace(); + throw e; + } finally { + DbUtils.closeQuietly(connection); + } + } + /** * Updates linked fields with values from entity being updated. This is used to update identical fields in related * tables (for now just fields in trips and stop_times) where the reference table's value should take precedence over @@ -464,7 +499,7 @@ private String updateChildTable( throw new IllegalStateException("Cannot create or update frequency entries for a timetable-based pattern."); } // Reconciling pattern stops MUST happen before original pattern stops are deleted in below block (with - // getUpdateReferencesSql) + // #deleteChildEntities) if (Table.PATTERN_STOP.name.equals(subTable.name)) { List newPatternStops = new ArrayList<>(); // Clean up pattern stop ID fields (passed in as string ID from datatools-ui to avoid id collision) @@ -628,10 +663,9 @@ private String updateChildTable( */ private void deleteChildEntities(Table childTable, Field keyField, String keyValue) throws SQLException { String childTableName = String.join(".", tablePrefix, childTable.name); - String deleteSql = getUpdateReferencesSql(SqlMethod.DELETE, childTableName, keyField, keyValue, null); - LOG.info(deleteSql); - Statement deleteStatement = connection.createStatement(); - int result = deleteStatement.executeUpdate(deleteSql); + PreparedStatement deleteStatement = getUpdateReferencesStatement(SqlMethod.DELETE, childTableName, keyField, keyValue, null); + LOG.info(deleteStatement.toString()); + int result = deleteStatement.executeUpdate(); LOG.info("Deleted {} {}", result, childTable.name); // FIXME: are there cases when an update should not return zero? // if (result == 0) throw new SQLException("No stop times found for trip ID"); @@ -662,6 +696,8 @@ private int updateStopTimesForPatternStop(ObjectNode patternStop, int previousTr statement.setInt(oneBasedIndex++, arrivalTime + dwellTime); // Set "where clause" with value for pattern_id and stop_sequence statement.setString(oneBasedIndex++, patternStop.get("pattern_id").asText()); + // In the editor, we can depend on stop_times#stop_sequence matching pattern_stops#stop_sequence because we + // normalize stop sequence values for stop times during snapshotting for the editor. statement.setInt(oneBasedIndex++, patternStop.get("stop_sequence").asInt()); // Log query, execute statement, and log result. LOG.debug(statement.toString()); @@ -670,6 +706,68 @@ private int updateStopTimesForPatternStop(ObjectNode patternStop, int previousTr return travelTime + dwellTime; } + /** + * Normalizes all stop times' arrivals and departures for an ordered set of pattern stops. This set can be the full + * set of stops for a pattern or just a subset. Typical usage for this method would be to overwrite the arrival and + * departure times for existing trips after a pattern stop has been added or inserted into a pattern or if a + * pattern stop's default travel or dwell time were updated and the stop times need to reflect this update. + * @param patternStops list of pattern stops for which to update stop times (ordered by increasing stop_sequence) + * @throws SQLException + * + * TODO? add param Set serviceIdFilters service_id values to filter trips on + */ + private int updateStopTimesForPatternStops(List patternStops) throws SQLException { + PatternStop firstPatternStop = patternStops.iterator().next(); + int firstStopSequence = firstPatternStop.stop_sequence; + // Prepare SQL query to determine the time that should form the basis for adding the travel time values. + int previousStopSequence = firstStopSequence > 0 ? firstStopSequence - 1 : 0; + String timeField = firstStopSequence > 0 ? "departure_time" : "arrival_time"; + String getFirstTravelTimeSql = String.format( + "select t.trip_id, %s from %s.stop_times st, %s.trips t where stop_sequence = ? " + + "and t.pattern_id = ? " + + "and t.trip_id = st.trip_id", + timeField, + tablePrefix, + tablePrefix + ); + PreparedStatement statement = connection.prepareStatement(getFirstTravelTimeSql); + statement.setInt(1, previousStopSequence); + statement.setString(2, firstPatternStop.pattern_id); + LOG.info(statement.toString()); + ResultSet resultSet = statement.executeQuery(); + Map timesForTripIds = new HashMap<>(); + while (resultSet.next()) { + timesForTripIds.put(resultSet.getString(1), resultSet.getInt(2)); + } + // Update stop times for individual trips with normalized travel times. + String updateTravelTimeSql = String.format( + "update %s.stop_times set arrival_time = ?, departure_time = ? where trip_id = ? and stop_sequence = ?", + tablePrefix + ); + PreparedStatement updateStopTimeStatement = connection.prepareStatement(updateTravelTimeSql); + LOG.info(updateStopTimeStatement.toString()); + final BatchTracker stopTimesTracker = new BatchTracker("stop_times", updateStopTimeStatement); + for (String tripId : timesForTripIds.keySet()) { + // Initialize travel time with previous stop time value. + int cumulativeTravelTime = timesForTripIds.get(tripId); + for (PatternStop patternStop : patternStops) { + // Gather travel/dwell time for pattern stop (being sure to check for missing values). + int travelTime = patternStop.default_travel_time == Entity.INT_MISSING ? 0 : patternStop.default_travel_time; + int dwellTime = patternStop.default_dwell_time == Entity.INT_MISSING ? 0 : patternStop.default_dwell_time; + int oneBasedIndex = 1; + // Increase travel time by current pattern stop's travel and dwell times (and set values for update). + cumulativeTravelTime += travelTime; + updateStopTimeStatement.setInt(oneBasedIndex++, cumulativeTravelTime); + cumulativeTravelTime += dwellTime; + updateStopTimeStatement.setInt(oneBasedIndex++, cumulativeTravelTime); + updateStopTimeStatement.setString(oneBasedIndex++, tripId); + updateStopTimeStatement.setInt(oneBasedIndex++, patternStop.stop_sequence); + stopTimesTracker.addBatch(); + } + } + return stopTimesTracker.executeRemaining(); + } + /** * Checks that a set of string references to a set of reference tables are all valid. For each set of references * mapped to a reference table, the method queries for all of the references. If there are any references that were @@ -1038,8 +1136,8 @@ private void insertBlankStopTimes( /** * For a given condition (fieldName = 'value'), delete all entities that match the condition. Because this uses the - * primary delete method, it also will delete any "child" entities that reference any entities matching the original - * query. + * {@link #delete(Integer, boolean)} method, it also will delete any "child" entities that reference any entities + * matching the original query. */ @Override public int deleteWhere(String fieldName, String value, boolean autoCommit) throws SQLException { @@ -1064,14 +1162,14 @@ public int deleteWhere(String fieldName, String value, boolean autoCommit) throw LOG.info("Deleted {} {} entities", results.size(), specTable.name); return results.size(); } catch (Exception e) { + // Rollback changes on failure. connection.rollback(); LOG.error("Could not delete {} entity where {}={}", specTable.name, fieldName, value); e.printStackTrace(); throw e; } finally { if (autoCommit) { - // Always rollback and close if auto-committing. - connection.rollback(); + // Always close connection if auto-committing. connection.close(); } } @@ -1086,7 +1184,8 @@ public int delete(Integer id, boolean autoCommit) throws SQLException { // Handle "cascading" delete or constraints on deleting entities that other entities depend on // (e.g., keep a calendar from being deleted if trips reference it). // FIXME: actually add "cascading"? Currently, it just deletes one level down. - deleteFromReferencingTables(tablePrefix, specTable, connection, id); + deleteFromReferencingTables(tablePrefix, specTable, id); + // Next, delete the actual record specified by id. PreparedStatement statement = connection.prepareStatement(specTable.generateDeleteSql(tablePrefix)); statement.setInt(1, id); LOG.info(statement.toString()); @@ -1102,13 +1201,12 @@ public int delete(Integer id, boolean autoCommit) throws SQLException { } catch (Exception e) { LOG.error("Could not delete {} entity with id: {}", specTable.name, id); e.printStackTrace(); + // Rollback changes if errors encountered. + connection.rollback(); throw e; } finally { - if (autoCommit) { - // Always rollback and close if auto-committing. - connection.rollback(); - connection.close(); - } + // Always close connection if auto-committing. Otherwise, leave open (for potential further updates). + if (autoCommit) connection.close(); } } @@ -1119,12 +1217,20 @@ public void commit() throws SQLException { connection.close(); } + /** + * Ensure that database connection closes. This should be called once the table writer is no longer needed. + */ + @Override + public void close() { + DbUtils.closeQuietly(connection); + } + /** * Delete entities from any referencing tables (if required). This method is defined for convenience and clarity, but * essentially just runs updateReferencingTables with a null value for newKeyValue param. */ - private static void deleteFromReferencingTables(String namespace, Table table, Connection connection, int id) throws SQLException { - updateReferencingTables(namespace, table, connection, id, null); + private void deleteFromReferencingTables(String namespace, Table table, int id) throws SQLException { + updateReferencingTables(namespace, table, id, null); } /** @@ -1161,8 +1267,7 @@ private static long handleStatementExecution(PreparedStatement statement, boolea * * FIXME: add more detail/precise language on what this method actually does */ - private static void ensureReferentialIntegrity( - Connection connection, + private void ensureReferentialIntegrity( ObjectNode jsonObject, String namespace, Table table, @@ -1196,7 +1301,7 @@ private static void ensureReferentialIntegrity( // FIXME: Need to update referencing tables because entity has changed ID. // Entity key value is being changed to an entirely new one. If there are entities that // reference this value, we need to update them. - updateReferencingTables(namespace, table, connection, id, keyValue); + updateReferencingTables(namespace, table, id, keyValue); } } else { // Conflict. The different conflict conditions are outlined below. @@ -1204,7 +1309,12 @@ private static void ensureReferentialIntegrity( // There was one match found. if (isCreating) { // Under no circumstance should a new entity have a conflict with existing key field. - throw new SQLException("New entity's key field must not match existing value."); + throw new SQLException( + String.format("New %s's %s value (%s) conflicts with an existing record in table.", + table.entityClass.getSimpleName(), + keyField, + keyValue) + ); } if (!uniqueIds.contains(id)) { // There are two circumstances we could encounter here. @@ -1269,23 +1379,16 @@ private static TIntSet getIdsForCondition( * Finds the set of tables that reference the parent entity being updated. */ private static Set getReferencingTables(Table table) { - String keyField = table.getKeyFieldName(); Set
referencingTables = new HashSet<>(); for (Table gtfsTable : Table.tablesInOrder) { // IMPORTANT: Skip the table for the entity we're modifying or if loop table does not have field. if (table.name.equals(gtfsTable.name)) continue; -// || !gtfsTable.hasField(keyField) for (Field field : gtfsTable.fields) { if (field.isForeignReference() && field.referenceTable.name.equals(table.name)) { // If any of the table's fields are foreign references to the specified table, add to the return set. referencingTables.add(gtfsTable); } } -// Field tableField = gtfsTable.getFieldForName(keyField); -// // If field is not a foreign reference, continue. (This should probably never be the case because a field -// // that shares the key field's name ought to refer to the key field. -// if (!tableField.isForeignReference()) continue; - } return referencingTables; } @@ -1316,16 +1419,15 @@ private static String getValueForId(int id, String fieldName, String namespace, * referencing the entity being updated. * * FIXME: add custom logic/hooks. Right now entity table checks are hard-coded in (e.g., if Agency, skip all. OR if - * Calendar, rollback transaction if there are referencing trips). + * Calendar, rollback transaction if there are referencing trips). * * FIXME: Do we need to clarify the impact of the direction of the relationship (e.g., if we delete a trip, that should - * not necessarily delete a shape that is shared by multiple trips)? I think not because we are skipping foreign refs - * found in the table for the entity being updated/deleted. [Leaving this comment in place for now though.] + * not necessarily delete a shape that is shared by multiple trips)? I think not because we are skipping foreign refs + * found in the table for the entity being updated/deleted. [Leaving this comment in place for now though.] */ - private static void updateReferencingTables( + private void updateReferencingTables( String namespace, Table table, - Connection connection, int id, String newKeyValue ) throws SQLException { @@ -1347,47 +1449,48 @@ private static void updateReferencingTables( String refTableName = String.join(".", namespace, referencingTable.name); for (Field field : referencingTable.editorFields()) { if (field.isForeignReference() && field.referenceTable.name.equals(table.name)) { - // FIXME: Are there other references that are not being captured??? - // Cascade delete stop times and frequencies for trips. This must happen before trips are deleted - // below. Otherwise, there are no trips with which to join. - if ("trips".equals(referencingTable.name)) { + if ( + Table.TRIPS.name.equals(referencingTable.name) && + sqlMethod.equals(SqlMethod.DELETE) && + table.name.equals(Table.PATTERNS.name) + ) { + // If deleting a pattern, cascade delete stop times and frequencies for trips first. This must + // happen before trips are deleted in the block below. Otherwise, the queries to select + // stop_times and frequencies to delete would fail because there would be no trip records to join + // with. String stopTimesTable = String.join(".", namespace, "stop_times"); String frequenciesTable = String.join(".", namespace, "frequencies"); String tripsTable = String.join(".", namespace, "trips"); // Delete stop times and frequencies for trips for pattern String deleteStopTimes = String.format( - "delete from %s using %s where %s.trip_id = %s.trip_id and %s.pattern_id = '%s'", - stopTimesTable, tripsTable, stopTimesTable, tripsTable, tripsTable, keyValue); - LOG.info(deleteStopTimes); + "delete from %s using %s where %s.trip_id = %s.trip_id and %s.pattern_id = ?", + stopTimesTable, tripsTable, stopTimesTable, tripsTable, tripsTable); PreparedStatement deleteStopTimesStatement = connection.prepareStatement(deleteStopTimes); + deleteStopTimesStatement.setString(1, keyValue); + LOG.info(deleteStopTimesStatement.toString()); int deletedStopTimes = deleteStopTimesStatement.executeUpdate(); LOG.info("Deleted {} stop times for pattern {}", deletedStopTimes, keyValue); String deleteFrequencies = String.format( - "delete from %s using %s where %s.trip_id = %s.trip_id and %s.pattern_id = '%s'", - frequenciesTable, tripsTable, frequenciesTable, tripsTable, tripsTable, keyValue); - LOG.info(deleteFrequencies); + "delete from %s using %s where %s.trip_id = %s.trip_id and %s.pattern_id = ?", + frequenciesTable, tripsTable, frequenciesTable, tripsTable, tripsTable); PreparedStatement deleteFrequenciesStatement = connection.prepareStatement(deleteFrequencies); + deleteFrequenciesStatement.setString(1, keyValue); + LOG.info(deleteFrequenciesStatement.toString()); int deletedFrequencies = deleteFrequenciesStatement.executeUpdate(); LOG.info("Deleted {} frequencies for pattern {}", deletedFrequencies, keyValue); } - // Get unique IDs before delete (for logging/message purposes). - // TIntSet uniqueIds = getIdsForCondition(refTableName, keyField, keyValue, connection); - String updateRefSql = getUpdateReferencesSql(sqlMethod, refTableName, field, keyValue, newKeyValue); - LOG.info(updateRefSql); - Statement updateStatement = connection.createStatement(); - int result = updateStatement.executeUpdate(updateRefSql); + // Get statement to update or delete entities that reference the key value. + PreparedStatement updateStatement = getUpdateReferencesStatement(sqlMethod, refTableName, field, keyValue, newKeyValue); + LOG.info(updateStatement.toString()); + int result = updateStatement.executeUpdate(); if (result > 0) { // FIXME: is this where a delete hook should go? (E.g., CalendarController subclass would override - // deleteEntityHook). - // deleteEntityHook(); + // deleteEntityHook). if (sqlMethod.equals(SqlMethod.DELETE)) { // Check for restrictions on delete. if (table.isCascadeDeleteRestricted()) { // The entity must not have any referencing entities in order to delete it. connection.rollback(); - // List idStrings = new ArrayList<>(); - // uniqueIds.forEach(uniqueId -> idStrings.add(String.valueOf(uniqueId))); - // String message = String.format("Cannot delete %s %s=%s. %d %s reference this %s (%s).", entityClass.getSimpleName(), keyField, keyValue, result, referencingTable.name, entityClass.getSimpleName(), String.join(",", idStrings)); String message = String.format( "Cannot delete %s %s=%s. %d %s reference this %s.", entityClass.getSimpleName(), @@ -1411,57 +1514,66 @@ private static void updateReferencingTables( } /** - * Constructs SQL string based on method provided. + * Constructs prepared statement to update or delete (depending on the method specified) records with foreign + * references to the provided key value. */ - private static String getUpdateReferencesSql( + private PreparedStatement getUpdateReferencesStatement( SqlMethod sqlMethod, String refTableName, Field keyField, String keyValue, String newKeyValue ) throws SQLException { + String sql; + PreparedStatement statement; boolean isArrayField = keyField.getSqlType().equals(JDBCType.ARRAY); switch (sqlMethod) { case DELETE: if (isArrayField) { - return String.format( - "delete from %s where %s @> ARRAY['%s']::text[]", + sql = String.format( + "delete from %s where %s @> ARRAY[?]::text[]", refTableName, - keyField.name, - keyValue + keyField.name ); } else { - return String.format("delete from %s where %s = '%s'", refTableName, keyField.name, keyValue); + sql = String.format("delete from %s where %s = ?", refTableName, keyField.name); } + statement = connection.prepareStatement(sql); + statement.setString(1, keyValue); + return statement; case UPDATE: if (isArrayField) { // If the field to be updated is an array field (of which there are only text[] types in the db), // replace the old value with the new value using array contains clause. - // FIXME This is probably horribly postgres specific. - return String.format( - "update %s set %s = array_replace(%s, '%s', '%s') where %s @> ARRAY['%s']::text[]", + // NOTE: We have to cast the string values to the text type because the StringListField uses arrays + // of text. + sql = String.format( + "update %s set %s = array_replace(%s, ?::text, ?::text) where %s @> ?::text[]", refTableName, keyField.name, keyField.name, - keyValue, - newKeyValue, - keyField.name, - keyValue + keyField.name ); + statement = connection.prepareStatement(sql); + statement.setString(1, keyValue); + statement.setString(2, newKeyValue); + String[] values = new String[]{keyValue}; + statement.setArray(3, connection.createArrayOf("text", values)); } else { - return String.format( - "update %s set %s = '%s' where %s = '%s'", + sql = String.format( + "update %s set %s = ? where %s = ?", refTableName, keyField.name, - newKeyValue, - keyField.name, - keyValue + keyField.name ); + statement = connection.prepareStatement(sql); + statement.setString(1, newKeyValue); + statement.setString(2, keyValue); } -// case CREATE: -// return String.format("insert into %s "); + return statement; default: throw new SQLException("SQL Method must be DELETE or UPDATE."); + } } } diff --git a/src/main/java/com/conveyal/gtfs/loader/LanguageField.java b/src/main/java/com/conveyal/gtfs/loader/LanguageField.java index 6e844e471..37ffc4faa 100644 --- a/src/main/java/com/conveyal/gtfs/loader/LanguageField.java +++ b/src/main/java/com/conveyal/gtfs/loader/LanguageField.java @@ -1,5 +1,6 @@ package com.conveyal.gtfs.loader; +import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.error.NewGTFSErrorType; import com.conveyal.gtfs.storage.StorageException; @@ -7,6 +8,7 @@ import java.sql.PreparedStatement; import java.sql.SQLType; import java.util.Locale; +import java.util.Set; import java.util.regex.Pattern; import static com.conveyal.gtfs.error.NewGTFSErrorType.LANGUAGE_FORMAT; @@ -20,24 +22,27 @@ public LanguageField (String name, Requirement requirement) { super(name, requirement); } - private String validate (String string) { + private ValidateFieldResult validate (String string) { + ValidateFieldResult result = new ValidateFieldResult<>(string); Locale locale = Locale.forLanguageTag(string); String generatedTag = locale.toLanguageTag(); // This works except for hierarchical sublanguages like zh-cmn and zh-yue which get flattened to the sublanguage. if (!generatedTag.equalsIgnoreCase(string)) { - throw new StorageException(LANGUAGE_FORMAT, string); + result.errors.add(NewGTFSError.forFeed(LANGUAGE_FORMAT, string)); } - return string; + return result; } /** Check that a string can be properly parsed and is in range. */ - public String validateAndConvert (String string) { + public ValidateFieldResult validateAndConvert (String string) { return cleanString(validate(string)); } - public void setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string) { + public Set setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string) { try { - preparedStatement.setString(oneBasedIndex, validateAndConvert(string)); + ValidateFieldResult result = validateAndConvert(string); + preparedStatement.setString(oneBasedIndex, result.clean); + return result.errors; } catch (Exception ex) { throw new StorageException(LANGUAGE_FORMAT, string); } diff --git a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java new file mode 100644 index 000000000..7f98c1ea8 --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java @@ -0,0 +1,14 @@ +package com.conveyal.gtfs.loader; + +import java.util.HashSet; +import java.util.Set; + +/** + * This class is used during feed loads to track the unique keys that are encountered in a GTFS feed. It has two sets of + * strings that it tracks, one for single field keys (e.g., route_id or stop_id) and one for keys that are compound, + * usually made up of a string ID with a sequence field (e.g., trip_id + stop_sequence for tracking unique stop times). + */ +public class ReferenceTracker { + public final Set transitIds = new HashSet<>(); + public final Set transitIdsWithSequence = new HashSet<>(); +} diff --git a/src/main/java/com/conveyal/gtfs/loader/ShortField.java b/src/main/java/com/conveyal/gtfs/loader/ShortField.java index 3f2eb2a1e..278bf05da 100644 --- a/src/main/java/com/conveyal/gtfs/loader/ShortField.java +++ b/src/main/java/com/conveyal/gtfs/loader/ShortField.java @@ -1,11 +1,13 @@ package com.conveyal.gtfs.loader; +import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.error.NewGTFSErrorType; import com.conveyal.gtfs.storage.StorageException; import java.sql.JDBCType; import java.sql.PreparedStatement; import java.sql.SQLType; +import java.util.Set; /** * Created by abyrd on 2017-03-31 @@ -19,27 +21,34 @@ public ShortField (String name, Requirement requirement, int maxValue) { this.maxValue = maxValue; } - private short validate (String string) { - if (string == null || string.isEmpty()) return 0; // Default numeric fields to zero. - short s = Short.parseShort(string); - if (s < 0) throw new StorageException(NewGTFSErrorType.NUMBER_NEGATIVE, string); - if (s > maxValue) throw new StorageException(NewGTFSErrorType.NUMBER_TOO_LARGE, string); - return s; + private ValidateFieldResult validate (String string) { + ValidateFieldResult result = new ValidateFieldResult<>(); + if (string == null || string.isEmpty()) { + // Default numeric fields to zero. + result.clean = 0; + return result; + } + result.clean = Short.parseShort(string); + if (result.clean < 0) result.errors.add(NewGTFSError.forFeed(NewGTFSErrorType.NUMBER_NEGATIVE, string)); + if (result.clean > maxValue) result.errors.add(NewGTFSError.forFeed(NewGTFSErrorType.NUMBER_TOO_LARGE, string)); + return result; } @Override - public void setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string) { + public Set setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string) { try { - preparedStatement.setShort(oneBasedIndex, validate(string)); + ValidateFieldResult result = validate(string); + preparedStatement.setShort(oneBasedIndex, result.clean); + return result.errors; } catch (Exception ex) { throw new StorageException(ex); } } @Override - public String validateAndConvert(String string) { - validate(string); - return string; + public ValidateFieldResult validateAndConvert(String string) { + ValidateFieldResult result = ValidateFieldResult.from(validate(string)); + return result; } @Override diff --git a/src/main/java/com/conveyal/gtfs/loader/StringField.java b/src/main/java/com/conveyal/gtfs/loader/StringField.java index 1be0c252a..cf42baf54 100644 --- a/src/main/java/com/conveyal/gtfs/loader/StringField.java +++ b/src/main/java/com/conveyal/gtfs/loader/StringField.java @@ -1,10 +1,12 @@ package com.conveyal.gtfs.loader; +import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.storage.StorageException; import java.sql.JDBCType; import java.sql.PreparedStatement; import java.sql.SQLType; +import java.util.Set; /** * Created by abyrd on 2017-03-31 @@ -16,13 +18,15 @@ public StringField (String name, Requirement requirement) { } /** Check that a string can be properly parsed and is in range. */ - public String validateAndConvert (String string) { + public ValidateFieldResult validateAndConvert (String string) { return cleanString(string); } - public void setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string) { + public Set setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string) { try { - preparedStatement.setString(oneBasedIndex, validateAndConvert(string)); + ValidateFieldResult result = validateAndConvert(string); + preparedStatement.setString(oneBasedIndex, result.clean); + return result.errors; } catch (Exception ex) { throw new StorageException(ex); } diff --git a/src/main/java/com/conveyal/gtfs/loader/StringListField.java b/src/main/java/com/conveyal/gtfs/loader/StringListField.java index 921b9c9c2..6319afe67 100644 --- a/src/main/java/com/conveyal/gtfs/loader/StringListField.java +++ b/src/main/java/com/conveyal/gtfs/loader/StringListField.java @@ -1,12 +1,20 @@ package com.conveyal.gtfs.loader; +import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.storage.StorageException; import java.sql.Array; import java.sql.JDBCType; import java.sql.PreparedStatement; import java.sql.SQLType; - +import java.util.Collections; +import java.util.Set; + +/** + * Field type for a list of strings. This type is ONLY used for the storage of fare rules (i.e., the field type is not + * found or expected in CSV tables, so not all methods are supported (e.g., validateAndConvert has no functionality + * because it is only called on GTFS tables). + */ public class StringListField extends Field { public StringListField(String name, Requirement requirement) { @@ -14,17 +22,21 @@ public StringListField(String name, Requirement requirement) { } @Override - public String validateAndConvert(String original) { - // FIXME - return null; + public ValidateFieldResult validateAndConvert(String original) { + // This function is currently only used during feed loading. Because the StringListField only exists for the + // schedule exceptions table (which is not a GTFS spec table), we do not expect to ever call this function on + // this table/field. + // TODO: is there any reason we may want to add support for validation? + throw new UnsupportedOperationException("Cannot call validateAndConvert on field of type StringListField because this is not a supported GTFS field type."); } @Override - public void setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string) { + public Set setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string) { // FIXME try { Array array = preparedStatement.getConnection().createArrayOf("text", string.split(",")); preparedStatement.setArray(oneBasedIndex, array); + return Collections.EMPTY_SET; } catch (Exception e) { throw new StorageException(e); } diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index e5ecf86ec..9d1bb8392 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -1,6 +1,7 @@ package com.conveyal.gtfs.loader; import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.error.SQLErrorStorage; import com.conveyal.gtfs.model.Agency; import com.conveyal.gtfs.model.Calendar; import com.conveyal.gtfs.model.CalendarDate; @@ -19,9 +20,14 @@ import com.conveyal.gtfs.model.Transfer; import com.conveyal.gtfs.model.Trip; import com.conveyal.gtfs.storage.StorageException; +import com.csvreader.CsvReader; +import org.apache.commons.io.input.BOMInputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.Charset; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; @@ -31,12 +37,19 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Enumeration; +import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.stream.Collectors; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; +import static com.conveyal.gtfs.error.NewGTFSErrorType.DUPLICATE_HEADER; import static com.conveyal.gtfs.error.NewGTFSErrorType.DUPLICATE_ID; import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; +import static com.conveyal.gtfs.error.NewGTFSErrorType.TABLE_IN_SUBDIRECTORY; +import static com.conveyal.gtfs.loader.JdbcGtfsLoader.sanitize; import static com.conveyal.gtfs.loader.Requirement.EDITOR; import static com.conveyal.gtfs.loader.Requirement.EXTENSION; import static com.conveyal.gtfs.loader.Requirement.OPTIONAL; @@ -70,6 +83,11 @@ public class Table { private boolean usePrimaryKey = false; /** Indicates whether the table has unique key field. */ private boolean hasUniqueKeyField = true; + /** + * Indicates whether the table has a compound key that must be used in conjunction with the key field to determine + * table uniqueness(e.g., transfers#to_stop_id). + * */ + private boolean compoundKey; public Table (String name, Class entityClass, Requirement required, Field... fields) { // TODO: verify table name is OK for use in constructing dynamic SQL queries @@ -118,7 +136,7 @@ public Table (String name, Class entityClass, Requirement requ ); public static final Table CALENDAR_DATES = new Table("calendar_dates", CalendarDate.class, OPTIONAL, - new StringField("service_id", REQUIRED), + new StringField("service_id", REQUIRED).isReferenceTo(CALENDAR), new DateField("date", REQUIRED), new IntegerField("exception_type", REQUIRED, 1, 2) ).keyFieldIsNotUnique(); @@ -241,7 +259,8 @@ public Table (String name, Class entityClass, Requirement requ new StringField("transfer_type", REQUIRED), new StringField("min_transfer_time", OPTIONAL)) .addPrimaryKey() - .keyFieldIsNotUnique(); + .keyFieldIsNotUnique() + .hasCompoundKey(); public static final Table TRIPS = new Table("trips", Trip.class, REQUIRED, new StringField("trip_id", REQUIRED), @@ -321,13 +340,20 @@ public Table restrictDelete () { } /** - * Fluent method to de-set the + * Fluent method to de-set the hasUniqueKeyField flag for tables which the first field should not be considered a + * primary key. */ - private Table keyFieldIsNotUnique() { + public Table keyFieldIsNotUnique() { this.hasUniqueKeyField = false; return this; } + /** Fluent method to set whether the table has a compound key, e.g., transfers#to_stop_id. */ + private Table hasCompoundKey() { + this.compoundKey = true; + return this; + } + /** * Fluent method that indicates that the integer ID field should be made a primary key. This should generally only * be used for tables that would ever need to be queried on the unique integer ID (which represents row number for @@ -452,10 +478,52 @@ public String generateInsertSql (String namespace, boolean setDefaultId) { String tableName = namespace == null ? name : String.join(".", namespace, name); - String questionMarks = String.join(", ", Collections.nCopies(editorFields().size(), "?")); String joinedFieldNames = commaSeparatedNames(editorFields()); String idValue = setDefaultId ? "DEFAULT" : "?"; - return String.format("insert into %s (id, %s) values (%s, %s)", tableName, joinedFieldNames, idValue, questionMarks); + return String.format( + "insert into %s (id, %s) values (%s, %s)", + tableName, + joinedFieldNames, + idValue, + String.join(", ", Collections.nCopies(editorFields().size(), "?")) + ); + } + + /** + * In GTFS feeds, all files are supposed to be in the root of the zip file, but feed producers often put them + * in a subdirectory. This function will search subdirectories if the entry is not found in the root. + * It records an error if the entry is in a subdirectory (as long as errorStorage is not null). + * It then creates a CSV reader for that table if it's found. + */ + public CsvReader getCsvReader(ZipFile zipFile, SQLErrorStorage sqlErrorStorage) { + final String tableFileName = this.name + ".txt"; + ZipEntry entry = zipFile.getEntry(tableFileName); + if (entry == null) { + // Table was not found, check if it is in a subdirectory. + Enumeration entries = zipFile.entries(); + while (entries.hasMoreElements()) { + ZipEntry e = entries.nextElement(); + if (e.getName().endsWith(tableFileName)) { + entry = e; + if (sqlErrorStorage != null) sqlErrorStorage.storeError(NewGTFSError.forTable(this, TABLE_IN_SUBDIRECTORY)); + break; + } + } + } + if (entry == null) return null; + try { + InputStream zipInputStream = zipFile.getInputStream(entry); + // Skip any byte order mark that may be present. Files must be UTF-8, + // but the GTFS spec says that "files that include the UTF byte order mark are acceptable". + InputStream bomInputStream = new BOMInputStream(zipInputStream); + CsvReader csvReader = new CsvReader(bomInputStream, ',', Charset.forName("UTF8")); + csvReader.readHeaders(); + return csvReader; + } catch (IOException e) { + LOG.error("Exception while opening zip entry: {}", e); + e.printStackTrace(); + return null; + } } /** @@ -646,13 +714,14 @@ public String getKeyFieldName () { } /** - * Returns field name that defines order for grouped entities. WARNING: this MUST be called on a spec table (i.e., - * one of the constant tables defined in this class). Otherwise, it could return null even if the table has an order - * field defined. + * Returns field name that defines order for grouped entities or that defines the compound key field (e.g., + * transfers#to_stop_id). WARNING: this field must be in the 1st position (base zero) of the fields array; hence, + * this MUST be called on a spec table (i.e., one of the constant tables defined in this class). Otherwise, it could + * return null even if the table has an order field defined. */ public String getOrderFieldName () { String name = fields[1].name; - if (name.contains("_sequence")) return name; + if (name.contains("_sequence") || compoundKey) return name; else return null; } @@ -673,13 +742,11 @@ public Class getEntityClass() { /** - * Finds the index of the field given a string name. + * Finds the index of the field for this table given a string name. * @return the index of the field or -1 if no match is found */ public int getFieldIndex (String name) { - // Linear search, assuming a small number of fields per table. - for (int i = 0; i < fields.length; i++) if (fields[i].name.equals(name)) return i; - return -1; + return Field.getFieldIndex(fields, name); } /** @@ -693,6 +760,14 @@ public boolean isRequired () { return required == REQUIRED; } + /** + * Checks whether the table is part of the GTFS specification, i.e., it is not an internal table used for the editor + * (e.g., Patterns or PatternStops). + */ + public boolean isSpecTable() { + return required == REQUIRED || required == OPTIONAL; + } + /** * Create indexes for table using shouldBeIndexed(), key field, and/or sequence field. WARNING: this MUST be called * on a spec table (i.e., one of the constant tables defined in this class). Otherwise, the getIndexFields method @@ -870,75 +945,128 @@ public Table getParentTable() { return parentTable; } + /** + * During table load, checks the uniqueness of the entity ID and that references are valid. NOTE: This method + * defaults the key field and order field names to this table's values. + * @param keyValue key value for the record being checked + * @param lineNumber line number of the record being checked + * @param field field currently being checked + * @param value value that corresponds to field + * @param referenceTracker reference tracker in which to store references + * @return any duplicate or bad reference errors. + */ + public Set checkReferencesAndUniqueness(String keyValue, int lineNumber, Field field, String value, ReferenceTracker referenceTracker) { + return checkReferencesAndUniqueness(keyValue, lineNumber, field, value, referenceTracker, getKeyFieldName(), getOrderFieldName()); + } + /** * During table load, checks the uniqueness of the entity ID and that references are valid. These references are - * stored in the provided reference tracker. Any non-unique IDs or invalid references will store an error. + * stored in the provided reference tracker. Any non-unique IDs or invalid references will store an error. NOTE: + * this instance of checkReferencesAndUniqueness allows for arbitrarily setting the keyField and orderField, which + * is helpful for checking uniqueness of fields that are not the standard primary key (e.g., route_short_name). */ - public void checkReferencesAndUniqueness(String keyValue, int lineNumber, Field field, String string, JdbcGtfsLoader.ReferenceTracker referenceTracker) { + public Set checkReferencesAndUniqueness(String keyValue, int lineNumber, Field field, String value, ReferenceTracker referenceTracker, String keyField, String orderField) { + Set errors = new HashSet<>(); // Store field-scoped transit ID for referential integrity check. (Note, entity scoping doesn't work here because // we need to cross-check multiple entity types for valid references, e.g., stop times and trips both share trip // id.) - String keyField = getKeyFieldName(); - String orderField = getOrderFieldName(); - // If table has no unique key field (e.g., calendar_dates or transfers), there is no need to check for - // duplicates. If it has an order field, that order field should supersede the key field as the "unique" field. - String uniqueKeyField = !hasUniqueKeyField ? null : orderField != null ? orderField : keyField; + // If table has an order field, that order field should supersede the key field as the "unique" field. In other + // words, if it has an order field, the unique key is actually compound -- made up of the keyField + orderField. + String uniqueKeyField = orderField != null + ? orderField + // If table has no unique key field (e.g., calendar_dates or transfers), there is no need to check for + // duplicates. + : !hasUniqueKeyField + ? null + : keyField; String transitId = String.join(":", keyField, keyValue); // If the field is optional and there is no value present, skip check. - if (!field.isRequired() && "".equals(string)) return; + if (!field.isRequired() && "".equals(value)) return Collections.EMPTY_SET; // First, handle referential integrity check. boolean isOrderField = field.name.equals(orderField); if (field.isForeignReference()) { - // Check referential integrity if applicable + // Check referential integrity if the field is a foreign reference. Note: the reference table must be loaded + // before the table/value being currently checked. String referenceField = field.referenceTable.getKeyFieldName(); - String referenceTransitId = String.join(":", referenceField, string); + String referenceTransitId = String.join(":", referenceField, value); if (!referenceTracker.transitIds.contains(referenceTransitId)) { // If the reference tracker does not contain NewGTFSError referentialIntegrityError = NewGTFSError.forLine( this, lineNumber, REFERENTIAL_INTEGRITY, referenceTransitId) .setEntityId(keyValue); - if (isOrderField) { - // If the field is an order field, set the sequence for the new error. - referentialIntegrityError.setSequence(string); - } - referenceTracker.errorStorage.storeError(referentialIntegrityError); + // If the field is an order field, set the sequence for the new error. + if (isOrderField) referentialIntegrityError.setSequence(value); + errors.add(referentialIntegrityError); } } - + // Next, handle duplicate ID check. + // In most cases there is no need to check for duplicate IDs if the field is a foreign reference. However, + // transfers#to_stop_id is defined as an order field, so we need to check that this field (which is both a + // foreign ref and order field) is dataset unique in conjunction with the key field. // These hold references to the set of IDs to check for duplicates and the ID to check. These depend on // whether an order field is part of the "unique ID." Set listOfUniqueIds = referenceTracker.transitIds; String uniqueId = transitId; - // There is no need to check for duplicate IDs if the field is a foreign reference. - if (field.isForeignReference()) return; - // Next, check that the ID is table-unique. + // Next, check that the ID is table-unique. For example, the trip_id field is table unique in trips.txt and + // the the stop_sequence field (joined with trip_id) is table unique in stop_times.txt. if (field.name.equals(uniqueKeyField)) { // Check for duplicate IDs and store entity-scoped IDs for referential integrity check if (isOrderField) { // Check duplicate reference in set of field-scoped id:sequence (e.g., stop_sequence:12345:2) // This should not be scoped by key field because there may be conflicts (e.g., with trip_id="12345:2" listOfUniqueIds = referenceTracker.transitIdsWithSequence; - uniqueId = String.join(":", field.name, keyValue, string); + uniqueId = String.join(":", field.name, keyValue, value); } // Add ID and check duplicate reference in entity-scoped IDs (e.g., stop_id:12345) boolean valueAlreadyExists = !listOfUniqueIds.add(uniqueId); if (valueAlreadyExists) { // If the value is a duplicate, add an error. NewGTFSError duplicateIdError = NewGTFSError.forLine(this, lineNumber, DUPLICATE_ID, uniqueId) - .setEntityId(keyValue); - if (isOrderField) { - duplicateIdError.setSequence(string); - } - referenceTracker.errorStorage.storeError(duplicateIdError); + .setEntityId(keyValue); + if (isOrderField) duplicateIdError.setSequence(value); + errors.add(duplicateIdError); } - } else { - // If the field is not the table unique key field, skip the duplicate ID check and simply add the ID to the - // list of unique IDs. - listOfUniqueIds.add(uniqueId); + } else if (field.name.equals(keyField) && !field.isForeignReference()) { + // We arrive here if the field is not a foreign reference and not the unique key field on the table (e.g., + // shape_pt_sequence), but is still a key on the table. For example, this is where we add shape_id from + // the shapes table, so that when we check the referential integrity of trips#shape_id, we know that the + // shape_id exists in the shapes table. It also handles tracking calendar_dates#service_id values. + referenceTracker.transitIds.add(uniqueId); } + return errors; + } + + /** + * For an array of field headers, returns the matching set of {@link Field}s for a {@link Table}. If errorStorage is + * not null, errors related to unexpected or duplicate header names will be stored. + */ + public Field[] getFieldsFromFieldHeaders(String[] headers, SQLErrorStorage errorStorage) { + Field[] fields = new Field[headers.length]; + Set fieldsSeen = new HashSet<>(); + for (int h = 0; h < headers.length; h++) { + String header = sanitize(headers[h], errorStorage); + if (fieldsSeen.contains(header) || "id".equals(header)) { + // FIXME: add separate error for tables containing ID field. + if (errorStorage != null) errorStorage.storeError(NewGTFSError.forTable(this, DUPLICATE_HEADER).setBadValue(header)); + fields[h] = null; + } else { + fields[h] = getFieldForName(header); + fieldsSeen.add(header); + } + } + return fields; + } + + /** + * Returns the index of the key field within the array of fields provided for a given table. + * @param fields array of fields (intended to be derived from the headers of a csv text file) + */ + public int getKeyFieldIndex(Field[] fields) { + String keyField = getKeyFieldName(); + return Field.getFieldIndex(fields, keyField); } } diff --git a/src/main/java/com/conveyal/gtfs/loader/TableWriter.java b/src/main/java/com/conveyal/gtfs/loader/TableWriter.java index 21bece4c1..159232165 100644 --- a/src/main/java/com/conveyal/gtfs/loader/TableWriter.java +++ b/src/main/java/com/conveyal/gtfs/loader/TableWriter.java @@ -21,4 +21,6 @@ public interface TableWriter { int deleteWhere (String fieldName, String value, boolean autoCommit) throws SQLException; void commit () throws SQLException; + + void close (); } diff --git a/src/main/java/com/conveyal/gtfs/loader/TimeField.java b/src/main/java/com/conveyal/gtfs/loader/TimeField.java index b398204e3..78769ee1a 100644 --- a/src/main/java/com/conveyal/gtfs/loader/TimeField.java +++ b/src/main/java/com/conveyal/gtfs/loader/TimeField.java @@ -1,11 +1,14 @@ package com.conveyal.gtfs.loader; +import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.error.NewGTFSErrorType; import com.conveyal.gtfs.storage.StorageException; import java.sql.JDBCType; import java.sql.PreparedStatement; import java.sql.SQLType; +import java.util.Collections; +import java.util.Set; /** * A field in the format HH:MM:SS, which will be stored as a number of seconds after midnight. @@ -17,9 +20,11 @@ public TimeField(String name, Requirement requirement) { } @Override - public void setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string) { + public Set setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string) { try { - preparedStatement.setInt(oneBasedIndex, getSeconds(string)); + ValidateFieldResult result = getSeconds(string); + preparedStatement.setInt(oneBasedIndex, result.clean); + return result.errors; } catch (Exception ex) { throw new StorageException(ex); } @@ -27,30 +32,34 @@ public void setParameter(PreparedStatement preparedStatement, int oneBasedIndex, // Actually this is converting the string. Can we use some JDBC existing functions for this? @Override - public String validateAndConvert(String hhmmss) { - return Integer.toString(getSeconds(hhmmss)); + public ValidateFieldResult validateAndConvert(String hhmmss) { + return ValidateFieldResult.from(getSeconds(hhmmss)); } - private static int getSeconds (String hhmmss) { + private static ValidateFieldResult getSeconds (String hhmmss) { + ValidateFieldResult result = new ValidateFieldResult<>(); // Accept hh:mm:ss or h:mm:ss for single-digit hours. if (hhmmss.length() != 8 && hhmmss.length() != 7) { - throw new StorageException(NewGTFSErrorType.TIME_FORMAT, hhmmss); + result.errors.add(NewGTFSError.forFeed(NewGTFSErrorType.TIME_FORMAT, hhmmss)); + return result; } String[] fields = hhmmss.split(":"); if (fields.length != 3) { - throw new StorageException(NewGTFSErrorType.TIME_FORMAT, hhmmss); + result.errors.add(NewGTFSError.forFeed(NewGTFSErrorType.TIME_FORMAT, hhmmss)); + return result; } int h = Integer.parseInt(fields[0]); int m = Integer.parseInt(fields[1]); int s = Integer.parseInt(fields[2]); // Other than the Moscow-Pyongyang route at 8.5 days, most of the longest services are around 6 days. - if (h < 0) throw new StorageException(NewGTFSErrorType.NUMBER_NEGATIVE, hhmmss); - if (h > 150) throw new StorageException(NewGTFSErrorType.NUMBER_TOO_LARGE, hhmmss); - if (m < 0) throw new StorageException(NewGTFSErrorType.NUMBER_NEGATIVE, hhmmss); - if (m > 59) throw new StorageException(NewGTFSErrorType.NUMBER_TOO_LARGE, hhmmss); - if (s < 0) throw new StorageException(NewGTFSErrorType.NUMBER_NEGATIVE, hhmmss); - if (s > 59) throw new StorageException(NewGTFSErrorType.NUMBER_TOO_LARGE, hhmmss); - return ((h * 60) + m) * 60 + s; + if (h < 0) result.errors.add(NewGTFSError.forFeed(NewGTFSErrorType.NUMBER_NEGATIVE, hhmmss)); + if (h > 150) result.errors.add(NewGTFSError.forFeed(NewGTFSErrorType.NUMBER_TOO_LARGE, hhmmss)); + if (m < 0) result.errors.add(NewGTFSError.forFeed(NewGTFSErrorType.NUMBER_NEGATIVE, hhmmss)); + if (m > 59) result.errors.add(NewGTFSError.forFeed(NewGTFSErrorType.NUMBER_TOO_LARGE, hhmmss)); + if (s < 0) result.errors.add(NewGTFSError.forFeed(NewGTFSErrorType.NUMBER_NEGATIVE, hhmmss)); + if (s > 59) result.errors.add(NewGTFSError.forFeed(NewGTFSErrorType.NUMBER_TOO_LARGE, hhmmss)); + result.clean = ((h * 60) + m) * 60 + s; + return result; } @Override diff --git a/src/main/java/com/conveyal/gtfs/loader/URLField.java b/src/main/java/com/conveyal/gtfs/loader/URLField.java index a16017b78..f0e2adc06 100644 --- a/src/main/java/com/conveyal/gtfs/loader/URLField.java +++ b/src/main/java/com/conveyal/gtfs/loader/URLField.java @@ -1,10 +1,14 @@ package com.conveyal.gtfs.loader; +import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.error.NewGTFSErrorType; import com.conveyal.gtfs.storage.StorageException; +import java.net.URL; import java.sql.JDBCType; import java.sql.PreparedStatement; import java.sql.SQLType; +import java.util.Set; /** * Created by abyrd on 2017-03-31 @@ -16,19 +20,22 @@ public URLField(String name, Requirement requirement) { } /** Check that a string can be properly parsed and is in range. */ - public String validateAndConvert (String string) { + public ValidateFieldResult validateAndConvert (String string) { + ValidateFieldResult result = cleanString(string); try { - string = cleanString(string); - // new URL(cleanString); TODO call this to validate, but we can't default to zero - return string; + new URL(result.clean); // TODO call this to validate, but we can't default to zero + return result; } catch (Exception ex) { - throw new StorageException(ex); + result.errors.add(NewGTFSError.forFeed(NewGTFSErrorType.URL_FORMAT, string)); + return result; } } - public void setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string) { + public Set setParameter(PreparedStatement preparedStatement, int oneBasedIndex, String string) { try { - preparedStatement.setString(oneBasedIndex, validateAndConvert(string)); + ValidateFieldResult result = validateAndConvert(string); + preparedStatement.setString(oneBasedIndex, result.clean); + return result.errors; } catch (Exception ex) { throw new StorageException(ex); } diff --git a/src/main/java/com/conveyal/gtfs/loader/ValidateFieldResult.java b/src/main/java/com/conveyal/gtfs/loader/ValidateFieldResult.java new file mode 100644 index 000000000..14bcb2d04 --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/loader/ValidateFieldResult.java @@ -0,0 +1,35 @@ +package com.conveyal.gtfs.loader; + +import com.conveyal.gtfs.error.NewGTFSError; + +import java.util.HashSet; +import java.util.Set; + +/** + * This mini helper class is used during feed loading to return both: + * - a cleaned value of arbitrary type T and + * - any errors encountered while validating the original value. + * + * Previously we resorted to returning a single validated value and throwing exceptions if bad values were encountered, + * but this kept us from being able to do both things at once: repair the value AND collect errors on the offending input. + */ +public class ValidateFieldResult { + public T clean; + public Set errors = new HashSet<>(); + + public ValidateFieldResult() {} + + /** Constructor used to set a default value (which may then be updated with the clean value). */ + public ValidateFieldResult(T defaultValue) { + this.clean = defaultValue; + } + + /** Builder method that constructs a ValidateFieldResult with type String from the input result. */ + public static ValidateFieldResult from(ValidateFieldResult result) { + ValidateFieldResult stringResult = new ValidateFieldResult<>(); + stringResult.clean = String.valueOf(result.clean); + stringResult.errors.addAll(result.errors); + return stringResult; + } + +} diff --git a/src/main/java/com/conveyal/gtfs/validator/NewTripTimesValidator.java b/src/main/java/com/conveyal/gtfs/validator/NewTripTimesValidator.java index c50cf929f..3b6cf40ff 100644 --- a/src/main/java/com/conveyal/gtfs/validator/NewTripTimesValidator.java +++ b/src/main/java/com/conveyal/gtfs/validator/NewTripTimesValidator.java @@ -46,7 +46,6 @@ public NewTripTimesValidator(Feed feed, SQLErrorStorage errorStorage) { tripValidators = new TripValidator[] { new SpeedTripValidator(feed, errorStorage), new ReferencesTripValidator(feed, errorStorage), - //new OverlappingTripValidator(feed, errorStorage), new ReversedTripValidator(feed, errorStorage), new ServiceValidator(feed, errorStorage), new PatternFinderValidator(feed, errorStorage) @@ -164,11 +163,10 @@ private void processTrip (List stopTimes) { // Repair the case where an arrival or departure time is provided, but not both. for (StopTime stopTime : stopTimes) fixMissingTimes(stopTime); // TODO check characteristics of timepoints - // All bad references should have been recorded at import, we can just ignore nulls. - Route route = null; - if (trip != null) route = routeById.get(trip.route_id); + // All bad references should have been recorded at import and null trip check is handled above, we can just + // ignore nulls. + Route route = routeById.get(trip.route_id); // Pass these same cleaned lists of stop_times and stops into each trip validator in turn. - for (TripValidator tripValidator : tripValidators) tripValidator.validateTrip(trip, route, stopTimes, stops); } @@ -177,7 +175,9 @@ private void processTrip (List stopTimes) { */ public void complete (ValidationResult validationResult) { for (TripValidator tripValidator : tripValidators) { + LOG.info("Running complete stage for {}", tripValidator.getClass().getSimpleName()); tripValidator.complete(validationResult); + LOG.info("{} finished", tripValidator.getClass().getSimpleName()); } } diff --git a/src/main/java/com/conveyal/gtfs/validator/OverlappingTripValidator.java b/src/main/java/com/conveyal/gtfs/validator/OverlappingTripValidator.java deleted file mode 100644 index c3603be72..000000000 --- a/src/main/java/com/conveyal/gtfs/validator/OverlappingTripValidator.java +++ /dev/null @@ -1,126 +0,0 @@ -package com.conveyal.gtfs.validator; - -import com.conveyal.gtfs.error.NewGTFSError; -import com.conveyal.gtfs.error.OverlappingTripsInBlockError; -import com.conveyal.gtfs.error.SQLErrorStorage; -import com.conveyal.gtfs.loader.Feed; -import com.conveyal.gtfs.model.*; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; - -import java.time.LocalDate; -import java.util.*; - -import static com.conveyal.gtfs.error.NewGTFSErrorType.TRIP_OVERLAP_IN_BLOCK; - -/** - * REVIEW - * whoa: feed.trips.values().stream().iterator().forEachRemaining(trip -> {}) - * should be for (Trip trip : feed.trips) {} - * We're fetching all the stop times for every trip in at least three different validators. - * - * Created by landon on 5/2/16. - */ -public class OverlappingTripValidator extends TripValidator { - - // check for overlapping trips within block - HashMap> blockIntervals = new HashMap<>(); - - public OverlappingTripValidator(Feed feed, SQLErrorStorage errorStorage) { - super(feed, errorStorage); - } - - @Override - public void validateTrip(Trip trip, Route route, List stopTimes, List stops) { - if (trip.block_id != null) { - BlockInterval blockInterval = new BlockInterval(); - blockInterval.trip = trip; - StopTime firstStopTime = stopTimes.get(0); - blockInterval.startTime = firstStopTime.departure_time; - blockInterval.firstStop = firstStopTime; - blockInterval.lastStop = stopTimes.get(stopTimes.size() - 1); - List intervals = blockIntervals.get(trip.block_id); - if (intervals == null) { - intervals = new ArrayList<>(); - blockIntervals.put(trip.block_id, intervals); - } - intervals.add(blockInterval); - } - } - - @Override - public void complete (ValidationResult validationResult) { - for (String blockId : blockIntervals.keySet()) { - List intervals = blockIntervals.get(blockId); - Collections.sort(intervals, Comparator.comparingInt(i -> i.startTime)); - int i2offset = 0; - // FIXME this has complexity of n^2, there has to be a better way. - for (BlockInterval i1 : intervals) { - // Compare the interval at position N with all other intervals at position N+1 to the end of the list. - i2offset += 1; // Fixme replace with integer iteration and get(n) - for(BlockInterval i2 : intervals.subList(i2offset, intervals.size() - 1)) { - String tripId1 = i1.trip.trip_id; - String tripId2 = i2.trip.trip_id; - if (tripId1.equals(tripId2)) { - // Why would this happen? Just duplicating a case covered by the original implementation. - // TODO can this happen at all? - continue; - } - // if trips don't overlap, skip FIXME can't this be simplified? - if (i1.lastStop.departure_time <= i2.firstStop.arrival_time || i2.lastStop.departure_time <= i1.firstStop.arrival_time) { - continue; - } - if (i1.trip.service_id.equals(i2.trip.service_id)) { - // Trips overlap. If they have the same service_id they overlap. - registerError(i1.trip, TRIP_OVERLAP_IN_BLOCK, i2.trip.trip_id); - } else { - // Trips overlap but don't have the same service_id. - // Check to see if service days fall on the same days of the week. - // FIXME requires more complex Service implementation - Service s1 = null; //feed.services.get(i1.trip.service_id); - Service s2 = null; //feed.services.get(i2.trip.service_id); - boolean overlap = Service.checkOverlap(s1, s2); - for (Map.Entry d1 : s1.calendar_dates.entrySet()) { - LocalDate date = d1.getKey(); - boolean activeOnDate = s1.activeOn(date) && s2.activeOn(date); - if (activeOnDate || overlap) { // FIXME this will always be true if overlap is true. - registerError(i1.trip, TRIP_OVERLAP_IN_BLOCK, i2.trip.trip_id); - break; - } - } - } - } - } - } - } - - private class BlockInterval { - Trip trip; - Integer startTime; - StopTime firstStop; - StopTime lastStop; - } - - // FIXME what is this patternId? This seems like a subset of block overlap errors (within a service day). -// String patternId = feed.tripPatternMap.get(tripId); -// String patternName = feed.patterns.get(patternId).name; -// int firstDeparture = Iterables.get(stopTimes, 0).departure_time; -// int lastArrival = Iterables.getLast(stopTimes).arrival_time; -// -// String tripKey = trip.service_id + "_"+ blockId + "_" + firstDeparture +"_" + lastArrival + "_" + patternId; -// -// if (duplicateTripHash.containsKey(tripKey)) { -// String firstDepartureString = LocalTime.ofSecondOfDay(Iterables.get(stopTimes, 0).departure_time % 86399).toString(); -// String lastArrivalString = LocalTime.ofSecondOfDay(Iterables.getLast(stopTimes).arrival_time % 86399).toString(); -// String duplicateTripId = duplicateTripHash.get(tripKey); -// Trip duplicateTrip = feed.trips.get(duplicateTripId); -// long line = trip.id > duplicateTrip.id ? trip.id : duplicateTrip.id; -// feed.errors.add(new DuplicateTripError(trip, line, duplicateTripId, patternName, firstDepartureString, lastArrivalString)); -// isValid = false; -// } else { -// duplicateTripHash.put(tripKey, tripId); -// } - - -} - diff --git a/src/main/java/com/conveyal/gtfs/validator/ServiceValidator.java b/src/main/java/com/conveyal/gtfs/validator/ServiceValidator.java index 3b1639c26..e842e0f75 100644 --- a/src/main/java/com/conveyal/gtfs/validator/ServiceValidator.java +++ b/src/main/java/com/conveyal/gtfs/validator/ServiceValidator.java @@ -27,13 +27,17 @@ import java.time.DayOfWeek; import java.time.LocalDate; import java.time.temporal.ChronoUnit; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import static com.conveyal.gtfs.error.NewGTFSErrorType.TRIP_OVERLAP_IN_BLOCK; + /** * This will validate that service date information is coherent, and attempt to deduce or validate the range of dates * covered by a GTFS feed. @@ -52,7 +56,7 @@ public class ServiceValidator extends TripValidator { private static final Logger LOG = LoggerFactory.getLogger(ServiceValidator.class); - + private HashMap> blockIntervals = new HashMap<>(); private Map serviceInfoForServiceId = new HashMap<>(); private Map dateInfoForDate = new HashMap<>(); @@ -63,6 +67,19 @@ public ServiceValidator(Feed feed, SQLErrorStorage errorStorage) { @Override public void validateTrip(Trip trip, Route route, List stopTimes, List stops) { + if (trip.block_id != null) { + // If the trip has a block_id, add a new block interval to the map. + BlockInterval blockInterval = new BlockInterval(); + blockInterval.trip = trip; + StopTime firstStopTime = stopTimes.get(0); + blockInterval.startTime = firstStopTime.departure_time; + blockInterval.firstStop = firstStopTime; + blockInterval.lastStop = stopTimes.get(stopTimes.size() - 1); + // Construct new list of intervals if none exists for encountered block_id. + blockIntervals + .computeIfAbsent(trip.block_id, k -> new ArrayList<>()) + .add(blockInterval); + } int firstStopDeparture = stopTimes.get(0).departure_time; int lastStopArrival = stopTimes.get(stopTimes.size() - 1).arrival_time; if (firstStopDeparture == Entity.INT_MISSING || lastStopArrival == Entity.INT_MISSING) { @@ -95,7 +112,11 @@ public void validateTrip(Trip trip, Route route, List stopTimes, List< */ @Override public void complete(ValidationResult validationResult) { + validateServiceInfo(validationResult); + validateBlocks(); + } + private void validateServiceInfo(ValidationResult validationResult) { LOG.info("Merging calendars and calendar_dates..."); // First handle the calendar entries, which define repeating weekly schedules. @@ -106,12 +127,12 @@ public void complete(ValidationResult validationResult) { for (LocalDate date = calendar.start_date; date.isBefore(endDate) || date.isEqual(endDate); date = date.plusDays(1)) { DayOfWeek dayOfWeek = date.getDayOfWeek(); if ( (dayOfWeek == DayOfWeek.MONDAY && calendar.monday > 0) || - (dayOfWeek == DayOfWeek.TUESDAY && calendar.tuesday > 0) || - (dayOfWeek == DayOfWeek.WEDNESDAY && calendar.wednesday > 0) || - (dayOfWeek == DayOfWeek.THURSDAY && calendar.thursday > 0) || - (dayOfWeek == DayOfWeek.FRIDAY && calendar.friday > 0) || - (dayOfWeek == DayOfWeek.SATURDAY && calendar.saturday > 0) || - (dayOfWeek == DayOfWeek.SUNDAY && calendar.sunday > 0)) { + (dayOfWeek == DayOfWeek.TUESDAY && calendar.tuesday > 0) || + (dayOfWeek == DayOfWeek.WEDNESDAY && calendar.wednesday > 0) || + (dayOfWeek == DayOfWeek.THURSDAY && calendar.thursday > 0) || + (dayOfWeek == DayOfWeek.FRIDAY && calendar.friday > 0) || + (dayOfWeek == DayOfWeek.SATURDAY && calendar.saturday > 0) || + (dayOfWeek == DayOfWeek.SUNDAY && calendar.sunday > 0)) { // Service is active on this date. serviceInfoForServiceId.computeIfAbsent(calendar.service_id, ServiceInfo::new).datesActive.add(date); } @@ -155,7 +176,7 @@ select durations.service_id, duration_seconds, days_active from ( registerError(NewGTFSError.forFeed(NewGTFSErrorType.SERVICE_NEVER_ACTIVE, serviceInfo.serviceId)); for (String tripId : serviceInfo.tripIds) { registerError( - NewGTFSError.forTable(Table.TRIPS, NewGTFSErrorType.TRIP_NEVER_ACTIVE) + NewGTFSError.forTable(Table.TRIPS, NewGTFSErrorType.TRIP_NEVER_ACTIVE) .setEntityId(tripId) .setBadValue(tripId)); } @@ -220,7 +241,7 @@ select durations.service_id, duration_seconds, days_active from ( // Check for low or zero service, which seems to happen even when services are defined. // This will also catch cases where dateInfo was null and the new instance contains no service. registerError(NewGTFSError.forFeed(NewGTFSErrorType.DATE_NO_SERVICE, - DateField.GTFS_DATE_FORMATTER.format(date))); + DateField.GTFS_DATE_FORMATTER.format(date))); } } } @@ -293,7 +314,7 @@ select durations.service_id, duration_seconds, days_active from ( String serviceDurationsTableName = feed.tablePrefix + "service_durations"; sql = String.format("create table %s (service_id varchar, route_type integer, " + - "duration_seconds integer, primary key (service_id, route_type))", serviceDurationsTableName); + "duration_seconds integer, primary key (service_id, route_type))", serviceDurationsTableName); LOG.info(sql); statement.execute(sql); sql = String.format("insert into %s values (?, ?, ?)", serviceDurationsTableName); @@ -328,7 +349,7 @@ select durations.service_id, duration_seconds, days_active from ( LOG.info("Done."); } - private static class ServiceInfo { + static class ServiceInfo { final String serviceId; TIntIntHashMap durationByRouteType = new TIntIntHashMap(); @@ -345,7 +366,7 @@ public int getTotalServiceDurationSeconds() { } - private static class DateInfo { + static class DateInfo { final LocalDate date; TIntIntHashMap durationByRouteType = new TIntIntHashMap(); @@ -369,4 +390,59 @@ public void add (ServiceInfo serviceInfo) { tripCount += serviceInfo.tripIds.size(); } } + + /** + * Checks that trips which run on the same block (i.e., share a block_id) do not overlap. The block_id + * represents a vehicle in service, so there must not be any trips on the same block interval that start while another + * block trip is running. + * + * NOTE: This validation check happens in the {@link ServiceValidator} because it depends on information derived + * about which service calendars operate on which feed dates ({@link #serviceInfoForServiceId}). + */ + private void validateBlocks () { + // Iterate over each block and determine if there are any trips that overlap one another. + for (String blockId : blockIntervals.keySet()) { + List intervals = blockIntervals.get(blockId); + intervals.sort(Comparator.comparingInt(i -> i.startTime)); + // Iterate over each interval (except for the last) comparing it to every other interval (so the last interval + // is handled through the course of iteration). + // FIXME this has complexity of n^2, there has to be a better way. + for (int n = 0; n < intervals.size() - 1; n++) { + BlockInterval interval1 = intervals.get(n); + // Compare the interval at position N with all other intervals at position N+1 to the end of the list. + for (BlockInterval interval2 : intervals.subList(n + 1, intervals.size())) { + if (interval1.lastStop.departure_time <= interval2.firstStop.arrival_time || interval2.lastStop.departure_time <= interval1.firstStop.arrival_time) { + continue; + } + // If either trip's last departure occurs after the other's first arrival, they overlap. We still + // need to determine if they operate on the same day though. + if (interval1.trip.service_id.equals(interval2.trip.service_id)) { + // If the overlapping trips share a service_id, record an error. + registerError(interval1.trip, TRIP_OVERLAP_IN_BLOCK, interval2.trip.trip_id); + } else { + // Trips overlap but don't have the same service_id. + // Check to see if service days fall on the same days of the week. + ServiceValidator.ServiceInfo info1 = serviceInfoForServiceId.get(interval1.trip.service_id); + ServiceValidator.ServiceInfo info2 = serviceInfoForServiceId.get(interval2.trip.service_id); + Set overlappingDates = new HashSet<>(info1.datesActive); // use the copy constructor + overlappingDates.retainAll(info2.datesActive); + if (overlappingDates.size() > 0) { + registerError(interval1.trip, TRIP_OVERLAP_IN_BLOCK, interval2.trip.trip_id); + } + } + } + } + } + } + + + /** + * A simple class used during validation to store details the run interval for a block trip. + */ + private class BlockInterval { + Trip trip; + Integer startTime; + StopTime firstStop; + StopTime lastStop; + } } diff --git a/src/main/java/com/conveyal/gtfs/validator/SpeedTripValidator.java b/src/main/java/com/conveyal/gtfs/validator/SpeedTripValidator.java index 96170bbff..ef28671a7 100644 --- a/src/main/java/com/conveyal/gtfs/validator/SpeedTripValidator.java +++ b/src/main/java/com/conveyal/gtfs/validator/SpeedTripValidator.java @@ -61,7 +61,7 @@ public void validateTrip(Trip trip, Route route, List stopTimes, List< registerError(currStopTime, DEPARTURE_BEFORE_ARRIVAL); } // Detect if travel times are rounded off to minutes. - boolean bothTravelTimesRounded = areTravelTimesRounded(prevStopTime); + boolean bothTravelTimesRounded = areTravelTimesRounded(prevStopTime) && areTravelTimesRounded(currStopTime); double travelTimeSeconds = currStopTime.arrival_time - prevStopTime.departure_time; // If travel times are rounded and travel time is zero, determine the maximum and minimum possible speed // by adding/removing one minute of slack. diff --git a/src/test/java/com/conveyal/gtfs/GTFSFeedTest.java b/src/test/java/com/conveyal/gtfs/GTFSFeedTest.java index e908c7619..38bad0505 100644 --- a/src/test/java/com/conveyal/gtfs/GTFSFeedTest.java +++ b/src/test/java/com/conveyal/gtfs/GTFSFeedTest.java @@ -252,7 +252,8 @@ public void canGetSpatialIndex() { GTFSFeed feed = GTFSFeed.fromFile(simpleGtfsZipFileName); assertThat( feed.getSpatialIndex().size(), - equalTo(2) + // This should reflect the number of stops in src/test/resources/fake-agency/stops.txt + equalTo(4) ); } @@ -279,4 +280,4 @@ public void canGetTripSpeedUsingStraightLine() { is(closeTo(5.18, 0.01)) ); } -} \ No newline at end of file +} diff --git a/src/test/java/com/conveyal/gtfs/GTFSTest.java b/src/test/java/com/conveyal/gtfs/GTFSTest.java index 3e2c8fe69..84f568548 100644 --- a/src/test/java/com/conveyal/gtfs/GTFSTest.java +++ b/src/test/java/com/conveyal/gtfs/GTFSTest.java @@ -4,6 +4,7 @@ import com.conveyal.gtfs.error.NewGTFSErrorType; import com.conveyal.gtfs.loader.FeedLoadResult; import com.conveyal.gtfs.loader.SnapshotResult; +import com.conveyal.gtfs.storage.ErrorExpectation; import com.conveyal.gtfs.storage.ExpectedFieldType; import com.conveyal.gtfs.storage.PersistenceExpectation; import com.conveyal.gtfs.storage.RecordExpectation; @@ -125,6 +126,25 @@ public void canLoadFeedWithBadDates () { ); } + /** + * Tests that a GTFS feed with overlapping block trips will record the appropriate error. + */ + @Test + public void canLoadFeedWithOverlappingTrips () { + PersistenceExpectation[] expectations = PersistenceExpectation.list( + new PersistenceExpectation( + new ErrorExpectation[]{ + new ErrorExpectation("error_type", NewGTFSErrorType.TRIP_OVERLAP_IN_BLOCK.toString()) + } + ) + ); + assertThat( + "Integration test passes", + runIntegrationTestOnFolder("fake-agency-overlapping-trips", nullValue(), expectations), + equalTo(true) + ); + } + /** * Tests whether or not "fake-agency" GTFS can be placed in a zipped subdirectory and loaded/exported successfully. */ @@ -231,6 +251,7 @@ private boolean runIntegrationTestOnFolder( Matcher fatalExceptionExpectation, PersistenceExpectation[] persistenceExpectations ) { + LOG.info("Running integration test on folder {}", folderName); // zip up test folder into temp zip file String zipFileName = null; try { @@ -267,7 +288,7 @@ private boolean runIntegrationTestOnZipFile( // Verify that loading the feed completes and data is stored properly try { // load and validate feed - LOG.info("load and validate feed"); + LOG.info("load and validate GTFS file {}", zipFileName); FeedLoadResult loadResult = GTFS.load(zipFileName, dataSource); ValidationResult validationResult = GTFS.validate(loadResult.uniqueIdentifier, dataSource); @@ -339,38 +360,6 @@ private void assertThatSnapshotIsErrorFree(SnapshotResult snapshotResult) { assertThat(snapshotResult.scheduleExceptions.fatalException, is(nullValue())); } - private String zipFolderAndLoadGTFS(String folderName, DataSource dataSource, PersistenceExpectation[] persistenceExpectations) { - // zip up test folder into temp zip file - String zipFileName; - try { - zipFileName = TestUtils.zipFolderFiles(folderName, true); - } catch (IOException e) { - e.printStackTrace(); - return null; - } - String namespace; - // Verify that loading the feed completes and data is stored properly - try { - // load and validate feed - LOG.info("load and validate feed"); - FeedLoadResult loadResult = GTFS.load(zipFileName, dataSource); - ValidationResult validationResult = GTFS.validate(loadResult.uniqueIdentifier, dataSource); - DataSource ds = GTFS.createDataSource( - dataSource.getConnection().getMetaData().getURL(), - null, - null - ); - assertThatLoadIsErrorFree(loadResult); - assertThat(validationResult.fatalException, is(nullValue())); - namespace = loadResult.uniqueIdentifier; - assertThatImportedGtfsMeetsExpectations(ds.getConnection(), namespace, persistenceExpectations); - } catch (SQLException | AssertionError e) { - e.printStackTrace(); - return null; - } - return namespace; - } - /** * Helper function to export a GTFS from the database to a temporary zip file. */ @@ -421,28 +410,20 @@ private void assertThatImportedGtfsMeetsExpectations( int numRecordsSearched = 0; while (rs.next()) { numRecordsSearched++; - LOG.info(String.format("record %d in ResultSet", numRecordsSearched)); + LOG.info("record {} in ResultSet", numRecordsSearched); boolean allFieldsMatch = true; for (RecordExpectation recordExpectation: persistenceExpectation.recordExpectations) { switch (recordExpectation.expectedFieldType) { case DOUBLE: double doubleVal = rs.getDouble(recordExpectation.fieldName); - LOG.info(String.format( - "%s: %f", - recordExpectation.fieldName, - doubleVal - )); + LOG.info("{}: {}", recordExpectation.fieldName, doubleVal); if (doubleVal != recordExpectation.doubleExpectation) { allFieldsMatch = false; } break; case INT: int intVal = rs.getInt(recordExpectation.fieldName); - LOG.info(String.format( - "%s: %d", - recordExpectation.fieldName, - intVal - )); + LOG.info("{}: {}", recordExpectation.fieldName, intVal); if (intVal != recordExpectation.intExpectation) { fieldsWithMismatches.put( recordExpectation.fieldName, @@ -453,17 +434,10 @@ private void assertThatImportedGtfsMeetsExpectations( break; case STRING: String strVal = rs.getString(recordExpectation.fieldName); - LOG.info(String.format( - "%s: %s", - recordExpectation.fieldName, - strVal - )); + LOG.info("{}: {}", recordExpectation.fieldName, strVal); if (strVal == null && recordExpectation.stringExpectation == null) { break; - } else if ( - (strVal == null && recordExpectation.stringExpectation != null) || - !strVal.equals(recordExpectation.stringExpectation) - ) { + } else if (strVal == null || !strVal.equals(recordExpectation.stringExpectation)) { fieldsWithMismatches.put( recordExpectation.fieldName, new ValuePair(recordExpectation.stringExpectation, strVal) @@ -521,6 +495,8 @@ private void assertThatExportedGtfsMeetsExpectations( // iterate through all expectations for (PersistenceExpectation persistenceExpectation : persistenceExpectations) { + // No need to check that errors were exported because it is an internal table only. + if ("errors".equals(persistenceExpectation.tableName)) continue; final String tableFileName = persistenceExpectation.tableName + ".txt"; LOG.info(String.format("reading table: %s", tableFileName)); @@ -618,8 +594,8 @@ private void assertThatPersistenceExpectationRecordWasFound( for (ValuePair valuePair : valuePairs) { assertThat( String.format("The value expected for %s was not found", field), - valuePair.expected, - equalTo(valuePair.found) + valuePair.found, + equalTo(valuePair.expected) ); } } diff --git a/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java b/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java index 421da273d..168c3310b 100644 --- a/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java +++ b/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java @@ -2,7 +2,6 @@ import com.conveyal.gtfs.TestUtils; import com.conveyal.gtfs.loader.FeedLoadResult; -import com.fasterxml.jackson.databind.ObjectMapper; import graphql.GraphQL; import org.apache.commons.io.IOUtils; import org.junit.AfterClass; @@ -40,10 +39,8 @@ public class GTFSGraphQLTest { private static DataSource testInjectionDataSource; private static String testInjectionNamespace; - private ObjectMapper mapper = new ObjectMapper(); - @BeforeClass - public static void setUpClass() throws SQLException, IOException { + public static void setUpClass() throws IOException { // create a new database testDBName = TestUtils.generateNewDB(); String dbConnectionUrl = String.format("jdbc:postgresql://localhost/%s", testDBName); @@ -74,74 +71,80 @@ public static void tearDownClass() { TestUtils.dropDB(testInjectionDBName); } - // tests that the graphQL schema can initialize + /** Tests that the graphQL schema can initialize. */ @Test(timeout=5000) public void canInitialize() { GTFSGraphQL.initialize(testDataSource); GraphQL graphQL = GTFSGraphQL.getGraphQl(); } - // tests that the root element of a feed can be fetched + /** Tests that the root element of a feed can be fetched. */ @Test(timeout=5000) public void canFetchFeed() throws IOException { assertThat(queryGraphQL("feed.txt"), matchesSnapshot()); } - // tests that the row counts of a feed can be fetched + /** Tests that the row counts of a feed can be fetched. */ @Test(timeout=5000) public void canFetchFeedRowCounts() throws IOException { assertThat(queryGraphQL("feedRowCounts.txt"), matchesSnapshot()); } - // tests that the errors of a feed can be fetched + /** Tests that the errors of a feed can be fetched. */ @Test(timeout=5000) public void canFetchErrors() throws IOException { assertThat(queryGraphQL("feedErrors.txt"), matchesSnapshot()); } - // tests that the feed_info of a feed can be fetched + /** Tests that the feed_info of a feed can be fetched. */ @Test(timeout=5000) public void canFetchFeedInfo() throws IOException { assertThat(queryGraphQL("feedFeedInfo.txt"), matchesSnapshot()); } - // tests that the patterns of a feed can be fetched + /** Tests that the patterns of a feed can be fetched. */ @Test(timeout=5000) public void canFetchPatterns() throws IOException { assertThat(queryGraphQL("feedPatterns.txt"), matchesSnapshot()); } - // tests that the agencies of a feed can be fetched + /** Tests that the agencies of a feed can be fetched. */ @Test(timeout=5000) public void canFetchAgencies() throws IOException { assertThat(queryGraphQL("feedAgencies.txt"), matchesSnapshot()); } - // tests that the calendars of a feed can be fetched + /** Tests that the calendars of a feed can be fetched. */ @Test(timeout=5000) public void canFetchCalendars() throws IOException { assertThat(queryGraphQL("feedCalendars.txt"), matchesSnapshot()); } - // tests that the fares of a feed can be fetched + /** Tests that the fares of a feed can be fetched. */ @Test(timeout=5000) public void canFetchFares() throws IOException { assertThat(queryGraphQL("feedFares.txt"), matchesSnapshot()); } - // tests that the routes of a feed can be fetched + /** Tests that the routes of a feed can be fetched. */ @Test(timeout=5000) public void canFetchRoutes() throws IOException { assertThat(queryGraphQL("feedRoutes.txt"), matchesSnapshot()); } - // tests that the stops of a feed can be fetched + /** Tests that the stops of a feed can be fetched. */ @Test(timeout=5000) public void canFetchStops() throws IOException { assertThat(queryGraphQL("feedStops.txt"), matchesSnapshot()); } - // tests that the trips of a feed can be fetched + /** Tests that the stops of a feed can be fetched. */ + @Test(timeout=5000) + public void canFetchStopWithChildren() throws IOException { + assertThat(queryGraphQL("feedStopWithChildren.txt"), matchesSnapshot()); + } + + /** Tests that the trips of a feed can be fetched. */ @Test(timeout=5000) public void canFetchTrips() throws IOException { assertThat(queryGraphQL("feedTrips.txt"), matchesSnapshot()); @@ -149,19 +152,19 @@ public void canFetchTrips() throws IOException { // TODO: make tests for schedule_exceptions / calendar_dates - // tests that the stop times of a feed can be fetched + /** Tests that the stop times of a feed can be fetched. */ @Test(timeout=5000) public void canFetchStopTimes() throws IOException { assertThat(queryGraphQL("feedStopTimes.txt"), matchesSnapshot()); } - // tests that the stop times of a feed can be fetched + /** Tests that the stop times of a feed can be fetched. */ @Test(timeout=5000) public void canFetchServices() throws IOException { assertThat(queryGraphQL("feedServices.txt"), matchesSnapshot()); } - // tests that the stop times of a feed can be fetched + /** Tests that the stop times of a feed can be fetched. */ @Test(timeout=5000) public void canFetchRoutesAndFilterTripsByDateAndTime() throws IOException { Map variables = new HashMap(); @@ -175,29 +178,32 @@ public void canFetchRoutesAndFilterTripsByDateAndTime() throws IOException { ); } - // tests that the limit argument applies properly to a fetcher defined with autolimit set to false + /** Tests that the limit argument applies properly to a fetcher defined with autolimit set to false. */ @Test(timeout=5000) public void canFetchNestedEntityWithLimit() throws IOException { assertThat(queryGraphQL("feedStopsStopTimeLimit.txt"), matchesSnapshot()); } - // tests whether a graphQL query that has superflous and redundant nesting can find the right result + /** Tests whether a graphQL query that has superflous and redundant nesting can find the right result. */ // if the graphQL dataloader is enabled correctly, there will not be any repeating sql queries in the logs @Test(timeout=5000) public void canFetchMultiNestedEntities() throws IOException { assertThat(queryGraphQL("superNested.txt"), matchesSnapshot()); } - // tests whether a graphQL query that has superflous and redundant nesting can find the right result - // if the graphQL dataloader is enabled correctly, there will not be any repeating sql queries in the logs - // furthermore, some queries should have been combined together + + /** + * Tests whether a graphQL query that has superflous and redundant nesting can find the right result. + * If the graphQL dataloader is enabled correctly, there will not be any repeating sql queries in the logs. + * Furthermore, some queries should have been combined together. + */ @Test(timeout=5000) public void canFetchMultiNestedEntitiesWithoutLimits() throws IOException { assertThat(queryGraphQL("superNestedNoLimits.txt"), matchesSnapshot()); } /** - * attempt to fetch more than one record with SQL injection as inputs - * the graphql library should properly escape the string and return 0 results for stops + * Attempt to fetch more than one record with SQL injection as inputs. + * The graphql library should properly escape the string and return 0 results for stops. */ @Test(timeout=5000) public void canSanitizeSQLInjectionSentAsInput() throws IOException { @@ -215,8 +221,8 @@ public void canSanitizeSQLInjectionSentAsInput() throws IOException { } /** - * attempt run a graphql query when one of the pieces of data contains a SQL injection - * the graphql library should properly escape the string and complete the queries + * Attempt to run a graphql query when one of the pieces of data contains a SQL injection. + * The graphql library should properly escape the string and complete the queries. */ @Test(timeout=5000) public void canSanitizeSQLInjectionSentAsKeyValue() throws IOException, SQLException { @@ -238,7 +244,7 @@ public void canSanitizeSQLInjectionSentAsKeyValue() throws IOException, SQLExcep /** - * Helper method to make a query with default variables + * Helper method to make a query with default variables. * * @param queryFilename the filename that should be used to generate the GraphQL query. This file must be present * in the `src/test/resources/graphql` folder @@ -250,7 +256,7 @@ private Map queryGraphQL(String queryFilename) throws IOExceptio } /** - * Helper method to execute a GraphQL query and return the result + * Helper method to execute a GraphQL query and return the result. * * @param queryFilename the filename that should be used to generate the GraphQL query. This file must be present * in the `src/test/resources/graphql` folder diff --git a/src/test/java/com/conveyal/gtfs/loader/FieldTests.java b/src/test/java/com/conveyal/gtfs/loader/FieldTests.java index 9e6823bde..520ecee9f 100644 --- a/src/test/java/com/conveyal/gtfs/loader/FieldTests.java +++ b/src/test/java/com/conveyal/gtfs/loader/FieldTests.java @@ -1,13 +1,10 @@ package com.conveyal.gtfs.loader; -import com.conveyal.gtfs.GTFSFeed; +import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.error.NewGTFSErrorType; -import com.conveyal.gtfs.stats.FeedStats; -import com.conveyal.gtfs.storage.StorageException; import org.junit.Test; import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.equalTo; /** * Unit tests to verify functionality of classes that load fields from GTFS tables. @@ -30,24 +27,54 @@ public void dateFieldParseTest() { "23000527", // Very distant future year "790722" // Two digit year }; - + DateField dateField = new DateField("date", Requirement.REQUIRED); for (String badDate : badDates) { - try { - new DateField("date", Requirement.REQUIRED).validateAndConvert(badDate); - assertThat("Parsing bad dates should throw an exception and never reach this point.", false); - } catch (StorageException ex) { - assertThat("Error type should be date-related.", - ex.errorType == NewGTFSErrorType.DATE_FORMAT || ex.errorType == NewGTFSErrorType.DATE_RANGE); - assertThat("Error's bad value should be the input value (the bad date).", ex.badValue.equals(badDate)); - } + ValidateFieldResult result = dateField.validateAndConvert(badDate); + assertThat("Parsing bad dates should result in an error.", result.errors.size() > 0); + NewGTFSError error = result.errors.iterator().next(); + assertThat("Error type should be date-related.", + error.errorType == NewGTFSErrorType.DATE_FORMAT || error.errorType == NewGTFSErrorType.DATE_RANGE); + assertThat("Error's bad value should be the input value (the bad date).", error.badValue.equals(badDate)); } String[] goodDates = { "20160522", "20180717", "20221212", "20190505" }; for (String goodDate : goodDates) { - String convertedValue = new DateField("date", Requirement.REQUIRED).validateAndConvert(goodDate); - assertThat("Returned value matches the well-formed date.", convertedValue.equals(goodDate)); + ValidateFieldResult result = dateField.validateAndConvert(goodDate); + assertThat("Returned value matches the well-formed date.", result.clean.equals(goodDate)); } } + + /** + * Make sure {@link Field#cleanString(ValidateFieldResult)} catches and removes illegal character sequences. + */ + @Test + public void illegalCharacterParseTest() { + String[] badStrings = { + "\n", // simple new line + "\t", // simple tab + "\t\n\r", // new line, tab, carriage return + "Hello\\world", // backslashes not permitted (replaced with escaped slash) + "Downtown via Peachtree\n\nSt" // new line and carriage within string + }; + StringField stringField = new StringField("any", Requirement.REQUIRED); + for (String badString : badStrings) { + ValidateFieldResult result = stringField.validateAndConvert(badString); + assertThat("Input with illegal characters should result in an error.", result.errors.size() > 0); + NewGTFSError error = result.errors.iterator().next(); + assertThat("Error type should be illegal field value.", + error.errorType == NewGTFSErrorType.ILLEGAL_FIELD_VALUE); + for (IllegalCharacter illegalCharacter : Field.ILLEGAL_CHARACTERS) { + // Check that string is clean. Note: for backslash, we check that the clean string contains an escaped + // backslash because checking for a single backslash will yield true even for after a successful + // substitution. + boolean stringIsClean = !result.clean.contains(illegalCharacter.illegalSequence); + if (illegalCharacter.illegalSequence.equals("\\") && !stringIsClean) { + stringIsClean = result.clean.contains(illegalCharacter.replacement); + } + assertThat(String.format("The cleaned string '%s' should not contain illegal character (%s).", result.clean, illegalCharacter.illegalSequence), stringIsClean); + } + } + } } diff --git a/src/test/java/com/conveyal/gtfs/loader/JDBCTableWriterTest.java b/src/test/java/com/conveyal/gtfs/loader/JDBCTableWriterTest.java index b95253689..2a9077caf 100644 --- a/src/test/java/com/conveyal/gtfs/loader/JDBCTableWriterTest.java +++ b/src/test/java/com/conveyal/gtfs/loader/JDBCTableWriterTest.java @@ -6,6 +6,7 @@ import com.conveyal.gtfs.dto.FareRuleDTO; import com.conveyal.gtfs.dto.FeedInfoDTO; import com.conveyal.gtfs.dto.FrequencyDTO; +import com.conveyal.gtfs.model.StopTime; import com.conveyal.gtfs.util.InvalidNamespaceException; import com.conveyal.gtfs.dto.PatternDTO; import com.conveyal.gtfs.dto.PatternStopDTO; @@ -24,16 +25,19 @@ import javax.sql.DataSource; import java.io.IOException; import java.sql.Connection; +import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; +import java.util.UUID; import static com.conveyal.gtfs.GTFS.createDataSource; +import static com.conveyal.gtfs.GTFS.load; import static com.conveyal.gtfs.GTFS.makeSnapshot; +import static com.conveyal.gtfs.GTFS.validate; +import static com.conveyal.gtfs.TestUtils.getResourceFileName; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.not; -import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertThat; -import static org.junit.Assert.assertTrue; /** * This class contains CRUD tests for {@link JdbcTableWriter} (i.e., editing GTFS entities in the RDBMS). Set up @@ -48,6 +52,7 @@ public class JDBCTableWriterTest { private static String testDBName; private static DataSource testDataSource; private static String testNamespace; + private static String testGtfsGLSnapshotNamespace; private static String simpleServiceId = "1"; private static String firstStopId = "1"; private static String lastStopId = "2"; @@ -83,6 +88,22 @@ public static void setUpClass() throws SQLException, IOException, InvalidNamespa createWeekdayCalendar(simpleServiceId, "20180103", "20180104"); createSimpleStop(firstStopId, "First Stop", firstStopLat, firstStopLon); createSimpleStop(lastStopId, "Last Stop", lastStopLat, lastStopLon); + + /** Load the following real-life GTFS for use with {@link JDBCTableWriterTest#canUpdateServiceId()} **/ + // load feed into db + FeedLoadResult feedLoadResult = load(getResourceFileName("real-world-gtfs-feeds/gtfs_GL.zip"), testDataSource); + String testGtfsGLNamespace = feedLoadResult.uniqueIdentifier; + // validate feed to create additional tables + validate(testGtfsGLNamespace, testDataSource); + // load into editor via snapshot + JdbcGtfsSnapshotter snapshotter = new JdbcGtfsSnapshotter(testGtfsGLNamespace, testDataSource); + SnapshotResult snapshotResult = snapshotter.copyTables(); + testGtfsGLSnapshotNamespace = snapshotResult.uniqueIdentifier; + } + + @AfterClass + public static void tearDownClass() { + TestUtils.dropDB(testDBName); } @Test @@ -261,18 +282,6 @@ public void canCreateUpdateAndDeleteFares() throws IOException, SQLException, In )); } - void assertThatSqlQueryYieldsZeroRows(String sql) throws SQLException { - assertThatSqlQueryYieldsRowCount(sql, 0); - } - - private void assertThatSqlQueryYieldsRowCount(String sql, int expectedRowCount) throws SQLException { - LOG.info(sql); - int recordCount = 0; - ResultSet rs = testDataSource.getConnection().prepareStatement(sql).executeQuery(); - while (rs.next()) recordCount++; - assertThat("Records matching query should equal expected count.", recordCount, equalTo(expectedRowCount)); - } - @Test public void canCreateUpdateAndDeleteRoutes() throws IOException, SQLException, InvalidNamespaceException { // Store Table and Class values for use in test. @@ -327,54 +336,107 @@ public void canCreateUpdateAndDeleteRoutes() throws IOException, SQLException, I } /** - * Create and store a simple route for testing. - */ - private static RouteDTO createSimpleTestRoute(String routeId, String agencyId, String shortName, String longName, int routeType) throws InvalidNamespaceException, IOException, SQLException { - RouteDTO input = new RouteDTO(); - input.route_id = routeId; - input.agency_id = agencyId; - // Empty value should be permitted for transfers and transfer_duration - input.route_short_name = shortName; - input.route_long_name = longName; - input.route_type = routeType; - // convert object to json and save it - JdbcTableWriter createTableWriter = createTestTableWriter(Table.ROUTES); - String output = createTableWriter.create(mapper.writeValueAsString(input), true); - LOG.info("create {} output:", Table.ROUTES.name); - LOG.info(output); - // parse output - return mapper.readValue(output, RouteDTO.class); - } - - /** - * Creates a pattern by first creating a route and then a pattern for that route. + * This test verifies that stop_times#shape_dist_traveled (and other "linked fields") are updated when a pattern + * is updated. */ - private static PatternDTO createRouteAndSimplePattern(String routeId, String patternId, String name) throws InvalidNamespaceException, SQLException, IOException { - return createRouteAndPattern(routeId, patternId, name, null, new ShapePointDTO[]{}, new PatternStopDTO[]{}, 0); + @Test + public void shouldUpdateStopTimeShapeDistTraveledOnPatternStopUpdate() throws IOException, SQLException, InvalidNamespaceException { + String routeId = newUUID(); + String patternId = newUUID(); + int startTime = 6 * 60 * 60; // 6 AM + PatternDTO pattern = createRouteAndPattern( + routeId, + patternId, + "pattern name", + null, + new ShapePointDTO[]{}, + new PatternStopDTO[]{ + new PatternStopDTO(patternId, firstStopId, 0), + new PatternStopDTO(patternId, lastStopId, 1) + }, + 0 + ); + // Make sure saved data matches expected data. + assertThat(pattern.route_id, equalTo(routeId)); + // Create trip so we can check that the stop_time values are updated after the patter update. + TripDTO tripInput = constructTimetableTrip(pattern.pattern_id, pattern.route_id, startTime, 60); + JdbcTableWriter createTripWriter = createTestTableWriter(Table.TRIPS); + String createdTripOutput = createTripWriter.create(mapper.writeValueAsString(tripInput), true); + TripDTO createdTrip = mapper.readValue(createdTripOutput, TripDTO.class); + // Check the stop_time's initial shape_dist_traveled value. TODO test that other linked fields are updated? + PreparedStatement statement = testDataSource.getConnection().prepareStatement( + String.format( + "select shape_dist_traveled from %s.stop_times where stop_sequence=1 and trip_id='%s'", + testNamespace, + createdTrip.trip_id + ) + ); + ResultSet resultSet = statement.executeQuery(); + while (resultSet.next()) { + // First stop_time shape_dist_traveled should be zero. + assertThat(resultSet.getInt(1), equalTo(0)); + } + // Update pattern_stop#shape_dist_traveled and check that the stop_time's shape_dist value is updated. + final double updatedShapeDistTraveled = 45.5; + pattern.pattern_stops[1].shape_dist_traveled = updatedShapeDistTraveled; + JdbcTableWriter patternUpdater = createTestTableWriter(Table.PATTERNS); + String updatedPatternOutput = patternUpdater.update(pattern.id, mapper.writeValueAsString(pattern), true); + LOG.info("Updated pattern: {}", updatedPatternOutput); + ResultSet resultSet2 = statement.executeQuery(); + while (resultSet2.next()) { + // First stop_time shape_dist_traveled should be updated. + assertThat(resultSet2.getDouble(1), equalTo(updatedShapeDistTraveled)); + } } - /** - * Creates a pattern by first creating a route and then a pattern for that route. - */ - private static PatternDTO createRouteAndPattern(String routeId, String patternId, String name, String shapeId, ShapePointDTO[] shapes, PatternStopDTO[] patternStops, int useFrequency) throws InvalidNamespaceException, SQLException, IOException { - // Create new route - createSimpleTestRoute(routeId, "RTA", "500", "Hollingsworth", 3); - // Create new pattern for route - PatternDTO input = new PatternDTO(); - input.pattern_id = patternId; - input.route_id = routeId; - input.name = name; - input.use_frequency = useFrequency; - input.shape_id = shapeId; - input.shapes = shapes; - input.pattern_stops = patternStops; - // Write the pattern to the database - JdbcTableWriter createPatternWriter = createTestTableWriter(Table.PATTERNS); - String output = createPatternWriter.create(mapper.writeValueAsString(input), true); - LOG.info("create {} output:", Table.PATTERNS.name); - LOG.info(output); - // Parse output - return mapper.readValue(output, PatternDTO.class); + @Test + public void shouldDeleteReferencingTripsAndStopTimesOnPatternDelete() throws IOException, SQLException, InvalidNamespaceException { + String routeId = "9834914"; + int startTime = 6 * 60 * 60; // 6 AM + PatternDTO pattern = createRouteAndSimplePattern(routeId, "9901900", "The Line"); + // make sure saved data matches expected data + assertThat(pattern.route_id, equalTo(routeId)); + TripDTO tripInput = constructTimetableTrip(pattern.pattern_id, pattern.route_id, startTime, 60); + JdbcTableWriter createTripWriter = createTestTableWriter(Table.TRIPS); + String createdTripOutput = createTripWriter.create(mapper.writeValueAsString(tripInput), true); + TripDTO createdTrip = mapper.readValue(createdTripOutput, TripDTO.class); + assertThatSqlQueryYieldsRowCount( + String.format( + "select * from %s.%s where id=%d", + testNamespace, + Table.TRIPS.name, + createdTrip.id + ), + 1 + ); + // Delete pattern record + JdbcTableWriter deletePatternWriter = createTestTableWriter(Table.PATTERNS); + int deleteOutput = deletePatternWriter.delete(pattern.id, true); + LOG.info("deleted {} records from {}", deleteOutput, Table.PATTERNS.name); + // Check that pattern record does not exist in DB + assertThatSqlQueryYieldsZeroRows( + String.format( + "select * from %s.%s where id=%d", + testNamespace, + Table.PATTERNS.name, + pattern.id + )); + // Check that trip records for pattern do not exist in DB + assertThatSqlQueryYieldsZeroRows( + String.format( + "select * from %s.%s where pattern_id='%s'", + testNamespace, + Table.TRIPS.name, + pattern.pattern_id + )); + // Check that stop_times records for trip do not exist in DB + assertThatSqlQueryYieldsZeroRows( + String.format( + "select * from %s.%s where trip_id='%s'", + testNamespace, + Table.STOP_TIMES.name, + createdTrip.trip_id + )); } /** @@ -456,34 +518,140 @@ public void canCreateUpdateAndDeleteFrequencyTripForFrequencyPattern() throws IO LOG.info("Updated pattern output: {}", updatedPatternOutput); // Create new trip for the pattern String createTripOutput = createTripWriter.create(mapper.writeValueAsString(tripInput), true); + LOG.info(createTripOutput); TripDTO createdTrip = mapper.readValue(createTripOutput, TripDTO.class); // Update trip // TODO: Add update and delete tests for updating pattern stops, stop_times, and frequencies. String updatedTripId = "100A"; createdTrip.trip_id = updatedTripId; JdbcTableWriter updateTripWriter = createTestTableWriter(tripsTable); - String updateTripOutput = updateTripWriter.update(tripInput.id, mapper.writeValueAsString(createdTrip), true); + String updateTripOutput = updateTripWriter.update(createdTrip.id, mapper.writeValueAsString(createdTrip), true); + LOG.info(updateTripOutput); TripDTO updatedTrip = mapper.readValue(updateTripOutput, TripDTO.class); // Check that saved data matches expected data assertThat(updatedTrip.frequencies[0].start_time, equalTo(startTime)); assertThat(updatedTrip.trip_id, equalTo(updatedTripId)); // Delete trip record JdbcTableWriter deleteTripWriter = createTestTableWriter(tripsTable); - int deleteOutput = deleteTripWriter.delete( - createdTrip.id, - true - ); + int deleteOutput = deleteTripWriter.delete(createdTrip.id, true); LOG.info("deleted {} records from {}", deleteOutput, tripsTable.name); - // Check that record does not exist in DB + // Check that trip record does not exist in DB assertThatSqlQueryYieldsZeroRows( String.format( "select * from %s.%s where id=%d", testNamespace, tripsTable.name, - createdTrip.id + updatedTrip.id + )); + // Check that stop_times records do not exist in DB + assertThatSqlQueryYieldsZeroRows( + String.format( + "select * from %s.%s where trip_id='%s'", + testNamespace, + Table.STOP_TIMES.name, + updatedTrip.trip_id )); } + /** + * Checks that {@link JdbcTableWriter#normalizeStopTimesForPattern(int, int)} can normalize stop times to a pattern's + * default travel times. + */ + @Test + public void canNormalizePatternStopTimes() throws IOException, SQLException, InvalidNamespaceException { + // Store Table and Class values for use in test. + final Table tripsTable = Table.TRIPS; + int initialTravelTime = 60; // one minute + int startTime = 6 * 60 * 60; // 6AM + String patternId = "123456"; + PatternStopDTO[] patternStops = new PatternStopDTO[]{ + new PatternStopDTO(patternId, firstStopId, 0), + new PatternStopDTO(patternId, lastStopId, 1) + }; + patternStops[1].default_travel_time = initialTravelTime; + PatternDTO pattern = createRouteAndPattern(newUUID(), + patternId, + "Pattern A", + null, + new ShapePointDTO[]{}, + patternStops, + 0); + // Create trip with travel times that match pattern stops. + TripDTO tripInput = constructTimetableTrip(pattern.pattern_id, pattern.route_id, startTime, initialTravelTime); + JdbcTableWriter createTripWriter = createTestTableWriter(tripsTable); + String createTripOutput = createTripWriter.create(mapper.writeValueAsString(tripInput), true); + LOG.info(createTripOutput); + TripDTO createdTrip = mapper.readValue(createTripOutput, TripDTO.class); + // Update pattern stop with new travel time. + JdbcTableWriter patternUpdater = createTestTableWriter(Table.PATTERNS); + int updatedTravelTime = 3600; // one hour + pattern.pattern_stops[1].default_travel_time = updatedTravelTime; + String updatedPatternOutput = patternUpdater.update(pattern.id, mapper.writeValueAsString(pattern), true); + LOG.info("Updated pattern output: {}", updatedPatternOutput); + // Normalize stop times. + JdbcTableWriter updateTripWriter = createTestTableWriter(tripsTable); + updateTripWriter.normalizeStopTimesForPattern(pattern.id, 0); + // Read pattern stops from database and check that the arrivals/departures have been updated. + JDBCTableReader stopTimesTable = new JDBCTableReader(Table.STOP_TIMES, + testDataSource, + testNamespace + ".", + EntityPopulator.STOP_TIME); + int index = 0; + for (StopTime stopTime : stopTimesTable.getOrdered(createdTrip.trip_id)) { + LOG.info("stop times i={} arrival={} departure={}", index, stopTime.arrival_time, stopTime.departure_time); + assertThat(stopTime.arrival_time, equalTo(startTime + index * updatedTravelTime)); + index++; + } + // Ensure that updated stop times equals pattern stops length + assertThat(index, equalTo(patternStops.length)); + } + + /** + * This test makes sure that updated the service_id will properly update affected referenced entities properly. + * This test case was initially developed to prove that https://github.com/conveyal/gtfs-lib/issues/203 is + * happening. + */ + @Test + public void canUpdateServiceId() throws InvalidNamespaceException, IOException, SQLException { + // change the service id + JdbcTableWriter tableWriter = new JdbcTableWriter(Table.CALENDAR, testDataSource, testGtfsGLSnapshotNamespace); + tableWriter.update( + 2, + "{\"id\":2,\"service_id\":\"test\",\"description\":\"MoTuWeThFrSaSu\",\"monday\":1,\"tuesday\":1,\"wednesday\":1,\"thursday\":1,\"friday\":1,\"saturday\":1,\"sunday\":1,\"start_date\":\"20180526\",\"end_date\":\"20201231\"}", + true + ); + + // assert that the amount of stop times equals the original amount of stop times in the feed + assertThatSqlQueryYieldsRowCount( + String.format( + "select * from %s.%s", + testGtfsGLSnapshotNamespace, + Table.STOP_TIMES.name + ), + 53 + ); + } + + /***************************************************************************************************************** + * End tests, begin helpers + ****************************************************************************************************************/ + + private static String newUUID() { + return UUID.randomUUID().toString(); + } + + private void assertThatSqlQueryYieldsRowCount(String sql, int expectedRowCount) throws SQLException { + LOG.info(sql); + int recordCount = 0; + ResultSet rs = testDataSource.getConnection().prepareStatement(sql).executeQuery(); + while (rs.next()) recordCount++; + assertThat("Records matching query should equal expected count.", recordCount, equalTo(expectedRowCount)); + } + + void assertThatSqlQueryYieldsZeroRows(String sql) throws SQLException { + assertThatSqlQueryYieldsRowCount(sql, 0); + } + /** * Construct (without writing to the database) a trip with a frequency entry. */ @@ -504,6 +672,53 @@ private TripDTO constructFrequencyTrip(String patternId, String routeId, int sta return tripInput; } + /** + * Construct (without writing to the database) a timetable trip. + */ + private TripDTO constructTimetableTrip(String patternId, String routeId, int startTime, int travelTime) { + TripDTO tripInput = new TripDTO(); + tripInput.pattern_id = patternId; + tripInput.route_id = routeId; + tripInput.service_id = simpleServiceId; + tripInput.stop_times = new StopTimeDTO[]{ + new StopTimeDTO(firstStopId, startTime, startTime, 0), + new StopTimeDTO(lastStopId, startTime + travelTime, startTime + travelTime, 1) + }; + tripInput.frequencies = new FrequencyDTO[]{}; + return tripInput; + } + + /** + * Creates a pattern by first creating a route and then a pattern for that route. + */ + private static PatternDTO createRouteAndPattern(String routeId, String patternId, String name, String shapeId, ShapePointDTO[] shapes, PatternStopDTO[] patternStops, int useFrequency) throws InvalidNamespaceException, SQLException, IOException { + // Create new route + createSimpleTestRoute(routeId, "RTA", "500", "Hollingsworth", 3); + // Create new pattern for route + PatternDTO input = new PatternDTO(); + input.pattern_id = patternId; + input.route_id = routeId; + input.name = name; + input.use_frequency = useFrequency; + input.shape_id = shapeId; + input.shapes = shapes; + input.pattern_stops = patternStops; + // Write the pattern to the database + JdbcTableWriter createPatternWriter = createTestTableWriter(Table.PATTERNS); + String output = createPatternWriter.create(mapper.writeValueAsString(input), true); + LOG.info("create {} output:", Table.PATTERNS.name); + LOG.info(output); + // Parse output + return mapper.readValue(output, PatternDTO.class); + } + + /** + * Creates a pattern by first creating a route and then a pattern for that route. + */ + private static PatternDTO createRouteAndSimplePattern(String routeId, String patternId, String name) throws InvalidNamespaceException, SQLException, IOException { + return createRouteAndPattern(routeId, patternId, name, null, new ShapePointDTO[]{}, new PatternStopDTO[]{}, 0); + } + /** * Create and store a simple stop entity. */ @@ -520,6 +735,26 @@ private static StopDTO createSimpleStop(String stopId, String stopName, double l return mapper.readValue(output, StopDTO.class); } + /** + * Create and store a simple route for testing. + */ + private static RouteDTO createSimpleTestRoute(String routeId, String agencyId, String shortName, String longName, int routeType) throws InvalidNamespaceException, IOException, SQLException { + RouteDTO input = new RouteDTO(); + input.route_id = routeId; + input.agency_id = agencyId; + // Empty value should be permitted for transfers and transfer_duration + input.route_short_name = shortName; + input.route_long_name = longName; + input.route_type = routeType; + // convert object to json and save it + JdbcTableWriter createTableWriter = createTestTableWriter(Table.ROUTES); + String output = createTableWriter.create(mapper.writeValueAsString(input), true); + LOG.info("create {} output:", Table.ROUTES.name); + LOG.info(output); + // parse output + return mapper.readValue(output, RouteDTO.class); + } + /** * Create and store a simple calendar that runs on each weekday. */ @@ -541,9 +776,4 @@ private static CalendarDTO createWeekdayCalendar(String serviceId, String startD LOG.info(output); return mapper.readValue(output, CalendarDTO.class); } - - @AfterClass - public static void tearDownClass() { - TestUtils.dropDB(testDBName); - } } diff --git a/src/test/java/com/conveyal/gtfs/storage/ErrorExpectation.java b/src/test/java/com/conveyal/gtfs/storage/ErrorExpectation.java new file mode 100644 index 000000000..d4ad653ff --- /dev/null +++ b/src/test/java/com/conveyal/gtfs/storage/ErrorExpectation.java @@ -0,0 +1,7 @@ +package com.conveyal.gtfs.storage; + +public class ErrorExpectation extends RecordExpectation { + public ErrorExpectation(String fieldName, String stringExpectation) { + super(fieldName, stringExpectation); + } +} diff --git a/src/test/java/com/conveyal/gtfs/storage/PersistenceExpectation.java b/src/test/java/com/conveyal/gtfs/storage/PersistenceExpectation.java index 1c1c1be7d..60bdc80a9 100644 --- a/src/test/java/com/conveyal/gtfs/storage/PersistenceExpectation.java +++ b/src/test/java/com/conveyal/gtfs/storage/PersistenceExpectation.java @@ -1,7 +1,5 @@ package com.conveyal.gtfs.storage; -import com.conveyal.gtfs.GTFSTest; - /** * A helper class to verify that data got stored in a particular table. */ @@ -20,6 +18,11 @@ public PersistenceExpectation(String tableName, RecordExpectation[] recordExpect this.recordExpectations = recordExpectations; } + public PersistenceExpectation(ErrorExpectation[] errorExpectations) { + this.tableName = "errors"; + this.recordExpectations = errorExpectations; + } + public static PersistenceExpectation[] list (PersistenceExpectation... expectations) { return expectations; } diff --git a/src/test/resources/fake-agency-bad-calendar-date/calendar_dates.txt b/src/test/resources/fake-agency-bad-calendar-date/calendar_dates.txt index 546ad12b5..692bd894c 100755 --- a/src/test/resources/fake-agency-bad-calendar-date/calendar_dates.txt +++ b/src/test/resources/fake-agency-bad-calendar-date/calendar_dates.txt @@ -1,3 +1,7 @@ service_id,date,exception_type 04100312-8fe1-46a5-a9f2-556f39478f57,bad_date,2 -04100312-8fe1-46a5-a9f2-556f39478f57,bad_date2,1 \ No newline at end of file +04100312-8fe1-46a5-a9f2-556f39478f57,bad_date2,1 +123_ID_NOT_EXISTS,20190301,2 +123_ID_NOT_EXISTS,20190301,1 +123_ID_NOT_EXISTS,bad_date,1 +123_ID_NOT_EXISTS,bad_date,2 \ No newline at end of file diff --git a/src/test/resources/fake-agency-overlapping-trips/agency.txt b/src/test/resources/fake-agency-overlapping-trips/agency.txt new file mode 100755 index 000000000..fc4cc2732 --- /dev/null +++ b/src/test/resources/fake-agency-overlapping-trips/agency.txt @@ -0,0 +1,2 @@ +agency_id,agency_name,agency_url,agency_lang,agency_phone,agency_email,agency_timezone,agency_fare_url +100000000,Fake Transit,http://example.com,,,,America/Los_Angeles, diff --git a/src/test/resources/fake-agency-overlapping-trips/calendar.txt b/src/test/resources/fake-agency-overlapping-trips/calendar.txt new file mode 100755 index 000000000..3959f46da --- /dev/null +++ b/src/test/resources/fake-agency-overlapping-trips/calendar.txt @@ -0,0 +1,2 @@ +service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date +A,1,1,1,1,1,1,1,20190313,20190917 diff --git a/src/test/resources/fake-agency-overlapping-trips/calendar_dates.txt b/src/test/resources/fake-agency-overlapping-trips/calendar_dates.txt new file mode 100755 index 000000000..74c1ef632 --- /dev/null +++ b/src/test/resources/fake-agency-overlapping-trips/calendar_dates.txt @@ -0,0 +1 @@ +service_id,date,exception_type diff --git a/src/test/resources/fake-agency-overlapping-trips/routes.txt b/src/test/resources/fake-agency-overlapping-trips/routes.txt new file mode 100755 index 000000000..8a0f40172 --- /dev/null +++ b/src/test/resources/fake-agency-overlapping-trips/routes.txt @@ -0,0 +1,2 @@ +agency_id,route_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_branding_url +100000000,10000000,100,,,3,,7CE6E7,FFFFFF, diff --git a/src/test/resources/fake-agency-overlapping-trips/stop_times.txt b/src/test/resources/fake-agency-overlapping-trips/stop_times.txt new file mode 100755 index 000000000..2ce4c8570 --- /dev/null +++ b/src/test/resources/fake-agency-overlapping-trips/stop_times.txt @@ -0,0 +1,5 @@ +trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type +1A00000,07:00:00,07:00:00,A000000,1,,0,0 +1A00000,07:10:30,07:10:30,B000000,2,,0,0 +2A00000,07:00:00,07:00:00,A000000,1,,0,0 +2A00000,07:02:00,07:02:00,B000000,2,,0,0 diff --git a/src/test/resources/fake-agency-overlapping-trips/stops.txt b/src/test/resources/fake-agency-overlapping-trips/stops.txt new file mode 100755 index 000000000..0f35743a2 --- /dev/null +++ b/src/test/resources/fake-agency-overlapping-trips/stops.txt @@ -0,0 +1,3 @@ +stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding +A000000,,Butler Ln,,37.0612132,-122.0074332,,,0,,, +B000000,,Scotts Valley Dr & Victor Sq,,37.0590172,-122.0096058,,,0,,, diff --git a/src/test/resources/fake-agency-overlapping-trips/trips.txt b/src/test/resources/fake-agency-overlapping-trips/trips.txt new file mode 100755 index 000000000..b1b70c9d7 --- /dev/null +++ b/src/test/resources/fake-agency-overlapping-trips/trips.txt @@ -0,0 +1,3 @@ +route_id,trip_id,direction_id,block_id,bikes_allowed,wheelchair_accessible,service_id +10000000,1A00000,0,BLOCK_1,0,0,A +10000000,2A00000,0,BLOCK_1,0,0,A diff --git a/src/test/resources/fake-agency/stops.txt b/src/test/resources/fake-agency/stops.txt index 8e71f7b2e..ba368047e 100755 --- a/src/test/resources/fake-agency/stops.txt +++ b/src/test/resources/fake-agency/stops.txt @@ -1,3 +1,5 @@ stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding 4u6g,,Butler Ln,,37.0612132,-122.0074332,,,0,,, johv,,Scotts Valley Dr & Victor Sq,,37.0590172,-122.0096058,,,0,,, +123,,Parent Station,,37.0666,-122.0777,,,1,,, +1234,,Child Stop,,37.06662,-122.07772,,,0,123,, \ No newline at end of file diff --git a/src/test/resources/graphql/feedStopWithChildren.txt b/src/test/resources/graphql/feedStopWithChildren.txt new file mode 100644 index 000000000..39094eea6 --- /dev/null +++ b/src/test/resources/graphql/feedStopWithChildren.txt @@ -0,0 +1,13 @@ +query ($namespace: String, $stop_id: [String]) { + feed(namespace: $namespace) { + feed_version + stops(stop_id: $stop_id) { + stop_id + stop_name + child_stops { + stop_id + stop_name + } + } + } +} \ No newline at end of file diff --git a/src/test/resources/real-world-gtfs-feeds/README.md b/src/test/resources/real-world-gtfs-feeds/README.md new file mode 100644 index 000000000..4ba0ffdc1 --- /dev/null +++ b/src/test/resources/real-world-gtfs-feeds/README.md @@ -0,0 +1 @@ +This package contains real-life GTFS feeds. While it's best to reduce data to the simplest form so problems can be isolated more easily, adding these feeds can be very useful for creating test cases that prove bugs exist in certain edge cases. diff --git a/src/test/resources/real-world-gtfs-feeds/gtfs_GL.zip b/src/test/resources/real-world-gtfs-feeds/gtfs_GL.zip new file mode 100644 index 000000000..3d2704172 Binary files /dev/null and b/src/test/resources/real-world-gtfs-feeds/gtfs_GL.zip differ diff --git a/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchErrors.json b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchErrors.json index 274b5b3f3..b3ba1911d 100644 --- a/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchErrors.json +++ b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchErrors.json @@ -25,12 +25,28 @@ "error_id" : 2, "error_type" : "FEED_TRAVEL_TIMES_ROUNDED", "line_number" : null + }, { + "bad_value" : "123", + "entity_id" : "123", + "entity_sequence" : null, + "entity_type" : "Stop", + "error_id" : 3, + "error_type" : "STOP_UNUSED", + "line_number" : 4 + }, { + "bad_value" : "1234", + "entity_id" : "1234", + "entity_sequence" : null, + "entity_type" : "Stop", + "error_id" : 4, + "error_type" : "STOP_UNUSED", + "line_number" : 5 }, { "bad_value" : "20170916", "entity_id" : null, "entity_sequence" : null, "entity_type" : null, - "error_id" : 3, + "error_id" : 5, "error_type" : "DATE_NO_SERVICE", "line_number" : null } ], diff --git a/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchFeedRowCounts.json b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchFeedRowCounts.json index dcca06a39..d941c3b59 100644 --- a/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchFeedRowCounts.json +++ b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchFeedRowCounts.json @@ -6,10 +6,10 @@ "agency" : 1, "calendar" : 1, "calendar_dates" : 1, - "errors" : 4, + "errors" : 6, "routes" : 1, "stop_times" : 4, - "stops" : 2, + "stops" : 4, "trips" : 2 }, "snapshot_of" : null diff --git a/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchNestedEntityWithLimit.json b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchNestedEntityWithLimit.json index c0b299ffa..1d0933aa9 100644 --- a/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchNestedEntityWithLimit.json +++ b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchNestedEntityWithLimit.json @@ -16,6 +16,12 @@ "stop_sequence" : 2, "trip_id" : "a30277f8-e50a-4a85-9141-b1e0da9d429d" } ] + }, { + "stop_id" : "123", + "stop_times" : [ ] + }, { + "stop_id" : "1234", + "stop_times" : [ ] } ] } } diff --git a/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchStopWithChildren.json b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchStopWithChildren.json new file mode 100644 index 000000000..d8b0084c3 --- /dev/null +++ b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchStopWithChildren.json @@ -0,0 +1,27 @@ +{ + "data" : { + "feed" : { + "feed_version" : "1.0", + "stops" : [ { + "child_stops" : [ ], + "stop_id" : "4u6g", + "stop_name" : "Butler Ln" + }, { + "child_stops" : [ ], + "stop_id" : "johv", + "stop_name" : "Scotts Valley Dr & Victor Sq" + }, { + "child_stops" : [ { + "stop_id" : "1234", + "stop_name" : "Child Stop" + } ], + "stop_id" : "123", + "stop_name" : "Parent Station" + }, { + "child_stops" : [ ], + "stop_id" : "1234", + "stop_name" : "Child Stop" + } ] + } + } +} \ No newline at end of file diff --git a/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchStops.json b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchStops.json index 8a248661f..eb640d0e3 100644 --- a/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchStops.json +++ b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchStops.json @@ -62,6 +62,42 @@ "stop_url" : null, "wheelchair_boarding" : null, "zone_id" : null + }, { + "id" : 4, + "location_type" : 1, + "parent_station" : null, + "patterns" : [ ], + "routes" : [ ], + "stop_code" : null, + "stop_desc" : null, + "stop_id" : "123", + "stop_lat" : 37.0666, + "stop_lon" : -122.0777, + "stop_name" : "Parent Station", + "stop_time_count" : 0, + "stop_times" : [ ], + "stop_timezone" : null, + "stop_url" : null, + "wheelchair_boarding" : null, + "zone_id" : null + }, { + "id" : 5, + "location_type" : 0, + "parent_station" : "123", + "patterns" : [ ], + "routes" : [ ], + "stop_code" : null, + "stop_desc" : null, + "stop_id" : "1234", + "stop_lat" : 37.06662, + "stop_lon" : -122.07772, + "stop_name" : "Child Stop", + "stop_time_count" : 0, + "stop_times" : [ ], + "stop_timezone" : null, + "stop_url" : null, + "wheelchair_boarding" : null, + "zone_id" : null } ] } }