diff --git a/.travis.yml b/.travis.yml index 987c3cb8e..518f41399 100644 --- a/.travis.yml +++ b/.travis.yml @@ -65,4 +65,4 @@ notifications: # Push results to codecov.io and run semantic-release (releases only created on pushes to the master branch). after_success: - bash <(curl -s https://codecov.io/bash) - - semantic-release --prepare @conveyal/maven-semantic-release --publish @semantic-release/github,@conveyal/maven-semantic-release --verify-conditions @semantic-release/github --verify-release @conveyal/maven-semantic-release + - semantic-release --prepare @conveyal/maven-semantic-release --publish @semantic-release/github,@conveyal/maven-semantic-release --verify-conditions @semantic-release/github,@conveyal/maven-semantic-release --verify-release @conveyal/maven-semantic-release --use-conveyal-workflow --dev-branch=dev diff --git a/README.md b/README.md index ba5169000..1daff3de6 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Based on observations over several years of experience using the OneBusAway GTFS The main design goals are: -- Avoid all reflection tricks and work imperatively even if it is a bit verbose +- Avoid all reflection tricks and use imperative code style even if it is a bit verbose - Allow loading and processing GTFS feeds (much) bigger than available memory - Introduce the concept of feed IDs, and do not use agency IDs for this purpose. - Perform extensive syntax and semantic validation of feed contents @@ -197,4 +197,4 @@ or the optional directory specified with the `--json` option. "validationTime" : 16319 } -``` \ No newline at end of file +``` diff --git a/pom.xml b/pom.xml index caac53044..d91327cd4 100644 --- a/pom.xml +++ b/pom.xml @@ -8,7 +8,7 @@ com.conveyal gtfs-lib - 4.0.1-SNAPSHOT + 4.1.3-SNAPSHOT jar @@ -353,7 +353,7 @@ com.graphql-java graphql-java - 4.2 + 11.0 diff --git a/src/main/java/com/conveyal/gtfs/GTFS.java b/src/main/java/com/conveyal/gtfs/GTFS.java index 11c8f951b..32d1147c3 100644 --- a/src/main/java/com/conveyal/gtfs/GTFS.java +++ b/src/main/java/com/conveyal/gtfs/GTFS.java @@ -185,7 +185,7 @@ public static void main (String[] args) throws IOException { try { cmd = new DefaultParser().parse(options, args); } catch (ParseException e) { - LOG.error("Error parsing command line: " + e.getMessage()); + LOG.error("Error parsing command line", e); printHelp(options); return; } diff --git a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java index 8fa452d46..02fc5075d 100644 --- a/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java +++ b/src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java @@ -36,6 +36,7 @@ public enum NewGTFSErrorType { TABLE_TOO_LONG(Priority.MEDIUM, "Table is too long to record line numbers with a 32-bit integer, overflow will occur."), TIME_ZONE_FORMAT(Priority.MEDIUM, "Time zone format should be X."), REQUIRED_TABLE_EMPTY(Priority.MEDIUM, "This table is required by the GTFS specification but is empty."), + FEED_TRAVEL_TIMES_ROUNDED(Priority.LOW, "All travel times in the feed are rounded to the minute, which may cause unexpected results in routing applications where travel times are zero."), ROUTE_DESCRIPTION_SAME_AS_NAME(Priority.LOW, "The description of a route is identical to its name, so does not add any information."), ROUTE_LONG_NAME_CONTAINS_SHORT_NAME(Priority.LOW, "The long name of a route should complement the short name, not include it."), ROUTE_SHORT_AND_LONG_NAME_MISSING(Priority.MEDIUM, "A route has neither a long nor a short name."), diff --git a/src/main/java/com/conveyal/gtfs/error/SQLErrorStorage.java b/src/main/java/com/conveyal/gtfs/error/SQLErrorStorage.java index f523ede6c..99b8a0df1 100644 --- a/src/main/java/com/conveyal/gtfs/error/SQLErrorStorage.java +++ b/src/main/java/com/conveyal/gtfs/error/SQLErrorStorage.java @@ -11,6 +11,7 @@ import java.sql.SQLException; import java.sql.Statement; import java.util.Map; +import java.util.Set; import static com.conveyal.gtfs.util.Util.ensureValidNamespace; @@ -86,6 +87,12 @@ public void storeError (NewGTFSError error) { } } + public void storeErrors (Set errors) { + for (NewGTFSError error : errors) { + storeError(error); + } + } + /** * Commits any outstanding error inserts and returns the error count via a SQL query. */ diff --git a/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java b/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java index 23ff013a7..b5702b85e 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java +++ b/src/main/java/com/conveyal/gtfs/graphql/GraphQLGtfsSchema.java @@ -119,6 +119,7 @@ public class GraphQLGtfsSchema { public static final GraphQLObjectType fareType = newObject().name("fare_attributes") .description("A GTFS agency object") .field(MapFetcher.field("id", GraphQLInt)) + .field(MapFetcher.field("agency_id")) .field(MapFetcher.field("fare_id")) .field(MapFetcher.field("price", GraphQLFloat)) .field(MapFetcher.field("currency_type")) @@ -269,6 +270,7 @@ public class GraphQLGtfsSchema { .field(MapFetcher.field("wheelchair_accessible")) .field(MapFetcher.field("publicly_visible", GraphQLInt)) .field(MapFetcher.field("status", GraphQLInt)) + .field(MapFetcher.field("route_sort_order")) // FIXME ^^ .field(RowCountFetcher.field("trip_count", "trips", "route_id")) .field(RowCountFetcher.field("pattern_count", "patterns", "route_id")) @@ -347,6 +349,7 @@ public class GraphQLGtfsSchema { .type(new GraphQLList(routeType)) .argument(stringArg("namespace")) .argument(stringArg(SEARCH_ARG)) + .argument(intArg(LIMIT_ARG)) .dataFetcher(new NestedJDBCFetcher( new JDBCFetcher("pattern_stops", "stop_id", null, false), new JDBCFetcher("patterns", "pattern_id", null, false), diff --git a/src/main/java/com/conveyal/gtfs/graphql/GraphQLUtil.java b/src/main/java/com/conveyal/gtfs/graphql/GraphQLUtil.java index d74180a82..5002e5ccc 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/GraphQLUtil.java +++ b/src/main/java/com/conveyal/gtfs/graphql/GraphQLUtil.java @@ -1,9 +1,9 @@ package com.conveyal.gtfs.graphql; -import graphql.schema.FieldDataFetcher; import graphql.schema.GraphQLArgument; import graphql.schema.GraphQLFieldDefinition; import graphql.schema.GraphQLList; +import graphql.schema.PropertyDataFetcher; import static graphql.Scalars.GraphQLFloat; import static graphql.Scalars.GraphQLInt; @@ -17,7 +17,7 @@ public static GraphQLFieldDefinition string (String name) { return newFieldDefinition() .name(name) .type(GraphQLString) - .dataFetcher(new FieldDataFetcher(name)) + .dataFetcher(new PropertyDataFetcher(name)) .build(); } @@ -25,7 +25,7 @@ public static GraphQLFieldDefinition intt (String name) { return newFieldDefinition() .name(name) .type(GraphQLInt) - .dataFetcher(new FieldDataFetcher(name)) + .dataFetcher(new PropertyDataFetcher(name)) .build(); } diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/FeedFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/FeedFetcher.java index 4e505315b..5b5386529 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/FeedFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/FeedFetcher.java @@ -14,6 +14,8 @@ import java.util.HashMap; import java.util.Map; +import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.validateNamespace; + /** * Fetch the summary row for a particular loaded feed, based on its namespace. * This essentially gets the row from the top-level summary table of all feeds that have been loaded into the database. @@ -25,6 +27,7 @@ public class FeedFetcher implements DataFetcher { @Override public Map get (DataFetchingEnvironment environment) { String namespace = environment.getArgument("namespace"); // This is the unique table prefix (the "schema"). + validateNamespace(namespace); StringBuilder sqlBuilder = new StringBuilder(); sqlBuilder.append(String.format("select * from feeds where namespace = '%s'", namespace)); Connection connection = null; diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java index 6ab6c90e9..3eac44404 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/JDBCFetcher.java @@ -167,12 +167,16 @@ public List> get (DataFetchingEnvironment environment) { * Handle fetching functionality for a given namespace, set of join values, and arguments. This is broken out from * the standard get function so that it can be reused in other fetchers (i.e., NestedJdbcFetcher) */ - List> getResults (String namespace, List parentJoinValues, Map arguments) { + List> getResults ( + String namespace, + List parentJoinValues, + Map graphQLQueryArguments + ) { // Track the parameters for setting prepared statement parameters - List parameters = new ArrayList<>(); + List preparedStatementParameters = new ArrayList<>(); // This will contain one Map for each row fetched from the database table. List> results = new ArrayList<>(); - if (arguments == null) arguments = new HashMap<>(); + if (graphQLQueryArguments == null) graphQLQueryArguments = new HashMap<>(); // Ensure namespace exists and is clean. Note: FeedFetcher will have executed before this and validated that an // entry exists in the feeds table and the schema actually exists in the database. validateNamespace(namespace); @@ -188,31 +192,32 @@ List> getResults (String namespace, List parentJoinV sqlBuilder.append("select *"); // We will build up additional sql clauses in this List (note: must be a List so that the order is preserved). - List conditions = new ArrayList<>(); + List whereConditions = new ArrayList<>(); // The order by clause will go here. String sortBy = ""; // If we are fetching an item nested within a GTFS entity in the GraphQL query, we want to add an SQL "where" // clause. This could conceivably be done automatically, but it's clearer to just express the intent. // Note, this is assuming the type of the field in the parent is a string. if (parentJoinField != null && parentJoinValues != null && !parentJoinValues.isEmpty()) { - conditions.add(makeInClause(parentJoinField, parentJoinValues, parameters)); + whereConditions.add(makeInClause(parentJoinField, parentJoinValues, preparedStatementParameters)); } if (sortField != null) { // Sort field is not provided by user input, so it's ok to add here (i.e., it's not prone to SQL injection). sortBy = String.format(" order by %s", sortField); } - Set argumentKeys = arguments.keySet(); + Set argumentKeys = graphQLQueryArguments.keySet(); for (String key : argumentKeys) { // The pagination, bounding box, and date/time args should all be skipped here because they are handled // separately below from standard args (pagination becomes limit/offset clauses, bounding box applies to // stops table, and date/time args filter stop times. All other args become "where X in A, B, C" clauses. if (argsToSkip.contains(key)) continue; if (ID_ARG.equals(key)) { - Integer value = (Integer) arguments.get(key); - conditions.add(String.join(" = ", "id", value.toString())); + Integer value = (Integer) graphQLQueryArguments.get(key); + whereConditions.add(String.join(" = ", "id", value.toString())); } else { - List values = (List) arguments.get(key); - if (values != null && !values.isEmpty()) conditions.add(makeInClause(key, values, parameters)); + List values = (List) graphQLQueryArguments.get(key); + if (values != null && !values.isEmpty()) + whereConditions.add(makeInClause(key, values, preparedStatementParameters)); } } if (argumentKeys.containsAll(boundingBoxArgs)) { @@ -222,7 +227,7 @@ List> getResults (String namespace, List parentJoinV // operating on the patterns table, a SELECT DISTINCT patterns query will be constructed with a join to // stops and pattern stops. for (String bound : boundingBoxArgs) { - Double value = (Double) arguments.get(bound); + Double value = (Double) graphQLQueryArguments.get(bound); // Determine delimiter/equality operator based on min/max String delimiter = bound.startsWith("max") ? " <= " : " >= "; // Determine field based on lat/lon @@ -232,7 +237,7 @@ List> getResults (String namespace, List parentJoinV boundsConditions.add(String.join(delimiter, fieldWithNamespace, value.toString())); } if ("stops".equals(tableName)) { - conditions.addAll(boundsConditions); + whereConditions.addAll(boundsConditions); } else if ("patterns".equals(tableName)) { // Add from table as unique_pattern_ids_in_bounds to match patterns table -> pattern stops -> stops fromTables.add( @@ -243,7 +248,7 @@ List> getResults (String namespace, List parentJoinV String.join(" and ", boundsConditions), namespace )); - conditions.add(String.format("%s.patterns.pattern_id = unique_pattern_ids_in_bounds.pattern_id", namespace)); + whereConditions.add(String.format("%s.patterns.pattern_id = unique_pattern_ids_in_bounds.pattern_id", namespace)); } } if (argumentKeys.contains(DATE_ARG)) { @@ -253,7 +258,7 @@ List> getResults (String namespace, List parentJoinV // service_dates table. In other words, feeds generated by the editor cannot be queried with the date/time args. String tripsTable = String.format("%s.trips", namespace); fromTables.add(tripsTable); - String date = getDateArgument(arguments); + String date = getDateArgument(graphQLQueryArguments); // Gather all service IDs that run on the provided date. fromTables.add(String.format( "(select distinct service_id from %s.service_dates where service_date = ?) as unique_service_ids_in_operation", @@ -261,11 +266,11 @@ List> getResults (String namespace, List parentJoinV ); // Add date to beginning of parameters list (it is used to pre-select a table in the from clause before any // other conditions or parameters are appended). - parameters.add(0, date); + preparedStatementParameters.add(0, date); if (argumentKeys.contains(FROM_ARG) && argumentKeys.contains(TO_ARG)) { // Determine which trips start in the specified time window by joining to filtered stop times. String timeFilteredTrips = "trips_beginning_in_time_period"; - conditions.add(String.format("%s.trip_id = %s.trip_id", timeFilteredTrips, tripsTable)); + whereConditions.add(String.format("%s.trip_id = %s.trip_id", timeFilteredTrips, tripsTable)); // Select all trip IDs that start during the specified time window. Note: the departure and arrival times // are divided by 86399 to account for trips that begin after midnight. FIXME: Should this be 86400? fromTables.add(String.format( @@ -273,16 +278,16 @@ List> getResults (String namespace, List parentJoinV "from (select distinct on (trip_id) * from %s.stop_times order by trip_id, stop_sequence) as first_stop_times " + "where departure_time %% 86399 >= %d and departure_time %% 86399 <= %d) as %s", namespace, - (int) arguments.get(FROM_ARG), - (int) arguments.get(TO_ARG), + (int) graphQLQueryArguments.get(FROM_ARG), + (int) graphQLQueryArguments.get(TO_ARG), timeFilteredTrips)); } // Join trips to service_dates (unique_service_ids_in_operation). - conditions.add(String.format("%s.service_id = unique_service_ids_in_operation.service_id", tripsTable)); + whereConditions.add(String.format("%s.service_id = unique_service_ids_in_operation.service_id", tripsTable)); } if (argumentKeys.contains(SEARCH_ARG)) { // Handle string search argument - String value = (String) arguments.get(SEARCH_ARG); + String value = (String) graphQLQueryArguments.get(SEARCH_ARG); if (!value.isEmpty()) { // Only apply string search if string is not empty. Set searchFields = getSearchFields(namespace); @@ -292,23 +297,23 @@ List> getResults (String namespace, List parentJoinV // FIXME: is ILIKE compatible with non-Postgres? LIKE doesn't work well enough (even when setting // the strings to lower case). searchClauses.add(String.format("%s ILIKE ?", field)); - parameters.add(String.format("%%%s%%", value)); + preparedStatementParameters.add(String.format("%%%s%%", value)); } if (!searchClauses.isEmpty()) { // Wrap string search in parentheses to isolate from other conditions. - conditions.add(String.format(("(%s)"), String.join(" OR ", searchClauses))); + whereConditions.add(String.format(("(%s)"), String.join(" OR ", searchClauses))); } } } sqlBuilder.append(String.format(" from %s", String.join(", ", fromTables))); - if (!conditions.isEmpty()) { + if (!whereConditions.isEmpty()) { sqlBuilder.append(" where "); - sqlBuilder.append(String.join(" and ", conditions)); + sqlBuilder.append(String.join(" and ", whereConditions)); } // The default value for sortBy is an empty string, so it's safe to always append it here. Also, there is no // threat of SQL injection because the sort field value is not user input. sqlBuilder.append(sortBy); - Integer limit = (Integer) arguments.get(LIMIT_ARG); + Integer limit = (Integer) graphQLQueryArguments.get(LIMIT_ARG); if (limit == null) { limit = autoLimit ? DEFAULT_ROWS_TO_FETCH : -1; } @@ -322,7 +327,7 @@ List> getResults (String namespace, List parentJoinV } else { sqlBuilder.append(" limit ").append(limit); } - Integer offset = (Integer) arguments.get(OFFSET_ARG); + Integer offset = (Integer) graphQLQueryArguments.get(OFFSET_ARG); if (offset != null && offset >= 0) { sqlBuilder.append(" offset ").append(offset); } @@ -331,7 +336,7 @@ List> getResults (String namespace, List parentJoinV connection = GTFSGraphQL.getConnection(); PreparedStatement preparedStatement = connection.prepareStatement(sqlBuilder.toString()); int oneBasedIndex = 1; - for (String parameter : parameters) { + for (String parameter : preparedStatementParameters) { preparedStatement.setString(oneBasedIndex++, parameter); } // This logging produces a lot of noise during testing due to large numbers of joined sub-queries @@ -381,7 +386,7 @@ private static String getDateArgument(Map arguments) { * @param namespace database schema namespace/table prefix * @return */ - private static void validateNamespace(String namespace) { + public static void validateNamespace(String namespace) { if (namespace == null) { // If namespace is null, do no attempt a query on a namespace that does not exist. throw new IllegalArgumentException("Namespace prefix must be provided."); @@ -437,7 +442,7 @@ private Set getSearchFields(String namespace) { /** * Construct filter clause with '=' (single string) and add values to list of parameters. * */ - static String makeInClause(String filterField, String string, List parameters) { + static String filterEquals(String filterField, String string, List parameters) { // Add string to list of parameters (to be later used to set parameters for prepared statement). parameters.add(string); return String.format("%s = ?", filterField); @@ -448,7 +453,7 @@ static String makeInClause(String filterField, String string, List param * */ static String makeInClause(String filterField, List strings, List parameters) { if (strings.size() == 1) { - return makeInClause(filterField, strings.get(0), parameters); + return filterEquals(filterField, strings.get(0), parameters); } else { // Add strings to list of parameters (to be later used to set parameters for prepared statement). parameters.addAll(strings); diff --git a/src/main/java/com/conveyal/gtfs/graphql/fetchers/RowCountFetcher.java b/src/main/java/com/conveyal/gtfs/graphql/fetchers/RowCountFetcher.java index e1a38a1aa..2c7b45e74 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/fetchers/RowCountFetcher.java +++ b/src/main/java/com/conveyal/gtfs/graphql/fetchers/RowCountFetcher.java @@ -15,7 +15,6 @@ import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; -import java.sql.Statement; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -23,7 +22,7 @@ import static com.conveyal.gtfs.graphql.GraphQLUtil.intt; import static com.conveyal.gtfs.graphql.GraphQLUtil.string; import static com.conveyal.gtfs.graphql.GraphQLUtil.stringArg; -import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.makeInClause; +import static com.conveyal.gtfs.graphql.fetchers.JDBCFetcher.filterEquals; import static graphql.Scalars.GraphQLInt; import static graphql.schema.GraphQLFieldDefinition.newFieldDefinition; import static graphql.schema.GraphQLObjectType.newObject; @@ -71,8 +70,13 @@ public Object get(DataFetchingEnvironment environment) { // FIXME Does this handle null cases? // Add where clause to filter out non-matching results. String filterValue = (String) parentFeedMap.get(filterField); - String inClause = makeInClause(filterField, filterValue, parameters); - clauses.add(String.join(" ", "where", inClause)); + clauses.add( + String.join( + " ", + "where", + filterEquals(filterField, filterValue, parameters) + ) + ); } else if (groupByField != null) { // Handle group by field and optionally handle any filter arguments passed in. if (!argKeys.isEmpty()) { @@ -86,8 +90,13 @@ public Object get(DataFetchingEnvironment environment) { } String filterValue = (String) arguments.get(groupedFilterField); if (filterValue != null) { - String inClause = makeInClause(groupedFilterField, filterValue, parameters); - clauses.add(String.join(" ", "where", inClause)); + clauses.add( + String.join( + " ", + "where", + filterEquals(groupedFilterField, filterValue, parameters) + ) + ); } } // Finally, add group by clause. diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java index b777a3997..0a536b5ce 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java @@ -129,11 +129,20 @@ public FeedLoadResult loadTables () { this.tablePrefix = randomIdString(); result.filename = gtfsFilePath; result.uniqueIdentifier = tablePrefix; - registerFeed(gtfsFile); - // Include the dot separator in the table prefix. - // This allows everything to work even when there's no prefix. - this.tablePrefix += "."; - this.errorStorage = new SQLErrorStorage(connection, tablePrefix, true); + + //The order of the following four lines should not be changed because the schema needs to be in place + //before the error storage can be constructed, which in turn needs to exist in case any errors are + //encountered during the loading process. + { + createSchema(connection, tablePrefix); + //the SQLErrorStorage constructor expects the tablePrefix to contain the dot separator. + this.errorStorage = new SQLErrorStorage(connection, tablePrefix + ".", true); + //registerFeed accesses this.tablePrefix which shouldn't contain the dot separator. + registerFeed(gtfsFile); + // Include the dot separator in the table prefix from this point onwards. + // This allows everything to work even when there's no prefix. + this.tablePrefix += "."; + } this.referenceTracker = new ReferenceTracker(errorStorage); // Load each table in turn, saving some summary information about what happened during each table load result.agency = load(Table.AGENCY); @@ -164,6 +173,27 @@ public FeedLoadResult loadTables () { } return result; } + + /** + * Creates a schema/namespace in the database. + * This does *not* setup any other tables or enter the schema name in a registry (@see #registerFeed). + * + * @param connection Connection to the database to create the schema on. + * @param schemaName Name of the schema (i.e. table prefix). Should not include the dot suffix. + */ + private static void createSchema (Connection connection, String schemaName) { + try { + Statement statement = connection.createStatement(); + // FIXME do the following only on databases that support schemas. + // SQLite does not support them. Is there any advantage of schemas over flat tables? + statement.execute("create schema " + schemaName); + connection.commit(); + LOG.info("Created new feed schema: {}", statement); + } catch (Exception ex) { + LOG.error("Exception while registering new feed namespace in feeds table: {}", ex.getMessage()); + DbUtils.closeQuietly(connection); + } + } /** * Add a line to the list of loaded feeds showing that this feed has been loaded. @@ -204,9 +234,6 @@ private void registerFeed (File gtfsFile) { // TODO try to get the feed_id and feed_version out of the feed_info table // statement.execute("select * from feed_info"); - // FIXME do the following only on databases that support schemas. - // SQLite does not support them. Is there any advantage of schemas over flat tables? - statement.execute("create schema " + tablePrefix); // current_timestamp seems to be the only standard way to get the current time across all common databases. // Record total load processing time? statement.execute("create table if not exists feeds (namespace varchar primary key, md5 varchar, " + diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsSnapshotter.java b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsSnapshotter.java index 2ae00d35c..415df8aa5 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsSnapshotter.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsSnapshotter.java @@ -2,6 +2,9 @@ import com.conveyal.gtfs.model.Calendar; import com.conveyal.gtfs.model.CalendarDate; +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; import org.apache.commons.dbutils.DbUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -13,10 +16,8 @@ import java.sql.SQLException; import java.sql.Statement; import java.time.format.DateTimeFormatter; -import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; -import java.util.List; import java.util.Map; import static com.conveyal.gtfs.util.Util.randomIdString; @@ -209,13 +210,12 @@ private TableLoadResult createScheduleExceptionsTable() { ); Iterable calendarDates = calendarDatesReader.getAll(); - // keep track of calendars by service id in case we need to add dummy calendar entries + // Keep track of calendars by service id in case we need to add dummy calendar entries. Map calendarsByServiceId = new HashMap<>(); - // iterate through calendar dates to build up to get list of dates with exceptions - HashMap> removedServiceDays = new HashMap<>(); - HashMap> addedServiceDays = new HashMap<>(); - List datesWithExceptions = new ArrayList<>(); + // Iterate through calendar dates to build up to get maps from exceptions to their dates. + Multimap removedServiceForDate = HashMultimap.create(); + Multimap addedServiceForDate = HashMultimap.create(); for (CalendarDate calendarDate : calendarDates) { // Skip any null dates if (calendarDate.date == null) { @@ -223,12 +223,8 @@ private TableLoadResult createScheduleExceptionsTable() { continue; } String date = calendarDate.date.format(DateTimeFormatter.BASIC_ISO_DATE); - datesWithExceptions.add(date); if (calendarDate.exception_type == 1) { - List dateAddedServices = addedServiceDays.getOrDefault(date, new ArrayList<>()); - dateAddedServices.add(calendarDate.service_id); - addedServiceDays.put(date, dateAddedServices); - + addedServiceForDate.put(date, calendarDate.service_id); // create (if needed) and extend range of dummy calendar that would need to be created if we are // copying from a feed that doesn't have the calendar.txt file Calendar calendar = calendarsByServiceId.getOrDefault(calendarDate.service_id, new Calendar()); @@ -241,33 +237,24 @@ private TableLoadResult createScheduleExceptionsTable() { } calendarsByServiceId.put(calendarDate.service_id, calendar); } else { - List dateRemovedServices = removedServiceDays.getOrDefault(date, new ArrayList<>()); - dateRemovedServices.add(calendarDate.service_id); - removedServiceDays.put(date, dateRemovedServices); + removedServiceForDate.put(date, calendarDate.service_id); } } - - // iterate through days and add to database - // for usability and simplicity of code, don't attempt to find all dates with similar - // added and removed services, but simply create an entry for each found date - for (String dateWithException : datesWithExceptions) { - scheduleExceptionsStatement.setString(1, dateWithException); - String[] dates = {dateWithException}; + // Iterate through dates with added or removed service and add to database. + // For usability and simplicity of code, don't attempt to find all dates with similar + // added and removed services, but simply create an entry for each found date. + for (String date : Sets.union(removedServiceForDate.keySet(), addedServiceForDate.keySet())) { + scheduleExceptionsStatement.setString(1, date); + String[] dates = {date}; scheduleExceptionsStatement.setArray(2, connection.createArrayOf("text", dates)); scheduleExceptionsStatement.setInt(3, 9); // FIXME use better static type scheduleExceptionsStatement.setArray( 4, - connection.createArrayOf( - "text", - addedServiceDays.getOrDefault(dateWithException, new ArrayList<>()).toArray() - ) + connection.createArrayOf("text", addedServiceForDate.get(date).toArray()) ); scheduleExceptionsStatement.setArray( 5, - connection.createArrayOf( - "text", - removedServiceDays.getOrDefault(dateWithException, new ArrayList<>()).toArray() - ) + connection.createArrayOf("text", removedServiceForDate.get(date).toArray()) ); scheduleExceptionsTracker.addBatch(); } diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index 616f494d3..9c6b94b7b 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -695,7 +695,14 @@ public boolean isRequired () { * FIXME: add foreign reference indexes? */ public void createIndexes(Connection connection, String namespace) throws SQLException { - LOG.info("Indexing..."); + if ("agency".equals(name) || "feed_info".equals(name)) { + // Skip indexing for the small tables that have so few records that indexes are unlikely to + // improve query performance or that are unlikely to be joined to other tables. NOTE: other tables could be + // added here in the future as needed. + LOG.info("Skipping indexes for {} table", name); + return; + } + LOG.info("Indexing {}...", name); String tableName; if (namespace == null) { throw new IllegalStateException("Schema namespace must be provided!"); diff --git a/src/main/java/com/conveyal/gtfs/validator/ServiceValidator.java b/src/main/java/com/conveyal/gtfs/validator/ServiceValidator.java index 5ab5b2ed1..3b1639c26 100644 --- a/src/main/java/com/conveyal/gtfs/validator/ServiceValidator.java +++ b/src/main/java/com/conveyal/gtfs/validator/ServiceValidator.java @@ -189,6 +189,9 @@ select durations.service_id, duration_seconds, days_active from ( if (date.isAfter(lastDate)) lastDate = date; } // Copy some useful information into the ValidationResult object to return to the caller. + // These variables are actually not directly tied to data in the calendar_dates.txt file. Instead, they + // represent the first and last date respectively of any entry in the calendar.txt and calendar_dates.txt + // files. validationResult.firstCalendarDate = firstDate; validationResult.lastCalendarDate = lastDate; // Is this any different? firstDate.until(lastDate, ChronoUnit.DAYS); diff --git a/src/main/java/com/conveyal/gtfs/validator/SpeedTripValidator.java b/src/main/java/com/conveyal/gtfs/validator/SpeedTripValidator.java index 72c22fdf1..46bc39d14 100644 --- a/src/main/java/com/conveyal/gtfs/validator/SpeedTripValidator.java +++ b/src/main/java/com/conveyal/gtfs/validator/SpeedTripValidator.java @@ -1,14 +1,16 @@ package com.conveyal.gtfs.validator; +import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.error.SQLErrorStorage; import com.conveyal.gtfs.loader.Feed; import com.conveyal.gtfs.model.Route; -import com.conveyal.gtfs.model.ShapePoint; import com.conveyal.gtfs.model.Stop; import com.conveyal.gtfs.model.StopTime; import com.conveyal.gtfs.model.Trip; +import java.util.HashSet; import java.util.List; +import java.util.Set; import static com.conveyal.gtfs.error.NewGTFSErrorType.*; import static com.conveyal.gtfs.util.Util.fastDistance; @@ -20,6 +22,8 @@ public class SpeedTripValidator extends TripValidator { public static final double MIN_SPEED_KPH = 0.5; + private boolean allTravelTimesAreRounded = true; + private Set travelTimeZeroErrors = new HashSet<>(); public SpeedTripValidator(Feed feed, SQLErrorStorage errorStorage) { super(feed, errorStorage); @@ -53,9 +57,14 @@ public void validateTrip(Trip trip, Route route, List stopTimes, List< if (currStopTime.departure_time < currStopTime.arrival_time) { registerError(currStopTime, DEPARTURE_BEFORE_ARRIVAL); } - // TODO detect the case where all travel times are rounded off to minutes (i.e. tt % 60 == 0) - // In that case determine the maximum and minimum possible speed by adding/removing one minute of slack. + // Detect if travel times are rounded off to minutes. + boolean bothTravelTimesRounded = areTravelTimesRounded(prevStopTime); double travelTimeSeconds = currStopTime.arrival_time - prevStopTime.departure_time; + // If travel times are rounded and travel time is zero, determine the maximum and minimum possible speed + // by adding/removing one minute of slack. + if (bothTravelTimesRounded && travelTimeSeconds == 0) { + travelTimeSeconds += 60; + } if (checkDistanceAndTime(distanceMeters, travelTimeSeconds, currStopTime)) { // If distance and time are OK, we've got valid numbers to calculate a travel speed. double kph = (distanceMeters / 1000D) / (travelTimeSeconds / 60D / 60D); @@ -73,6 +82,26 @@ public void validateTrip(Trip trip, Route route, List stopTimes, List< } } + /** + * Completing this feed validator means checking if there were any unrounded travel times in the feed and (if so) + * registering any zero travel time errors that were passed over before the first unrounded travel time was + * encountered. If in fact all travel times are rounded to the minute, store a special feed-wide error in this case. + */ + public void complete (ValidationResult validationResult) { + if (!allTravelTimesAreRounded) storeErrors(travelTimeZeroErrors); + else registerError(NewGTFSError.forFeed(FEED_TRAVEL_TIMES_ROUNDED, null)); + } + + /** + * Check that arrival and departure time for a stop time are rounded to the minute and update + * {@link #allTravelTimesAreRounded} accordingly. + */ + private boolean areTravelTimesRounded(StopTime stopTime) { + boolean bothTravelTimesAreRounded = stopTime.departure_time % 60 == 0 && stopTime.arrival_time % 60 == 0; + if (!bothTravelTimesAreRounded) this.allTravelTimesAreRounded = false; + return bothTravelTimesAreRounded; + } + /** * This just pulls some of the range checking logic out of the main trip checking loop so it's more readable. * @return true if all values are OK @@ -88,7 +117,10 @@ private boolean checkDistanceAndTime (double distanceMeters, double travelTimeSe registerError(stopTime, TRAVEL_TIME_NEGATIVE, travelTimeSeconds); good = false; } else if (travelTimeSeconds == 0) { - registerError(stopTime, TRAVEL_TIME_ZERO); + // Only register the travel time zero error if not all travel times are rounded. Otherwise, hold onto the + // error in the travelTimeZeroErrors collection until the completion of this validator. + if (!allTravelTimesAreRounded) registerError(stopTime, TRAVEL_TIME_ZERO); + else travelTimeZeroErrors.add(createUnregisteredError(stopTime, TRAVEL_TIME_ZERO)); good = false; } return good; @@ -97,7 +129,7 @@ private boolean checkDistanceAndTime (double distanceMeters, double travelTimeSe /** * @return max speed in km/hour. */ - private double getMaxSpeedKph (Route route) { + private static double getMaxSpeedKph (Route route) { int type = -1; if (route != null) type = route.route_type; switch (type) { diff --git a/src/main/java/com/conveyal/gtfs/validator/TripValidator.java b/src/main/java/com/conveyal/gtfs/validator/TripValidator.java index d0164bd84..296742568 100644 --- a/src/main/java/com/conveyal/gtfs/validator/TripValidator.java +++ b/src/main/java/com/conveyal/gtfs/validator/TripValidator.java @@ -1,10 +1,8 @@ package com.conveyal.gtfs.validator; -import com.conveyal.gtfs.error.NewGTFSError; import com.conveyal.gtfs.error.SQLErrorStorage; import com.conveyal.gtfs.loader.Feed; import com.conveyal.gtfs.model.Route; -import com.conveyal.gtfs.model.ShapePoint; import com.conveyal.gtfs.model.Stop; import com.conveyal.gtfs.model.StopTime; import com.conveyal.gtfs.model.Trip; diff --git a/src/main/java/com/conveyal/gtfs/validator/ValidationResult.java b/src/main/java/com/conveyal/gtfs/validator/ValidationResult.java index 51bc514b1..d79a52444 100644 --- a/src/main/java/com/conveyal/gtfs/validator/ValidationResult.java +++ b/src/main/java/com/conveyal/gtfs/validator/ValidationResult.java @@ -1,13 +1,10 @@ package com.conveyal.gtfs.validator; -import java.awt.geom.Rectangle2D; -import java.io.Serializable; -import com.conveyal.gtfs.model.Pattern; -import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.awt.geom.Rectangle2D; +import java.io.Serializable; import java.time.LocalDate; -import java.util.List; /** * An instance of this class is returned by the validator. @@ -26,8 +23,15 @@ public class ValidationResult implements Serializable { public int errorCount; public LocalDate declaredStartDate; public LocalDate declaredEndDate; + + /** + * the actual first and last date of service which is calculated in {@link ServiceValidator#complete} + * These variables are actually not directly tied to data in the calendar_dates.txt file. Instead, they represent + * the first and last date respectively of any entry in the calendar.txt and calendar_dates.txt files. + */ public LocalDate firstCalendarDate; public LocalDate lastCalendarDate; + public int[] dailyBusSeconds; public int[] dailyTramSeconds; public int[] dailyMetroSeconds; diff --git a/src/main/java/com/conveyal/gtfs/validator/Validator.java b/src/main/java/com/conveyal/gtfs/validator/Validator.java index 66d432579..1c4824539 100644 --- a/src/main/java/com/conveyal/gtfs/validator/Validator.java +++ b/src/main/java/com/conveyal/gtfs/validator/Validator.java @@ -6,9 +6,7 @@ import com.conveyal.gtfs.loader.Feed; import com.conveyal.gtfs.model.Entity; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.List; +import java.util.Set; /** * A Validator examines a whole GTFS feed or a single trip within a GTFS feed. It accumulates error messages for @@ -39,6 +37,22 @@ public void registerError(Entity entity, NewGTFSErrorType errorType) { errorStorage.storeError(NewGTFSError.forEntity(entity, errorType)); } + /** + * Stores a set of errors. + */ + public void storeErrors(Set errors) { + errorStorage.storeErrors(errors); + } + + /** + * WARNING: this method creates but DOES NOT STORE a new GTFS error. It should only be used in cases where a + * collection of errors need to be temporarily held before storing in batch (e.g., waiting to store travel time zero + * errors before it is determined that the entire feed uses travel times rounded to the minute). + */ + NewGTFSError createUnregisteredError (Entity entity, NewGTFSErrorType errorType) { + return NewGTFSError.forEntity(entity, errorType); + } + // /** // * Store an error that affects a single line of a single table. Add a single key-value pair to it. Wraps the // * underlying error constructor. diff --git a/src/test/java/com/conveyal/gtfs/GTFSFeedTest.java b/src/test/java/com/conveyal/gtfs/GTFSFeedTest.java index cb7299163..e908c7619 100644 --- a/src/test/java/com/conveyal/gtfs/GTFSFeedTest.java +++ b/src/test/java/com/conveyal/gtfs/GTFSFeedTest.java @@ -56,7 +56,7 @@ public static void setUpClass() { //executed only once, before the first test simpleGtfsZipFileName = null; try { - simpleGtfsZipFileName = TestUtils.zipFolderFiles("fake-agency"); + simpleGtfsZipFileName = TestUtils.zipFolderFiles("fake-agency", true); } catch (IOException e) { e.printStackTrace(); } @@ -190,7 +190,7 @@ public void canGetAgencyTimeZoneForStop() { public void canGetInterpolatedTimes() throws GTFSFeed.FirstAndLastStopsDoNotHaveTimes, IOException { String tripId = "a30277f8-e50a-4a85-9141-b1e0da9d429d"; - String gtfsZipFileName = TestUtils.zipFolderFiles("fake-agency-interpolated-stop-times"); + String gtfsZipFileName = TestUtils.zipFolderFiles("fake-agency-interpolated-stop-times", true); GTFSFeed feed = GTFSFeed.fromFile(gtfsZipFileName); Iterable stopTimes = feed.getInterpolatedStopTimesForTrip(tripId); diff --git a/src/test/java/com/conveyal/gtfs/GTFSMainTest.java b/src/test/java/com/conveyal/gtfs/GTFSMainTest.java index 558c946a3..4220eaeb5 100644 --- a/src/test/java/com/conveyal/gtfs/GTFSMainTest.java +++ b/src/test/java/com/conveyal/gtfs/GTFSMainTest.java @@ -29,7 +29,7 @@ public static void setUpClass() { //executed only once, before the first test simpleGtfsZipFileName = null; try { - simpleGtfsZipFileName = TestUtils.zipFolderFiles("fake-agency"); + simpleGtfsZipFileName = TestUtils.zipFolderFiles("fake-agency", true); } catch (IOException e) { e.printStackTrace(); } diff --git a/src/test/java/com/conveyal/gtfs/GTFSTest.java b/src/test/java/com/conveyal/gtfs/GTFSTest.java index 0bf4c2237..f655ee783 100644 --- a/src/test/java/com/conveyal/gtfs/GTFSTest.java +++ b/src/test/java/com/conveyal/gtfs/GTFSTest.java @@ -11,7 +11,10 @@ import com.csvreader.CsvReader; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.Multimap; +import com.google.common.io.Files; +import org.apache.commons.io.FileUtils; import org.apache.commons.io.input.BOMInputStream; +import org.hamcrest.Matcher; import org.hamcrest.comparator.ComparatorMatcherBuilder; import org.junit.Before; import org.junit.Test; @@ -92,140 +95,12 @@ public void requiresActionCommand() throws Exception { */ @Test public void canLoadAndExportSimpleAgency() { - PersistenceExpectation[] persistenceExpectations = new PersistenceExpectation[]{ - new PersistenceExpectation( - "agency", - new RecordExpectation[]{ - new RecordExpectation("agency_id", "1"), - new RecordExpectation("agency_name", "Fake Transit"), - new RecordExpectation("agency_timezone", "America/Los_Angeles") - } - ), - new PersistenceExpectation( - "calendar", - new RecordExpectation[]{ - new RecordExpectation( - "service_id", "04100312-8fe1-46a5-a9f2-556f39478f57" - ), - new RecordExpectation("monday", 1), - new RecordExpectation("tuesday", 1), - new RecordExpectation("wednesday", 1), - new RecordExpectation("thursday", 1), - new RecordExpectation("friday", 1), - new RecordExpectation("saturday", 1), - new RecordExpectation("sunday", 1), - new RecordExpectation("start_date", "20170915"), - new RecordExpectation("end_date", "20170917") - } - ), - new PersistenceExpectation( - "calendar_dates", - new RecordExpectation[]{ - new RecordExpectation( - "service_id", "04100312-8fe1-46a5-a9f2-556f39478f57" - ), - new RecordExpectation("date", 20170916), - new RecordExpectation("exception_type", 2) - } - ), - new PersistenceExpectation( - "fare_attributes", - new RecordExpectation[]{ - new RecordExpectation("fare_id", "route_based_fare"), - new RecordExpectation("price", 1.23, 0), - new RecordExpectation("currency_type", "USD") - } - ), - new PersistenceExpectation( - "fare_rules", - new RecordExpectation[]{ - new RecordExpectation("fare_id", "route_based_fare"), - new RecordExpectation("route_id", "1") - } - ), - new PersistenceExpectation( - "feed_info", - new RecordExpectation[]{ - new RecordExpectation("feed_publisher_name", "Conveyal" - ), - new RecordExpectation( - "feed_publisher_url", "http://www.conveyal.com" - ), - new RecordExpectation("feed_lang", "en"), - new RecordExpectation("feed_version", "1.0") - } - ), - new PersistenceExpectation( - "frequencies", - new RecordExpectation[]{ - new RecordExpectation("trip_id", "frequency-trip"), - new RecordExpectation("start_time", 28800, "08:00:00"), - new RecordExpectation("end_time", 32400, "09:00:00"), - new RecordExpectation("headway_secs", 1800), - new RecordExpectation("exact_times", 0) - } - ), - new PersistenceExpectation( - "routes", - new RecordExpectation[]{ - new RecordExpectation("agency_id", "1"), - new RecordExpectation("route_id", "1"), - new RecordExpectation("route_short_name", "1"), - new RecordExpectation("route_long_name", "Route 1"), - new RecordExpectation("route_type", 3), - new RecordExpectation("route_color", "7CE6E7") - } - ), - new PersistenceExpectation( - "shapes", - new RecordExpectation[]{ - new RecordExpectation( - "shape_id", "5820f377-f947-4728-ac29-ac0102cbc34e" - ), - new RecordExpectation("shape_pt_lat", 37.061172, 0.00001), - new RecordExpectation("shape_pt_lon", -122.007500, 0.00001), - new RecordExpectation("shape_pt_sequence", 2), - new RecordExpectation("shape_dist_traveled", 7.4997067, 0.01) - } - ), - new PersistenceExpectation( - "stop_times", - new RecordExpectation[]{ - new RecordExpectation( - "trip_id", "a30277f8-e50a-4a85-9141-b1e0da9d429d" - ), - new RecordExpectation("arrival_time", 25200, "07:00:00"), - new RecordExpectation("departure_time", 25200, "07:00:00"), - new RecordExpectation("stop_id", "4u6g"), - // the string expectation for stop_sequence is different because of how stop_times are - // converted to 0-based indexes in Table.normalizeAndCloneStopTimes - new RecordExpectation("stop_sequence", 1, "1", "0"), - new RecordExpectation("pickup_type", 0), - new RecordExpectation("drop_off_type", 0), - new RecordExpectation("shape_dist_traveled", 0.0, 0.01) - } - ), - new PersistenceExpectation( - "trips", - new RecordExpectation[]{ - new RecordExpectation( - "trip_id", "a30277f8-e50a-4a85-9141-b1e0da9d429d" - ), - new RecordExpectation( - "service_id", "04100312-8fe1-46a5-a9f2-556f39478f57" - ), - new RecordExpectation("route_id", "1"), - new RecordExpectation("direction_id", 0), - new RecordExpectation( - "shape_id", "5820f377-f947-4728-ac29-ac0102cbc34e" - ), - new RecordExpectation("bikes_allowed", 0), - new RecordExpectation("wheelchair_accessible", 0) - } - ) - }; assertThat( - runIntegrationTest("fake-agency", persistenceExpectations), + runIntegrationTestOnFolder( + "fake-agency", + nullValue(), + fakeAgencyPersistenceExpectations + ), equalTo(true) ); } @@ -245,7 +120,33 @@ public void canLoadFeedWithBadDates () { ); assertThat( "Integration test passes", - runIntegrationTest("fake-agency-bad-calendar-date", expectations), + runIntegrationTestOnFolder("fake-agency-bad-calendar-date", nullValue(), expectations), + equalTo(true) + ); + } + + /** + * Tests whether or not "fake-agency" GTFS can be placed in a zipped subdirectory and loaded/exported successfully. + */ + @Test + public void canLoadAndExportSimpleAgencyInSubDirectory() { + String zipFileName = null; + // Get filename for fake-agency resource + String resourceFolder = TestUtils.getResourceFileName("fake-agency"); + // Recursively copy folder into temp directory, which we zip up and run the integration test on. + File tempDir = Files.createTempDir(); + tempDir.deleteOnExit(); + File nestedDir = new File(TestUtils.fileNameWithDir(tempDir.getAbsolutePath(), "fake-agency")); + LOG.info("Creating temp folder with nested subdirectory at {}", tempDir.getAbsolutePath()); + try { + FileUtils.copyDirectory(new File(resourceFolder), nestedDir); + zipFileName = TestUtils.zipFolderFiles(tempDir.getAbsolutePath(), false); + } catch (IOException e) { + e.printStackTrace(); + } + // TODO Add error expectations argument that expects NewGTFSErrorType.TABLE_IN_SUBDIRECTORY error type. + assertThat( + runIntegrationTestOnZipFile(zipFileName, nullValue(), fakeAgencyPersistenceExpectations), equalTo(true) ); } @@ -311,20 +212,48 @@ public void canLoadAndExportSimpleAgencyWithOnlyCalendarDates() { ) }; assertThat( - runIntegrationTest("fake-agency-only-calendar-dates", persistenceExpectations), + runIntegrationTestOnFolder( + "fake-agency-only-calendar-dates", + nullValue(), + persistenceExpectations + ), equalTo(true) ); } /** - * A helper method that will run GTFS.main with a certain zip file. + * A helper method that will zip a specified folder in test/main/resources and call + * {@link #runIntegrationTestOnZipFile(String, Matcher, PersistenceExpectation[])} on that file. + */ + private boolean runIntegrationTestOnFolder( + String folderName, + Matcher fatalExceptionExpectation, + PersistenceExpectation[] persistenceExpectations + ) { + // zip up test folder into temp zip file + String zipFileName = null; + try { + zipFileName = TestUtils.zipFolderFiles(folderName, true); + } catch (IOException e) { + e.printStackTrace(); + return false; + } + return runIntegrationTestOnZipFile(zipFileName, fatalExceptionExpectation, persistenceExpectations); + } + + /** + * A helper method that will run GTFS#main with a certain zip file. * This tests whether a GTFS zip file can be loaded without any errors. * * After the GTFS is loaded, this will also initiate an export of a GTFS from the database and check * the integrity of the exported GTFS. */ - private boolean runIntegrationTest(String folderName, PersistenceExpectation[] persistenceExpectations) { + private boolean runIntegrationTestOnZipFile( + String zipFileName, + Matcher fatalExceptionExpectation, + PersistenceExpectation[] persistenceExpectations + ) { String newDBName = TestUtils.generateNewDB(); String dbConnectionUrl = String.format("jdbc:postgresql://localhost/%s", newDBName); DataSource dataSource = GTFS.createDataSource( @@ -333,11 +262,26 @@ private boolean runIntegrationTest(String folderName, PersistenceExpectation[] p null ); - String namespace = zipFolderAndLoadGTFS(folderName, dataSource, persistenceExpectations); - if (namespace == null) { - // Indicates that loading failed. + String namespace; + + // Verify that loading the feed completes and data is stored properly + try { + // load and validate feed + LOG.info("load and validate feed"); + FeedLoadResult loadResult = GTFS.load(zipFileName, dataSource); + ValidationResult validationResult = GTFS.validate(loadResult.uniqueIdentifier, dataSource); + + assertThat(validationResult.fatalException, is(fatalExceptionExpectation)); + namespace = loadResult.uniqueIdentifier; + + assertThatImportedGtfsMeetsExpectations(dataSource.getConnection(), namespace, persistenceExpectations); + } catch (SQLException e) { TestUtils.dropDB(newDBName); + e.printStackTrace(); return false; + } catch (AssertionError e) { + TestUtils.dropDB(newDBName); + throw e; } // Verify that exporting the feed (in non-editor mode) completes and data is outputted properly @@ -399,7 +343,7 @@ private String zipFolderAndLoadGTFS(String folderName, DataSource dataSource, Pe // zip up test folder into temp zip file String zipFileName; try { - zipFileName = TestUtils.zipFolderFiles(folderName); + zipFileName = TestUtils.zipFolderFiles(folderName, true); } catch (IOException e) { e.printStackTrace(); return null; @@ -673,18 +617,154 @@ private void assertThatPersistenceExpectationRecordWasFound( Collection valuePairs = mismatches.get(field); for (ValuePair valuePair : valuePairs) { assertThat( - String.format("The value expected for %s was not found", field), - valuePair.expected, - equalTo(valuePair.found) + String.format("The value expected for %s was not found", field), + valuePair.expected, + equalTo(valuePair.found) ); } } } else { assertThat( - "The record as defined in the PersistenceExpectation was not found.", - foundRecord, - equalTo(true) + "The record as defined in the PersistenceExpectation was not found.", + foundRecord, + equalTo(true) ); } } + + /** + * Persistence expectations for use with the GTFS contained within the "fake-agency" resources folder. + */ + private PersistenceExpectation[] fakeAgencyPersistenceExpectations = new PersistenceExpectation[]{ + new PersistenceExpectation( + "agency", + new RecordExpectation[]{ + new RecordExpectation("agency_id", "1"), + new RecordExpectation("agency_name", "Fake Transit"), + new RecordExpectation("agency_timezone", "America/Los_Angeles") + } + ), + new PersistenceExpectation( + "calendar", + new RecordExpectation[]{ + new RecordExpectation( + "service_id", "04100312-8fe1-46a5-a9f2-556f39478f57" + ), + new RecordExpectation("monday", 1), + new RecordExpectation("tuesday", 1), + new RecordExpectation("wednesday", 1), + new RecordExpectation("thursday", 1), + new RecordExpectation("friday", 1), + new RecordExpectation("saturday", 1), + new RecordExpectation("sunday", 1), + new RecordExpectation("start_date", "20170915"), + new RecordExpectation("end_date", "20170917") + } + ), + new PersistenceExpectation( + "calendar_dates", + new RecordExpectation[]{ + new RecordExpectation( + "service_id", "04100312-8fe1-46a5-a9f2-556f39478f57" + ), + new RecordExpectation("date", 20170916), + new RecordExpectation("exception_type", 2) + } + ), + new PersistenceExpectation( + "fare_attributes", + new RecordExpectation[]{ + new RecordExpectation("fare_id", "route_based_fare"), + new RecordExpectation("price", 1.23, 0), + new RecordExpectation("currency_type", "USD") + } + ), + new PersistenceExpectation( + "fare_rules", + new RecordExpectation[]{ + new RecordExpectation("fare_id", "route_based_fare"), + new RecordExpectation("route_id", "1") + } + ), + new PersistenceExpectation( + "feed_info", + new RecordExpectation[]{ + new RecordExpectation("feed_publisher_name", "Conveyal" + ), + new RecordExpectation( + "feed_publisher_url", "http://www.conveyal.com" + ), + new RecordExpectation("feed_lang", "en"), + new RecordExpectation("feed_version", "1.0") + } + ), + new PersistenceExpectation( + "frequencies", + new RecordExpectation[]{ + new RecordExpectation("trip_id", "frequency-trip"), + new RecordExpectation("start_time", 28800, "08:00:00"), + new RecordExpectation("end_time", 32400, "09:00:00"), + new RecordExpectation("headway_secs", 1800), + new RecordExpectation("exact_times", 0) + } + ), + new PersistenceExpectation( + "routes", + new RecordExpectation[]{ + new RecordExpectation("agency_id", "1"), + new RecordExpectation("route_id", "1"), + new RecordExpectation("route_short_name", "1"), + new RecordExpectation("route_long_name", "Route 1"), + new RecordExpectation("route_type", 3), + new RecordExpectation("route_color", "7CE6E7") + } + ), + new PersistenceExpectation( + "shapes", + new RecordExpectation[]{ + new RecordExpectation( + "shape_id", "5820f377-f947-4728-ac29-ac0102cbc34e" + ), + new RecordExpectation("shape_pt_lat", 37.061172, 0.00001), + new RecordExpectation("shape_pt_lon", -122.007500, 0.00001), + new RecordExpectation("shape_pt_sequence", 2), + new RecordExpectation("shape_dist_traveled", 7.4997067, 0.01) + } + ), + new PersistenceExpectation( + "stop_times", + new RecordExpectation[]{ + new RecordExpectation( + "trip_id", "a30277f8-e50a-4a85-9141-b1e0da9d429d" + ), + new RecordExpectation("arrival_time", 25200, "07:00:00"), + new RecordExpectation("departure_time", 25200, "07:00:00"), + new RecordExpectation("stop_id", "4u6g"), + // the string expectation for stop_sequence is different because of how stop_times are + // converted to 0-based indexes in Table.normalizeAndCloneStopTimes + new RecordExpectation("stop_sequence", 1, "1", "0"), + new RecordExpectation("pickup_type", 0), + new RecordExpectation("drop_off_type", 0), + new RecordExpectation("shape_dist_traveled", 0.0, 0.01) + } + ), + new PersistenceExpectation( + "trips", + new RecordExpectation[]{ + new RecordExpectation( + "trip_id", "a30277f8-e50a-4a85-9141-b1e0da9d429d" + ), + new RecordExpectation( + "service_id", "04100312-8fe1-46a5-a9f2-556f39478f57" + ), + new RecordExpectation("route_id", "1"), + new RecordExpectation("direction_id", 0), + new RecordExpectation( + "shape_id", "5820f377-f947-4728-ac29-ac0102cbc34e" + ), + new RecordExpectation("bikes_allowed", 0), + new RecordExpectation("wheelchair_accessible", 0) + } + ) + }; } diff --git a/src/test/java/com/conveyal/gtfs/TestUtils.java b/src/test/java/com/conveyal/gtfs/TestUtils.java index 441f079e0..7c67ad07d 100644 --- a/src/test/java/com/conveyal/gtfs/TestUtils.java +++ b/src/test/java/com/conveyal/gtfs/TestUtils.java @@ -1,12 +1,12 @@ package com.conveyal.gtfs; +import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.sql.Connection; @@ -104,27 +104,38 @@ public static String getResourceFileName(String fileName) { /** * Zip files in a folder into a temporary zip file */ - public static String zipFolderFiles(String folderName) throws IOException { + public static String zipFolderFiles(String folderName, boolean isRelativeToResourcesPath) throws IOException { // create temporary zip file File tempFile = File.createTempFile("temp-gtfs-zip-", ".zip"); tempFile.deleteOnExit(); String tempFilePath = tempFile.getAbsolutePath(); - compressZipfile(TestUtils.getResourceFileName(folderName), tempFilePath); + // If folder name is relative to resources path, get full path. Otherwise, it is assumed to be an absolute path. + String folderPath = isRelativeToResourcesPath ? getResourceFileName(folderName) : folderName; + // Do not nest files under a subdirectory if directly zipping a folder in src/main/resources + compressZipfile(folderPath, tempFilePath, !isRelativeToResourcesPath); return tempFilePath; } - private static void compressZipfile(String sourceDir, String outputFile) throws IOException, FileNotFoundException { + private static void compressZipfile(String sourceDir, String outputFile, boolean nestDirectory) throws IOException { ZipOutputStream zipFile = new ZipOutputStream(new FileOutputStream(outputFile)); - compressDirectoryToZipfile(sourceDir, sourceDir, zipFile); + compressDirectoryToZipfile(sourceDir, sourceDir, zipFile, nestDirectory); IOUtils.closeQuietly(zipFile); } - private static void compressDirectoryToZipfile(String rootDir, String sourceDir, ZipOutputStream out) throws IOException, FileNotFoundException { + /** + * Convenience method for zipping a directory. + * @param nestDirectory whether nested folders should be preserved as subdirectories + */ + private static void compressDirectoryToZipfile(String rootDir, String sourceDir, ZipOutputStream out, boolean nestDirectory) throws IOException { for (File file : new File(sourceDir).listFiles()) { if (file.isDirectory()) { - compressDirectoryToZipfile(rootDir, sourceDir + File.separator + file.getName(), out); + compressDirectoryToZipfile(rootDir, fileNameWithDir(sourceDir, file.getName()), out, nestDirectory); } else { - ZipEntry entry = new ZipEntry(sourceDir.replace(rootDir, "") + file.getName()); + String folderName = sourceDir.replace(rootDir, ""); + String zipEntryName = nestDirectory + ? fileNameWithDir(folderName, file.getName()) + : String.join("", folderName, file.getName()); + ZipEntry entry = new ZipEntry(zipEntryName); out.putNextEntry(entry); FileInputStream in = new FileInputStream(file.getAbsolutePath()); @@ -133,4 +144,8 @@ private static void compressDirectoryToZipfile(String rootDir, String sourceDir, } } } + + public static String fileNameWithDir(String directory, String filename) { + return String.join(File.separator, directory, filename); + } } diff --git a/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java b/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java index 44a197fc6..421da273d 100644 --- a/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java +++ b/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java @@ -49,7 +49,7 @@ public static void setUpClass() throws SQLException, IOException { String dbConnectionUrl = String.format("jdbc:postgresql://localhost/%s", testDBName); testDataSource = createDataSource(dbConnectionUrl, null, null); // zip up test folder into temp zip file - String zipFileName = TestUtils.zipFolderFiles("fake-agency"); + String zipFileName = TestUtils.zipFolderFiles("fake-agency", true); // load feed into db FeedLoadResult feedLoadResult = load(zipFileName, testDataSource); testNamespace = feedLoadResult.uniqueIdentifier; @@ -75,74 +75,74 @@ public static void tearDownClass() { } // tests that the graphQL schema can initialize - @Test + @Test(timeout=5000) public void canInitialize() { GTFSGraphQL.initialize(testDataSource); GraphQL graphQL = GTFSGraphQL.getGraphQl(); } // tests that the root element of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchFeed() throws IOException { assertThat(queryGraphQL("feed.txt"), matchesSnapshot()); } // tests that the row counts of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchFeedRowCounts() throws IOException { assertThat(queryGraphQL("feedRowCounts.txt"), matchesSnapshot()); } // tests that the errors of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchErrors() throws IOException { assertThat(queryGraphQL("feedErrors.txt"), matchesSnapshot()); } // tests that the feed_info of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchFeedInfo() throws IOException { assertThat(queryGraphQL("feedFeedInfo.txt"), matchesSnapshot()); } // tests that the patterns of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchPatterns() throws IOException { assertThat(queryGraphQL("feedPatterns.txt"), matchesSnapshot()); } // tests that the agencies of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchAgencies() throws IOException { assertThat(queryGraphQL("feedAgencies.txt"), matchesSnapshot()); } // tests that the calendars of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchCalendars() throws IOException { assertThat(queryGraphQL("feedCalendars.txt"), matchesSnapshot()); } // tests that the fares of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchFares() throws IOException { assertThat(queryGraphQL("feedFares.txt"), matchesSnapshot()); } // tests that the routes of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchRoutes() throws IOException { assertThat(queryGraphQL("feedRoutes.txt"), matchesSnapshot()); } // tests that the stops of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchStops() throws IOException { assertThat(queryGraphQL("feedStops.txt"), matchesSnapshot()); } // tests that the trips of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchTrips() throws IOException { assertThat(queryGraphQL("feedTrips.txt"), matchesSnapshot()); } @@ -150,19 +150,19 @@ public void canFetchTrips() throws IOException { // TODO: make tests for schedule_exceptions / calendar_dates // tests that the stop times of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchStopTimes() throws IOException { assertThat(queryGraphQL("feedStopTimes.txt"), matchesSnapshot()); } // tests that the stop times of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchServices() throws IOException { assertThat(queryGraphQL("feedServices.txt"), matchesSnapshot()); } // tests that the stop times of a feed can be fetched - @Test + @Test(timeout=5000) public void canFetchRoutesAndFilterTripsByDateAndTime() throws IOException { Map variables = new HashMap(); variables.put("namespace", testNamespace); @@ -176,16 +176,30 @@ public void canFetchRoutesAndFilterTripsByDateAndTime() throws IOException { } // tests that the limit argument applies properly to a fetcher defined with autolimit set to false - @Test + @Test(timeout=5000) public void canFetchNestedEntityWithLimit() throws IOException { assertThat(queryGraphQL("feedStopsStopTimeLimit.txt"), matchesSnapshot()); } + // tests whether a graphQL query that has superflous and redundant nesting can find the right result + // if the graphQL dataloader is enabled correctly, there will not be any repeating sql queries in the logs + @Test(timeout=5000) + public void canFetchMultiNestedEntities() throws IOException { + assertThat(queryGraphQL("superNested.txt"), matchesSnapshot()); + } + // tests whether a graphQL query that has superflous and redundant nesting can find the right result + // if the graphQL dataloader is enabled correctly, there will not be any repeating sql queries in the logs + // furthermore, some queries should have been combined together + @Test(timeout=5000) + public void canFetchMultiNestedEntitiesWithoutLimits() throws IOException { + assertThat(queryGraphQL("superNestedNoLimits.txt"), matchesSnapshot()); + } + /** * attempt to fetch more than one record with SQL injection as inputs * the graphql library should properly escape the string and return 0 results for stops */ - @Test + @Test(timeout=5000) public void canSanitizeSQLInjectionSentAsInput() throws IOException { Map variables = new HashMap(); variables.put("namespace", testInjectionNamespace); @@ -204,7 +218,7 @@ public void canSanitizeSQLInjectionSentAsInput() throws IOException { * attempt run a graphql query when one of the pieces of data contains a SQL injection * the graphql library should properly escape the string and complete the queries */ - @Test + @Test(timeout=5000) public void canSanitizeSQLInjectionSentAsKeyValue() throws IOException, SQLException { // manually update the route_id key in routes and patterns String injection = "'' OR 1=1; Select ''99"; diff --git a/src/test/resources/graphql/superNested.txt b/src/test/resources/graphql/superNested.txt new file mode 100644 index 000000000..3d2ea056b --- /dev/null +++ b/src/test/resources/graphql/superNested.txt @@ -0,0 +1,23 @@ +query ($namespace: String) { + feed(namespace: $namespace) { + feed_version + routes { + route_id + stops { + routes { + route_id + stops { + routes { + route_id + stops { + stop_id + } + } + stop_id + } + } + stop_id + } + } + } +} \ No newline at end of file diff --git a/src/test/resources/graphql/superNestedNoLimits.txt b/src/test/resources/graphql/superNestedNoLimits.txt new file mode 100644 index 000000000..71902d06e --- /dev/null +++ b/src/test/resources/graphql/superNestedNoLimits.txt @@ -0,0 +1,23 @@ + query ($namespace: String) { + feed(namespace: $namespace) { + feed_version + routes(limit: -1) { + route_id + stops(limit: -1) { + routes(limit: -1) { + route_id + stops(limit: -1) { + routes(limit: -1) { + route_id + stops(limit: -1) { + stop_id + } + } + stop_id + } + } + stop_id + } + } + } + } \ No newline at end of file diff --git a/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchErrors.json b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchErrors.json index c876d26c3..274b5b3f3 100644 --- a/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchErrors.json +++ b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchErrors.json @@ -18,11 +18,19 @@ "error_type" : "ROUTE_LONG_NAME_CONTAINS_SHORT_NAME", "line_number" : 2 }, { - "bad_value" : "20170916", + "bad_value" : null, "entity_id" : null, "entity_sequence" : null, "entity_type" : null, "error_id" : 2, + "error_type" : "FEED_TRAVEL_TIMES_ROUNDED", + "line_number" : null + }, { + "bad_value" : "20170916", + "entity_id" : null, + "entity_sequence" : null, + "entity_type" : null, + "error_id" : 3, "error_type" : "DATE_NO_SERVICE", "line_number" : null } ], diff --git a/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchFeedRowCounts.json b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchFeedRowCounts.json index 3e02c3026..dcca06a39 100644 --- a/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchFeedRowCounts.json +++ b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchFeedRowCounts.json @@ -6,7 +6,7 @@ "agency" : 1, "calendar" : 1, "calendar_dates" : 1, - "errors" : 3, + "errors" : 4, "routes" : 1, "stop_times" : 4, "stops" : 2, diff --git a/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchMultiNestedEntities.json b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchMultiNestedEntities.json new file mode 100644 index 000000000..97f3b64aa --- /dev/null +++ b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchMultiNestedEntities.json @@ -0,0 +1,63 @@ +{ + "data" : { + "feed" : { + "feed_version" : "1.0", + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "stop_id" : "4u6g" + }, { + "stop_id" : "johv" + } ] + } ], + "stop_id" : "4u6g" + }, { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "stop_id" : "4u6g" + }, { + "stop_id" : "johv" + } ] + } ], + "stop_id" : "johv" + } ] + } ], + "stop_id" : "4u6g" + }, { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "stop_id" : "4u6g" + }, { + "stop_id" : "johv" + } ] + } ], + "stop_id" : "4u6g" + }, { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "stop_id" : "4u6g" + }, { + "stop_id" : "johv" + } ] + } ], + "stop_id" : "johv" + } ] + } ], + "stop_id" : "johv" + } ] + } ] + } + } +} \ No newline at end of file diff --git a/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchMultiNestedEntitiesWithoutLimits.json b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchMultiNestedEntitiesWithoutLimits.json new file mode 100644 index 000000000..97f3b64aa --- /dev/null +++ b/src/test/resources/snapshots/com/conveyal/gtfs/graphql/GTFSGraphQLTest/canFetchMultiNestedEntitiesWithoutLimits.json @@ -0,0 +1,63 @@ +{ + "data" : { + "feed" : { + "feed_version" : "1.0", + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "stop_id" : "4u6g" + }, { + "stop_id" : "johv" + } ] + } ], + "stop_id" : "4u6g" + }, { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "stop_id" : "4u6g" + }, { + "stop_id" : "johv" + } ] + } ], + "stop_id" : "johv" + } ] + } ], + "stop_id" : "4u6g" + }, { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "stop_id" : "4u6g" + }, { + "stop_id" : "johv" + } ] + } ], + "stop_id" : "4u6g" + }, { + "routes" : [ { + "route_id" : "1", + "stops" : [ { + "stop_id" : "4u6g" + }, { + "stop_id" : "johv" + } ] + } ], + "stop_id" : "johv" + } ] + } ], + "stop_id" : "johv" + } ] + } ] + } + } +} \ No newline at end of file