diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml new file mode 100644 index 000000000..a3ebe75cb --- /dev/null +++ b/.github/workflows/build-docker.yml @@ -0,0 +1,43 @@ +name: Create and publish a Docker image +# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images + +on: + push: + branches: ['dev', 'master', 'dev-flex', 'mtc-deploy'] + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Log in to the Container registry + # Maintain specific commit hash for stability + uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + - name: Build and push Docker image + uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} \ No newline at end of file diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 3054ca7d5..494aeccb1 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -28,11 +28,11 @@ jobs: uses: actions/setup-java@v1 with: java-version: 1.8 - # Install node 14 for running e2e tests (and for maven-semantic-release). - - name: Use Node.js 18.x + # Install node for running e2e tests (and for maven-semantic-release). + - name: Use Node.js 20.x uses: actions/setup-node@v1 with: - node-version: 18.x + node-version: 20.x - name: Start MongoDB uses: supercharge/mongodb-github-action@1.3.0 with: @@ -97,6 +97,10 @@ jobs: # Run maven-semantic-release to potentially create a new release of datatools-server. The flag --skip-maven-deploy is # used to avoid deploying to maven central. So essentially, this just creates a release with a changelog on github. + - name: Use Node.js 20.x + uses: actions/setup-node@v1 + with: + node-version: 20.x - name: Run maven-semantic-release env: GH_TOKEN: ${{ secrets.GH_TOKEN }} diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..1291686be --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +# syntax=docker/dockerfile:1 +FROM maven:3.8.7-openjdk-18-slim + +COPY . /datatools + +# Build jar +WORKDIR /datatools +RUN mvn package -DskipTests +RUN cp target/dt*.jar /datatools/ +RUN mv dt*.jar datatools-server.jar + +RUN mkdir -p /var/datatools_gtfs/gtfsplus +RUN mkdir -p /var/gtfs/manager/gtfs/gtfsplus + +# Launch server +# This relies on a configuration volume and aws volume being present. See `docker-compose.yml`, or the example below +# Try: docker run --publish 4000:4000 -v ~/config/:/config datatools-latest +CMD ["java", "-XX:MaxRAMPercentage=95", "-jar", "datatools-server.jar", "/config/env.yml", "/config/server.yml"] +EXPOSE 4000 \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 000000000..cc847068d --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,26 @@ +version: '3.8' +services: + datatools-server: + build: ./ + ports: + - "4000:4000" + volumes: + - type: bind + source: ./configurations/default/ + target: /config + - type: bind + source: ~/.aws + target: /root/.aws + depends_on: + - mongo + - postgres + mongo: + image: mongo + restart: always + postgres: + environment: + POSTGRES_HOST_AUTH_METHOD: trust + POSTGRES_USER: root + POSTGRES_DB: dmtest + image: postgres + restart: always \ No newline at end of file diff --git a/pom.xml b/pom.xml index f9b95a751..330baf572 100644 --- a/pom.xml +++ b/pom.xml @@ -259,10 +259,10 @@ - AWS S3 SDK - putting/getting objects into/out of S3. --> - com.github.conveyal + com.github.ibi-group gtfs-lib - bdb76ee + mtc-temp-merge-hack-SNAPSHOT diff --git a/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java b/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java index c0b76d959..55504120c 100644 --- a/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java +++ b/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java @@ -21,8 +21,10 @@ import java.util.Arrays; import java.util.Collection; import java.util.Enumeration; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; +import java.util.Set; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; @@ -122,6 +124,13 @@ private static void validateTable( GTFSFeed gtfsFeed ) throws IOException { String tableId = specTable.get("id").asText(); + boolean tableIsDirections = tableId.equals("directions"); + + Set gtfsRoutes = new HashSet<>(); + if (tableIsDirections) { + // Copy the gtfs routes into a set so that we can "check them off" (remove them). + gtfsRoutes.addAll(gtfsFeed.routes.keySet()); + } // Read in table data from input stream. CsvReader csvReader = new CsvReader(inputStreamToValidate, ',', StandardCharsets.UTF_8); @@ -167,15 +176,20 @@ private static void validateTable( // Validate each value in row. Note: we iterate over the fields and not values because a row may be missing // columns, but we still want to validate that missing value (e.g., if it is missing a required field). for (int f = 0; f < fieldsFound.length; f++) { + JsonNode specField = fieldsFound[f]; // If value exists for index, use that. Otherwise, default to null to avoid out of bounds exception. String val = f < recordColumnCount ? rowValues[f] : null; - validateTableValue(issues, tableId, rowIndex, rowValues, val, fieldsFound, fieldsFound[f], gtfsFeed); + validateTableValue(issues, tableId, rowIndex, rowValues, val, fieldsFound, specField, gtfsFeed, gtfsRoutes, tableIsDirections); } } rowIndex++; } csvReader.close(); + if (tableIsDirections && !gtfsRoutes.isEmpty()) { + // After we're done validating all the table values, check if every route was checked off in directions.txt + issues.add(new ValidationIssue(tableId, null, -1, "Directions file doesn't define directions for all routes listed in routes.txt")); + } // Add issues for wrong number of columns and for empty rows after processing all rows. // Note: We considered adding an issue for each row, but opted for the single error approach because there's no // concept of a row-level issue in the UI right now. So we would potentially need to add that to the UI @@ -205,7 +219,9 @@ private static void validateTableValue( String value, JsonNode[] specFieldsFound, JsonNode specField, - GTFSFeed gtfsFeed + GTFSFeed gtfsFeed, + Set gtfsRoutes, + boolean tableIsDirections ) { if (specField == null) return; String fieldName = specField.get("name").asText(); @@ -300,6 +316,8 @@ private static void validateTableValue( break; } + // "Check off" the route_id in directions.txt from the list to verify every route id has a direction + if (tableIsDirections && fieldName.equals("route_id")) gtfsRoutes.remove(value); } /** Construct missing ID text for validation issue description. */ diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/FeedUpdater.java b/src/main/java/com/conveyal/datatools/manager/jobs/FeedUpdater.java index 89ac448a3..a455e5ce1 100644 --- a/src/main/java/com/conveyal/datatools/manager/jobs/FeedUpdater.java +++ b/src/main/java/com/conveyal/datatools/manager/jobs/FeedUpdater.java @@ -384,6 +384,7 @@ static Map getLatestVersionsSentForPublishing(Collection()) .stream() .collect(Collectors.toMap(v -> v.feedSourceId, Function.identity())); diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/ProcessSingleFeedJob.java b/src/main/java/com/conveyal/datatools/manager/jobs/ProcessSingleFeedJob.java index 978e50162..904d9a616 100644 --- a/src/main/java/com/conveyal/datatools/manager/jobs/ProcessSingleFeedJob.java +++ b/src/main/java/com/conveyal/datatools/manager/jobs/ProcessSingleFeedJob.java @@ -97,7 +97,10 @@ public void jobLogic() { // Run transform job in line so we can monitor the error status before load/validate begins. zipTransform.run(); // Short circuit the feed load/validate if a pre-load transform fails. - if (zipTransform.status.error) return; + if (zipTransform.status.error) { + status.fail("Feed transformation failed, see details below."); + return; + } } // Assign transform result from zip target. feedVersion.feedTransformResult = zipTarget.feedTransformResult; diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java index 7e9cd390d..118b14392 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java @@ -15,6 +15,7 @@ import org.supercsv.prefs.CsvPreference; import java.io.ByteArrayInputStream; +import java.io.IOException; import java.io.InputStream; import java.io.StringWriter; import java.nio.charset.StandardCharsets; @@ -27,6 +28,7 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; +import java.util.zip.ZipException; import java.util.zip.ZipFile; import static com.conveyal.datatools.manager.DataManager.getConfigProperty; @@ -143,7 +145,7 @@ private void initializeCapitalizeSubstitutions() { public void validateParameters(MonitorableJob.Status status) { // fieldName must not be null if (fieldName == null) { - status.fail("Field name must not be null"); + status.fail("'Normalize Field' Transformation failed because the field name parameter is not set."); return; } @@ -193,68 +195,101 @@ public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status st Files.copy(originalZipPath, tempZipPath, StandardCopyOption.REPLACE_EXISTING); Table gtfsTable = GtfsUtils.getGtfsTable(table); + if (gtfsTable == null) { + status.fail(String.format("Unsupported GTFS file '%s'", tableName)); + return; + } CsvReader csvReader = gtfsTable.getCsvReader(new ZipFile(tempZipPath.toAbsolutePath().toString()), null); + if (csvReader == null) { + status.fail(String.format("'Normalize Field' failed because file '%s' was not found in the GTFS archive", tableName)); + return; + } final String[] headers = csvReader.getHeaders(); Field[] fieldsFoundInZip = gtfsTable.getFieldsFromFieldHeaders(headers, null); int transformFieldIndex = getFieldIndex(fieldsFoundInZip, fieldName); + if (transformFieldIndex == -1) { + status.fail(String.format("'Normalize Field' failed because field '%s' was not found in file '%s' in the GTFS archive", fieldName, tableName)); + return; + } - int modifiedRowCount = 0; - - // Write headers and processed CSV rows. - writer.write(headers); - while (csvReader.readRecord()) { - String originalValue = csvReader.get(transformFieldIndex); - String transformedValue = originalValue; - - // Convert to title case, if requested. - if (capitalize) { - if (capitalizationStyle == CapitalizationStyle.TITLE_CASE) { - transformedValue = convertToTitleCase(transformedValue); - } - // TODO: Implement other capitalization styles. - } - - // Perform substitutions if any. - transformedValue = performSubstitutions(transformedValue); - - // Re-assemble the CSV line and place in buffer. - String[] csvValues = csvReader.getValues(); - csvValues[transformFieldIndex] = transformedValue; - - // Write line to table (plus new line char). - writer.write(csvValues); + int modifiedRowCount = generateCsvContent(writer, headers, csvReader, transformFieldIndex); - // Count number of CSV rows changed. - if (!originalValue.equals(transformedValue)) { - modifiedRowCount++; - } - } // End of iteration over each row. - csvReader.close(); - writer.flush(); - - // Copy csv input stream into the zip file, replacing the existing file. - try ( - // Modify target zip file that we just read. - FileSystem targetZipFs = FileSystems.newFileSystem(tempZipPath, null); - // Stream for file copy operation. - InputStream inputStream = new ByteArrayInputStream(stringWriter.toString().getBytes(StandardCharsets.UTF_8)) - ) { - Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs); - Files.copy(inputStream, targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING); - zipTarget.feedTransformResult.tableTransformResults.add( - new TableTransformResult(tableName, 0, modifiedRowCount, 0) - ); - } + writeCsvContent(zipTarget, tempZipPath, stringWriter, tableName, modifiedRowCount); // Replace original zip file with temporary working zip file. // (This should also trigger a system IO update event, so subsequent IO calls pick up the correct file. Files.move(tempZipPath, originalZipPath, StandardCopyOption.REPLACE_EXISTING); LOG.info("Field normalization transformation successful, {} row(s) changed.", modifiedRowCount); + } catch (ZipException ze) { + status.fail( + String.format("'Normalize Field' failed because the GTFS archive is corrupted (%s).", ze.getMessage()), + ze + ); } catch (Exception e) { status.fail("Unknown error encountered while transforming zip file", e); } } + /** Write csv input stream into the zip file, replacing the existing file. */ + private void writeCsvContent( + FeedTransformZipTarget zipTarget, + Path tempZipPath, + StringWriter stringWriter, + String tableName, + int modifiedRowCount + ) throws IOException { + try ( + // Modify target zip file that we just read. + FileSystem targetZipFs = FileSystems.newFileSystem(tempZipPath, (ClassLoader) null); + // Stream for file copy operation. + InputStream inputStream = new ByteArrayInputStream(stringWriter.toString().getBytes(StandardCharsets.UTF_8)) + ) { + Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs); + Files.copy(inputStream, targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING); + zipTarget.feedTransformResult.tableTransformResults.add( + new TableTransformResult(tableName, 0, modifiedRowCount, 0) + ); + } + } + + /** Generates content for the GTFS table, returns the number of rows modified. */ + private int generateCsvContent(CsvListWriter writer, String[] headers, CsvReader csvReader, int transformFieldIndex) throws IOException { + int modifiedRowCount = 0; + + // Write headers and processed CSV rows. + writer.write(headers); + while (csvReader.readRecord()) { + String originalValue = csvReader.get(transformFieldIndex); + String transformedValue = originalValue; + + // Convert to title case, if requested. + if (capitalize) { + if (capitalizationStyle == CapitalizationStyle.TITLE_CASE) { + transformedValue = convertToTitleCase(transformedValue); + } + // TODO: Implement other capitalization styles. + } + + // Perform substitutions if any. + transformedValue = performSubstitutions(transformedValue); + + // Re-assemble the CSV line and place in buffer. + String[] csvValues = csvReader.getValues(); + csvValues[transformFieldIndex] = transformedValue; + + // Write line to table (plus new line char). + writer.write(csvValues); + + // Count number of CSV rows changed. + if (!originalValue.equals(transformedValue)) { + modifiedRowCount++; + } + } // End of iteration over each row. + csvReader.close(); + writer.flush(); + return modifiedRowCount; + } + /** * Converts the provided string to Title Case, accommodating for capitalization exceptions * and separator characters that may be immediately precede diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/ReplaceFileFromStringTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/ReplaceFileFromStringTransformation.java index 5673d0c05..2ac87e569 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/transform/ReplaceFileFromStringTransformation.java +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/ReplaceFileFromStringTransformation.java @@ -42,7 +42,7 @@ public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status st String tableName = table + ".txt"; // Run the replace transformation Path targetZipPath = Paths.get(zipTarget.gtfsFile.getAbsolutePath()); - try( FileSystem targetZipFs = FileSystems.newFileSystem(targetZipPath, null) ){ + try( FileSystem targetZipFs = FileSystems.newFileSystem(targetZipPath, (ClassLoader) null) ){ // Convert csv data to input stream. InputStream inputStream = new ByteArrayInputStream(csvData.getBytes(StandardCharsets.UTF_8)); Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs); diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/ReplaceFileFromVersionTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/ReplaceFileFromVersionTransformation.java index e78d21f72..53963fb16 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/transform/ReplaceFileFromVersionTransformation.java +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/ReplaceFileFromVersionTransformation.java @@ -45,12 +45,12 @@ public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status st String tableName = table + ".txt"; // Run the replace transformation Path sourceZipPath = Paths.get(sourceVersion.retrieveGtfsFile().getAbsolutePath()); - try (FileSystem sourceZipFs = FileSystems.newFileSystem(sourceZipPath, null)) { + try (FileSystem sourceZipFs = FileSystems.newFileSystem(sourceZipPath, (ClassLoader) null)) { // If the source txt file does not exist, NoSuchFileException will be thrown and caught below. Path sourceTxtFilePath = getTablePathInZip(tableName, sourceZipFs); Path targetZipPath = Paths.get(zipTarget.gtfsFile.getAbsolutePath()); LOG.info("Replacing file {} in zip file {} with source {}", tableName, targetZipPath.getFileName(), sourceVersion.id); - try (FileSystem targetZipFs = FileSystems.newFileSystem(targetZipPath, null)) { + try (FileSystem targetZipFs = FileSystems.newFileSystem(targetZipPath, (ClassLoader) null)) { Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs); // Set transform type according to whether target file exists. TransformType type = Files.exists(targetTxtFilePath) diff --git a/src/test/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidationTest.java b/src/test/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidationTest.java index ac24c29bb..b88813480 100644 --- a/src/test/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidationTest.java +++ b/src/test/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidationTest.java @@ -73,7 +73,7 @@ void canValidateCleanGtfsPlus() throws Exception { LOG.info("Validation BART GTFS+"); GtfsPlusValidation validation = GtfsPlusValidation.validate(bartVersion1.id); // Expect issues to be zero. - assertThat("Issues count for clean BART feed is zero", validation.issues.size(), equalTo(0)); + assertThat("Clean BART feed and incomplete directions.txt results in one issue.", validation.issues.size(), equalTo(1)); } @Test @@ -82,8 +82,8 @@ void canValidateGtfsPlusWithQuotedValues() throws Exception { GtfsPlusValidation validation = GtfsPlusValidation.validate(bartVersion1WithQuotedValues.id); // Expect issues to be zero. assertThat( - "Issues count for clean BART feed (quoted values) is zero", - validation.issues.size(), equalTo(0) + "Issues count for clean BART feed (quoted values) is equal to 1 (as above)", + validation.issues.size(), equalTo(1) ); }