From 16c5b647b221e01d284cc959931117d67b887ed4 Mon Sep 17 00:00:00 2001 From: "philip.cline" Date: Thu, 28 Sep 2023 10:25:24 -0400 Subject: [PATCH 01/22] feat(JdbcTableWriter): add directions validation --- .../manager/gtfsplus/GtfsPlusValidation.java | 22 +++++++++++++++++-- .../gtfsplus/GtfsPlusValidationTest.java | 6 ++--- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java b/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java index c0b76d959..d80b733f4 100644 --- a/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java +++ b/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java @@ -21,8 +21,10 @@ import java.util.Arrays; import java.util.Collection; import java.util.Enumeration; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; +import java.util.Set; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; @@ -122,6 +124,13 @@ private static void validateTable( GTFSFeed gtfsFeed ) throws IOException { String tableId = specTable.get("id").asText(); + boolean tableIsDirections = tableId.equals("directions"); + + Set gtfsRoutes = new HashSet<>(); + if (tableIsDirections) { + // Copy the gtfs routes into a map we can "check them off" in (remove them). Stream is required in order to copy keys. + gtfsRoutes.addAll(gtfsFeed.routes.keySet()); + } // Read in table data from input stream. CsvReader csvReader = new CsvReader(inputStreamToValidate, ',', StandardCharsets.UTF_8); @@ -167,15 +176,20 @@ private static void validateTable( // Validate each value in row. Note: we iterate over the fields and not values because a row may be missing // columns, but we still want to validate that missing value (e.g., if it is missing a required field). for (int f = 0; f < fieldsFound.length; f++) { + JsonNode specField = fieldsFound[f]; // If value exists for index, use that. Otherwise, default to null to avoid out of bounds exception. String val = f < recordColumnCount ? rowValues[f] : null; - validateTableValue(issues, tableId, rowIndex, rowValues, val, fieldsFound, fieldsFound[f], gtfsFeed); + validateTableValue(issues, tableId, rowIndex, rowValues, val, fieldsFound, specField, gtfsFeed, gtfsRoutes, tableIsDirections); } } rowIndex++; } csvReader.close(); + if (tableIsDirections && !gtfsRoutes.isEmpty()) { + // After we're done validating all the table values, check if every route was checked off in directions.txt + issues.add(new ValidationIssue(tableId, "route_id", -1, "Directions table does not define direction names for all routes.")); + } // Add issues for wrong number of columns and for empty rows after processing all rows. // Note: We considered adding an issue for each row, but opted for the single error approach because there's no // concept of a row-level issue in the UI right now. So we would potentially need to add that to the UI @@ -205,7 +219,9 @@ private static void validateTableValue( String value, JsonNode[] specFieldsFound, JsonNode specField, - GTFSFeed gtfsFeed + GTFSFeed gtfsFeed, + Set gtfsRoutes, + boolean tableIsDirections ) { if (specField == null) return; String fieldName = specField.get("name").asText(); @@ -300,6 +316,8 @@ private static void validateTableValue( break; } + // "Check off" the route_id in directions.txt from the list to verify every route id has a direction + if (tableIsDirections && fieldName.equals("route_id")) gtfsRoutes.remove(value); } /** Construct missing ID text for validation issue description. */ diff --git a/src/test/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidationTest.java b/src/test/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidationTest.java index ac24c29bb..b88813480 100644 --- a/src/test/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidationTest.java +++ b/src/test/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidationTest.java @@ -73,7 +73,7 @@ void canValidateCleanGtfsPlus() throws Exception { LOG.info("Validation BART GTFS+"); GtfsPlusValidation validation = GtfsPlusValidation.validate(bartVersion1.id); // Expect issues to be zero. - assertThat("Issues count for clean BART feed is zero", validation.issues.size(), equalTo(0)); + assertThat("Clean BART feed and incomplete directions.txt results in one issue.", validation.issues.size(), equalTo(1)); } @Test @@ -82,8 +82,8 @@ void canValidateGtfsPlusWithQuotedValues() throws Exception { GtfsPlusValidation validation = GtfsPlusValidation.validate(bartVersion1WithQuotedValues.id); // Expect issues to be zero. assertThat( - "Issues count for clean BART feed (quoted values) is zero", - validation.issues.size(), equalTo(0) + "Issues count for clean BART feed (quoted values) is equal to 1 (as above)", + validation.issues.size(), equalTo(1) ); } From caed604efac66fafb62d6984b421b19e3213afe7 Mon Sep 17 00:00:00 2001 From: "philip.cline" Date: Thu, 28 Sep 2023 10:32:40 -0400 Subject: [PATCH 02/22] refactor(GtfsPlusValidation): Update wording --- .../conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java b/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java index d80b733f4..c71f5d2f2 100644 --- a/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java +++ b/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java @@ -128,7 +128,7 @@ private static void validateTable( Set gtfsRoutes = new HashSet<>(); if (tableIsDirections) { - // Copy the gtfs routes into a map we can "check them off" in (remove them). Stream is required in order to copy keys. + // Copy the gtfs routes into a set so that we can "check them off" (remove them). gtfsRoutes.addAll(gtfsFeed.routes.keySet()); } From 01e8aa53e97d971def08b0e3c7bbaf4d1a1b1365 Mon Sep 17 00:00:00 2001 From: "philip.cline" Date: Thu, 12 Oct 2023 15:28:27 -0400 Subject: [PATCH 03/22] feat(Directions Validation): update validation language --- .../conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java b/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java index c71f5d2f2..d1ed60087 100644 --- a/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java +++ b/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java @@ -188,7 +188,7 @@ private static void validateTable( if (tableIsDirections && !gtfsRoutes.isEmpty()) { // After we're done validating all the table values, check if every route was checked off in directions.txt - issues.add(new ValidationIssue(tableId, "route_id", -1, "Directions table does not define direction names for all routes.")); + issues.add(new ValidationIssue(tableId, null, -1, "Directions file doesn't define directions for all routes listed in routes file")); } // Add issues for wrong number of columns and for empty rows after processing all rows. // Note: We considered adding an issue for each row, but opted for the single error approach because there's no From 68cec40c0258cdfbe46521a5aa79f5af8425f5df Mon Sep 17 00:00:00 2001 From: "philip.cline" Date: Fri, 13 Oct 2023 11:35:47 -0400 Subject: [PATCH 04/22] refactor(Directions Validation): update validation language --- .../conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java b/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java index d1ed60087..55504120c 100644 --- a/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java +++ b/src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java @@ -188,7 +188,7 @@ private static void validateTable( if (tableIsDirections && !gtfsRoutes.isEmpty()) { // After we're done validating all the table values, check if every route was checked off in directions.txt - issues.add(new ValidationIssue(tableId, null, -1, "Directions file doesn't define directions for all routes listed in routes file")); + issues.add(new ValidationIssue(tableId, null, -1, "Directions file doesn't define directions for all routes listed in routes.txt")); } // Add issues for wrong number of columns and for empty rows after processing all rows. // Note: We considered adding an issue for each row, but opted for the single error approach because there's no From f465c893f4fd317203b50b0c179e5228b967deb8 Mon Sep 17 00:00:00 2001 From: "philip.cline" Date: Fri, 13 Oct 2023 16:42:44 -0400 Subject: [PATCH 05/22] fix(FeedUpdater): allow mongo query to use disk space --- .../java/com/conveyal/datatools/manager/jobs/FeedUpdater.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/FeedUpdater.java b/src/main/java/com/conveyal/datatools/manager/jobs/FeedUpdater.java index 89ac448a3..a455e5ce1 100644 --- a/src/main/java/com/conveyal/datatools/manager/jobs/FeedUpdater.java +++ b/src/main/java/com/conveyal/datatools/manager/jobs/FeedUpdater.java @@ -384,6 +384,7 @@ static Map getLatestVersionsSentForPublishing(Collection()) .stream() .collect(Collectors.toMap(v -> v.feedSourceId, Function.identity())); From 9f37b66ad3b1eebc899e56504995cea723007b84 Mon Sep 17 00:00:00 2001 From: "philip.cline" Date: Wed, 15 Nov 2023 21:44:30 -0500 Subject: [PATCH 06/22] feat(build-docker): add additional branch to docker build --- .github/workflows/build-docker.yml | 42 ++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 .github/workflows/build-docker.yml diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml new file mode 100644 index 000000000..a1c19b63f --- /dev/null +++ b/.github/workflows/build-docker.yml @@ -0,0 +1,42 @@ +name: Create and publish a Docker image +# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images + +on: + push: + branches: ['dev', 'master', 'dev-flex', 'mtc-deploy'] + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Log in to the Container registry + uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + - name: Build and push Docker image + uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} \ No newline at end of file From 64750413ab9be3d6c7e1e7ad44fd107164a312a2 Mon Sep 17 00:00:00 2001 From: "philip.cline" Date: Thu, 16 Nov 2023 10:02:59 -0500 Subject: [PATCH 07/22] refactor(mtc docker): add Dockerfile, docker compose example --- Dockerfile | 18 ++++++++++++++++++ docker-compose.yml | 26 ++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 Dockerfile create mode 100644 docker-compose.yml diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..a64eddcd4 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +# syntax=docker/dockerfile:1 +FROM maven:3.8.7-openjdk-18-slim + +COPY . /datatools + +# Build jar +WORKDIR /datatools +RUN mvn package -DskipTests +RUN cp target/dt*.jar /datatools/ +RUN mv dt*.jar datatools-server.jar + +RUN mkdir -p /var/datatools_gtfs/gtfsplus + +# Launch server +# This relies on a configuration volume and aws volume being present. See `docker-compose.yml`, or the example below +# Try: docker run --publish 4000:4000 -v ~/config/:/config datatools-latest +CMD ["java", "-XX:MaxRAMPercentage=95", "-jar", "datatools-server.jar", "/config/env.yml", "/config/server.yml"] +EXPOSE 4000 \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 000000000..cc847068d --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,26 @@ +version: '3.8' +services: + datatools-server: + build: ./ + ports: + - "4000:4000" + volumes: + - type: bind + source: ./configurations/default/ + target: /config + - type: bind + source: ~/.aws + target: /root/.aws + depends_on: + - mongo + - postgres + mongo: + image: mongo + restart: always + postgres: + environment: + POSTGRES_HOST_AUTH_METHOD: trust + POSTGRES_USER: root + POSTGRES_DB: dmtest + image: postgres + restart: always \ No newline at end of file From 02f15ccd6038e248e0b271014b8d889b6109f172 Mon Sep 17 00:00:00 2001 From: "philip.cline" Date: Thu, 16 Nov 2023 19:02:58 -0500 Subject: [PATCH 08/22] refactor(mtc docker): add comment --- .github/workflows/build-docker.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml index a1c19b63f..a3ebe75cb 100644 --- a/.github/workflows/build-docker.yml +++ b/.github/workflows/build-docker.yml @@ -21,6 +21,7 @@ jobs: uses: actions/checkout@v3 - name: Log in to the Container registry + # Maintain specific commit hash for stability uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 with: registry: ${{ env.REGISTRY }} From 562de6a737d0d5db76feac02b31559d6b8b28294 Mon Sep 17 00:00:00 2001 From: "philip.cline" Date: Fri, 24 Nov 2023 11:44:24 -0500 Subject: [PATCH 09/22] fix(compilation): fix compile error with transformations --- .../models/transform/NormalizeFieldTransformation.java | 8 ++++---- .../transform/ReplaceFileFromStringTransformation.java | 2 +- .../transform/ReplaceFileFromVersionTransformation.java | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java index 7e9cd390d..d9a4c146f 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java @@ -234,10 +234,10 @@ public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status st // Copy csv input stream into the zip file, replacing the existing file. try ( - // Modify target zip file that we just read. - FileSystem targetZipFs = FileSystems.newFileSystem(tempZipPath, null); - // Stream for file copy operation. - InputStream inputStream = new ByteArrayInputStream(stringWriter.toString().getBytes(StandardCharsets.UTF_8)) + // Modify target zip file that we just read. + FileSystem targetZipFs = FileSystems.newFileSystem(tempZipPath, (ClassLoader) null); + // Stream for file copy operation. + InputStream inputStream = new ByteArrayInputStream(stringWriter.toString().getBytes(StandardCharsets.UTF_8)) ) { Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs); Files.copy(inputStream, targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING); diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/ReplaceFileFromStringTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/ReplaceFileFromStringTransformation.java index 5673d0c05..2ac87e569 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/transform/ReplaceFileFromStringTransformation.java +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/ReplaceFileFromStringTransformation.java @@ -42,7 +42,7 @@ public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status st String tableName = table + ".txt"; // Run the replace transformation Path targetZipPath = Paths.get(zipTarget.gtfsFile.getAbsolutePath()); - try( FileSystem targetZipFs = FileSystems.newFileSystem(targetZipPath, null) ){ + try( FileSystem targetZipFs = FileSystems.newFileSystem(targetZipPath, (ClassLoader) null) ){ // Convert csv data to input stream. InputStream inputStream = new ByteArrayInputStream(csvData.getBytes(StandardCharsets.UTF_8)); Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs); diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/ReplaceFileFromVersionTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/ReplaceFileFromVersionTransformation.java index e78d21f72..53963fb16 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/transform/ReplaceFileFromVersionTransformation.java +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/ReplaceFileFromVersionTransformation.java @@ -45,12 +45,12 @@ public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status st String tableName = table + ".txt"; // Run the replace transformation Path sourceZipPath = Paths.get(sourceVersion.retrieveGtfsFile().getAbsolutePath()); - try (FileSystem sourceZipFs = FileSystems.newFileSystem(sourceZipPath, null)) { + try (FileSystem sourceZipFs = FileSystems.newFileSystem(sourceZipPath, (ClassLoader) null)) { // If the source txt file does not exist, NoSuchFileException will be thrown and caught below. Path sourceTxtFilePath = getTablePathInZip(tableName, sourceZipFs); Path targetZipPath = Paths.get(zipTarget.gtfsFile.getAbsolutePath()); LOG.info("Replacing file {} in zip file {} with source {}", tableName, targetZipPath.getFileName(), sourceVersion.id); - try (FileSystem targetZipFs = FileSystems.newFileSystem(targetZipPath, null)) { + try (FileSystem targetZipFs = FileSystems.newFileSystem(targetZipPath, (ClassLoader) null)) { Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs); // Set transform type according to whether target file exists. TransformType type = Files.exists(targetTxtFilePath) From 8de4e3d857f0ce951a790179c10f2f097a290c5d Mon Sep 17 00:00:00 2001 From: "philip.cline" Date: Fri, 24 Nov 2023 13:36:37 -0500 Subject: [PATCH 10/22] refactor(NormalizeFieldTransformation): fix formatting --- .../models/transform/NormalizeFieldTransformation.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java index d9a4c146f..783828c6c 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java @@ -234,10 +234,10 @@ public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status st // Copy csv input stream into the zip file, replacing the existing file. try ( - // Modify target zip file that we just read. - FileSystem targetZipFs = FileSystems.newFileSystem(tempZipPath, (ClassLoader) null); - // Stream for file copy operation. - InputStream inputStream = new ByteArrayInputStream(stringWriter.toString().getBytes(StandardCharsets.UTF_8)) + // Modify target zip file that we just read. + FileSystem targetZipFs = FileSystems.newFileSystem(tempZipPath, (ClassLoader) null); + // Stream for file copy operation. + InputStream inputStream = new ByteArrayInputStream(stringWriter.toString().getBytes(StandardCharsets.UTF_8)) ) { Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs); Files.copy(inputStream, targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING); From 2b2bef955d6586c7146d963619158ac6a06c290c Mon Sep 17 00:00:00 2001 From: "philip.cline" Date: Fri, 1 Dec 2023 15:41:42 -0500 Subject: [PATCH 11/22] refactor(Dockerfile): make gtfsplus dir (pre-approved) --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index a64eddcd4..1291686be 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,6 +10,7 @@ RUN cp target/dt*.jar /datatools/ RUN mv dt*.jar datatools-server.jar RUN mkdir -p /var/datatools_gtfs/gtfsplus +RUN mkdir -p /var/gtfs/manager/gtfs/gtfsplus # Launch server # This relies on a configuration volume and aws volume being present. See `docker-compose.yml`, or the example below From 8dccaf75e2b2ad9abe5e1b8152972a2d38bc2fae Mon Sep 17 00:00:00 2001 From: binh-dam-ibigroup <56846598+binh-dam-ibigroup@users.noreply.github.com> Date: Tue, 4 Jun 2024 19:11:45 -0400 Subject: [PATCH 12/22] fix(ProcessSingleFeedJob): Fail job if transform job failed. --- .../datatools/manager/jobs/ProcessSingleFeedJob.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/conveyal/datatools/manager/jobs/ProcessSingleFeedJob.java b/src/main/java/com/conveyal/datatools/manager/jobs/ProcessSingleFeedJob.java index 978e50162..904d9a616 100644 --- a/src/main/java/com/conveyal/datatools/manager/jobs/ProcessSingleFeedJob.java +++ b/src/main/java/com/conveyal/datatools/manager/jobs/ProcessSingleFeedJob.java @@ -97,7 +97,10 @@ public void jobLogic() { // Run transform job in line so we can monitor the error status before load/validate begins. zipTransform.run(); // Short circuit the feed load/validate if a pre-load transform fails. - if (zipTransform.status.error) return; + if (zipTransform.status.error) { + status.fail("Feed transformation failed, see details below."); + return; + } } // Assign transform result from zip target. feedVersion.feedTransformResult = zipTarget.feedTransformResult; From 541ebdfb533765274b818faf62ce17b3a9210478 Mon Sep 17 00:00:00 2001 From: binh-dam-ibigroup <56846598+binh-dam-ibigroup@users.noreply.github.com> Date: Wed, 5 Jun 2024 18:02:09 -0400 Subject: [PATCH 13/22] refactor(NormalizeFieldTr...): Create messages for specific conditions. Extract some methods. --- .../NormalizeFieldTransformation.java | 117 +++++++++++------- 1 file changed, 70 insertions(+), 47 deletions(-) diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java index 783828c6c..ee705cf9c 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java @@ -15,6 +15,7 @@ import org.supercsv.prefs.CsvPreference; import java.io.ByteArrayInputStream; +import java.io.IOException; import java.io.InputStream; import java.io.StringWriter; import java.nio.charset.StandardCharsets; @@ -143,7 +144,7 @@ private void initializeCapitalizeSubstitutions() { public void validateParameters(MonitorableJob.Status status) { // fieldName must not be null if (fieldName == null) { - status.fail("Field name must not be null"); + status.fail("'Normalize Field' Transformation failed because the field name parameter is not set."); return; } @@ -193,58 +194,26 @@ public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status st Files.copy(originalZipPath, tempZipPath, StandardCopyOption.REPLACE_EXISTING); Table gtfsTable = GtfsUtils.getGtfsTable(table); + if (gtfsTable == null) { + status.fail(String.format("Unsupported GTFS file '%s'", tableName)); + return; + } CsvReader csvReader = gtfsTable.getCsvReader(new ZipFile(tempZipPath.toAbsolutePath().toString()), null); + if (csvReader == null) { + status.fail(String.format("'Normalize Field' failed because file '%s' was not found in the GTFS archive", tableName)); + return; + } final String[] headers = csvReader.getHeaders(); Field[] fieldsFoundInZip = gtfsTable.getFieldsFromFieldHeaders(headers, null); int transformFieldIndex = getFieldIndex(fieldsFoundInZip, fieldName); + if (transformFieldIndex == -1) { + status.fail(String.format("'Normalize Field' failed because field '%s' was not found in file '%s' in the GTFS archive", fieldName, tableName)); + return; + } - int modifiedRowCount = 0; - - // Write headers and processed CSV rows. - writer.write(headers); - while (csvReader.readRecord()) { - String originalValue = csvReader.get(transformFieldIndex); - String transformedValue = originalValue; - - // Convert to title case, if requested. - if (capitalize) { - if (capitalizationStyle == CapitalizationStyle.TITLE_CASE) { - transformedValue = convertToTitleCase(transformedValue); - } - // TODO: Implement other capitalization styles. - } - - // Perform substitutions if any. - transformedValue = performSubstitutions(transformedValue); - - // Re-assemble the CSV line and place in buffer. - String[] csvValues = csvReader.getValues(); - csvValues[transformFieldIndex] = transformedValue; - - // Write line to table (plus new line char). - writer.write(csvValues); + int modifiedRowCount = generateCsvContent(writer, headers, csvReader, transformFieldIndex); - // Count number of CSV rows changed. - if (!originalValue.equals(transformedValue)) { - modifiedRowCount++; - } - } // End of iteration over each row. - csvReader.close(); - writer.flush(); - - // Copy csv input stream into the zip file, replacing the existing file. - try ( - // Modify target zip file that we just read. - FileSystem targetZipFs = FileSystems.newFileSystem(tempZipPath, (ClassLoader) null); - // Stream for file copy operation. - InputStream inputStream = new ByteArrayInputStream(stringWriter.toString().getBytes(StandardCharsets.UTF_8)) - ) { - Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs); - Files.copy(inputStream, targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING); - zipTarget.feedTransformResult.tableTransformResults.add( - new TableTransformResult(tableName, 0, modifiedRowCount, 0) - ); - } + writeCsvContent(zipTarget, tempZipPath, stringWriter, tableName, modifiedRowCount); // Replace original zip file with temporary working zip file. // (This should also trigger a system IO update event, so subsequent IO calls pick up the correct file. @@ -255,6 +224,60 @@ public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status st } } + /** Write csv input stream into the zip file, replacing the existing file. */ + private void writeCsvContent(FeedTransformZipTarget zipTarget, Path tempZipPath, StringWriter stringWriter, String tableName, int modifiedRowCount) throws IOException { + try ( + // Modify target zip file that we just read. + FileSystem targetZipFs = FileSystems.newFileSystem(tempZipPath, (ClassLoader) null); + // Stream for file copy operation. + InputStream inputStream = new ByteArrayInputStream(stringWriter.toString().getBytes(StandardCharsets.UTF_8)) + ) { + Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs); + Files.copy(inputStream, targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING); + zipTarget.feedTransformResult.tableTransformResults.add( + new TableTransformResult(tableName, 0, modifiedRowCount, 0) + ); + } + } + + /** Generates content for the GTFS table, returns the number of rows modified. */ + private int generateCsvContent(CsvListWriter writer, String[] headers, CsvReader csvReader, int transformFieldIndex) throws IOException { + int modifiedRowCount = 0; + + // Write headers and processed CSV rows. + writer.write(headers); + while (csvReader.readRecord()) { + String originalValue = csvReader.get(transformFieldIndex); + String transformedValue = originalValue; + + // Convert to title case, if requested. + if (capitalize) { + if (capitalizationStyle == CapitalizationStyle.TITLE_CASE) { + transformedValue = convertToTitleCase(transformedValue); + } + // TODO: Implement other capitalization styles. + } + + // Perform substitutions if any. + transformedValue = performSubstitutions(transformedValue); + + // Re-assemble the CSV line and place in buffer. + String[] csvValues = csvReader.getValues(); + csvValues[transformFieldIndex] = transformedValue; + + // Write line to table (plus new line char). + writer.write(csvValues); + + // Count number of CSV rows changed. + if (!originalValue.equals(transformedValue)) { + modifiedRowCount++; + } + } // End of iteration over each row. + csvReader.close(); + writer.flush(); + return modifiedRowCount; + } + /** * Converts the provided string to Title Case, accommodating for capitalization exceptions * and separator characters that may be immediately precede From 6478799f589cb890609e632b3fbee4d4cb3fd89c Mon Sep 17 00:00:00 2001 From: binh-dam-ibigroup <56846598+binh-dam-ibigroup@users.noreply.github.com> Date: Thu, 6 Jun 2024 09:13:43 -0400 Subject: [PATCH 14/22] chore(gh/maven): Update CI node to 20. --- .github/workflows/maven.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 3054ca7d5..3c88a3e2a 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -28,11 +28,11 @@ jobs: uses: actions/setup-java@v1 with: java-version: 1.8 - # Install node 14 for running e2e tests (and for maven-semantic-release). - - name: Use Node.js 18.x + # Install node for running e2e tests (and for maven-semantic-release). + - name: Use Node.js 20.x uses: actions/setup-node@v1 with: - node-version: 18.x + node-version: 20.x - name: Start MongoDB uses: supercharge/mongodb-github-action@1.3.0 with: From ab28543fe8fc68b6d5f9e9918c3125e4afcacef5 Mon Sep 17 00:00:00 2001 From: binh-dam-ibigroup <56846598+binh-dam-ibigroup@users.noreply.github.com> Date: Thu, 6 Jun 2024 09:41:51 -0400 Subject: [PATCH 15/22] chore(gh/maven): Force CI node to 20.14 --- .github/workflows/maven.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 3c88a3e2a..4364c8578 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -32,7 +32,7 @@ jobs: - name: Use Node.js 20.x uses: actions/setup-node@v1 with: - node-version: 20.x + node-version: 20.14 - name: Start MongoDB uses: supercharge/mongodb-github-action@1.3.0 with: From d3af102128003669e0e558fb9c49b3b960b21c15 Mon Sep 17 00:00:00 2001 From: binh-dam-ibigroup <56846598+binh-dam-ibigroup@users.noreply.github.com> Date: Thu, 6 Jun 2024 09:50:24 -0400 Subject: [PATCH 16/22] Revert "chore(gh/maven): Force CI node to 20.14" This reverts commit ab28543fe8fc68b6d5f9e9918c3125e4afcacef5. --- .github/workflows/maven.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 4364c8578..3c88a3e2a 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -32,7 +32,7 @@ jobs: - name: Use Node.js 20.x uses: actions/setup-node@v1 with: - node-version: 20.14 + node-version: 20.x - name: Start MongoDB uses: supercharge/mongodb-github-action@1.3.0 with: From f384d6de51919b84922e452fb3efde15aded0b92 Mon Sep 17 00:00:00 2001 From: binh-dam-ibigroup <56846598+binh-dam-ibigroup@users.noreply.github.com> Date: Thu, 6 Jun 2024 10:28:08 -0400 Subject: [PATCH 17/22] chore(gh/maven): Add node 20 setup before running semantic-release. --- .github/workflows/maven.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 3c88a3e2a..494aeccb1 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -97,6 +97,10 @@ jobs: # Run maven-semantic-release to potentially create a new release of datatools-server. The flag --skip-maven-deploy is # used to avoid deploying to maven central. So essentially, this just creates a release with a changelog on github. + - name: Use Node.js 20.x + uses: actions/setup-node@v1 + with: + node-version: 20.x - name: Run maven-semantic-release env: GH_TOKEN: ${{ secrets.GH_TOKEN }} From 9826e978ce0904580ebf5e8dc485a8b0d3226d49 Mon Sep 17 00:00:00 2001 From: binh-dam-ibigroup <56846598+binh-dam-ibigroup@users.noreply.github.com> Date: Thu, 6 Jun 2024 16:04:57 -0400 Subject: [PATCH 18/22] fix(NormalizeFieldTr...): Handle ZIP exceptions with specific error message. --- .../models/transform/NormalizeFieldTransformation.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java index ee705cf9c..6175488d3 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java @@ -28,6 +28,7 @@ import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; +import java.util.zip.ZipException; import java.util.zip.ZipFile; import static com.conveyal.datatools.manager.DataManager.getConfigProperty; @@ -219,6 +220,11 @@ public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status st // (This should also trigger a system IO update event, so subsequent IO calls pick up the correct file. Files.move(tempZipPath, originalZipPath, StandardCopyOption.REPLACE_EXISTING); LOG.info("Field normalization transformation successful, {} row(s) changed.", modifiedRowCount); + } catch (ZipException ze) { + status.fail( + String.format("'Normalize Field' failed because the GTFS archive is corrupted (%s).", ze.getMessage()), + ze + ); } catch (Exception e) { status.fail("Unknown error encountered while transforming zip file", e); } From c50f9a66d66d4304addbbf0b59c3b154cbb732f5 Mon Sep 17 00:00:00 2001 From: binh-dam-ibigroup <56846598+binh-dam-ibigroup@users.noreply.github.com> Date: Fri, 7 Jun 2024 13:36:49 -0400 Subject: [PATCH 19/22] style(NormalizeFieldTr...): Wrap writeCsvContent declaration. --- .../models/transform/NormalizeFieldTransformation.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java index 6175488d3..118b14392 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/NormalizeFieldTransformation.java @@ -231,7 +231,13 @@ public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status st } /** Write csv input stream into the zip file, replacing the existing file. */ - private void writeCsvContent(FeedTransformZipTarget zipTarget, Path tempZipPath, StringWriter stringWriter, String tableName, int modifiedRowCount) throws IOException { + private void writeCsvContent( + FeedTransformZipTarget zipTarget, + Path tempZipPath, + StringWriter stringWriter, + String tableName, + int modifiedRowCount + ) throws IOException { try ( // Modify target zip file that we just read. FileSystem targetZipFs = FileSystems.newFileSystem(tempZipPath, (ClassLoader) null); From 3728283ef3b8f42ec65150f7769d9ed9922c0021 Mon Sep 17 00:00:00 2001 From: josh-willis-arcadis <168561922+josh-willis-arcadis@users.noreply.github.com> Date: Wed, 20 Nov 2024 16:19:11 -0600 Subject: [PATCH 20/22] refactor(pom.xml): update gtfs lib version and group id to ibi-group repo --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index f9b95a751..d1be81a91 100644 --- a/pom.xml +++ b/pom.xml @@ -259,10 +259,10 @@ - AWS S3 SDK - putting/getting objects into/out of S3. --> - com.github.conveyal + com.github.ibi-group gtfs-lib - bdb76ee + 9fbdaa9c97acaccdd793e5f7e53da0243290f9b7 From 4fefa1633afbf293437f48b17f5c7114c7ad1cec Mon Sep 17 00:00:00 2001 From: miles-grant-ibi Date: Tue, 26 Nov 2024 14:32:19 -0500 Subject: [PATCH 21/22] chore(deps): update gtfs-lib --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d1be81a91..e9d79f776 100644 --- a/pom.xml +++ b/pom.xml @@ -262,7 +262,7 @@ com.github.ibi-group gtfs-lib - 9fbdaa9c97acaccdd793e5f7e53da0243290f9b7 + 29827a0441a5d0c7e50583a63e0df12b8216b7bd From b04386e718320d83527e4fb5baee9092bc27d957 Mon Sep 17 00:00:00 2001 From: miles-grant-ibi Date: Tue, 26 Nov 2024 14:55:50 -0500 Subject: [PATCH 22/22] chore(deps): Switch to branch based version definition --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index e9d79f776..330baf572 100644 --- a/pom.xml +++ b/pom.xml @@ -262,7 +262,7 @@ com.github.ibi-group gtfs-lib - 29827a0441a5d0c7e50583a63e0df12b8216b7bd + mtc-temp-merge-hack-SNAPSHOT