Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fold MTC deploy back to dev #589

Draft
wants to merge 30 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
7d0e61b
Merge pull request #528 from ibi-group/mtc-delete-publishedversion
binh-dam-ibigroup May 5, 2023
16c5b64
feat(JdbcTableWriter): add directions validation
philip-cline Sep 28, 2023
caed604
refactor(GtfsPlusValidation): Update wording
philip-cline Sep 28, 2023
8f77fc3
Merge pull request #562 from ibi-group/add-mtc-directions-validation
philip-cline Oct 2, 2023
01e8aa5
feat(Directions Validation): update validation language
philip-cline Oct 12, 2023
68cec40
refactor(Directions Validation): update validation language
philip-cline Oct 13, 2023
2e4ad4f
Merge pull request #568 from ibi-group/update-directions-validation
philip-cline Oct 13, 2023
f465c89
fix(FeedUpdater): allow mongo query to use disk space
philip-cline Oct 13, 2023
9027b28
Merge pull request #570 from ibi-group/allow-disk-space-for-mongo-query
philip-cline Oct 16, 2023
9f37b66
feat(build-docker): add additional branch to docker build
philip-cline Nov 16, 2023
6475041
refactor(mtc docker): add Dockerfile, docker compose example
philip-cline Nov 16, 2023
02f15cc
refactor(mtc docker): add comment
philip-cline Nov 17, 2023
5a4cb5c
Merge pull request #576 from ibi-group/mtc-add-docker-build-branch
philip-cline Nov 17, 2023
562de6a
fix(compilation): fix compile error with transformations
philip-cline Nov 24, 2023
8de4e3d
refactor(NormalizeFieldTransformation): fix formatting
philip-cline Nov 24, 2023
86549b4
Merge pull request #578 from ibi-group/fix-transformation-compile-err…
philip-cline Nov 24, 2023
2b2bef9
refactor(Dockerfile): make gtfsplus dir (pre-approved)
philip-cline Dec 1, 2023
8dccaf7
fix(ProcessSingleFeedJob): Fail job if transform job failed.
binh-dam-ibigroup Jun 4, 2024
541ebdf
refactor(NormalizeFieldTr...): Create messages for specific condition…
binh-dam-ibigroup Jun 5, 2024
6478799
chore(gh/maven): Update CI node to 20.
binh-dam-ibigroup Jun 6, 2024
ab28543
chore(gh/maven): Force CI node to 20.14
binh-dam-ibigroup Jun 6, 2024
d3af102
Revert "chore(gh/maven): Force CI node to 20.14"
binh-dam-ibigroup Jun 6, 2024
f384d6d
chore(gh/maven): Add node 20 setup before running semantic-release.
binh-dam-ibigroup Jun 6, 2024
9826e97
fix(NormalizeFieldTr...): Handle ZIP exceptions with specific error m…
binh-dam-ibigroup Jun 6, 2024
c50f9a6
style(NormalizeFieldTr...): Wrap writeCsvContent declaration.
binh-dam-ibigroup Jun 7, 2024
4528783
Merge pull request #598 from ibi-group/mtc-improve-transform-err-msgs
binh-dam-ibigroup Jun 21, 2024
3728283
refactor(pom.xml): update gtfs lib version and group id to ibi-group …
josh-willis-arcadis Nov 20, 2024
4fefa16
chore(deps): update gtfs-lib
miles-grant-ibigroup Nov 26, 2024
b04386e
chore(deps): Switch to branch based version definition
miles-grant-ibigroup Nov 26, 2024
1f13778
Merge pull request #602 from ibi-group/mtc-duplicate-arrival-time
josh-willis-arcadis Dec 2, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions .github/workflows/build-docker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: Create and publish a Docker image
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images

on:
push:
branches: ['dev', 'master', 'dev-flex', 'mtc-deploy']

env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}

jobs:
build-and-push-image:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write

steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Log in to the Container registry
# Maintain specific commit hash for stability
uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}

- name: Build and push Docker image
uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
10 changes: 7 additions & 3 deletions .github/workflows/maven.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ jobs:
uses: actions/setup-java@v1
with:
java-version: 1.8
# Install node 14 for running e2e tests (and for maven-semantic-release).
- name: Use Node.js 18.x
# Install node for running e2e tests (and for maven-semantic-release).
- name: Use Node.js 20.x
uses: actions/setup-node@v1
with:
node-version: 18.x
node-version: 20.x
- name: Start MongoDB
uses: supercharge/[email protected]
with:
Expand Down Expand Up @@ -97,6 +97,10 @@ jobs:

# Run maven-semantic-release to potentially create a new release of datatools-server. The flag --skip-maven-deploy is
# used to avoid deploying to maven central. So essentially, this just creates a release with a changelog on github.
- name: Use Node.js 20.x
uses: actions/setup-node@v1
with:
node-version: 20.x
- name: Run maven-semantic-release
env:
GH_TOKEN: ${{ secrets.GH_TOKEN }}
Expand Down
19 changes: 19 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# syntax=docker/dockerfile:1
FROM maven:3.8.7-openjdk-18-slim

COPY . /datatools

# Build jar
WORKDIR /datatools
RUN mvn package -DskipTests
RUN cp target/dt*.jar /datatools/
RUN mv dt*.jar datatools-server.jar

RUN mkdir -p /var/datatools_gtfs/gtfsplus
RUN mkdir -p /var/gtfs/manager/gtfs/gtfsplus

# Launch server
# This relies on a configuration volume and aws volume being present. See `docker-compose.yml`, or the example below
# Try: docker run --publish 4000:4000 -v ~/config/:/config datatools-latest
CMD ["java", "-XX:MaxRAMPercentage=95", "-jar", "datatools-server.jar", "/config/env.yml", "/config/server.yml"]
EXPOSE 4000
26 changes: 26 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
version: '3.8'
services:
datatools-server:
build: ./
ports:
- "4000:4000"
volumes:
- type: bind
source: ./configurations/default/
target: /config
- type: bind
source: ~/.aws
target: /root/.aws
depends_on:
- mongo
- postgres
mongo:
image: mongo
restart: always
postgres:
environment:
POSTGRES_HOST_AUTH_METHOD: trust
POSTGRES_USER: root
POSTGRES_DB: dmtest
image: postgres
restart: always
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,10 @@
- AWS S3 SDK - putting/getting objects into/out of S3.
-->
<dependency>
<groupId>com.github.conveyal</groupId>
<groupId>com.github.ibi-group</groupId>
<artifactId>gtfs-lib</artifactId>
<!-- Latest dev build on jitpack.io -->
<version>bdb76ee</version>
<version>mtc-temp-merge-hack-SNAPSHOT</version>
<!-- Exclusions added in order to silence SLF4J warnings about multiple bindings:
http://www.slf4j.org/codes.html#multiple_bindings
-->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@
import java.util.Arrays;
import java.util.Collection;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

Expand Down Expand Up @@ -122,6 +124,13 @@ private static void validateTable(
GTFSFeed gtfsFeed
) throws IOException {
String tableId = specTable.get("id").asText();
boolean tableIsDirections = tableId.equals("directions");

Set<String> gtfsRoutes = new HashSet<>();
if (tableIsDirections) {
// Copy the gtfs routes into a set so that we can "check them off" (remove them).
gtfsRoutes.addAll(gtfsFeed.routes.keySet());
}

// Read in table data from input stream.
CsvReader csvReader = new CsvReader(inputStreamToValidate, ',', StandardCharsets.UTF_8);
Expand Down Expand Up @@ -167,15 +176,20 @@ private static void validateTable(
// Validate each value in row. Note: we iterate over the fields and not values because a row may be missing
// columns, but we still want to validate that missing value (e.g., if it is missing a required field).
for (int f = 0; f < fieldsFound.length; f++) {
JsonNode specField = fieldsFound[f];
// If value exists for index, use that. Otherwise, default to null to avoid out of bounds exception.
String val = f < recordColumnCount ? rowValues[f] : null;
validateTableValue(issues, tableId, rowIndex, rowValues, val, fieldsFound, fieldsFound[f], gtfsFeed);
validateTableValue(issues, tableId, rowIndex, rowValues, val, fieldsFound, specField, gtfsFeed, gtfsRoutes, tableIsDirections);
}
}
rowIndex++;
}
csvReader.close();

if (tableIsDirections && !gtfsRoutes.isEmpty()) {
// After we're done validating all the table values, check if every route was checked off in directions.txt
issues.add(new ValidationIssue(tableId, null, -1, "Directions file doesn't define directions for all routes listed in routes.txt"));
}
// Add issues for wrong number of columns and for empty rows after processing all rows.
// Note: We considered adding an issue for each row, but opted for the single error approach because there's no
// concept of a row-level issue in the UI right now. So we would potentially need to add that to the UI
Expand Down Expand Up @@ -205,7 +219,9 @@ private static void validateTableValue(
String value,
JsonNode[] specFieldsFound,
JsonNode specField,
GTFSFeed gtfsFeed
GTFSFeed gtfsFeed,
Set<String> gtfsRoutes,
boolean tableIsDirections
) {
if (specField == null) return;
String fieldName = specField.get("name").asText();
Expand Down Expand Up @@ -300,6 +316,8 @@ private static void validateTableValue(
break;
}

// "Check off" the route_id in directions.txt from the list to verify every route id has a direction
if (tableIsDirections && fieldName.equals("route_id")) gtfsRoutes.remove(value);
}

/** Construct missing ID text for validation issue description. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@ static Map<String, FeedVersion> getLatestVersionsSentForPublishing(Collection<Fe
return Persistence.feedVersions
.getMongoCollection()
.aggregate(stages)
.allowDiskUse(true)
.into(new ArrayList<>())
.stream()
.collect(Collectors.toMap(v -> v.feedSourceId, Function.identity()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,10 @@ public void jobLogic() {
// Run transform job in line so we can monitor the error status before load/validate begins.
zipTransform.run();
// Short circuit the feed load/validate if a pre-load transform fails.
if (zipTransform.status.error) return;
if (zipTransform.status.error) {
status.fail("Feed transformation failed, see details below.");
return;
}
}
// Assign transform result from zip target.
feedVersion.feedTransformResult = zipTarget.feedTransformResult;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.supercsv.prefs.CsvPreference;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
Expand All @@ -27,6 +28,7 @@
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import java.util.zip.ZipException;
import java.util.zip.ZipFile;

import static com.conveyal.datatools.manager.DataManager.getConfigProperty;
Expand Down Expand Up @@ -143,7 +145,7 @@ private void initializeCapitalizeSubstitutions() {
public void validateParameters(MonitorableJob.Status status) {
// fieldName must not be null
if (fieldName == null) {
status.fail("Field name must not be null");
status.fail("'Normalize Field' Transformation failed because the field name parameter is not set.");
return;
}

Expand Down Expand Up @@ -193,68 +195,101 @@ public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status st
Files.copy(originalZipPath, tempZipPath, StandardCopyOption.REPLACE_EXISTING);

Table gtfsTable = GtfsUtils.getGtfsTable(table);
if (gtfsTable == null) {
status.fail(String.format("Unsupported GTFS file '%s'", tableName));
return;
}
CsvReader csvReader = gtfsTable.getCsvReader(new ZipFile(tempZipPath.toAbsolutePath().toString()), null);
if (csvReader == null) {
status.fail(String.format("'Normalize Field' failed because file '%s' was not found in the GTFS archive", tableName));
return;
}
final String[] headers = csvReader.getHeaders();
Field[] fieldsFoundInZip = gtfsTable.getFieldsFromFieldHeaders(headers, null);
int transformFieldIndex = getFieldIndex(fieldsFoundInZip, fieldName);
if (transformFieldIndex == -1) {
status.fail(String.format("'Normalize Field' failed because field '%s' was not found in file '%s' in the GTFS archive", fieldName, tableName));
return;
}

int modifiedRowCount = 0;

// Write headers and processed CSV rows.
writer.write(headers);
while (csvReader.readRecord()) {
String originalValue = csvReader.get(transformFieldIndex);
String transformedValue = originalValue;

// Convert to title case, if requested.
if (capitalize) {
if (capitalizationStyle == CapitalizationStyle.TITLE_CASE) {
transformedValue = convertToTitleCase(transformedValue);
}
// TODO: Implement other capitalization styles.
}

// Perform substitutions if any.
transformedValue = performSubstitutions(transformedValue);

// Re-assemble the CSV line and place in buffer.
String[] csvValues = csvReader.getValues();
csvValues[transformFieldIndex] = transformedValue;

// Write line to table (plus new line char).
writer.write(csvValues);
int modifiedRowCount = generateCsvContent(writer, headers, csvReader, transformFieldIndex);

// Count number of CSV rows changed.
if (!originalValue.equals(transformedValue)) {
modifiedRowCount++;
}
} // End of iteration over each row.
csvReader.close();
writer.flush();

// Copy csv input stream into the zip file, replacing the existing file.
try (
// Modify target zip file that we just read.
FileSystem targetZipFs = FileSystems.newFileSystem(tempZipPath, null);
// Stream for file copy operation.
InputStream inputStream = new ByteArrayInputStream(stringWriter.toString().getBytes(StandardCharsets.UTF_8))
) {
Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs);
Files.copy(inputStream, targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING);
zipTarget.feedTransformResult.tableTransformResults.add(
new TableTransformResult(tableName, 0, modifiedRowCount, 0)
);
}
writeCsvContent(zipTarget, tempZipPath, stringWriter, tableName, modifiedRowCount);

// Replace original zip file with temporary working zip file.
// (This should also trigger a system IO update event, so subsequent IO calls pick up the correct file.
Files.move(tempZipPath, originalZipPath, StandardCopyOption.REPLACE_EXISTING);
LOG.info("Field normalization transformation successful, {} row(s) changed.", modifiedRowCount);
} catch (ZipException ze) {
status.fail(
String.format("'Normalize Field' failed because the GTFS archive is corrupted (%s).", ze.getMessage()),
ze
);
} catch (Exception e) {
status.fail("Unknown error encountered while transforming zip file", e);
}
}

/** Write csv input stream into the zip file, replacing the existing file. */
private void writeCsvContent(
FeedTransformZipTarget zipTarget,
Path tempZipPath,
StringWriter stringWriter,
String tableName,
int modifiedRowCount
) throws IOException {
try (
// Modify target zip file that we just read.
FileSystem targetZipFs = FileSystems.newFileSystem(tempZipPath, (ClassLoader) null);
// Stream for file copy operation.
InputStream inputStream = new ByteArrayInputStream(stringWriter.toString().getBytes(StandardCharsets.UTF_8))
) {
Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs);
Files.copy(inputStream, targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING);
zipTarget.feedTransformResult.tableTransformResults.add(
new TableTransformResult(tableName, 0, modifiedRowCount, 0)
);
}
}

/** Generates content for the GTFS table, returns the number of rows modified. */
private int generateCsvContent(CsvListWriter writer, String[] headers, CsvReader csvReader, int transformFieldIndex) throws IOException {
int modifiedRowCount = 0;

// Write headers and processed CSV rows.
writer.write(headers);
while (csvReader.readRecord()) {
String originalValue = csvReader.get(transformFieldIndex);
String transformedValue = originalValue;

// Convert to title case, if requested.
if (capitalize) {
if (capitalizationStyle == CapitalizationStyle.TITLE_CASE) {
transformedValue = convertToTitleCase(transformedValue);
}
// TODO: Implement other capitalization styles.
}

// Perform substitutions if any.
transformedValue = performSubstitutions(transformedValue);

// Re-assemble the CSV line and place in buffer.
String[] csvValues = csvReader.getValues();
csvValues[transformFieldIndex] = transformedValue;

// Write line to table (plus new line char).
writer.write(csvValues);

// Count number of CSV rows changed.
if (!originalValue.equals(transformedValue)) {
modifiedRowCount++;
}
} // End of iteration over each row.
csvReader.close();
writer.flush();
return modifiedRowCount;
}

/**
* Converts the provided string to Title Case, accommodating for capitalization exceptions
* and separator characters that may be immediately precede
Expand Down
Loading
Loading