Skip to content

Commit

Permalink
Merge pull request #595 from ibi-group/fix-append-transform-edge-cases
Browse files Browse the repository at this point in the history
Fix Append Transformation Edge Cases
  • Loading branch information
miles-grant-ibigroup authored May 15, 2024
2 parents ef7670f + bb5b2da commit 18dbbc3
Show file tree
Hide file tree
Showing 10 changed files with 128 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
import com.conveyal.datatools.manager.models.TableTransformResult;
import com.conveyal.datatools.manager.models.TransformType;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
Expand Down Expand Up @@ -49,20 +51,40 @@ public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status st
Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs);

final File tempFile = File.createTempFile(tableName + "-temp", ".txt");
final File tempFileWithStrippedNewlines = File.createTempFile(tableName + "-temp-no-newlines", ".txt");
Files.copy(targetTxtFilePath, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING);

// Append CSV data into the target file in the temporary copy of file
try (OutputStream os = new FileOutputStream(tempFile, true)) {
// Append a newline in case our data doesn't include one
// Having an extra newline is not a problem!
os.write(newLineStream.readAllBytes());
os.write(inputStream.readAllBytes());
os.flush();

} catch (Exception e) {
status.fail("Failed to write to target file", e);
}


// Re-write file without extra line breaks
try (
OutputStream noNewlineOs = new FileOutputStream(tempFileWithStrippedNewlines, false);
FileReader fr = new FileReader(tempFile);
BufferedReader br = new BufferedReader(fr);
) {
String line;
while ((line = br.readLine()) != null) {
if (line.matches("\n") || line.isEmpty()) {
continue;
}

noNewlineOs.write(line.getBytes());
noNewlineOs.write("\n".getBytes());
}
noNewlineOs.flush();
}

// Copy modified file into zip
Files.copy(tempFile.toPath(), targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING);
Files.copy(tempFileWithStrippedNewlines.toPath(), targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING);

final int NEW_LINE_CHARACTER_CODE = 10;
int lineCount = (int) csvData.chars().filter(c -> c == NEW_LINE_CHARACTER_CODE).count();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,71 @@ void canAppendToStops() throws SQLException, IOException {
1
);
}


@Test
void canAppendToStopsWithLeadingNewlineInData() throws SQLException, IOException {
sourceVersion = createFeedVersion(
feedSource,
zipFolderFiles("fake-agency-with-only-calendar-and-trailing-newlines")
);
FeedTransformation transformation = AppendToFileTransformation.create(generateStopRowWithLeadingNewline(), "stops");
FeedTransformRules transformRules = new FeedTransformRules(transformation);
feedSource.transformRules.add(transformRules);
Persistence.feedSources.replace(feedSource.id, feedSource);
// Create new target version (note: the folder has no stop_attributes.txt file)
targetVersion = createFeedVersion(
feedSource,
zipFolderFiles("fake-agency-with-only-calendar-dates")
);
LOG.info("Checking assertions.");
assertEquals(
5 + 3, // Magic number should match row count of stops.txt with three extra
targetVersion.feedLoadResult.stops.rowCount,
"stops.txt row count should equal input csv data # of rows + 3 extra rows"
);
// Check for presence of new stop id in database (one record).
assertThatSqlCountQueryYieldsExpectedCount(
String.format(
"SELECT count(*) FROM %s.stops WHERE stop_id = '%s'",
targetVersion.namespace,
"new"
),
1
);
}
@Test
void canAppendToStopsWithTrailingNewlineInData() throws SQLException, IOException {
sourceVersion = createFeedVersion(
feedSource,
zipFolderFiles("fake-agency-with-only-calendar-and-trailing-newlines")
);
FeedTransformation transformation = AppendToFileTransformation.create(generateStopRowWithTrailingNewline(), "stops");
FeedTransformRules transformRules = new FeedTransformRules(transformation);
feedSource.transformRules.add(transformRules);
Persistence.feedSources.replace(feedSource.id, feedSource);
// Create new target version (note: the folder has no stop_attributes.txt file)
targetVersion = createFeedVersion(
feedSource,
zipFolderFiles("fake-agency-with-only-calendar-dates")
);
LOG.info("Checking assertions.");
assertEquals(
5 + 3, // Magic number should match row count of stops.txt with three extra
targetVersion.feedLoadResult.stops.rowCount,
"stops.txt row count should equal input csv data # of rows + 3 extra rows"
);
// Check for presence of new stop id in database (one record).
assertThatSqlCountQueryYieldsExpectedCount(
String.format(
"SELECT count(*) FROM %s.stops WHERE stop_id = '%s'",
targetVersion.namespace,
"new"
),
1
);
}

@Test
void canReplaceFeedInfo() throws SQLException, IOException {
// Generate random UUID for feedId, which gets placed into the csv data.
Expand Down Expand Up @@ -311,6 +376,16 @@ private static String generateStopRow() {
"\nnew2,new2,appended stop,,37,-122,,,0,123,," +
"\nnew,new,appended stop,,37.06668,-122.07781,,,0,123,,";
}
private static String generateStopRowWithLeadingNewline() {
return "\nnew3,new3,appended stop,,37,-122,,,0,123,," +
"\nnew2,new2,appended stop,,37,-122,,,0,123,," +
"\nnew,new,appended stop,,37.06668,-122.07781,,,0,123,,";
}
private static String generateStopRowWithTrailingNewline() {
return "new3,new3,appended stop,,37,-122,,,0,123,," +
"\nnew2,new2,appended stop,,37,-122,,,0,123,," +
"\nnew,new,appended stop,,37.06668,-122.07781,,,0,123,,\n";
}

private static String generateCustomCsvData() {
return "custom_column1,custom_column2,custom_column3"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
agency_id,agency_name,agency_url,agency_lang,agency_phone,agency_email,agency_timezone,agency_fare_url,agency_branding_url
1,Fake Transit,,,,,America/Los_Angeles,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date
common_id,1,1,1,1,1,1,1,20170918,20170920
only_calendar_id,1,1,1,1,1,1,1,20170921,20170922
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
feed_id,feed_publisher_name,feed_publisher_url,feed_lang,feed_version
fake_transit,Conveyal,http://www.conveyal.com,en,1.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
agency_id,route_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_branding_url
1,1,1,Route 1,,3,,7CE6E7,FFFFFF,
1,2,2,Route 2,,3,,7CE6E7,FFFFFF,
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
stop_id,accessibility_id,cardinal_direction,relative_position,stop_city
4u6g,0,SE,FS,Scotts Valley
johv,0,SE,FS,Scotts Valley
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint
only-calendar-trip1,07:00:00,07:00:00,4u6g,1,,0,0,0.0000000,
only-calendar-trip1,07:01:00,07:01:00,johv,2,,0,0,341.4491961,
only-calendar-trip2,07:00:00,07:00:00,johv,1,,0,0,0.0000000,
only-calendar-trip2,07:01:00,07:01:00,4u6g,2,,0,0,341.4491961,
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding
4u6g,4u6g,Butler Ln,,37.0612132,-122.0074332,,,0,,,
johv,johv,Scotts Valley Dr & Victor Sq,,37.0590172,-122.0096058,,,0,,,
123,,Parent Station,,37.0666,-122.0777,,,1,,,

1234,1234,Child Stop,,37.06662,-122.07772,,,0,123,,
1234567,1234567,Unused stop,,37.06668,-122.07781,,,0,123,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
route_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,bikes_allowed,wheelchair_accessible,service_id
1,only-calendar-trip1,,,0,,,0,0,common_id
2,only-calendar-trip2,,,0,,,0,0,common_id

0 comments on commit 18dbbc3

Please sign in to comment.