diff --git a/src/main/java/com/conveyal/gtfs/loader/Field.java b/src/main/java/com/conveyal/gtfs/loader/Field.java index ebc99b0df..2002843a0 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Field.java +++ b/src/main/java/com/conveyal/gtfs/loader/Field.java @@ -22,12 +22,12 @@ public abstract class Field { public final String name; - final Requirement requirement; + public final Requirement requirement; /** * Indicates that this field acts as a foreign key to this referenced table. This is used when checking referential * integrity when loading a feed. * */ - Table referenceTable = null; + public Table referenceTable = null; private boolean shouldBeIndexed; private boolean emptyValuePermitted; @@ -51,6 +51,16 @@ public void setNull(PreparedStatement preparedStatement, int oneBasedIndex) thro preparedStatement.setNull(oneBasedIndex, getSqlType().getVendorTypeNumber()); } + /** + * Finds the index of the field given a string name. + * @return the index of the field or -1 if no match is found + */ + public static int getFieldIndex (Field[] fields, String name) { + // Linear search, assuming a small number of fields per table. + for (int i = 0; i < fields.length; i++) if (fields[i].name.equals(name)) return i; + return -1; + } + public abstract SQLType getSqlType (); // Overridden to create exception for "double precision", since its enum value is just called DOUBLE, and ARRAY types, diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java index 0a536b5ce..bf2755b76 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java @@ -86,7 +86,7 @@ public class JdbcGtfsLoader { private SQLErrorStorage errorStorage; // Contains references to unique entity IDs during load stage used for referential integrity check. - private ReferenceTracker referenceTracker; + private ReferenceTracker referenceTracker = new ReferenceTracker(); public JdbcGtfsLoader(String gtfsFilePath, DataSource dataSource) { this.gtfsFilePath = gtfsFilePath; @@ -130,9 +130,9 @@ public FeedLoadResult loadTables () { result.filename = gtfsFilePath; result.uniqueIdentifier = tablePrefix; - //The order of the following four lines should not be changed because the schema needs to be in place - //before the error storage can be constructed, which in turn needs to exist in case any errors are - //encountered during the loading process. + // The order of the following four lines should not be changed because the schema needs to be in place + // before the error storage can be constructed, which in turn needs to exist in case any errors are + // encountered during the loading process. { createSchema(connection, tablePrefix); //the SQLErrorStorage constructor expects the tablePrefix to contain the dot separator. @@ -143,7 +143,6 @@ public FeedLoadResult loadTables () { // This allows everything to work even when there's no prefix. this.tablePrefix += "."; } - this.referenceTracker = new ReferenceTracker(errorStorage); // Load each table in turn, saving some summary information about what happened during each table load result.agency = load(Table.AGENCY); result.calendar = load(Table.CALENDAR); @@ -210,7 +209,7 @@ private void registerFeed (File gtfsFile) { // FIXME is this extra CSV reader used anymore? Check comment below. // First, inspect feed_info.txt to extract the ID and version. // We could get this with SQL after loading, but feed_info, feed_id and feed_version are all optional. - CsvReader csvReader = getCsvReader(Table.FEED_INFO); + CsvReader csvReader = Table.FEED_INFO.getCsvReader(zip, errorStorage); String feedId = "", feedVersion = ""; if (csvReader != null) { // feed_info.txt has been found and opened. @@ -256,43 +255,6 @@ private void registerFeed (File gtfsFile) { } } - /** - * In GTFS feeds, all files are supposed to be in the root of the zip file, but feed producers often put them - * in a subdirectory. This function will search subdirectories if the entry is not found in the root. - * It records an error if the entry is in a subdirectory. - * It then creates a CSV reader for that table if it's found. - */ - private CsvReader getCsvReader (Table table) { - final String tableFileName = table.name + ".txt"; - ZipEntry entry = zip.getEntry(tableFileName); - if (entry == null) { - // Table was not found, check if it is in a subdirectory. - Enumeration entries = zip.entries(); - while (entries.hasMoreElements()) { - ZipEntry e = entries.nextElement(); - if (e.getName().endsWith(tableFileName)) { - entry = e; - errorStorage.storeError(NewGTFSError.forTable(table, TABLE_IN_SUBDIRECTORY)); - break; - } - } - } - if (entry == null) return null; - try { - InputStream zipInputStream = zip.getInputStream(entry); - // Skip any byte order mark that may be present. Files must be UTF-8, - // but the GTFS spec says that "files that include the UTF byte order mark are acceptable". - InputStream bomInputStream = new BOMInputStream(zipInputStream); - CsvReader csvReader = new CsvReader(bomInputStream, ',', Charset.forName("UTF8")); - csvReader.readHeaders(); - return csvReader; - } catch (IOException e) { - LOG.error("Exception while opening zip entry: {}", e); - e.printStackTrace(); - return null; - } - } - /** * This wraps the main internal table loader method to catch exceptions and figure out how many errors happened. */ @@ -339,7 +301,7 @@ private int getTableSize(Table table) { * @return number of rows that were loaded. */ private int loadInternal (Table table) throws Exception { - CsvReader csvReader = getCsvReader(table); + CsvReader csvReader = table.getCsvReader(zip, errorStorage); if (csvReader == null) { LOG.info(String.format("file %s.txt not found in gtfs zipfile", table.name)); // This GTFS table could not be opened in the zip, even in a subdirectory. @@ -353,25 +315,8 @@ private int loadInternal (Table table) throws Exception { // TODO Strip out line returns, tabs in field contents. // By default the CSV reader trims leading and trailing whitespace in fields. // Build up a list of fields in the same order they appear in this GTFS CSV file. - int headerCount = csvReader.getHeaderCount(); - Field[] fields = new Field[headerCount]; - Set fieldsSeen = new HashSet<>(); - String keyField = table.getKeyFieldName(); - int keyFieldIndex = -1; - for (int h = 0; h < headerCount; h++) { - String header = sanitize(csvReader.getHeader(h)); - if (fieldsSeen.contains(header) || "id".equals(header)) { - // FIXME: add separate error for tables containing ID field. - errorStorage.storeError(NewGTFSError.forTable(table, DUPLICATE_HEADER).setBadValue(header)); - fields[h] = null; - } else { - fields[h] = table.getFieldForName(header); - fieldsSeen.add(header); - if (keyField.equals(header)) { - keyFieldIndex = h; - } - } - } + Field[] fields = table.getFieldsFromFieldHeaders(csvReader.getHeaders(), errorStorage); + int keyFieldIndex = table.getKeyFieldIndex(fields); // Create separate fields array with filtered list that does not include null values (for duplicate headers or // ID field). This is solely used to construct the table and array of values to load. Field[] cleanFields = Arrays.stream(fields).filter(field -> field != null).toArray(Field[]::new); @@ -439,7 +384,8 @@ private int loadInternal (Table table) throws Exception { // CSV reader get on an empty field will be an empty string literal. String string = csvReader.get(f); // Use spec table to check that references are valid and IDs are unique. - table.checkReferencesAndUniqueness(keyValue, lineNumber, field, string, referenceTracker); + Set errors = table.checkReferencesAndUniqueness(keyValue, lineNumber, field, string, referenceTracker); + errorStorage.storeErrors(errors); // Add value for entry into table setValueForField(table, columnIndex, lineNumber, field, string, postgresText, transformedStrings); // Increment column index. @@ -521,10 +467,10 @@ public void setValueForField(Table table, int fieldIndex, int lineNumber, Field // The Field objects throw exceptions to avoid passing the line number, table name etc. into them. try { // FIXME we need to set the transformed string element even when an error occurs. - // This means the validation and insertion step need to happen separately. - // or the errors should not be signaled with exceptions. - // Also, we should probably not be converting any GTFS field values. - // We should be saving it as-is in the database and converting upon load into our model objects. + // This means the validation and insertion step need to happen separately. + // or the errors should not be signaled with exceptions. + // Also, we should probably not be converting any GTFS field values. + // We should be saving it as-is in the database and converting upon load into our model objects. if (postgresText) transformedStrings[fieldIndex + 1] = field.validateAndConvert(string); else field.setParameter(insertStatement, fieldIndex + 2, string); } catch (StorageException ex) { @@ -562,24 +508,12 @@ private void setFieldToNull(boolean postgresText, String[] transformedStrings, i * * TODO add a test including SQL injection text (quote and semicolon) */ - public String sanitize (String string) throws SQLException { + public static String sanitize (String string, SQLErrorStorage errorStorage) { String clean = string.replaceAll("[^\\p{Alnum}_]", ""); if (!clean.equals(string)) { LOG.warn("SQL identifier '{}' was sanitized to '{}'", string, clean); - if (errorStorage != null) { - errorStorage.storeError(NewGTFSError.forFeed(COLUMN_NAME_UNSAFE, string)); - } + if (errorStorage != null) errorStorage.storeError(NewGTFSError.forFeed(COLUMN_NAME_UNSAFE, string)); } return clean; } - - public class ReferenceTracker { - public final Set transitIds = new HashSet<>(); - public final Set transitIdsWithSequence = new HashSet<>(); - public final SQLErrorStorage errorStorage; - - public ReferenceTracker(SQLErrorStorage errorStorage) { - this.errorStorage = errorStorage; - } - } } diff --git a/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java new file mode 100644 index 000000000..7f98c1ea8 --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/loader/ReferenceTracker.java @@ -0,0 +1,14 @@ +package com.conveyal.gtfs.loader; + +import java.util.HashSet; +import java.util.Set; + +/** + * This class is used during feed loads to track the unique keys that are encountered in a GTFS feed. It has two sets of + * strings that it tracks, one for single field keys (e.g., route_id or stop_id) and one for keys that are compound, + * usually made up of a string ID with a sequence field (e.g., trip_id + stop_sequence for tracking unique stop times). + */ +public class ReferenceTracker { + public final Set transitIds = new HashSet<>(); + public final Set transitIdsWithSequence = new HashSet<>(); +} diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index e5ecf86ec..e068cebdf 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -1,6 +1,7 @@ package com.conveyal.gtfs.loader; import com.conveyal.gtfs.error.NewGTFSError; +import com.conveyal.gtfs.error.SQLErrorStorage; import com.conveyal.gtfs.model.Agency; import com.conveyal.gtfs.model.Calendar; import com.conveyal.gtfs.model.CalendarDate; @@ -19,9 +20,14 @@ import com.conveyal.gtfs.model.Transfer; import com.conveyal.gtfs.model.Trip; import com.conveyal.gtfs.storage.StorageException; +import com.csvreader.CsvReader; +import org.apache.commons.io.input.BOMInputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.Charset; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; @@ -31,12 +37,19 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Enumeration; +import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.stream.Collectors; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; +import static com.conveyal.gtfs.error.NewGTFSErrorType.DUPLICATE_HEADER; import static com.conveyal.gtfs.error.NewGTFSErrorType.DUPLICATE_ID; import static com.conveyal.gtfs.error.NewGTFSErrorType.REFERENTIAL_INTEGRITY; +import static com.conveyal.gtfs.error.NewGTFSErrorType.TABLE_IN_SUBDIRECTORY; +import static com.conveyal.gtfs.loader.JdbcGtfsLoader.sanitize; import static com.conveyal.gtfs.loader.Requirement.EDITOR; import static com.conveyal.gtfs.loader.Requirement.EXTENSION; import static com.conveyal.gtfs.loader.Requirement.OPTIONAL; @@ -70,6 +83,11 @@ public class Table { private boolean usePrimaryKey = false; /** Indicates whether the table has unique key field. */ private boolean hasUniqueKeyField = true; + /** + * Indicates whether the table has a compound key that must be used in conjunction with the key field to determine + * table uniqueness(e.g., transfers#to_stop_id). + * */ + private boolean compoundKey; public Table (String name, Class entityClass, Requirement required, Field... fields) { // TODO: verify table name is OK for use in constructing dynamic SQL queries @@ -118,7 +136,7 @@ public Table (String name, Class entityClass, Requirement requ ); public static final Table CALENDAR_DATES = new Table("calendar_dates", CalendarDate.class, OPTIONAL, - new StringField("service_id", REQUIRED), + new StringField("service_id", REQUIRED).isReferenceTo(CALENDAR), new DateField("date", REQUIRED), new IntegerField("exception_type", REQUIRED, 1, 2) ).keyFieldIsNotUnique(); @@ -241,7 +259,8 @@ public Table (String name, Class entityClass, Requirement requ new StringField("transfer_type", REQUIRED), new StringField("min_transfer_time", OPTIONAL)) .addPrimaryKey() - .keyFieldIsNotUnique(); + .keyFieldIsNotUnique() + .hasCompoundKey(); public static final Table TRIPS = new Table("trips", Trip.class, REQUIRED, new StringField("trip_id", REQUIRED), @@ -321,13 +340,20 @@ public Table restrictDelete () { } /** - * Fluent method to de-set the + * Fluent method to de-set the hasUniqueKeyField flag for tables which the first field should not be considered a + * primary key. */ - private Table keyFieldIsNotUnique() { + public Table keyFieldIsNotUnique() { this.hasUniqueKeyField = false; return this; } + /** Fluent method to set whether the table has a compound key, e.g., transfers#to_stop_id. */ + private Table hasCompoundKey() { + this.compoundKey = true; + return this; + } + /** * Fluent method that indicates that the integer ID field should be made a primary key. This should generally only * be used for tables that would ever need to be queried on the unique integer ID (which represents row number for @@ -458,6 +484,43 @@ public String generateInsertSql (String namespace, boolean setDefaultId) { return String.format("insert into %s (id, %s) values (%s, %s)", tableName, joinedFieldNames, idValue, questionMarks); } + /** + * In GTFS feeds, all files are supposed to be in the root of the zip file, but feed producers often put them + * in a subdirectory. This function will search subdirectories if the entry is not found in the root. + * It records an error if the entry is in a subdirectory (as long as errorStorage is not null). + * It then creates a CSV reader for that table if it's found. + */ + public CsvReader getCsvReader(ZipFile zipFile, SQLErrorStorage sqlErrorStorage) { + final String tableFileName = this.name + ".txt"; + ZipEntry entry = zipFile.getEntry(tableFileName); + if (entry == null) { + // Table was not found, check if it is in a subdirectory. + Enumeration entries = zipFile.entries(); + while (entries.hasMoreElements()) { + ZipEntry e = entries.nextElement(); + if (e.getName().endsWith(tableFileName)) { + entry = e; + if (sqlErrorStorage != null) sqlErrorStorage.storeError(NewGTFSError.forTable(this, TABLE_IN_SUBDIRECTORY)); + break; + } + } + } + if (entry == null) return null; + try { + InputStream zipInputStream = zipFile.getInputStream(entry); + // Skip any byte order mark that may be present. Files must be UTF-8, + // but the GTFS spec says that "files that include the UTF byte order mark are acceptable". + InputStream bomInputStream = new BOMInputStream(zipInputStream); + CsvReader csvReader = new CsvReader(bomInputStream, ',', Charset.forName("UTF8")); + csvReader.readHeaders(); + return csvReader; + } catch (IOException e) { + LOG.error("Exception while opening zip entry: {}", e); + e.printStackTrace(); + return null; + } + } + /** * Join a list of fields with a comma + space separator. */ @@ -646,13 +709,14 @@ public String getKeyFieldName () { } /** - * Returns field name that defines order for grouped entities. WARNING: this MUST be called on a spec table (i.e., - * one of the constant tables defined in this class). Otherwise, it could return null even if the table has an order - * field defined. + * Returns field name that defines order for grouped entities or that defines the compound key field (e.g., + * transfers#to_stop_id). WARNING: this field must be in the 1st position (base zero) of the fields array; hence, + * this MUST be called on a spec table (i.e., one of the constant tables defined in this class). Otherwise, it could + * return null even if the table has an order field defined. */ public String getOrderFieldName () { String name = fields[1].name; - if (name.contains("_sequence")) return name; + if (name.contains("_sequence") || compoundKey) return name; else return null; } @@ -673,13 +737,11 @@ public Class getEntityClass() { /** - * Finds the index of the field given a string name. + * Finds the index of the field for this table given a string name. * @return the index of the field or -1 if no match is found */ public int getFieldIndex (String name) { - // Linear search, assuming a small number of fields per table. - for (int i = 0; i < fields.length; i++) if (fields[i].name.equals(name)) return i; - return -1; + return Field.getFieldIndex(fields, name); } /** @@ -693,6 +755,14 @@ public boolean isRequired () { return required == REQUIRED; } + /** + * Checks whether the table is part of the GTFS specification, i.e., it is not an internal table used for the editor + * (e.g., Patterns or PatternStops). + */ + public boolean isSpecTable() { + return required == REQUIRED || required == OPTIONAL; + } + /** * Create indexes for table using shouldBeIndexed(), key field, and/or sequence field. WARNING: this MUST be called * on a spec table (i.e., one of the constant tables defined in this class). Otherwise, the getIndexFields method @@ -870,75 +940,128 @@ public Table getParentTable() { return parentTable; } + /** + * During table load, checks the uniqueness of the entity ID and that references are valid. NOTE: This method + * defaults the key field and order field names to this table's values. + * @param keyValue key value for the record being checked + * @param lineNumber line number of the record being checked + * @param field field currently being checked + * @param value value that corresponds to field + * @param referenceTracker reference tracker in which to store references + * @return any duplicate or bad reference errors. + */ + public Set checkReferencesAndUniqueness(String keyValue, int lineNumber, Field field, String value, ReferenceTracker referenceTracker) { + return checkReferencesAndUniqueness(keyValue, lineNumber, field, value, referenceTracker, getKeyFieldName(), getOrderFieldName()); + } + /** * During table load, checks the uniqueness of the entity ID and that references are valid. These references are - * stored in the provided reference tracker. Any non-unique IDs or invalid references will store an error. + * stored in the provided reference tracker. Any non-unique IDs or invalid references will store an error. NOTE: + * this instance of checkReferencesAndUniqueness allows for arbitrarily setting the keyField and orderField, which + * is helpful for checking uniqueness of fields that are not the standard primary key (e.g., route_short_name). */ - public void checkReferencesAndUniqueness(String keyValue, int lineNumber, Field field, String string, JdbcGtfsLoader.ReferenceTracker referenceTracker) { + public Set checkReferencesAndUniqueness(String keyValue, int lineNumber, Field field, String value, ReferenceTracker referenceTracker, String keyField, String orderField) { + Set errors = new HashSet<>(); // Store field-scoped transit ID for referential integrity check. (Note, entity scoping doesn't work here because // we need to cross-check multiple entity types for valid references, e.g., stop times and trips both share trip // id.) - String keyField = getKeyFieldName(); - String orderField = getOrderFieldName(); - // If table has no unique key field (e.g., calendar_dates or transfers), there is no need to check for - // duplicates. If it has an order field, that order field should supersede the key field as the "unique" field. - String uniqueKeyField = !hasUniqueKeyField ? null : orderField != null ? orderField : keyField; + // If table has an order field, that order field should supersede the key field as the "unique" field. In other + // words, if it has an order field, the unique key is actually compound -- made up of the keyField + orderField. + String uniqueKeyField = orderField != null + ? orderField + // If table has no unique key field (e.g., calendar_dates or transfers), there is no need to check for + // duplicates. + : !hasUniqueKeyField + ? null + : keyField; String transitId = String.join(":", keyField, keyValue); // If the field is optional and there is no value present, skip check. - if (!field.isRequired() && "".equals(string)) return; + if (!field.isRequired() && "".equals(value)) return Collections.EMPTY_SET; // First, handle referential integrity check. boolean isOrderField = field.name.equals(orderField); if (field.isForeignReference()) { - // Check referential integrity if applicable + // Check referential integrity if the field is a foreign reference. Note: the reference table must be loaded + // before the table/value being currently checked. String referenceField = field.referenceTable.getKeyFieldName(); - String referenceTransitId = String.join(":", referenceField, string); + String referenceTransitId = String.join(":", referenceField, value); if (!referenceTracker.transitIds.contains(referenceTransitId)) { // If the reference tracker does not contain NewGTFSError referentialIntegrityError = NewGTFSError.forLine( this, lineNumber, REFERENTIAL_INTEGRITY, referenceTransitId) .setEntityId(keyValue); - if (isOrderField) { - // If the field is an order field, set the sequence for the new error. - referentialIntegrityError.setSequence(string); - } - referenceTracker.errorStorage.storeError(referentialIntegrityError); + // If the field is an order field, set the sequence for the new error. + if (isOrderField) referentialIntegrityError.setSequence(value); + errors.add(referentialIntegrityError); } } - + // Next, handle duplicate ID check. + // In most cases there is no need to check for duplicate IDs if the field is a foreign reference. However, + // transfers#to_stop_id is defined as an order field, so we need to check that this field (which is both a + // foreign ref and order field) is dataset unique in conjunction with the key field. // These hold references to the set of IDs to check for duplicates and the ID to check. These depend on // whether an order field is part of the "unique ID." Set listOfUniqueIds = referenceTracker.transitIds; String uniqueId = transitId; - // There is no need to check for duplicate IDs if the field is a foreign reference. - if (field.isForeignReference()) return; - // Next, check that the ID is table-unique. + // Next, check that the ID is table-unique. For example, the trip_id field is table unique in trips.txt and + // the the stop_sequence field (joined with trip_id) is table unique in stop_times.txt. if (field.name.equals(uniqueKeyField)) { // Check for duplicate IDs and store entity-scoped IDs for referential integrity check if (isOrderField) { // Check duplicate reference in set of field-scoped id:sequence (e.g., stop_sequence:12345:2) // This should not be scoped by key field because there may be conflicts (e.g., with trip_id="12345:2" listOfUniqueIds = referenceTracker.transitIdsWithSequence; - uniqueId = String.join(":", field.name, keyValue, string); + uniqueId = String.join(":", field.name, keyValue, value); } // Add ID and check duplicate reference in entity-scoped IDs (e.g., stop_id:12345) boolean valueAlreadyExists = !listOfUniqueIds.add(uniqueId); if (valueAlreadyExists) { // If the value is a duplicate, add an error. NewGTFSError duplicateIdError = NewGTFSError.forLine(this, lineNumber, DUPLICATE_ID, uniqueId) - .setEntityId(keyValue); - if (isOrderField) { - duplicateIdError.setSequence(string); - } - referenceTracker.errorStorage.storeError(duplicateIdError); + .setEntityId(keyValue); + if (isOrderField) duplicateIdError.setSequence(value); + errors.add(duplicateIdError); + } + } else if (field.name.equals(keyField)) { + // We arrive here if the field is not a foreign reference and not the unique key field on the table (e.g., + // shape_pt_sequence), but is still a key on the table. For example, this is where we add shape_id from + // the shapes table, so that when we check the referential integrity of trips#shape_id, we know that the + // shape_id exists in the shapes table. It also handles tracking calendar_dates#service_id values. + referenceTracker.transitIds.add(uniqueId); + } + return errors; + } + + /** + * For an array of field headers, returns the matching set of {@link Field}s for a {@link Table}. If errorStorage is + * not null, errors related to unexpected or duplicate header names will be stored. + */ + public Field[] getFieldsFromFieldHeaders(String[] headers, SQLErrorStorage errorStorage) { + Field[] fields = new Field[headers.length]; + Set fieldsSeen = new HashSet<>(); + for (int h = 0; h < headers.length; h++) { + String header = sanitize(headers[h], errorStorage); + if (fieldsSeen.contains(header) || "id".equals(header)) { + // FIXME: add separate error for tables containing ID field. + if (errorStorage != null) errorStorage.storeError(NewGTFSError.forTable(this, DUPLICATE_HEADER).setBadValue(header)); + fields[h] = null; + } else { + fields[h] = getFieldForName(header); + fieldsSeen.add(header); } - } else { - // If the field is not the table unique key field, skip the duplicate ID check and simply add the ID to the - // list of unique IDs. - listOfUniqueIds.add(uniqueId); } + return fields; + } + + /** + * Returns the index of the key field within the array of fields provided for a given table. + * @param fields array of fields (intended to be derived from the headers of a csv text file) + */ + public int getKeyFieldIndex(Field[] fields) { + String keyField = getKeyFieldName(); + return Field.getFieldIndex(fields, keyField); } }