Skip to content

Commit

Permalink
Merge pull request #1113 from ie3-institute/ms/#1112-confusing-error-…
Browse files Browse the repository at this point in the history
…message-on-deprecated-input-data

Improving error message when using the outdated csv format
  • Loading branch information
sebastian-peter authored Nov 26, 2024
2 parents 4abf08b + 921a51e commit 183c129
Show file tree
Hide file tree
Showing 14 changed files with 1,248 additions and 1,209 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Storage minimum level parameter removed from cylindrical thermal storage [#1123](https://github.com/ie3-institute/PowerSystemDataModel/issues/1123)
- Converted eval-rst to myst syntax in ReadTheDocs, fixed line wrapping and widths[#1137](https://github.com/ie3-institute/PowerSystemDataModel/issues/1137)
- Improving usage of streams on sql fetches [#827](https://github.com/ie3-institute/PowerSystemDataModel/issues/827)
- Improving error message when using the outdated csv format [#1112](https://github.com/ie3-institute/PowerSystemDataModel/issues/1112)


## [5.1.0] - 2024-06-24

Expand Down
84 changes: 47 additions & 37 deletions src/main/java/edu/ie3/datamodel/io/source/csv/CsvDataSource.java
Original file line number Diff line number Diff line change
Expand Up @@ -161,41 +161,44 @@ protected Set<Path> getIndividualTimeSeriesFilePaths() {
* occurred
*/
protected Map<String, String> buildFieldsToAttributes(
final String csvRow, final String[] headline) {
final String csvRow, final String[] headline) throws SourceException {

TreeMap<String, String> insensitiveFieldsToAttributes =
new TreeMap<>(String.CASE_INSENSITIVE_ORDER);

try {
String[] fieldVals = parseCsvRow(csvRow, csvSep);
insensitiveFieldsToAttributes.putAll(
IntStream.range(0, fieldVals.length)
.boxed()
.collect(
Collectors.toMap(
k -> StringUtils.snakeCaseToCamelCase(headline[k]), v -> fieldVals[v])));
String[] fieldVals = parseCsvRow(csvRow, csvSep);
insensitiveFieldsToAttributes.putAll(
IntStream.range(0, Math.min(fieldVals.length, headline.length))
.boxed()
.collect(
Collectors.toMap(
k -> StringUtils.snakeCaseToCamelCase(headline[k]), v -> fieldVals[v])));

if (insensitiveFieldsToAttributes.size() != headline.length) {
Set<String> fieldsToAttributesKeySet = insensitiveFieldsToAttributes.keySet();
insensitiveFieldsToAttributes = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
throw new SourceException(
"The size of the headline does not fit to the size of the resulting fields to attributes mapping.\nHeadline: "
+ String.join(", ", headline)
+ "\nResultingMap: "
+ String.join(", ", fieldsToAttributesKeySet)
+ "\nCsvRow: "
+ csvRow.trim()
+ ".\nIs the csv separator in the file matching the separator provided in the constructor ('"
+ csvSep
+ "') and does the number of columns match the number of headline fields?");
}
} catch (Exception e) {
log.error(
"Cannot build fields to attributes map for row '{}' with headline '{}'.\nException: {}",
csvRow.trim(),
String.join(",", headline),
e);
if (fieldVals.length != headline.length) {
throw new SourceException(
"The size of the headline ("
+ headline.length
+ ") does not fit to the size of the attribute fields ("
+ fieldVals.length
+ ").\nHeadline: "
+ String.join(", ", headline)
+ "\nRow: "
+ csvRow.trim()
+ ".\nPlease check:"
+ "\n - is the csv separator in the file matching the separator provided in the constructor ('"
+ csvSep
+ "')"
+ "\n - does the number of columns match the number of headline fields "
+ "\n - are you using a valid RFC 4180 formatted csv row?");
}

if (insensitiveFieldsToAttributes.size() != fieldVals.length) {
throw new SourceException(
"There might be duplicate headline elements.\nHeadline: "
+ String.join(", ", headline)
+ ".\nPlease keep in mind that headlines are case-insensitive and underscores from snake case are ignored.");
}

return insensitiveFieldsToAttributes;
}

Expand Down Expand Up @@ -252,7 +255,7 @@ Try<Stream<Map<String, String>>, SourceException> buildStreamWithFieldsToAttribu
// is wanted to avoid a lock on the file), but this causes a closing of the stream as well.
// As we still want to consume the data at other places, we start a new stream instead of
// returning the original one
return Success.of(csvRowFieldValueMapping(reader, headline).parallelStream());
return csvRowFieldValueMapping(reader, headline);
} catch (FileNotFoundException e) {
if (allowFileNotExisting) {
log.warn("Unable to find file '{}': {}", filePath, e.getMessage());
Expand Down Expand Up @@ -282,13 +285,20 @@ private Try<Path, SourceException> getFilePath(Class<? extends Entity> entityCla
* @param headline of the file
* @return a list of mapping
*/
protected List<Map<String, String>> csvRowFieldValueMapping(
protected Try<Stream<Map<String, String>>, SourceException> csvRowFieldValueMapping(
BufferedReader reader, String[] headline) {
return reader
.lines()
.parallel()
.map(csvRow -> buildFieldsToAttributes(csvRow, headline))
.filter(map -> !map.isEmpty())
.toList();
return Try.scanStream(
reader
.lines()
.parallel()
.map(
csvRow ->
Try.of(
() -> buildFieldsToAttributes(csvRow, headline),
SourceException.class)),
"Map<String, String>")
.transform(
stream -> stream.filter(map -> !map.isEmpty()),
e -> new SourceException("Parsing csv row failed.", e));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ public List<CoordinateDistance> findCornerPoints(
}

public int getCoordinateCount() {
return idToCoordinate.keySet().size();
return idToCoordinate.size();
}

private Collection<Point> getCoordinatesInBoundingBox(
Expand Down Expand Up @@ -209,7 +209,7 @@ private Collection<Point> getCoordinatesInBoundingBox(
// is wanted to avoid a lock on the file), but this causes a closing of the stream as well.
// As we still want to consume the data at other places, we start a new stream instead of
// returning the original one
return Success.of(dataSource.csvRowFieldValueMapping(reader, headline).parallelStream());
return dataSource.csvRowFieldValueMapping(reader, headline);
} catch (IOException e) {
return Failure.of(
new SourceException("Cannot read the file for coordinate id to coordinate mapping.", e));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ private Try<Stream<Map<String, String>>, SourceException> buildStreamWithFieldsT
// is wanted to avoid a lock on the file), but this causes a closing of the stream as well.
// As we still want to consume the data at other places, we start a new stream instead of
// returning the original one
return Success.of(dataSource.csvRowFieldValueMapping(reader, headline).parallelStream());
return dataSource.csvRowFieldValueMapping(reader, headline);
} catch (IOException e) {
return Failure.of(
new SourceException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*/
package edu.ie3.datamodel.io.source.csv

import edu.ie3.datamodel.exceptions.SourceException
import edu.ie3.datamodel.io.csv.CsvIndividualTimeSeriesMetaInformation
import edu.ie3.datamodel.io.naming.FileNamingStrategy
import edu.ie3.datamodel.io.naming.timeseries.ColumnScheme
Expand Down Expand Up @@ -243,7 +244,6 @@ class CsvDataSourceTest extends Specification implements CsvTestDataMeta {
]
}


def "A CsvDataSource should build a valid fields to attributes map with valid data and empty value fields as expected"() {
given:
def validHeadline = [
Expand Down Expand Up @@ -275,7 +275,7 @@ class CsvDataSourceTest extends Specification implements CsvTestDataMeta {
]
}

def "A CsvDataSource should be able to handle several errors when the csvRow is invalid or cannot be processed"() {
def "A CsvDataSource should throw an exception if the headline and CSV row have different sizes"() {
given:
def validHeadline = [
"uuid",
Expand All @@ -288,14 +288,42 @@ class CsvDataSourceTest extends Specification implements CsvTestDataMeta {
"s_rated"
] as String[]

expect:
dummyCsvSource.buildFieldsToAttributes(invalidCsvRow, validHeadline) == [:]
when:
dummyCsvSource.buildFieldsToAttributes(invalidCsvRow, validHeadline)

then:
def exception = thrown(SourceException)
exception.getMessage().startsWith("The size of the headline (8) does not fit to the size of the attribute fields")

where:
invalidCsvRow || explaination
"5ebd8f7e-dedb-4017-bb86-6373c4b68eb8;25.0;100.0;0.95;98.0;test_bmTypeInput;50.0;25.0" || "wrong separator"
"5ebd8f7e-dedb-4017-bb86-6373c4b68eb8,25.0,100.0,0.95,98.0,test_bmTypeInput" || "too less columns"
"5ebd8f7e-dedb-4017-bb86-6373c4b68eb8,25.0,100.0,0.95,98.0,test_bmTypeInput,,,," || "too much columns"
"5ebd8f7e-dedb-4017-bb86-6373c4b68eb8,25.0,100.0,0.95,98.0,test_bmTypeInput" || "too little columns"
"5ebd8f7e-dedb-4017-bb86-6373c4b68eb8,25.0,100.0,0.95,98.0,test_bmTypeInput,,,," || "too many columns"
}


def "A CsvDataSource should throw an exception if there are duplicate headlines"() {
given:
def invalidHeadline = [
"uuid",
"active_power_gradient",
"Active_Power_Gradient",
"capex",
"cosphi_rated",
"eta_conv",
"id",
"opex",
"s_rated",
] as String[]
def validCsvRow = "5ebd8f7e-dedb-4017-bb86-6373c4b68eb8,25.0,25.0,100.0,0.95,98.0,test_bmTypeInput,50.0,25.0"

when:
dummyCsvSource.buildFieldsToAttributes(validCsvRow, invalidHeadline)

then:
def exception = thrown(SourceException)
exception.getMessage().startsWith("There might be duplicate headline elements.")
}

def "The CsvDataSource is able to provide correct paths to time series files"() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"uuid","cost_controlled","feed_in_tariff","id","market_reaction","node","operates_from","operates_until","operator","q_characteristics","type"
a3b7576b-cac7-4350-90ff-06316cdca192,true,51.0,BM_Test,true,f5839ade-5968-4879-a824-90b5fb3552cd,,,,cosPhiFixed:{(0.00,1.00)},2fdca5f1-c11b-4169-a695-4c98f0e0a84a
a3b7576b-cac7-4350-90ff-06316cdca192,true,51.0,BM_Test,true,f5839ade-5968-4879-a824-90b5fb3552cd,,,,"cosPhiFixed:{(0.00,1.00)}",2fdca5f1-c11b-4169-a695-4c98f0e0a84a
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"uuid","cos_phi_rated","id","node","operates_from","operates_until","operator","q_characteristics","charging_points","type","location_type","v2gSupport"
06a14909-366e-4e94-a593-1016e1455b30,0.9,test_evcs_1,5f1c776c-6935-40f7-ba9e-60646e08992b,,,,cosPhiFixed:{(0.00,1.0)},4,ChargingStationType1,HOME,false
104acdaa-5dc5-4197-aed2-2fddb3c4f237,0.9,test_evcs_2,ed4697fd-016c-40c2-a66b-e793878dadea,,,,cosPhiFixed:{(0.00,1.0)},4,ChargingStationType1,HOME,false
06a14909-366e-4e94-a593-1016e1455b30,0.9,test_evcs_1,5f1c776c-6935-40f7-ba9e-60646e08992b,,,,"cosPhiFixed:{(0.00,1.0)}",4,ChargingStationType1,HOME,false
104acdaa-5dc5-4197-aed2-2fddb3c4f237,0.9,test_evcs_2,ed4697fd-016c-40c2-a66b-e793878dadea,,,,"cosPhiFixed:{(0.00,1.0)}",4,ChargingStationType1,HOME,false
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"uuid","cos_phi_rated","id","node","operates_from","operates_until","operator","q_characteristics","s_rated"
9abe950d-362e-4efe-b686-500f84d8f368,0.9,test_feed_in,5f1c776c-6935-40f7-ba9e-60646e08992b,,,,cosPhiFixed:{(0.00,0.95)},200.0
9abe950d-362e-4efe-b686-500f84d8f368,0.9,test_feed_in,5f1c776c-6935-40f7-ba9e-60646e08992b,,,,"cosPhiFixed:{(0.00,0.95)}",200.0
Loading

0 comments on commit 183c129

Please sign in to comment.