Merge pull request #1113 from ie3-institute/ms/#1112-confusing-error-…

…message-on-deprecated-input-data Improving error message when using the outdated csv format
ie3-institute · Nov 26, 2024 · 183c129 · 183c129
2 parents 4abf08b + 921a51e
commit 183c129
Show file tree

Hide file tree

Showing 14 changed files with 1,248 additions and 1,209 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -16,6 +16,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Storage minimum level parameter removed from cylindrical thermal storage [#1123](https://github.com/ie3-institute/PowerSystemDataModel/issues/1123)
 - Converted eval-rst to myst syntax in ReadTheDocs, fixed line wrapping and widths[#1137](https://github.com/ie3-institute/PowerSystemDataModel/issues/1137)
 - Improving usage of streams on sql fetches [#827](https://github.com/ie3-institute/PowerSystemDataModel/issues/827)
+- Improving error message when using the outdated csv format [#1112](https://github.com/ie3-institute/PowerSystemDataModel/issues/1112)
+
 
 ## [5.1.0] - 2024-06-24
 

diff --git a/src/main/java/edu/ie3/datamodel/io/source/csv/CsvDataSource.java b/src/main/java/edu/ie3/datamodel/io/source/csv/CsvDataSource.java
@@ -161,41 +161,44 @@ protected Set<Path> getIndividualTimeSeriesFilePaths() {
    *     occurred
    */
   protected Map<String, String> buildFieldsToAttributes(
-      final String csvRow, final String[] headline) {
+      final String csvRow, final String[] headline) throws SourceException {
 
     TreeMap<String, String> insensitiveFieldsToAttributes =
         new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
 
-    try {
-      String[] fieldVals = parseCsvRow(csvRow, csvSep);
-      insensitiveFieldsToAttributes.putAll(
-          IntStream.range(0, fieldVals.length)
-              .boxed()
-              .collect(
-                  Collectors.toMap(
-                      k -> StringUtils.snakeCaseToCamelCase(headline[k]), v -> fieldVals[v])));
+    String[] fieldVals = parseCsvRow(csvRow, csvSep);
+    insensitiveFieldsToAttributes.putAll(
+        IntStream.range(0, Math.min(fieldVals.length, headline.length))
+            .boxed()
+            .collect(
+                Collectors.toMap(
+                    k -> StringUtils.snakeCaseToCamelCase(headline[k]), v -> fieldVals[v])));
 
-      if (insensitiveFieldsToAttributes.size() != headline.length) {
-        Set<String> fieldsToAttributesKeySet = insensitiveFieldsToAttributes.keySet();
-        insensitiveFieldsToAttributes = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
-        throw new SourceException(
-            "The size of the headline does not fit to the size of the resulting fields to attributes mapping.\nHeadline: "
-                + String.join(", ", headline)
-                + "\nResultingMap: "
-                + String.join(", ", fieldsToAttributesKeySet)
-                + "\nCsvRow: "
-                + csvRow.trim()
-                + ".\nIs the csv separator in the file matching the separator provided in the constructor ('"
-                + csvSep
-                + "') and does the number of columns match the number of headline fields?");
-      }
-    } catch (Exception e) {
-      log.error(
-          "Cannot build fields to attributes map for row '{}' with headline '{}'.\nException: {}",
-          csvRow.trim(),
-          String.join(",", headline),
-          e);
+    if (fieldVals.length != headline.length) {
+      throw new SourceException(
+          "The size of the headline ("
+              + headline.length
+              + ") does not fit to the size of the attribute fields ("
+              + fieldVals.length
+              + ").\nHeadline: "
+              + String.join(", ", headline)
+              + "\nRow: "
+              + csvRow.trim()
+              + ".\nPlease check:"
+              + "\n - is the csv separator in the file matching the separator provided in the constructor ('"
+              + csvSep
+              + "')"
+              + "\n - does the number of columns match the number of headline fields "
+              + "\n - are you using a valid RFC 4180 formatted csv row?");
+    }
+
+    if (insensitiveFieldsToAttributes.size() != fieldVals.length) {
+      throw new SourceException(
+          "There might be duplicate headline elements.\nHeadline: "
+              + String.join(", ", headline)
+              + ".\nPlease keep in mind that headlines are case-insensitive and underscores from snake case are ignored.");
     }
+
     return insensitiveFieldsToAttributes;
   }
 
@@ -252,7 +255,7 @@ Try<Stream<Map<String, String>>, SourceException> buildStreamWithFieldsToAttribu
       // is wanted to avoid a lock on the file), but this causes a closing of the stream as well.
       // As we still want to consume the data at other places, we start a new stream instead of
       // returning the original one
-      return Success.of(csvRowFieldValueMapping(reader, headline).parallelStream());
+      return csvRowFieldValueMapping(reader, headline);
     } catch (FileNotFoundException e) {
       if (allowFileNotExisting) {
         log.warn("Unable to find file '{}': {}", filePath, e.getMessage());
@@ -282,13 +285,20 @@ private Try<Path, SourceException> getFilePath(Class<? extends Entity> entityCla
    * @param headline of the file
    * @return a list of mapping
    */
-  protected List<Map<String, String>> csvRowFieldValueMapping(
+  protected Try<Stream<Map<String, String>>, SourceException> csvRowFieldValueMapping(
       BufferedReader reader, String[] headline) {
-    return reader
-        .lines()
-        .parallel()
-        .map(csvRow -> buildFieldsToAttributes(csvRow, headline))
-        .filter(map -> !map.isEmpty())
-        .toList();
+    return Try.scanStream(
+            reader
+                .lines()
+                .parallel()
+                .map(
+                    csvRow ->
+                        Try.of(
+                            () -> buildFieldsToAttributes(csvRow, headline),
+                            SourceException.class)),
+            "Map<String, String>")
+        .transform(
+            stream -> stream.filter(map -> !map.isEmpty()),
+            e -> new SourceException("Parsing csv row failed.", e));
   }
 }
diff --git a/src/main/java/edu/ie3/datamodel/io/source/csv/CsvIdCoordinateSource.java b/src/main/java/edu/ie3/datamodel/io/source/csv/CsvIdCoordinateSource.java
@@ -179,7 +179,7 @@ public List<CoordinateDistance> findCornerPoints(
   }
 
   public int getCoordinateCount() {
-    return idToCoordinate.keySet().size();
+    return idToCoordinate.size();
   }
 
   private Collection<Point> getCoordinatesInBoundingBox(
@@ -209,7 +209,7 @@ private Collection<Point> getCoordinatesInBoundingBox(
       // is wanted to avoid a lock on the file), but this causes a closing of the stream as well.
       // As we still want to consume the data at other places, we start a new stream instead of
       // returning the original one
-      return Success.of(dataSource.csvRowFieldValueMapping(reader, headline).parallelStream());
+      return dataSource.csvRowFieldValueMapping(reader, headline);
     } catch (IOException e) {
       return Failure.of(
           new SourceException("Cannot read the file for coordinate id to coordinate mapping.", e));

diff --git a/src/main/java/edu/ie3/datamodel/io/source/csv/CsvWeatherSource.java b/src/main/java/edu/ie3/datamodel/io/source/csv/CsvWeatherSource.java
@@ -240,7 +240,7 @@ private Try<Stream<Map<String, String>>, SourceException> buildStreamWithFieldsT
       // is wanted to avoid a lock on the file), but this causes a closing of the stream as well.
       // As we still want to consume the data at other places, we start a new stream instead of
       // returning the original one
-      return Success.of(dataSource.csvRowFieldValueMapping(reader, headline).parallelStream());
+      return dataSource.csvRowFieldValueMapping(reader, headline);
     } catch (IOException e) {
       return Failure.of(
           new SourceException(

diff --git a/src/test/groovy/edu/ie3/datamodel/io/source/csv/CsvDataSourceTest.groovy b/src/test/groovy/edu/ie3/datamodel/io/source/csv/CsvDataSourceTest.groovy
@@ -5,6 +5,7 @@
  */
 package edu.ie3.datamodel.io.source.csv
 
+import edu.ie3.datamodel.exceptions.SourceException
 import edu.ie3.datamodel.io.csv.CsvIndividualTimeSeriesMetaInformation
 import edu.ie3.datamodel.io.naming.FileNamingStrategy
 import edu.ie3.datamodel.io.naming.timeseries.ColumnScheme
@@ -243,7 +244,6 @@ class CsvDataSourceTest extends Specification implements CsvTestDataMeta {
     ]
   }
 
-
   def "A CsvDataSource should build a valid fields to attributes map with valid data and empty value fields as expected"() {
     given:
     def validHeadline = [
@@ -275,7 +275,7 @@ class CsvDataSourceTest extends Specification implements CsvTestDataMeta {
     ]
   }
 
-  def "A CsvDataSource should be able to handle several errors when the csvRow is invalid or cannot be processed"() {
+  def "A CsvDataSource should throw an exception if the headline and CSV row have different sizes"() {
     given:
     def validHeadline = [
       "uuid",
@@ -288,14 +288,42 @@ class CsvDataSourceTest extends Specification implements CsvTestDataMeta {
       "s_rated"
     ] as String[]
 
-    expect:
-    dummyCsvSource.buildFieldsToAttributes(invalidCsvRow, validHeadline) == [:]
+    when:
+    dummyCsvSource.buildFieldsToAttributes(invalidCsvRow, validHeadline)
+
+    then:
+    def exception = thrown(SourceException)
+    exception.getMessage().startsWith("The size of the headline (8) does not fit to the size of the attribute fields")
 
     where:
     invalidCsvRow                                                                          || explaination
     "5ebd8f7e-dedb-4017-bb86-6373c4b68eb8;25.0;100.0;0.95;98.0;test_bmTypeInput;50.0;25.0" || "wrong separator"
-    "5ebd8f7e-dedb-4017-bb86-6373c4b68eb8,25.0,100.0,0.95,98.0,test_bmTypeInput"           || "too less columns"
-    "5ebd8f7e-dedb-4017-bb86-6373c4b68eb8,25.0,100.0,0.95,98.0,test_bmTypeInput,,,,"       || "too much columns"
+    "5ebd8f7e-dedb-4017-bb86-6373c4b68eb8,25.0,100.0,0.95,98.0,test_bmTypeInput"           || "too little columns"
+    "5ebd8f7e-dedb-4017-bb86-6373c4b68eb8,25.0,100.0,0.95,98.0,test_bmTypeInput,,,,"       || "too many columns"
+  }
+
+
+  def "A CsvDataSource should throw an exception if there are duplicate headlines"() {
+    given:
+    def invalidHeadline = [
+      "uuid",
+      "active_power_gradient",
+      "Active_Power_Gradient",
+      "capex",
+      "cosphi_rated",
+      "eta_conv",
+      "id",
+      "opex",
+      "s_rated",
+    ] as String[]
+    def validCsvRow = "5ebd8f7e-dedb-4017-bb86-6373c4b68eb8,25.0,25.0,100.0,0.95,98.0,test_bmTypeInput,50.0,25.0"
+
+    when:
+    dummyCsvSource.buildFieldsToAttributes(validCsvRow, invalidHeadline)
+
+    then:
+    def exception = thrown(SourceException)
+    exception.getMessage().startsWith("There might be duplicate headline elements.")
   }
 
   def "The CsvDataSource is able to provide correct paths to time series files"() {

diff --git a/src/test/resources/edu/ie3/datamodel/io/source/csv/_joint_grid/bm_input.csv b/src/test/resources/edu/ie3/datamodel/io/source/csv/_joint_grid/bm_input.csv
@@ -1,2 +1,2 @@
 "uuid","cost_controlled","feed_in_tariff","id","market_reaction","node","operates_from","operates_until","operator","q_characteristics","type"
-a3b7576b-cac7-4350-90ff-06316cdca192,true,51.0,BM_Test,true,f5839ade-5968-4879-a824-90b5fb3552cd,,,,cosPhiFixed:{(0.00,1.00)},2fdca5f1-c11b-4169-a695-4c98f0e0a84a
+a3b7576b-cac7-4350-90ff-06316cdca192,true,51.0,BM_Test,true,f5839ade-5968-4879-a824-90b5fb3552cd,,,,"cosPhiFixed:{(0.00,1.00)}",2fdca5f1-c11b-4169-a695-4c98f0e0a84a
diff --git a/src/test/resources/edu/ie3/datamodel/io/source/csv/_joint_grid/evcs_input.csv b/src/test/resources/edu/ie3/datamodel/io/source/csv/_joint_grid/evcs_input.csv
@@ -1,3 +1,3 @@
 "uuid","cos_phi_rated","id","node","operates_from","operates_until","operator","q_characteristics","charging_points","type","location_type","v2gSupport"
-06a14909-366e-4e94-a593-1016e1455b30,0.9,test_evcs_1,5f1c776c-6935-40f7-ba9e-60646e08992b,,,,cosPhiFixed:{(0.00,1.0)},4,ChargingStationType1,HOME,false
-104acdaa-5dc5-4197-aed2-2fddb3c4f237,0.9,test_evcs_2,ed4697fd-016c-40c2-a66b-e793878dadea,,,,cosPhiFixed:{(0.00,1.0)},4,ChargingStationType1,HOME,false
+06a14909-366e-4e94-a593-1016e1455b30,0.9,test_evcs_1,5f1c776c-6935-40f7-ba9e-60646e08992b,,,,"cosPhiFixed:{(0.00,1.0)}",4,ChargingStationType1,HOME,false
+104acdaa-5dc5-4197-aed2-2fddb3c4f237,0.9,test_evcs_2,ed4697fd-016c-40c2-a66b-e793878dadea,,,,"cosPhiFixed:{(0.00,1.0)}",4,ChargingStationType1,HOME,false
diff --git a/src/test/resources/edu/ie3/datamodel/io/source/csv/_joint_grid/fixed_feed_in_input.csv b/src/test/resources/edu/ie3/datamodel/io/source/csv/_joint_grid/fixed_feed_in_input.csv
@@ -1,2 +1,2 @@
 "uuid","cos_phi_rated","id","node","operates_from","operates_until","operator","q_characteristics","s_rated"
-9abe950d-362e-4efe-b686-500f84d8f368,0.9,test_feed_in,5f1c776c-6935-40f7-ba9e-60646e08992b,,,,cosPhiFixed:{(0.00,0.95)},200.0
+9abe950d-362e-4efe-b686-500f84d8f368,0.9,test_feed_in,5f1c776c-6935-40f7-ba9e-60646e08992b,,,,"cosPhiFixed:{(0.00,0.95)}",200.0