Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix escaping by using RFC compliant parser #565

Merged
merged 1 commit into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion metafacture-csv/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ description = 'Modules for processing comma-separated values'

dependencies {
api project(':metafacture-framework')
implementation 'com.opencsv:opencsv:3.10'
implementation 'com.opencsv:opencsv:5.9'
testImplementation "junit:junit:${versions.junit}"
testImplementation "org.mockito:mockito-core:${versions.mockito}"
}
26 changes: 21 additions & 5 deletions metafacture-csv/src/main/java/org/metafacture/csv/CsvDecoder.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2013, 2014 Deutsche Nationalbibliothek
* Copyright 2013-2024 Deutsche Nationalbibliothek and hbz
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
Expand All @@ -24,6 +24,10 @@
import org.metafacture.framework.helpers.DefaultObjectPipe;

import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
import com.opencsv.RFC4180Parser;
import com.opencsv.RFC4180ParserBuilder;
import com.opencsv.exceptions.CsvException;

import java.io.IOException;
import java.io.StringReader;
Expand All @@ -48,6 +52,7 @@ public final class CsvDecoder extends DefaultObjectPipe<String, StreamReceiver>
private String[] header = new String[0];
private int count;
private boolean hasHeader;
private RFC4180Parser parser;

/**
* Creates an instance of {@link CsvDecoder} with a given separator.
Expand All @@ -56,6 +61,7 @@ public final class CsvDecoder extends DefaultObjectPipe<String, StreamReceiver>
*/
public CsvDecoder(final String separator) {
this.separator = separator.charAt(0);
initializeCsvParser();
}

/**
Expand All @@ -65,13 +71,21 @@ public CsvDecoder(final String separator) {
*/
public CsvDecoder(final char separator) {
this.separator = separator;
initializeCsvParser();
}

/**
* Creates an instance of {@link CsvDecoder}. The default separator is
* {@value #DEFAULT_SEP}.
*/
public CsvDecoder() {
initializeCsvParser();
}

private void initializeCsvParser() {
this.parser = new RFC4180ParserBuilder()
.withSeparator(separator)
.build();
}

@Override
Expand Down Expand Up @@ -105,18 +119,19 @@ else if (parts.length == header.length) {
}
}

private String[] parseCsv(final String string) {
private String[] parseCsv(final String csv) {
String[] parts = new String[0];
try {
final CSVReader reader = new CSVReader(new StringReader(string),
separator);
final CSVReader reader = new CSVReaderBuilder(new StringReader(csv))
.withCSVParser(parser)
.build();
final List<String[]> lines = reader.readAll();
if (lines.size() > 0) {
parts = lines.get(0);
}
reader.close();
}
catch (final IOException e) {
catch (final IOException | CsvException e) {
e.printStackTrace();
}
return parts;
Expand All @@ -139,5 +154,6 @@ public void setHasHeader(final boolean hasHeader) {
*/
public void setSeparator(final String separator) {
this.separator = separator.charAt(0);
initializeCsvParser();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,22 @@ public void testTabSeparated() {
ordered.verify(receiver).endRecord();
}

/**
* In: "a","b\t","c\\t","\","\cd\"
* Out: a, b , c\\t, \, \cd\
*/
@Test
public void issue496_escaping() {
decoder.setHasHeader(false);
decoder.process("\"a\",\"b\t\",\"c\\t\",\"\\\",\"\\cd\\\"");
final InOrder ordered = inOrder(receiver);
ordered.verify(receiver).startRecord("1");
ordered.verify(receiver).literal("0", "a");
ordered.verify(receiver).literal("1", "b\t");
ordered.verify(receiver).literal("2", "c\\t");
ordered.verify(receiver).literal("3", "\\");
ordered.verify(receiver).literal("4", "\\cd\\");
ordered.verify(receiver).endRecord();
}

}
Loading