From 135fbf1a8b393df21bc4fe77f919c2174753feb4 Mon Sep 17 00:00:00 2001 From: "Yifei.Hu" Date: Sat, 27 Apr 2024 00:02:25 +0800 Subject: [PATCH 1/5] Fix issue with duplicate column name --- .DS_Store | Bin 0 -> 6148 bytes .../contents.xcworkspacedata | 7 +++ SwiftCSV/Extension.swift | 13 ++++ SwiftCSV/Parser.swift | 6 ++ SwiftCSV/ParsingState.swift | 14 ++++- .../DuplicateColumnNameHandlingTests.swift | 59 ++++++++++++++++++ 6 files changed, 97 insertions(+), 2 deletions(-) create mode 100644 .DS_Store create mode 100644 .swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata create mode 100644 SwiftCSV/Extension.swift create mode 100644 SwiftCSVTests/DuplicateColumnNameHandlingTests.swift diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..0cbf169dc91a58de90bd873a70783249daa12d72 GIT binary patch literal 6148 zcmeHKOG*SW5Pjv$qQTAJ%4JR<%w8~#Egqo3wam2t8M+Zgm%a2DUdAhU4A0=JRFrA# z#o$6jsvvolq+XJK&`An_%pU3~Fac1oD2h4*rrm=>TRC`4lr+aZj!@wgdw3d{=r4BZ z-gmg?M_l6q>-%4zL5*{?xTANoyqs2xX4R&Tkp0_Dd8?)@=gVf!_~PL0`DFC|dbl~a z{xP?`THo&rmvnOlTme_W6>tTT0=%=;ifu!$T>)3X75Gv>&xgRG7&CT;>F8jUQUIbp zqm$9sTtYb^W6anYa)c&EB^p(7iXld4f5>q$V`pe|NKQT^ul$+3NLroqhZGKphF-e@ zu0UUbku@iJ{~z<08BFr~6mPi#uE0N2K*nWN&iGOJZvFOode;V)GZr(rsq?>z$8 i&~s!r4cd56pK&o`XQ;Dif1(rpA&>;|$`$wp1wH^84?VyD literal 0 HcmV?d00001 diff --git a/.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata b/.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata new file mode 100644 index 0000000..919434a --- /dev/null +++ b/.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ + + + + + diff --git a/SwiftCSV/Extension.swift b/SwiftCSV/Extension.swift new file mode 100644 index 0000000..aa2049c --- /dev/null +++ b/SwiftCSV/Extension.swift @@ -0,0 +1,13 @@ +// +// Extension.swift +// +// +// Created by 胡逸飞 on 2024/4/26. +// + +extension Array where Element: Hashable { + func duplicates() -> [Element] { + let counts = self.reduce(into: [:]) { counts, element in counts[element, default: 0] += 1 } + return counts.filter { $0.value > 1 }.map { $0.key } + } +} diff --git a/SwiftCSV/Parser.swift b/SwiftCSV/Parser.swift index ac329ff..1406da4 100644 --- a/SwiftCSV/Parser.swift +++ b/SwiftCSV/Parser.swift @@ -126,6 +126,12 @@ enum Parser { static func enumerateAsDict(header: [String], content: String, delimiter: CSVDelimiter, rowLimit: Int? = nil, block: @escaping ([String : String]) -> ()) throws { let enumeratedHeader = header.enumerated() + + // Check for duplicate column names + let duplicateColumns = header.duplicates() + if !duplicateColumns.isEmpty { + throw CSVParseError.generic(message: "Duplicate column names found: \(duplicateColumns.joined(separator: ", "))") + } // Start after the header try enumerateAsArray(text: content, delimiter: delimiter, startAt: 1, rowLimit: rowLimit) { fields in diff --git a/SwiftCSV/ParsingState.swift b/SwiftCSV/ParsingState.swift index ed37ce0..068498d 100644 --- a/SwiftCSV/ParsingState.swift +++ b/SwiftCSV/ParsingState.swift @@ -6,11 +6,21 @@ // Copyright © 2016 Naoto Kaneko. All rights reserved. // -public enum CSVParseError: Error { +public enum CSVParseError: Error, Equatable { case generic(message: String) case quotation(message: String) + + public static func == (lhs: CSVParseError, rhs: CSVParseError) -> Bool { + switch (lhs, rhs) { + case (.generic(let message1), .generic(let message2)): + return message1 == message2 + case (.quotation(let message1), .quotation(let message2)): + return message1 == message2 + default: + return false + } + } } - /// State machine of parsing CSV contents character by character. struct ParsingState { diff --git a/SwiftCSVTests/DuplicateColumnNameHandlingTests.swift b/SwiftCSVTests/DuplicateColumnNameHandlingTests.swift new file mode 100644 index 0000000..adf622f --- /dev/null +++ b/SwiftCSVTests/DuplicateColumnNameHandlingTests.swift @@ -0,0 +1,59 @@ +// +// DuplicateColumnNameHandlingTests.swift +// +// +// Created by 胡逸飞 on 2024/4/27. +// + +import Foundation +import XCTest +@testable import SwiftCSV + +class DuplicateColumnNameHandlingTests: XCTestCase { + + func testErrorOnDuplicateColumnNames() throws { + let csvString = """ + id,name,age,name + 1,John,23,John Doe + 2,Jane,25,Jane Doe + """ + + XCTAssertThrowsError(try CSV(string: csvString)) { error in + XCTAssertEqual(error as? CSVParseError, CSVParseError.generic(message: "Duplicate column names found: name")) + } + } + + func testNoDuplicateColumnNames() throws { + let csvString = """ + id,name,age + 1,John,23 + 2,Jane,25 + """ + + let csvError = try CSV(string: csvString) + let csvRandom = try CSV(string: csvString) + + XCTAssertEqual(csvError.header, ["id", "name", "age"]) + XCTAssertEqual(csvRandom.header, ["id", "name", "age"]) + + XCTAssertEqual(csvError.rows.count, 2) + XCTAssertEqual(csvRandom.rows.count, 2) + + XCTAssertEqual(csvError.rows[0]["id"], "1") + XCTAssertEqual(csvError.rows[0]["name"], "John") + XCTAssertEqual(csvError.rows[0]["age"], "23") + + XCTAssertEqual(csvRandom.rows[0]["id"], "1") + XCTAssertEqual(csvRandom.rows[0]["name"], "John") + XCTAssertEqual(csvRandom.rows[0]["age"], "23") + + XCTAssertEqual(csvError.rows[1]["id"], "2") + XCTAssertEqual(csvError.rows[1]["name"], "Jane") + XCTAssertEqual(csvError.rows[1]["age"], "25") + + XCTAssertEqual(csvRandom.rows[1]["id"], "2") + XCTAssertEqual(csvRandom.rows[1]["name"], "Jane") + XCTAssertEqual(csvRandom.rows[1]["age"], "25") + } + +} From 67b115ecc46c3af6e95b90a73385d90d0056796c Mon Sep 17 00:00:00 2001 From: "Yifei.Hu" Date: Mon, 15 Jul 2024 14:59:49 +0800 Subject: [PATCH 2/5] Fixed the problems and confirm the conformance --- SwiftCSV/{Extension.swift => Array+duplicates.swift} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename SwiftCSV/{Extension.swift => Array+duplicates.swift} (100%) diff --git a/SwiftCSV/Extension.swift b/SwiftCSV/Array+duplicates.swift similarity index 100% rename from SwiftCSV/Extension.swift rename to SwiftCSV/Array+duplicates.swift From d0839bf47171d4e554ab9a3eb101c82cbfbec28d Mon Sep 17 00:00:00 2001 From: "Yifei.Hu" Date: Mon, 15 Jul 2024 15:01:46 +0800 Subject: [PATCH 3/5] Fixed the problems and confirm the conformance --- SwiftCSV/Array+duplicates.swift | 2 +- SwiftCSV/Parser.swift | 2 +- SwiftCSV/ParsingState.swift | 6 +++++- SwiftCSVTests/DuplicateColumnNameHandlingTests.swift | 7 ++++++- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/SwiftCSV/Array+duplicates.swift b/SwiftCSV/Array+duplicates.swift index aa2049c..63bc027 100644 --- a/SwiftCSV/Array+duplicates.swift +++ b/SwiftCSV/Array+duplicates.swift @@ -1,5 +1,5 @@ // -// Extension.swift +// Array+duplicates.swift // // // Created by 胡逸飞 on 2024/4/26. diff --git a/SwiftCSV/Parser.swift b/SwiftCSV/Parser.swift index 1406da4..1bfc336 100644 --- a/SwiftCSV/Parser.swift +++ b/SwiftCSV/Parser.swift @@ -130,7 +130,7 @@ enum Parser { // Check for duplicate column names let duplicateColumns = header.duplicates() if !duplicateColumns.isEmpty { - throw CSVParseError.generic(message: "Duplicate column names found: \(duplicateColumns.joined(separator: ", "))") + throw CSVParseError.duplicateColumns(columnNames: duplicateColumns) } // Start after the header diff --git a/SwiftCSV/ParsingState.swift b/SwiftCSV/ParsingState.swift index 068498d..30e2b92 100644 --- a/SwiftCSV/ParsingState.swift +++ b/SwiftCSV/ParsingState.swift @@ -6,9 +6,11 @@ // Copyright © 2016 Naoto Kaneko. All rights reserved. // -public enum CSVParseError: Error, Equatable { +public enum CSVParseError: Error { case generic(message: String) case quotation(message: String) + case duplicateColumns(columnNames: [String]) + public static func == (lhs: CSVParseError, rhs: CSVParseError) -> Bool { switch (lhs, rhs) { @@ -16,6 +18,8 @@ public enum CSVParseError: Error, Equatable { return message1 == message2 case (.quotation(let message1), .quotation(let message2)): return message1 == message2 + case (.duplicateColumns(let columns1), .duplicateColumns(let columns2)): + return columns1 == columns2 default: return false } diff --git a/SwiftCSVTests/DuplicateColumnNameHandlingTests.swift b/SwiftCSVTests/DuplicateColumnNameHandlingTests.swift index adf622f..6e539ee 100644 --- a/SwiftCSVTests/DuplicateColumnNameHandlingTests.swift +++ b/SwiftCSVTests/DuplicateColumnNameHandlingTests.swift @@ -19,7 +19,12 @@ class DuplicateColumnNameHandlingTests: XCTestCase { """ XCTAssertThrowsError(try CSV(string: csvString)) { error in - XCTAssertEqual(error as? CSVParseError, CSVParseError.generic(message: "Duplicate column names found: name")) + switch error as? CSVParseError { + case .duplicateColumns(let columnNames): + XCTAssertEqual(["name"], columnNames) + default: + XCTFail("Expected CSVParseError.duplicateColumns") + } } } From 07aab19fd9627affa23d3e7342c725244a9faf0e Mon Sep 17 00:00:00 2001 From: "Yifei.Hu" Date: Wed, 17 Jul 2024 11:40:05 +0800 Subject: [PATCH 4/5] delete unused code and update the changelog --- CHANGELOG.md | 6 ++++++ SwiftCSV/ParsingState.swift | 12 ------------ 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0952f49..1ded77e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,12 @@ Bugfixes: - Strip byte order mark from all input strings, not just when loading files (#128) -- @Diggory +## 0.8.2 + +Bugfixes: + +- Throw an error when encountering duplicate column names in CSV headers (#136) + ## 0.8.1 Bugfixes: diff --git a/SwiftCSV/ParsingState.swift b/SwiftCSV/ParsingState.swift index 30e2b92..74c0e95 100644 --- a/SwiftCSV/ParsingState.swift +++ b/SwiftCSV/ParsingState.swift @@ -12,18 +12,6 @@ public enum CSVParseError: Error { case duplicateColumns(columnNames: [String]) - public static func == (lhs: CSVParseError, rhs: CSVParseError) -> Bool { - switch (lhs, rhs) { - case (.generic(let message1), .generic(let message2)): - return message1 == message2 - case (.quotation(let message1), .quotation(let message2)): - return message1 == message2 - case (.duplicateColumns(let columns1), .duplicateColumns(let columns2)): - return columns1 == columns2 - default: - return false - } - } } /// State machine of parsing CSV contents character by character. struct ParsingState { From 4bd5436efb3360f230ee7103b00cd36a27dc7fdd Mon Sep 17 00:00:00 2001 From: "Yifei.Hu" Date: Wed, 17 Jul 2024 11:41:09 +0800 Subject: [PATCH 5/5] update the author info --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ded77e..65cd6bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,7 @@ Bugfixes: Bugfixes: -- Throw an error when encountering duplicate column names in CSV headers (#136) +- Throw an error when encountering duplicate column names in CSV headers (#136) -- @TomorrowMC ## 0.8.1