From 464e0577d2a4b7e2584f487dbb4548e0f01e6152 Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Tue, 5 Feb 2019 08:31:05 -0700 Subject: [PATCH] Fix #154. We already have the ability to only read certain columns via manually iterating CSV.File or using Tables.select, but there was another issue where this file had an invalid column name that dies while trying to normalize the name. --- src/filedetection.jl | 2 +- test/testfiles/test_file_issue_154.csv | 3 +++ test/testfiles/testfiles.jl | 6 ++++++ 3 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 test/testfiles/test_file_issue_154.csv diff --git a/src/filedetection.jl b/src/filedetection.jl index 990fc1b3..2ec6ac22 100644 --- a/src/filedetection.jl +++ b/src/filedetection.jl @@ -8,7 +8,7 @@ normalizename(name::Symbol) = name function normalizename(name::String) uname = strip(Unicode.normalize(name)) id = Base.isidentifier(uname) ? uname : map(c->Base.is_id_char(c) ? c : '_', uname) - cleansed = string((!Base.is_id_start_char(id[1]) || id in RESERVED) ? "_" : "", id) + cleansed = string((isempty(id) || !Base.is_id_start_char(id[1]) || id in RESERVED) ? "_" : "", id) return Symbol(replace(cleansed, r"(_)\1+"=>"_")) end diff --git a/test/testfiles/test_file_issue_154.csv b/test/testfiles/test_file_issue_154.csv new file mode 100644 index 00000000..e12199b2 --- /dev/null +++ b/test/testfiles/test_file_issue_154.csv @@ -0,0 +1,3 @@ +a, b, , +0, 1, , comment +12, 5, , diff --git a/test/testfiles/testfiles.jl b/test/testfiles/testfiles.jl index 440cdef0..b26f774b 100644 --- a/test/testfiles/testfiles.jl +++ b/test/testfiles/testfiles.jl @@ -490,4 +490,10 @@ testfiles = [ NamedTuple{(:FAMILY, :PERSON, :MARKER, :RATIO), Tuple{String, String, String, Float64}}, (FAMILY = ["A", "A", "A", "A", "A", "A", "EPGP013951", "EPGP014065", "EPGP014065", "EPGP014065", "EP07", "83346_EPGP014244", "83346_EPGP014244", "83506", "87001"], PERSON = ["EP01223", "EP01227", "EP01228", "EP01228", "EP01227", "EP01228", "EPGP013952", "EPGP014066", "EPGP014065", "EPGP014068", "706", "T3011", "T3231", "T17255", "301"], MARKER = ["rs710865", "rs11249215", "rs11249215", "rs10903129", "rs621559", "rs1514175", "rs773564", "rs2794520", "rs296547", "rs296547", "rs10927875", "rs2251760", "rs2251760", "rs2475335", "rs2413583"], RATIO = [0.0214, 0.0107, 0.00253, 0.0116, 0.00842, 0.0202, 0.00955, 0.0193, 0.0135, 0.0239, 0.0157, 0.0154, 0.0154, 0.00784, 0.0112]) ), + # #154 + ("test_file_issue_154.csv", (allowmissing=:auto, normalizenames=true), + (2, 4), + NamedTuple{(:a, :b, :_, :Column4), Tuple{Int64, Int64, Missing, Union{Missing, String}}}, + (a = [0, 12], b = [1, 5], _ = Missing[missing, missing], Column4 = Union{Missing, String}[" comment ", missing]) + ), ];