Skip to content

Commit

Permalink
Fix #154. We already have the ability to only read certain columns vi…
Browse files Browse the repository at this point in the history
…a manually iterating CSV.File or using Tables.select, but there was another issue where this file had an invalid column name that dies while trying to normalize the name. (#382)
  • Loading branch information
quinnj authored Feb 5, 2019
1 parent 1c1f406 commit da5bcbb
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/filedetection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ normalizename(name::Symbol) = name
function normalizename(name::String)
uname = strip(Unicode.normalize(name))
id = Base.isidentifier(uname) ? uname : map(c->Base.is_id_char(c) ? c : '_', uname)
cleansed = string((!Base.is_id_start_char(id[1]) || id in RESERVED) ? "_" : "", id)
cleansed = string((isempty(id) || !Base.is_id_start_char(id[1]) || id in RESERVED) ? "_" : "", id)
return Symbol(replace(cleansed, r"(_)\1+"=>"_"))
end

Expand Down
3 changes: 3 additions & 0 deletions test/testfiles/test_file_issue_154.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
a, b, ,
0, 1, , comment
12, 5, ,
6 changes: 6 additions & 0 deletions test/testfiles/testfiles.jl
Original file line number Diff line number Diff line change
Expand Up @@ -490,4 +490,10 @@ testfiles = [
NamedTuple{(:FAMILY, :PERSON, :MARKER, :RATIO), Tuple{String, String, String, Float64}},
(FAMILY = ["A", "A", "A", "A", "A", "A", "EPGP013951", "EPGP014065", "EPGP014065", "EPGP014065", "EP07", "83346_EPGP014244", "83346_EPGP014244", "83506", "87001"], PERSON = ["EP01223", "EP01227", "EP01228", "EP01228", "EP01227", "EP01228", "EPGP013952", "EPGP014066", "EPGP014065", "EPGP014068", "706", "T3011", "T3231", "T17255", "301"], MARKER = ["rs710865", "rs11249215", "rs11249215", "rs10903129", "rs621559", "rs1514175", "rs773564", "rs2794520", "rs296547", "rs296547", "rs10927875", "rs2251760", "rs2251760", "rs2475335", "rs2413583"], RATIO = [0.0214, 0.0107, 0.00253, 0.0116, 0.00842, 0.0202, 0.00955, 0.0193, 0.0135, 0.0239, 0.0157, 0.0154, 0.0154, 0.00784, 0.0112])
),
# #154
("test_file_issue_154.csv", (allowmissing=:auto, normalizenames=true),
(2, 4),
NamedTuple{(:a, :b, :_, :Column4), Tuple{Int64, Int64, Missing, Union{Missing, String}}},
(a = [0, 12], b = [1, 5], _ = Missing[missing, missing], Column4 = Union{Missing, String}[" comment ", missing])
),
];

0 comments on commit da5bcbb

Please sign in to comment.