From 06c2d82d90d5b97e70565b7c114e6d5b461cb887 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 3 Nov 2024 22:09:30 +0100 Subject: [PATCH 1/4] DimVector of NamedTuple is a NamedTuple table --- src/tables.jl | 137 +++++++++++++++++++++++++++++++------------------ src/utils.jl | 8 ++- test/tables.jl | 8 +++ 3 files changed, 100 insertions(+), 53 deletions(-) diff --git a/src/tables.jl b/src/tables.jl index 626419161..2d5ab7d7b 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -5,6 +5,9 @@ Abstract supertype for dim tables """ abstract type AbstractDimTable <: Tables.AbstractColumns end +struct Columns end +struct Rows end + # Tables.jl interface for AbstractDimStack and AbstractDimArray DimTableSources = Union{AbstractDimStack,AbstractDimArray} @@ -12,12 +15,8 @@ DimTableSources = Union{AbstractDimStack,AbstractDimArray} Tables.istable(::Type{<:DimTableSources}) = true Tables.columnaccess(::Type{<:DimTableSources}) = true Tables.columns(x::DimTableSources) = DimTable(x) - -Tables.columnnames(A::AbstractDimArray) = _colnames(DimStack(A)) -Tables.columnnames(s::AbstractDimStack) = _colnames(s) - -Tables.schema(A::AbstractDimArray) = Tables.schema(DimStack(A)) -Tables.schema(s::AbstractDimStack) = Tables.schema(DimTable(s)) +Tables.columnnames(x::DimTableSources) = _colnames(x) +Tables.schema(x::DimTableSources) = Tables.schema(DimTable(x)) @inline Tables.getcolumn(x::DimTableSources, i::Int) = Tables.getcolumn(DimTable(x), i) @inline Tables.getcolumn(x::DimTableSources, key::Symbol) = @@ -27,11 +26,14 @@ Tables.schema(s::AbstractDimStack) = Tables.schema(DimTable(s)) @inline Tables.getcolumn(t::DimTableSources, dim::DimOrDimType) = Tables.getcolumn(t, dimnum(t, dim)) -function _colnames(s::AbstractDimStack) - dimkeys = map(name, dims(s)) - # The data is always the last column/s - (dimkeys..., keys(s)...) +_colnames(s::AbstractDimStack) = (map(name, dims(s))..., keys(s)...) +function _colnames(A::AbstractDimArray) + n = Symbol(name(A)) == Symbol("") ? :value : Symbol(name(A)) + (map(name, dims(A))..., n) end +_colnames(A::AbstractDimVector{T}) where T<:NamedTuple = + (map(name, dims(A))..., _colnames(T)...) +_colnames(::Type{<:NamedTuple{Keys}}) where Keys = Keys # DimTable @@ -88,18 +90,20 @@ julia> a = DimArray(ones(16, 16, 3), (X, Y, Dim{:band})) 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 … 1.0 1.0 1.0 1.0 1.0 1.0 1.0 -julia> +julia> ``` """ -struct DimTable <: AbstractDimTable +struct DimTable{Mode} <: AbstractDimTable parent::Union{AbstractDimArray,AbstractDimStack} colnames::Vector{Symbol} dimcolumns::Vector{AbstractVector} - dimarraycolumns::Vector{AbstractVector} + dimarraycolumns::Vector end -function DimTable(s::AbstractDimStack; mergedims=nothing) +function DimTable(s::AbstractDimStack; + mergedims=nothing, +) s = isnothing(mergedims) ? s : DD.mergedims(s, mergedims) dimcolumns = collect(_dimcolumns(s)) dimarraycolumns = if hassamedims(s) @@ -108,40 +112,54 @@ function DimTable(s::AbstractDimStack; mergedims=nothing) map(A -> vec(DimExtensionArray(A, dims(s))), layers(s)) end |> collect keys = collect(_colnames(s)) - return DimTable(s, keys, dimcolumns, dimarraycolumns) + return DimTable{Columns}(s, keys, dimcolumns, dimarraycolumns) end -function DimTable(xs::Vararg{AbstractDimArray}; layernames=nothing, mergedims=nothing) +function DimTable(As::Vararg{AbstractDimArray}; + layernames=nothing, + name=layernames, + mergedims=nothing, +) # Check that dims are compatible - comparedims(xs...) - + comparedims(As...) # Construct Layer Names - layernames = isnothing(layernames) ? [Symbol("layer_$i") for i in eachindex(xs)] : layernames - + layernames = isnothing(layernames) ? uniquekeys(As) : layernames # Construct dimension and array columns with DimExtensionArray - xs = isnothing(mergedims) ? xs : map(x -> DimensionalData.mergedims(x, mergedims), xs) - dims_ = dims(first(xs)) + As = isnothing(mergedims) ? As : map(x -> DimensionalData.mergedims(x, mergedims), As) + dims_ = dims(first(As)) dimcolumns = collect(_dimcolumns(dims_)) dimnames = collect(map(name, dims_)) - dimarraycolumns = collect(map(vec ∘ parent, xs)) + dimarraycolumns = collect(map(vec ∘ parent, As)) colnames = vcat(dimnames, layernames) # Return DimTable - return DimTable(first(xs), colnames, dimcolumns, dimarraycolumns) + return DimTable{Columns}(first(As), colnames, dimcolumns, dimarraycolumns) end -function DimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=nothing) - if !isnothing(layersfrom) && any(hasdim(x, layersfrom)) - d = dims(x, layersfrom) - nlayers = size(x, d) - layers = [view(x, rebuild(d, i)) for i in 1:nlayers] +function DimTable(A::AbstractDimArray; + layersfrom=nothing, + mergedims=nothing, +) + if !isnothing(layersfrom) && any(hasdim(A, layersfrom)) + d = dims(A, layersfrom) + nlayers = size(A, d) + layers = [view(A, rebuild(d, i)) for i in 1:nlayers] layernames = if iscategorical(d) Symbol.((name(d),), '_', lookup(d)) else Symbol.(("$(name(d))_$i" for i in 1:nlayers)) end - return DimTable(layers..., layernames=layernames, mergedims=mergedims) + return DimTable(layers...; layernames, mergedims) else - s = name(x) == NoName() ? DimStack((;value=x)) : DimStack(x) - return DimTable(s, mergedims=mergedims) + A = isnothing(mergedims) ? A : DD.mergedims(A, mergedims) + dimcolumns = collect(_dimcolumns(A)) + colnames = collect(_colnames(A)) + if (ndims(A) == 1) && (eltype(A) <: NamedTuple) + dimarrayrows = parent(A) + return DimTable{Rows}(A, colnames, dimcolumns, dimarrayrows) + else + dimarraycolumns = [vec(parent(A))] + @show colnames dimcolumns dimarraycolumns + return DimTable{Columns}(A, colnames, dimcolumns, dimarraycolumns) + end end end @@ -156,8 +174,6 @@ function _dimcolumn(x, d::Dimension) end end - - dimcolumns(t::DimTable) = getfield(t, :dimcolumns) dimarraycolumns(t::DimTable) = getfield(t, :dimarraycolumns) colnames(t::DimTable) = Tuple(getfield(t, :colnames)) @@ -175,12 +191,26 @@ Tables.columnaccess(::Type{<:DimTable}) = true Tables.columns(t::DimTable) = t Tables.columnnames(c::DimTable) = colnames(c) -function Tables.schema(t::DimTable) - types = vcat([map(eltype, dimcolumns(t))...], [map(eltype, dimarraycolumns(t))...]) +function Tables.schema(t::DimTable) + types = vcat([map(eltype, dimcolumns(t))...], _dimarraycolumn_eltypes(t)) Tables.Schema(colnames(t), types) end -@inline function Tables.getcolumn(t::DimTable, i::Int) +_dimarraycolumn_eltypes(t::DimTable{Columns}) = [map(eltype, dimarraycolumns(t))...] +_dimarraycolumn_eltypes(t::DimTable{Rows}) = _eltypes(eltype(dimarraycolumns(t))) +_eltypes(::Type{T}) where T<:NamedTuple = collect(T.types) + +@inline function Tables.getcolumn(t::DimTable{Rows}, i::Int) + nkeys = length(colnames(t)) + if i > length(dims(t)) + map(nt -> nt[i], dimarraycolumns(t)) + elseif i > 0 && i < nkeys + dimcolumns(t)[i] + else + throw(ArgumentError("There is no table column $i")) + end +end +@inline function Tables.getcolumn(t::DimTable{Columns}, i::Int) nkeys = length(colnames(t)) if i > length(dims(t)) dimarraycolumns(t)[i - length(dims(t))] @@ -190,12 +220,19 @@ end throw(ArgumentError("There is no table column $i")) end end - @inline function Tables.getcolumn(t::DimTable, dim::DimOrDimType) dimcolumns(t)[dimnum(t, dim)] end - -@inline function Tables.getcolumn(t::DimTable, key::Symbol) +@inline function Tables.getcolumn(t::DimTable{Rows}, key::Symbol) + key in colnames(t) || throw(ArgumentError("There is no table column $key")) + if hasdim(parent(t), key) + dimcolumns(t)[dimnum(t, key)] + else + # Function barrier + _col_from_rows(dimarraycolumns(t), key) + end +end +@inline function Tables.getcolumn(t::DimTable{Columns}, key::Symbol) keys = colnames(t) i = findfirst(==(key), keys) if isnothing(i) @@ -204,22 +241,20 @@ end return Tables.getcolumn(t, i) end end - @inline function Tables.getcolumn(t::DimTable, ::Type{T}, i::Int, key::Symbol) where T Tables.getcolumn(t, key) end -# TableTraits.jl interface - +_col_from_rows(rows, key) = map(row -> row[key], rows) -function IteratorInterfaceExtensions.getiterator(x::DimTableSources) - return Tables.datavaluerows(Tables.dictcolumntable(x)) -end -IteratorInterfaceExtensions.isiterable(::DimTableSources) = true +# TableTraits.jl interface TableTraits.isiterabletable(::DimTableSources) = true +TableTraits.isiterabletable(::DimTable) = true -function IteratorInterfaceExtensions.getiterator(t::DimTable) - return Tables.datavaluerows(Tables.dictcolumntable(t)) -end +# IteratorInterfaceExtensions.jl interface +IteratorInterfaceExtensions.getiterator(x::DimTableSources) = + Tables.datavaluerows(Tables.dictcolumntable(x)) +IteratorInterfaceExtensions.getiterator(t::DimTable) = + Tables.datavaluerows(Tables.dictcolumntable(t)) +IteratorInterfaceExtensions.isiterable(::DimTableSources) = true IteratorInterfaceExtensions.isiterable(::DimTable) = true -TableTraits.isiterabletable(::DimTable) = true diff --git a/src/utils.jl b/src/utils.jl index fe3eb4172..814b75555 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -193,9 +193,13 @@ function uniquekeys(keys::Vector{Symbol}) end end function uniquekeys(keys::Tuple{Symbol,Vararg{Symbol}}) - ids = ntuple(x -> x, length(keys)) + ids = ntuple(identity, length(keys)) map(keys, ids) do k, id - count(k1 -> k == k1, keys) > 1 ? Symbol(:layer, id) : k + if k == Symbol("") + Symbol(:layer, id) + else + count(k1 -> k == k1, keys) > 1 ? Symbol(:layer, id) : k + end end end uniquekeys(t::Tuple) = ntuple(i -> Symbol(:layer, i), length(t)) diff --git a/test/tables.jl b/test/tables.jl index b5bd416ea..d2b01e4a2 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -154,3 +154,11 @@ end @test Tables.columnnames(t3) == (:dimensions, :layer1, :layer2, :layer3) @test Tables.columnnames(t4) == (:band, :geometry, :value) end + +@testset "DimTable NamedTuple" begin + da = DimArray([(; a=1.0f0i, b=2.0i) for i in 1:10], X) + t = DimTable(da) + s = Tables.schema(t) + @test s.names == (:X, :a, :b) + @test s.types == (Int, Float32, Float64) +end From d95b8a831c326f670cd8120822cc5160b1a3d9ba Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 3 Nov 2024 23:14:11 +0100 Subject: [PATCH 2/4] bugfix --- src/tables.jl | 3 +-- test/tables.jl | 7 ++++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/tables.jl b/src/tables.jl index 2d5ab7d7b..a4c606ceb 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -116,7 +116,6 @@ function DimTable(s::AbstractDimStack; end function DimTable(As::Vararg{AbstractDimArray}; layernames=nothing, - name=layernames, mergedims=nothing, ) # Check that dims are compatible @@ -124,7 +123,7 @@ function DimTable(As::Vararg{AbstractDimArray}; # Construct Layer Names layernames = isnothing(layernames) ? uniquekeys(As) : layernames # Construct dimension and array columns with DimExtensionArray - As = isnothing(mergedims) ? As : map(x -> DimensionalData.mergedims(x, mergedims), As) + As = isnothing(mergedims) ? As : map(x -> DD.mergedims(x, mergedims), As) dims_ = dims(first(As)) dimcolumns = collect(_dimcolumns(dims_)) dimnames = collect(map(name, dims_)) diff --git a/test/tables.jl b/test/tables.jl index d2b01e4a2..f5ea708db 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -1,4 +1,9 @@ -using DimensionalData, IteratorInterfaceExtensions, TableTraits, Tables, Test, DataFrames +using DimensionalData +using Test +using Tables +using IteratorInterfaceExtensions +using TableTraits +using DataFrames using DimensionalData.Lookups, DimensionalData.Dimensions using DimensionalData: DimTable, DimExtensionArray From 4d613bd7cbe208e8ee618fb98c7bcd6d1e47e70a Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 3 Nov 2024 23:57:18 +0100 Subject: [PATCH 3/4] remove show --- src/tables.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tables.jl b/src/tables.jl index a4c606ceb..1fb75e0e7 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -156,7 +156,6 @@ function DimTable(A::AbstractDimArray; return DimTable{Rows}(A, colnames, dimcolumns, dimarrayrows) else dimarraycolumns = [vec(parent(A))] - @show colnames dimcolumns dimarraycolumns return DimTable{Columns}(A, colnames, dimcolumns, dimarraycolumns) end end From 645baa117ba27b1cb81376edcad651e3d6bc9bdd Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 4 Nov 2024 00:03:46 +0100 Subject: [PATCH 4/4] fix ambiguity --- src/tables.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tables.jl b/src/tables.jl index 1fb75e0e7..a619ef7cf 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -218,7 +218,7 @@ end throw(ArgumentError("There is no table column $i")) end end -@inline function Tables.getcolumn(t::DimTable, dim::DimOrDimType) +@inline function Tables.getcolumn(t::DimTable, dim::Union{Dimension,Type{<:Dimension}}) dimcolumns(t)[dimnum(t, dim)] end @inline function Tables.getcolumn(t::DimTable{Rows}, key::Symbol)