From c75746d121603556e137cc9cfc97398e0347e1e7 Mon Sep 17 00:00:00 2001 From: Seth Axen Date: Tue, 4 Apr 2023 23:28:45 +0200 Subject: [PATCH 01/24] Add initial implementation of `mergedims` --- src/DimensionalData.jl | 2 +- src/array/array.jl | 72 ++++++++++++++++++++++++++++++++++++++++++ src/stack/stack.jl | 12 +++++++ 3 files changed, 85 insertions(+), 1 deletion(-) diff --git a/src/DimensionalData.jl b/src/DimensionalData.jl index 842596977..51fe7ce31 100644 --- a/src/DimensionalData.jl +++ b/src/DimensionalData.jl @@ -64,7 +64,7 @@ export dims, refdims, metadata, name, lookup, bounds export dimnum, hasdim, hasselection, otherdims # utils -export set, rebuild, reorder, modify, broadcast_dims, broadcast_dims! +export set, rebuild, reorder, mergedims, modify, broadcast_dims, broadcast_dims! const DD = DimensionalData diff --git a/src/array/array.jl b/src/array/array.jl index 5ad4a88cb..19466d23b 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -569,3 +569,75 @@ end # Thed default constructor is DimArray dimconstructor(dims::DimTuple) = dimconstructor(tail(dims)) dimconstructor(dims::Tuple{}) = DimArray + +""" + mergedims(old_dims => new_dim) => Dimension + +Return a dimension `new_dim` whose indices are a [`MergedLookup`](@ref) of the indices of +`old_dims`. +""" +function mergedims((old_dims, new_dim)::Pair) + dims_to_merge = dims(old_dims) + data = vec(DimPoints(dims_to_merge)) + return rebuild(basedims(new_dim), MergedLookup(data, dims_to_merge)) +end + +""" + mergedims(dims, old_dims => new_dim, others::Pair...) => dims_new + +If dimensions `old_dims`, `new_dim`, etc. are found in `dims`, then return new `dims_new` +where all dims in `old_dims` have been combined into a single dim `new_dim`. + +The returned dimension will keep only the name of `new_dim`. Its coords will be a +[`MergedLookup`](@ref) of the coords of the dims in `old_dims`. New dimensions are always +placed at the end of `dims_new`. `others` contains other dimension pairs to be merged. + +# Example + +````jldoctest +julia> ds = (X(0:0.1:0.4), Y(10:10:100), Ti([0, 3, 4])); + +julia> mergedims(ds, Ti => :time, (X, Y) => :space) +Dim{:time} MergedLookup{Tuple{Int64}} Tuple{Int64}[(0,), (3,), (4,)] Ti, +Dim{:space} MergedLookup{Tuple{Float64, Int64}} Tuple{Float64, Int64}[(0.0, 10), (0.1, 10), …, (0.3, 100), (0.4, 100)] X, Y +```` +""" +function mergedims(all_dims, dim_pair::Pair, dim_pairs::Pair...) + old_dims, new_dim = dim_pair + dims_to_merge = dims(all_dims, old_dims) + merged_dim = mergedims(dims_to_merge => new_dim) + all_dims_new = (otherdims(all_dims, dims_to_merge)..., merged_dim) + isempty(dim_pairs) && return all_dims_new + return mergedims(all_dims_new, dim_pairs...) +end + +""" + mergedims(A::AbstractDimArray, dim_pairs::Pair...) => AbstractDimArray + +Return a new array whose dimensions are the result of [`mergedims(dims(A), dim_pairs)`](@ref). +""" +function mergedims(A::AbstractDimArray, dim_pairs::Pair...) + isempty(dim_pairs) && return A + all_dims = dims(A) + dims_to_merge = map(Base.Fix1(dims, A), map(_astuple ∘ first, dim_pairs)) + dims_to_leave = otherdims(all_dims, _cat_tuples(map(_astuple, dims_to_merge)...)) + length(dims_to_leave) == ndims(A) && return A + sizes_unmerged = map(Base.Fix1(size, A), dims_to_leave) + sizes_merged = map(Base.Fix1(prod, Base.Fix1(size, A)), dims_to_merge) + dims_new = mergedims(all_dims, dim_pairs...) + Aperm = PermutedDimsArray(A, _unmergedims(dims_new, map(last, dim_pairs))) + data_merged = reshape(data(Aperm), sizes_unmerged..., sizes_merged...) + rebuild(A, data_merged, dims_new) +end + +function _unmergedims(all_dims, merged_dims) + _merged_dims = dims(all_dims, merged_dims) + unmerged_dims = map(all_dims) do d + hasdim(_merged_dims, d) || return _astuple(d) + return dims(lookup(d)) + end + return _cat_tuples(unmerged_dims...) +end +_unmergedims(all_dims, dim_pairs::Pair...) = _cat_tuples(replace(all_dims, dim_pairs...)) + +_cat_tuples(tuples...) = mapreduce(_astuple, (x, y) -> (x..., y...), tuples) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 960efbe07..84917a387 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -159,6 +159,18 @@ for func in (:index, :lookup, :metadata, :sampling, :span, :bounds, :locus, :ord @eval ($func)(s::AbstractDimStack, args...) = ($func)(dims(s), args...) end +""" + mergedims(ds::AbstractDimStack, dim_pairs::Pair...) => AbstractDimStack + +Return a new stack where `mergedims(A, dim_pairs...)` has been applied to each layer `A` of +`ds`. +""" +function mergedims(ds::AbstractDimStack, dim_pairs::Pair...) + isempty(dim_pairs) && return ds + vals = map(da -> mergedims(da, dim_pairs...), layers(ds)) + rebuild_from_arrays(ds, vals) +end + """ DimStack <: AbstractDimStack From 90587981eabff210f00fb97223147c7861b5fdfb Mon Sep 17 00:00:00 2001 From: Seth Axen Date: Thu, 6 Apr 2023 00:34:08 +0200 Subject: [PATCH 02/24] Use values instead of layers --- src/stack/stack.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 84917a387..0324c187f 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -167,7 +167,7 @@ Return a new stack where `mergedims(A, dim_pairs...)` has been applied to each l """ function mergedims(ds::AbstractDimStack, dim_pairs::Pair...) isempty(dim_pairs) && return ds - vals = map(da -> mergedims(da, dim_pairs...), layers(ds)) + vals = map(A -> mergedims(A, dim_pairs...), values(ds)) rebuild_from_arrays(ds, vals) end From 63627dcefb6831d3193d48056028c560d8467a78 Mon Sep 17 00:00:00 2001 From: Seth Axen Date: Thu, 6 Apr 2023 00:34:24 +0200 Subject: [PATCH 03/24] Simplify base method --- src/array/array.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/array/array.jl b/src/array/array.jl index 19466d23b..ade742a75 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -577,9 +577,8 @@ Return a dimension `new_dim` whose indices are a [`MergedLookup`](@ref) of the i `old_dims`. """ function mergedims((old_dims, new_dim)::Pair) - dims_to_merge = dims(old_dims) - data = vec(DimPoints(dims_to_merge)) - return rebuild(basedims(new_dim), MergedLookup(data, dims_to_merge)) + data = vec(DimPoints(_astuple(old_dims))) + return rebuild(basedims(new_dim), MergedLookup(data, old_dims)) end """ From ccb0f71e04216d7caa33dbe1e3aefcc03e8b982d Mon Sep 17 00:00:00 2001 From: Seth Axen Date: Thu, 6 Apr 2023 00:34:51 +0200 Subject: [PATCH 04/24] Perform all checks in main method --- src/array/array.jl | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/src/array/array.jl b/src/array/array.jl index ade742a75..2b13c8766 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -601,13 +601,35 @@ Dim{:time} MergedLookup{Tuple{Int64}} Tuple{Int64}[(0,), (3,), (4,)] Ti, Dim{:space} MergedLookup{Tuple{Float64, Int64}} Tuple{Float64, Int64}[(0.0, 10), (0.1, 10), …, (0.3, 100), (0.4, 100)] X, Y ```` """ -function mergedims(all_dims, dim_pair::Pair, dim_pairs::Pair...) +function mergedims(all_dims, dim_pairs::Pair...) + # filter out dims completely missing + dim_pairs_complete = filter(dim_pairs) do (old_dims,) + dims_present = dims(all_dims, _astuple(old_dims)) + isempty(dims_present) && return false + all(hasdim(dims_present, old_dims)) || throw(ArgumentError( + "Not all dimensions $old_dims found in $(map(basetypeof, all_dims))" + )) + return true + end + isempty(dim_pairs_complete) && return all_dims + dim_pairs_concrete = map(dim_pairs_complete) do (old_dims, new_dim) + return dims(all_dims, _astuple(old_dims)) => new_dim + end + # throw error if old dim groups overlap + old_dims_tuples = map(first, dim_pairs_concrete) + if !dimsmatch(_cat_tuples(old_dims_tuples...), combinedims(old_dims_tuples...)) + throw(ArgumentError("Dimensions to be merged are not all unique")) + end + return _mergedims(all_dims, dim_pairs_concrete...) +end + +function _mergedims(all_dims, dim_pair::Pair, dim_pairs::Pair...) old_dims, new_dim = dim_pair - dims_to_merge = dims(all_dims, old_dims) + dims_to_merge = dims(all_dims, _astuple(old_dims)) merged_dim = mergedims(dims_to_merge => new_dim) all_dims_new = (otherdims(all_dims, dims_to_merge)..., merged_dim) isempty(dim_pairs) && return all_dims_new - return mergedims(all_dims_new, dim_pairs...) + return _mergedims(all_dims_new, dim_pairs...) end """ From 61a5a2ad86513ad7a7cd44b0dd13c3c8b985f780 Mon Sep 17 00:00:00 2001 From: Seth Axen Date: Thu, 6 Apr 2023 00:35:04 +0200 Subject: [PATCH 05/24] Simplify mergedims for arrays --- src/array/array.jl | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/array/array.jl b/src/array/array.jl index 2b13c8766..abadcad66 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -640,14 +640,11 @@ Return a new array whose dimensions are the result of [`mergedims(dims(A), dim_p function mergedims(A::AbstractDimArray, dim_pairs::Pair...) isempty(dim_pairs) && return A all_dims = dims(A) - dims_to_merge = map(Base.Fix1(dims, A), map(_astuple ∘ first, dim_pairs)) - dims_to_leave = otherdims(all_dims, _cat_tuples(map(_astuple, dims_to_merge)...)) - length(dims_to_leave) == ndims(A) && return A - sizes_unmerged = map(Base.Fix1(size, A), dims_to_leave) - sizes_merged = map(Base.Fix1(prod, Base.Fix1(size, A)), dims_to_merge) dims_new = mergedims(all_dims, dim_pairs...) - Aperm = PermutedDimsArray(A, _unmergedims(dims_new, map(last, dim_pairs))) - data_merged = reshape(data(Aperm), sizes_unmerged..., sizes_merged...) + dimsmatch(all_dims, dims_new) && return A + dims_perm = _unmergedims(dims_new, map(last, dim_pairs)) + Aperm = PermutedDimsArray(A, dims_perm) + data_merged = reshape(data(Aperm), map(length, dims_new)) rebuild(A, data_merged, dims_new) end From 8c06a4393bba1758871423a0f5fee6e8137ad650 Mon Sep 17 00:00:00 2001 From: Seth Axen Date: Thu, 6 Apr 2023 00:35:57 +0200 Subject: [PATCH 06/24] Handle case where different numbers of dims provided --- src/Dimensions/primitives.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Dimensions/primitives.jl b/src/Dimensions/primitives.jl index c6c2c1dd5..9a6afcb5f 100644 --- a/src/Dimensions/primitives.jl +++ b/src/Dimensions/primitives.jl @@ -12,6 +12,7 @@ or are at least rotations/transformations of the same type. """ @inline dimsmatch(dims, query) = dimsmatch(<:, dims, query) @inline function dimsmatch(f::Function, dims::Tuple, query::Tuple) + length(dims) == length(query) || return false all(map((d, l) -> dimsmatch(f, d, l), dims, query)) end @inline dimsmatch(f::Function, dim, query) = dimsmatch(f, typeof(dim), typeof(query)) From 5d8123f718b3abd51c1e31534a8812ab0f0a105e Mon Sep 17 00:00:00 2001 From: Seth Axen Date: Thu, 6 Apr 2023 00:38:34 +0200 Subject: [PATCH 07/24] Add mergedims to docs --- docs/src/api.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/src/api.md b/docs/src/api.md index 5cf88794c..e0673a21c 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -50,6 +50,7 @@ rebuild modify broadcast_dims broadcast_dims! +mergedims reorder Base.cat Base.map From af9381a31d1e701b7e04b27a71b7208ef4fae113 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Fri, 8 Sep 2023 18:57:17 -0600 Subject: [PATCH 08/24] Implemented WideDimTable --- src/tables.jl | 219 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 204 insertions(+), 15 deletions(-) diff --git a/src/tables.jl b/src/tables.jl index 036357889..c18a85fb3 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -1,10 +1,17 @@ -# Tables.jl interface +""" + AbstractDimTable <: Tables.AbstractColumns + +Abstract supertype for dim tables +""" +abstract type AbstractDimTable <: Tables.AbstractColumns end + +# Tables.jl interface for AbstractDimStack and AbstractDimArray DimTableSources = Union{AbstractDimStack,AbstractDimArray} Tables.istable(::Type{<:DimTableSources}) = true Tables.columnaccess(::Type{<:DimTableSources}) = true -Tables.columns(x::DimTableSources) = DimTable(x) +Tables.columns(x::DimTableSources) = WideDimTable(x) Tables.columnnames(A::AbstractDimArray) = _colnames(DimStack(A)) Tables.columnnames(s::AbstractDimStack) = _colnames(s) @@ -20,6 +27,10 @@ Tables.schema(s::AbstractDimStack) = Tables.schema(DimTable(s)) @inline Tables.getcolumn(t::DimTableSources, dim::DimOrDimType) = Tables.getcolumn(t, dimnum(t, dim)) + +# DimColumn + + """ DimColumn{T,D<:Dimension} <: AbstractVector{T} @@ -56,8 +67,6 @@ end dim(c::DimColumn) = getfield(c, :dim) dimstride(c::DimColumn) = getfield(c, :dimstride) -# Simple Array interface - Base.length(c::DimColumn) = getfield(c, :length) @inline function Base.getindex(c::DimColumn, i::Int) Base.@boundscheck checkbounds(c, i) @@ -70,6 +79,36 @@ Base.axes(c::DimColumn) = (Base.OneTo(length(c)),) Base.vec(c::DimColumn{T}) where T = [c[i] for i in eachindex(c)] Base.Array(c::DimColumn) = vec(c) + +# MergedDimColumn + + +struct MergedDimColumn{T,DS} <: AbstractVector{T} + colname::Symbol + dimcols::DS +end +function MergedDimColumn(dims::DS, name::Symbol) where DS + MergedDimColumn{Tuple{map(eltype, dims)...},DS}(name, dims) +end + +colname(c::MergedDimColumn) = getfield(c, :colname) +dimcols(c::MergedDimColumn) = getfield(c, :dimcols) + +Base.length(c::MergedDimColumn) = length(first(dimcols(c))) +@inline function Base.getindex(c::MergedDimColumn{T}, i::Int) where T + return map(x -> x[i], dimcols(c)) +end +Base.getindex(c::MergedDimColumn, ::Colon) = vec(c) +Base.getindex(c::MergedDimColumn, A::AbstractArray) = [c[i] for i in A] +Base.size(c::MergedDimColumn) = (length(c),) +Base.axes(c::MergedDimColumn) = (Base.OneTo(length(c)),) +Base.vec(c::MergedDimColumn{T}) where T = [c[i] for i in eachindex(c)] +Base.Array(c::MergedDimColumn) = vec(c) + + +# DimArrayColumn + + struct DimArrayColumn{T,A<:AbstractDimArray{T},DS,DL,L} <: AbstractVector{T} data::A dimstrides::DS @@ -89,8 +128,6 @@ Base.parent(c::DimArrayColumn) = getfield(c, :data) dimstrides(c::DimArrayColumn) = getfield(c, :dimstrides) dimlengths(c::DimArrayColumn) = getfield(c, :dimlengths) -# Simple Array interface - Base.length(c::DimArrayColumn) = getfield(c, :length) @inline function Base.getindex(c::DimArrayColumn, i::Int) Base.@boundscheck checkbounds(c, i) @@ -107,12 +144,9 @@ Base.axes(c::DimArrayColumn) = (Base.OneTo(length(c)),) Base.vec(c::DimArrayColumn{T}) where T = [c[i] for i in eachindex(c)] Base.Array(c::DimArrayColumn) = vec(c) -""" - AbstractDimTable <: Tables.AbstractColumns -Abstract supertype for dim tables -""" -abstract type AbstractDimTable <: Tables.AbstractColumns end +# DimTable + """ DimTable <: AbstractDimTable @@ -163,8 +197,6 @@ for func in (:dims, :val, :index, :lookup, :metadata, :order, :sampling, :span, end -# Tables interface - Tables.istable(::DimTable) = true Tables.columnaccess(::Type{<:DimTable}) = true Tables.columns(t::DimTable) = t @@ -207,16 +239,173 @@ function _colnames(s::AbstractDimStack) end +# WideDimTable + + +""" + WideDimTable <: AbstractDimTable + + WideDimTable(A::AbstractDimArray) + +Construct a Tables.jl/TableTraits.jl compatible object out of an `AbstractDimArray`. + +This table will have a column for the array data and columns for each +`Dimension` index, as a [`DimColumn`]. These are lazy, and generated +as required. + +Column names are converted from the dimension types using +[`DimensionalData.dim2key`](@ref). This means type `Ti` becomes the +column name `:Ti`, and `Dim{:custom}` becomes `:custom`. + +To get dimension columns, you can index with `Dimension` (`X()`) or +`Dimension` type (`X`) as well as the regular `Int` or `Symbol`. +""" +struct WideDimTable{DS} <: AbstractDimTable + colnames::Vector{Symbol} + dimcolumns::DS + dimarraycolumns::Vector{DimArrayColumn} +end + +function WideDimTable(s::AbstractDimStack; mergedims=false) + dims_ = dims(s) + dimcolumns = collect(map(d -> DimColumn(d, dims_), dims_)) + dimarraycolumns = collect(map(A -> DimArrayColumn(A, dims_), s)) + + if mergedims + dimcol = MergedDimColumn(Tuple(dimcolumns), :geometry) + keys = vcat([:geometry], collect(_colnames(s))[length(dims_)+1:end]) + return WideDimTable(keys, [dimcol], dimarraycolumns) + else + keys = collect(_colnames(s)) + return WideDimTable(keys, dimcolumns, dimarraycolumns) + end +end + +function WideDimTable(xs::Vararg{AbstractDimArray}; layernames=[Symbol("layer_$i") for i in eachindex(xs)], mergedims=false) + # Construct DimColumns + dims_ = dims(first(xs)) + dimcolumns = map(d -> DimColumn(d, dims_), dims_) + dimnames = collect(map(dim2key, dims_)) + + # Construct DimArrayColumns + dimarraycolumns = collect(map(A -> DimArrayColumn(A, dims_), xs)) + + # Merge DimColumns + if mergedims + colnames = vcat([:geometry], layernames) + dimcol = MergedDimColumn(Tuple(dimcolumns), :geometry) + return WideDimTable{typeof(dimcol)}(colnames, dimcol, dimarraycolumns) + else + colnames = vcat(dimnames, layernames) + return WideDimTable{typeof(dimcolumns)}(colnames, dimcolumns, dimarraycolumns) + end +end + +function WideDimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=false) + if (layersfrom <: Dimension) && (any(isa.(dims(x), layersfrom))) + nlayers = size(x, layersfrom) + layers = [(@view x[layersfrom(i)]) for i in 1:nlayers] + layernames = Symbol.(["$(dim2key(layersfrom))_$i" for i in 1:nlayers]) + return WideDimTable(layers..., layernames=layernames, mergedims=mergedims) + else + # Construct DimColumns + dims_ = dims(x) + dimcolumns = map(d -> DimColumn(d, dims_), dims_) + dimnames = collect(map(dim2key, dims_)) + + # Construct DimArrayColumn + dimarraycolumn = DimArrayColumn(x, dims_) + + # Merge DimColumns + if mergedims + colnames = vcat([:geometry], [:value]) + dimcol = MergedDimColumn(Tuple(dimcolumns), :geometry) + return WideDimTable{typeof(dimcol)}(colnames, dimcol, [dimarraycolumn]) + else + return WideDimTable{typeof(dimcolumns)}(vcat(dimnames, [:value]), dimcolumns, [dimarraycolumn]) + end + end +end + +dimcolumns(t::WideDimTable) = getfield(t, :dimcolumns) +dimarraycolumns(t::WideDimTable) = getfield(t, :dimarraycolumns) +dims(t::WideDimTable) = dims(parent(t)) + +Base.parent(t::WideDimTable) = getfield(t, :colnames) + +for func in (:dims, :val, :index, :lookup, :metadata, :order, :sampling, :span, :bounds, + :locus, :name, :label, :units) + @eval $func(t::WideDimTable, args...) = $func(parent(t), args...) + +end + +Tables.istable(::WideDimTable) = true +Tables.columnaccess(::Type{<:WideDimTable}) = true +Tables.columns(t::WideDimTable) = t +Tables.columnnames(c::WideDimTable) = parent(c) + +function Tables.schema(t::WideDimTable) + colnames = parent(t) + types = vcat([map(eltype, dimcolumns(t))...], [map(eltype, dimarraycolumns(t))...]) + Tables.Schema(colnames, types) +end + +function Tables.schema(t::WideDimTable{<:MergedDimColumn}) + colnames = parent(t) + types = vcat([eltype(dimcolumns(t))], [map(eltype, dimarraycolumns(t))...]) + Tables.Schema(colnames, types) +end + +@inline function Tables.getcolumn(t::WideDimTable, key::Symbol) + keys = parent(t) + i = findfirst(==(key), keys) + n_dimcols = length(dimcolumns(t)) + if i <= n_dimcols + return dimcolumns(t)[i] + else + return dimarraycolumns(t)[i - n_dimcols] + end +end + +@inline function Tables.getcolumn(t::WideDimTable{<:MergedDimColumn}, key::Symbol) + keys = parent(t) + i = findfirst(==(key), keys) + if i == 1 + return dimcolumns(t) + else + return dimarraycolumns(t)[i - 1] + end + n_dimcols = length(dimcolumns(t)) + i = findfirst(==(key), keys) + if i <= n_dimcols + return dimcolumns(t)[i] + else + return dimarraycolumns(t)[i - n_dimcols] + end +end + +@inline function Tables.getcolumn(t::WideDimTable, ::Type{T}, i::Int, key::Symbol) where T + Tables.getcolumn(t, key) +end + + # TableTraits.jl interface + function IteratorInterfaceExtensions.getiterator(x::DimTableSources) - return Tables.datavaluerows(Tables.columntable(x)) + return Tables.datavaluerows(Tables.dictcolumntable(x)) end IteratorInterfaceExtensions.isiterable(::DimTableSources) = true TableTraits.isiterabletable(::DimTableSources) = true function IteratorInterfaceExtensions.getiterator(t::DimTable) - return Tables.datavaluerows(Tables.columntable(t)) + return Tables.datavaluerows(Tables.dictcolumntable(t)) end IteratorInterfaceExtensions.isiterable(::DimTable) = true TableTraits.isiterabletable(::DimTable) = true +function IteratorInterfaceExtensions.getiterator(t::WideDimTable) + return Tables.datavaluerows(Tables.dictcolumntable(t)) +end +IteratorInterfaceExtensions.isiterable(::WideDimTable) = true +TableTraits.isiterabletable(::WideDimTable) = true + From d90b86aaf6a5cd4130eb58a77aaa3d802e61816a Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Fri, 8 Sep 2023 20:00:31 -0600 Subject: [PATCH 09/24] Removed Dead Code --- src/DimensionalData.jl | 2 +- src/tables.jl | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/src/DimensionalData.jl b/src/DimensionalData.jl index 1600a41f6..c7cda2628 100644 --- a/src/DimensionalData.jl +++ b/src/DimensionalData.jl @@ -55,7 +55,7 @@ export AbstractDimArray, DimArray export AbstractDimStack, DimStack -export AbstractDimTable, DimTable +export AbstractDimTable, DimTable, WideDimTable export DimIndices, DimKeys, DimPoints diff --git a/src/tables.jl b/src/tables.jl index c18a85fb3..f788341ec 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -302,7 +302,7 @@ function WideDimTable(xs::Vararg{AbstractDimArray}; layernames=[Symbol("layer_$i end function WideDimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=false) - if (layersfrom <: Dimension) && (any(isa.(dims(x), layersfrom))) + if !isnothing(layersfrom) && (layersfrom <: Dimension) && (any(isa.(dims(x), layersfrom))) nlayers = size(x, layersfrom) layers = [(@view x[layersfrom(i)]) for i in 1:nlayers] layernames = Symbol.(["$(dim2key(layersfrom))_$i" for i in 1:nlayers]) @@ -375,13 +375,6 @@ end else return dimarraycolumns(t)[i - 1] end - n_dimcols = length(dimcolumns(t)) - i = findfirst(==(key), keys) - if i <= n_dimcols - return dimcolumns(t)[i] - else - return dimarraycolumns(t)[i - n_dimcols] - end end @inline function Tables.getcolumn(t::WideDimTable, ::Type{T}, i::Int, key::Symbol) where T From 14ca27122498cb0c5361f8da9436677f8ba58e9c Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Sat, 9 Sep 2023 15:52:29 -0600 Subject: [PATCH 10/24] mergedims now allows selection of specific dimensions --- src/DimensionalData.jl | 3 +- src/Dimensions/primitives.jl | 1 + src/mergedims.jl | 93 +++++++++++++++ src/tables.jl | 225 +++++++++++------------------------ 4 files changed, 166 insertions(+), 156 deletions(-) create mode 100644 src/mergedims.jl diff --git a/src/DimensionalData.jl b/src/DimensionalData.jl index c7cda2628..3685f83b1 100644 --- a/src/DimensionalData.jl +++ b/src/DimensionalData.jl @@ -66,7 +66,7 @@ export dims, refdims, metadata, name, lookup, bounds export dimnum, hasdim, hasselection, otherdims # utils -export set, rebuild, reorder, modify, broadcast_dims, broadcast_dims! +export set, rebuild, reorder, modify, broadcast_dims, broadcast_dims!, mergedims const DD = DimensionalData @@ -88,6 +88,7 @@ include("stack/methods.jl") include("stack/show.jl") # Other include("dimindices.jl") +include("mergedims.jl") include("tables.jl") # Combined (easier to work on these in one file) include("plotrecipes.jl") diff --git a/src/Dimensions/primitives.jl b/src/Dimensions/primitives.jl index b3a69a4cf..098a530e5 100644 --- a/src/Dimensions/primitives.jl +++ b/src/Dimensions/primitives.jl @@ -12,6 +12,7 @@ or are at least rotations/transformations of the same type. """ @inline dimsmatch(dims, query) = dimsmatch(<:, dims, query) @inline function dimsmatch(f::Function, dims::Tuple, query::Tuple) + length(dims) == length(query) || return false all(map((d, l) -> dimsmatch(f, d, l), dims, query)) end @inline dimsmatch(f::Function, dim, query) = dimsmatch(f, typeof(dim), typeof(query)) diff --git a/src/mergedims.jl b/src/mergedims.jl new file mode 100644 index 000000000..e478ac588 --- /dev/null +++ b/src/mergedims.jl @@ -0,0 +1,93 @@ +""" + mergedims(old_dims => new_dim) => Dimension +Return a dimension `new_dim` whose indices are a [`MergedLookup`](@ref) of the indices of +`old_dims`. +""" +function mergedims((old_dims, new_dim)::Pair) + data = vec(DimPoints(_astuple(old_dims))) + return rebuild(basedims(new_dim), MergedLookup(data, old_dims)) +end + +""" + mergedims(dims, old_dims => new_dim, others::Pair...) => dims_new +If dimensions `old_dims`, `new_dim`, etc. are found in `dims`, then return new `dims_new` +where all dims in `old_dims` have been combined into a single dim `new_dim`. +The returned dimension will keep only the name of `new_dim`. Its coords will be a +[`MergedLookup`](@ref) of the coords of the dims in `old_dims`. New dimensions are always +placed at the end of `dims_new`. `others` contains other dimension pairs to be merged. +# Example +````jldoctest +julia> ds = (X(0:0.1:0.4), Y(10:10:100), Ti([0, 3, 4])); +julia> mergedims(ds, Ti => :time, (X, Y) => :space) +Dim{:time} MergedLookup{Tuple{Int64}} Tuple{Int64}[(0,), (3,), (4,)] Ti, +Dim{:space} MergedLookup{Tuple{Float64, Int64}} Tuple{Float64, Int64}[(0.0, 10), (0.1, 10), …, (0.3, 100), (0.4, 100)] X, Y +```` +""" +function mergedims(all_dims, dim_pairs::Pair...) + # filter out dims completely missing + dim_pairs_complete = filter(dim_pairs) do (old_dims,) + dims_present = dims(all_dims, _astuple(old_dims)) + isempty(dims_present) && return false + all(hasdim(dims_present, old_dims)) || throw(ArgumentError( + "Not all dimensions $old_dims found in $(map(basetypeof, all_dims))" + )) + return true + end + isempty(dim_pairs_complete) && return all_dims + dim_pairs_concrete = map(dim_pairs_complete) do (old_dims, new_dim) + return dims(all_dims, _astuple(old_dims)) => new_dim + end + # throw error if old dim groups overlap + old_dims_tuples = map(first, dim_pairs_concrete) + if !dimsmatch(_cat_tuples(old_dims_tuples...), combinedims(old_dims_tuples...)) + throw(ArgumentError("Dimensions to be merged are not all unique")) + end + return _mergedims(all_dims, dim_pairs_concrete...) +end + +""" + mergedims(A::AbstractDimArray, dim_pairs::Pair...) => AbstractDimArray +Return a new array whose dimensions are the result of [`mergedims(dims(A), dim_pairs)`](@ref). +""" +function mergedims(A::AbstractDimArray, dim_pairs::Pair...) + isempty(dim_pairs) && return A + all_dims = dims(A) + dims_new = mergedims(all_dims, dim_pairs...) + dimsmatch(all_dims, dims_new) && return A + dims_perm = _unmergedims(dims_new, map(last, dim_pairs)) + Aperm = PermutedDimsArray(A, dims_perm) + data_merged = reshape(data(Aperm), map(length, dims_new)) + rebuild(A, data_merged, dims_new) +end + +""" + mergedims(ds::AbstractDimStack, dim_pairs::Pair...) => AbstractDimStack +Return a new stack where `mergedims(A, dim_pairs...)` has been applied to each layer `A` of +`ds`. +""" +function mergedims(ds::AbstractDimStack, dim_pairs::Pair...) + isempty(dim_pairs) && return ds + vals = map(A -> mergedims(A, dim_pairs...), values(ds)) + rebuild_from_arrays(ds, vals) +end + +function _mergedims(all_dims, dim_pair::Pair, dim_pairs::Pair...) + old_dims, new_dim = dim_pair + dims_to_merge = dims(all_dims, _astuple(old_dims)) + merged_dim = mergedims(dims_to_merge => new_dim) + all_dims_new = (otherdims(all_dims, dims_to_merge)..., merged_dim) + isempty(dim_pairs) && return all_dims_new + return _mergedims(all_dims_new, dim_pairs...) +end + +function _unmergedims(all_dims, merged_dims) + _merged_dims = dims(all_dims, merged_dims) + unmerged_dims = map(all_dims) do d + hasdim(_merged_dims, d) || return _astuple(d) + return dims(lookup(d)) + end + return _cat_tuples(unmerged_dims...) +end +_unmergedims(all_dims, dim_pairs::Pair...) = _cat_tuples(replace(all_dims, dim_pairs...)) + +_cat_tuples(tuples...) = mapreduce(_astuple, (x, y) -> (x..., y...), tuples) \ No newline at end of file diff --git a/src/tables.jl b/src/tables.jl index f788341ec..282921ef5 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -11,7 +11,7 @@ DimTableSources = Union{AbstractDimStack,AbstractDimArray} Tables.istable(::Type{<:DimTableSources}) = true Tables.columnaccess(::Type{<:DimTableSources}) = true -Tables.columns(x::DimTableSources) = WideDimTable(x) +Tables.columns(x::DimTableSources) = DimTable(x) Tables.columnnames(A::AbstractDimArray) = _colnames(DimStack(A)) Tables.columnnames(s::AbstractDimStack) = _colnames(s) @@ -27,6 +27,12 @@ Tables.schema(s::AbstractDimStack) = Tables.schema(DimTable(s)) @inline Tables.getcolumn(t::DimTableSources, dim::DimOrDimType) = Tables.getcolumn(t, dimnum(t, dim)) +function _colnames(s::AbstractDimStack) + dimkeys = map(dim2key, (dims(s))) + # The data is always the last column/s + (dimkeys..., keys(s)...) +end + # DimColumn @@ -145,7 +151,7 @@ Base.vec(c::DimArrayColumn{T}) where T = [c[i] for i in eachindex(c)] Base.Array(c::DimArrayColumn) = vec(c) -# DimTable +#DimTable """ @@ -166,147 +172,42 @@ column name `:Ti`, and `Dim{:custom}` becomes `:custom`. To get dimension columns, you can index with `Dimension` (`X()`) or `Dimension` type (`X`) as well as the regular `Int` or `Symbol`. """ -struct DimTable{Keys,S,DC,DAC} <: AbstractDimTable - stack::S - dimcolumns::DC - dimarraycolumns::DAC -end -DimTable{K}(stack::S, dimcolumns::DC, strides::SD) where {K,S,DC,SD} = - DimTable{K,S,DC,SD}(stack, dimcolumns, strides) -DimTable(A::AbstractDimArray, As::AbstractDimArray...) = DimTable((A, As...)) -function DimTable(As::Tuple{AbstractDimArray,Vararg{AbstractDimArray}}...) - DimTable(DimStack(As...)) -end -function DimTable(s::AbstractDimStack) - dims_ = dims(s) - dimcolumns = map(d -> DimColumn(d, dims_), dims_) - dimarraycolumns = map(A -> DimArrayColumn(A, dims_), s) - keys = _colnames(s) - DimTable{keys}(s, dimcolumns, dimarraycolumns) -end - -dimcolumns(t::DimTable) = getfield(t, :dimcolumns) -dimarraycolumns(t::DimTable) = getfield(t, :dimarraycolumns) -dims(t::DimTable) = dims(parent(t)) - -Base.parent(t::DimTable) = getfield(t, :stack) - -for func in (:dims, :val, :index, :lookup, :metadata, :order, :sampling, :span, :bounds, - :locus, :name, :label, :units) - @eval $func(t::DimTable, args...) = $func(parent(t), args...) - -end - -Tables.istable(::DimTable) = true -Tables.columnaccess(::Type{<:DimTable}) = true -Tables.columns(t::DimTable) = t -Tables.columnnames(c::DimTable{Keys}) where Keys = Keys -function Tables.schema(t::DimTable{Keys}) where Keys - s = parent(t) - types = (map(eltype, dims(s))..., map(eltype, parent(s))...) - Tables.Schema(Keys, types) -end - -@inline function Tables.getcolumn(t::DimTable{Keys}, i::Int) where Keys - nkeys = length(Keys) - if i > length(dims(t)) - dimarraycolumns(t)[i - length(dims(t))] - elseif i > 0 && i < nkeys - dimcolumns(t)[i] - else - throw(ArgumentError("There is no table column $i")) - end -end -@inline function Tables.getcolumn(t::DimTable, dim::DimOrDimType) - dimcolumns(t)[dimnum(t, dim)] -end -# Retrieve a column by name -@inline function Tables.getcolumn(t::DimTable{Keys}, key::Symbol) where Keys - if key in keys(dimarraycolumns(t)) - dimarraycolumns(t)[key] - else - dimcolumns(t)[dimnum(dims(t), key)] - end -end -@inline function Tables.getcolumn(t::DimTable, ::Type{T}, i::Int, key::Symbol) where T - Tables.getcolumn(t, key) -end - -function _colnames(s::AbstractDimStack) - dimkeys = map(dim2key, (dims(s))) - # The data is always the last column/s - (dimkeys..., keys(s)...) -end - - -# WideDimTable - - -""" - WideDimTable <: AbstractDimTable - - WideDimTable(A::AbstractDimArray) - -Construct a Tables.jl/TableTraits.jl compatible object out of an `AbstractDimArray`. - -This table will have a column for the array data and columns for each -`Dimension` index, as a [`DimColumn`]. These are lazy, and generated -as required. - -Column names are converted from the dimension types using -[`DimensionalData.dim2key`](@ref). This means type `Ti` becomes the -column name `:Ti`, and `Dim{:custom}` becomes `:custom`. - -To get dimension columns, you can index with `Dimension` (`X()`) or -`Dimension` type (`X`) as well as the regular `Int` or `Symbol`. -""" -struct WideDimTable{DS} <: AbstractDimTable +struct DimTable <: AbstractDimTable colnames::Vector{Symbol} - dimcolumns::DS + dimcolumns::Vector{DimColumn} dimarraycolumns::Vector{DimArrayColumn} end -function WideDimTable(s::AbstractDimStack; mergedims=false) +function DimTable(s::AbstractDimStack; mergedims=nothing) + s = isnothing(mergedims) ? s : DimensionalData.mergedims(s, mergedims) dims_ = dims(s) dimcolumns = collect(map(d -> DimColumn(d, dims_), dims_)) - dimarraycolumns = collect(map(A -> DimArrayColumn(A, dims_), s)) + dimarraycolumns = collect(map(A -> DimArrayColumn(A, dims(s)), s)) - if mergedims - dimcol = MergedDimColumn(Tuple(dimcolumns), :geometry) - keys = vcat([:geometry], collect(_colnames(s))[length(dims_)+1:end]) - return WideDimTable(keys, [dimcol], dimarraycolumns) - else - keys = collect(_colnames(s)) - return WideDimTable(keys, dimcolumns, dimarraycolumns) - end + keys = collect(_colnames(s)) + return DimTable(keys, dimcolumns, dimarraycolumns) end -function WideDimTable(xs::Vararg{AbstractDimArray}; layernames=[Symbol("layer_$i") for i in eachindex(xs)], mergedims=false) +function DimTable(xs::Vararg{AbstractDimArray}; layernames=[Symbol("layer_$i") for i in eachindex(xs)], mergedims=nothing) # Construct DimColumns + xs = isnothing(mergedims) ? xs : map(x -> DimensionalData.mergedims(x, mergedims), xs) dims_ = dims(first(xs)) - dimcolumns = map(d -> DimColumn(d, dims_), dims_) + dimcolumns = collect(map(d -> DimColumn(d, dims_), dims_)) dimnames = collect(map(dim2key, dims_)) # Construct DimArrayColumns dimarraycolumns = collect(map(A -> DimArrayColumn(A, dims_), xs)) - # Merge DimColumns - if mergedims - colnames = vcat([:geometry], layernames) - dimcol = MergedDimColumn(Tuple(dimcolumns), :geometry) - return WideDimTable{typeof(dimcol)}(colnames, dimcol, dimarraycolumns) - else - colnames = vcat(dimnames, layernames) - return WideDimTable{typeof(dimcolumns)}(colnames, dimcolumns, dimarraycolumns) - end + colnames = vcat(dimnames, layernames) + return DimTable(colnames, dimcolumns, dimarraycolumns) end -function WideDimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=false) +function DimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=nothing) if !isnothing(layersfrom) && (layersfrom <: Dimension) && (any(isa.(dims(x), layersfrom))) nlayers = size(x, layersfrom) layers = [(@view x[layersfrom(i)]) for i in 1:nlayers] layernames = Symbol.(["$(dim2key(layersfrom))_$i" for i in 1:nlayers]) - return WideDimTable(layers..., layernames=layernames, mergedims=mergedims) + return DimTable(layers..., layernames=layernames, mergedims=mergedims) else # Construct DimColumns dims_ = dims(x) @@ -320,43 +221,37 @@ function WideDimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=false) if mergedims colnames = vcat([:geometry], [:value]) dimcol = MergedDimColumn(Tuple(dimcolumns), :geometry) - return WideDimTable{typeof(dimcol)}(colnames, dimcol, [dimarraycolumn]) + return DimTable{typeof(dimcol)}(colnames, dimcol, [dimarraycolumn]) else - return WideDimTable{typeof(dimcolumns)}(vcat(dimnames, [:value]), dimcolumns, [dimarraycolumn]) + return DimTable{typeof(dimcolumns)}(vcat(dimnames, [:value]), dimcolumns, [dimarraycolumn]) end end end -dimcolumns(t::WideDimTable) = getfield(t, :dimcolumns) -dimarraycolumns(t::WideDimTable) = getfield(t, :dimarraycolumns) -dims(t::WideDimTable) = dims(parent(t)) +dimcolumns(t::DimTable) = getfield(t, :dimcolumns) +dimarraycolumns(t::DimTable) = getfield(t, :dimarraycolumns) +dims(t::DimTable) = dims(parent(t)) -Base.parent(t::WideDimTable) = getfield(t, :colnames) +Base.parent(t::DimTable) = getfield(t, :colnames) for func in (:dims, :val, :index, :lookup, :metadata, :order, :sampling, :span, :bounds, :locus, :name, :label, :units) - @eval $func(t::WideDimTable, args...) = $func(parent(t), args...) + @eval $func(t::DimTable, args...) = $func(parent(t), args...) end -Tables.istable(::WideDimTable) = true -Tables.columnaccess(::Type{<:WideDimTable}) = true -Tables.columns(t::WideDimTable) = t -Tables.columnnames(c::WideDimTable) = parent(c) +Tables.istable(::DimTable) = true +Tables.columnaccess(::Type{<:DimTable}) = true +Tables.columns(t::DimTable) = t +Tables.columnnames(c::DimTable) = parent(c) -function Tables.schema(t::WideDimTable) +function Tables.schema(t::DimTable) colnames = parent(t) types = vcat([map(eltype, dimcolumns(t))...], [map(eltype, dimarraycolumns(t))...]) Tables.Schema(colnames, types) end -function Tables.schema(t::WideDimTable{<:MergedDimColumn}) - colnames = parent(t) - types = vcat([eltype(dimcolumns(t))], [map(eltype, dimarraycolumns(t))...]) - Tables.Schema(colnames, types) -end - -@inline function Tables.getcolumn(t::WideDimTable, key::Symbol) +@inline function Tables.getcolumn(t::DimTable, key::Symbol) keys = parent(t) i = findfirst(==(key), keys) n_dimcols = length(dimcolumns(t)) @@ -367,17 +262,7 @@ end end end -@inline function Tables.getcolumn(t::WideDimTable{<:MergedDimColumn}, key::Symbol) - keys = parent(t) - i = findfirst(==(key), keys) - if i == 1 - return dimcolumns(t) - else - return dimarraycolumns(t)[i - 1] - end -end - -@inline function Tables.getcolumn(t::WideDimTable, ::Type{T}, i::Int, key::Symbol) where T +@inline function Tables.getcolumn(t::DimTable, ::Type{T}, i::Int, key::Symbol) where T Tables.getcolumn(t, key) end @@ -396,9 +281,39 @@ function IteratorInterfaceExtensions.getiterator(t::DimTable) end IteratorInterfaceExtensions.isiterable(::DimTable) = true TableTraits.isiterabletable(::DimTable) = true -function IteratorInterfaceExtensions.getiterator(t::WideDimTable) - return Tables.datavaluerows(Tables.dictcolumntable(t)) + + +function fromtable(table, dims) + @time xlookup = enumerate(dims[1]) .|> reverse |> Dict + @time ylookup = enumerate(dims[2]) .|> reverse |> Dict + + dst = zeros(Float32, size(dims)) + geoms = collect(table.geometry) + vals = collect(table.band_1) + for i in eachindex(geoms) + (x, y) = geoms[i] + dst[xlookup[x],ylookup[y]] = vals[i] + end + return dst +end + +function getindices(vals, ref) + i = 1 + n = length(ref) + indices = Int64[] + for val in vals + while (i <= n) && (val != ref[i]) + i += 1 + end + push!(indices, i) + end + return indices end -IteratorInterfaceExtensions.isiterable(::WideDimTable) = true -TableTraits.isiterabletable(::WideDimTable) = true +function fromtable(geoms, vals, dims) + @time dst = zeros(Float32, length(dims)) + @time sortedvals = sort(zip(geoms, vals), by=(reverse ∘ first)) + @time indices = DD.getindices(first.(sortedvals), dims) + @time dst[indices] .= last.(sortedvals) + return dst +end \ No newline at end of file From a3dfda7f15a36d5f07c4095ce99be64b92159fef Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Sat, 9 Sep 2023 15:54:30 -0600 Subject: [PATCH 11/24] Dropped MergedDimColumn --- src/tables.jl | 64 +-------------------------------------------------- 1 file changed, 1 insertion(+), 63 deletions(-) diff --git a/src/tables.jl b/src/tables.jl index 282921ef5..3bd746865 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -86,32 +86,6 @@ Base.vec(c::DimColumn{T}) where T = [c[i] for i in eachindex(c)] Base.Array(c::DimColumn) = vec(c) -# MergedDimColumn - - -struct MergedDimColumn{T,DS} <: AbstractVector{T} - colname::Symbol - dimcols::DS -end -function MergedDimColumn(dims::DS, name::Symbol) where DS - MergedDimColumn{Tuple{map(eltype, dims)...},DS}(name, dims) -end - -colname(c::MergedDimColumn) = getfield(c, :colname) -dimcols(c::MergedDimColumn) = getfield(c, :dimcols) - -Base.length(c::MergedDimColumn) = length(first(dimcols(c))) -@inline function Base.getindex(c::MergedDimColumn{T}, i::Int) where T - return map(x -> x[i], dimcols(c)) -end -Base.getindex(c::MergedDimColumn, ::Colon) = vec(c) -Base.getindex(c::MergedDimColumn, A::AbstractArray) = [c[i] for i in A] -Base.size(c::MergedDimColumn) = (length(c),) -Base.axes(c::MergedDimColumn) = (Base.OneTo(length(c)),) -Base.vec(c::MergedDimColumn{T}) where T = [c[i] for i in eachindex(c)] -Base.Array(c::MergedDimColumn) = vec(c) - - # DimArrayColumn @@ -280,40 +254,4 @@ function IteratorInterfaceExtensions.getiterator(t::DimTable) return Tables.datavaluerows(Tables.dictcolumntable(t)) end IteratorInterfaceExtensions.isiterable(::DimTable) = true -TableTraits.isiterabletable(::DimTable) = true - - -function fromtable(table, dims) - @time xlookup = enumerate(dims[1]) .|> reverse |> Dict - @time ylookup = enumerate(dims[2]) .|> reverse |> Dict - - dst = zeros(Float32, size(dims)) - geoms = collect(table.geometry) - vals = collect(table.band_1) - for i in eachindex(geoms) - (x, y) = geoms[i] - dst[xlookup[x],ylookup[y]] = vals[i] - end - return dst -end - -function getindices(vals, ref) - i = 1 - n = length(ref) - indices = Int64[] - for val in vals - while (i <= n) && (val != ref[i]) - i += 1 - end - push!(indices, i) - end - return indices -end - -function fromtable(geoms, vals, dims) - @time dst = zeros(Float32, length(dims)) - @time sortedvals = sort(zip(geoms, vals), by=(reverse ∘ first)) - @time indices = DD.getindices(first.(sortedvals), dims) - @time dst[indices] .= last.(sortedvals) - return dst -end \ No newline at end of file +TableTraits.isiterabletable(::DimTable) = true \ No newline at end of file From 78938b329d4ed87310f982fe64a0c7e87c3abc0f Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Sat, 9 Sep 2023 16:11:50 -0600 Subject: [PATCH 12/24] cleaned up code --- src/tables.jl | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/src/tables.jl b/src/tables.jl index 3bd746865..c26a8fbf1 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -157,7 +157,6 @@ function DimTable(s::AbstractDimStack; mergedims=nothing) dims_ = dims(s) dimcolumns = collect(map(d -> DimColumn(d, dims_), dims_)) dimarraycolumns = collect(map(A -> DimArrayColumn(A, dims(s)), s)) - keys = collect(_colnames(s)) return DimTable(keys, dimcolumns, dimarraycolumns) end @@ -172,38 +171,25 @@ function DimTable(xs::Vararg{AbstractDimArray}; layernames=[Symbol("layer_$i") f # Construct DimArrayColumns dimarraycolumns = collect(map(A -> DimArrayColumn(A, dims_), xs)) + # Return DimTable colnames = vcat(dimnames, layernames) return DimTable(colnames, dimcolumns, dimarraycolumns) end function DimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=nothing) - if !isnothing(layersfrom) && (layersfrom <: Dimension) && (any(isa.(dims(x), layersfrom))) + if !isnothing(layersfrom) && any(hasdim(x, layersfrom)) nlayers = size(x, layersfrom) layers = [(@view x[layersfrom(i)]) for i in 1:nlayers] layernames = Symbol.(["$(dim2key(layersfrom))_$i" for i in 1:nlayers]) return DimTable(layers..., layernames=layernames, mergedims=mergedims) else - # Construct DimColumns - dims_ = dims(x) - dimcolumns = map(d -> DimColumn(d, dims_), dims_) - dimnames = collect(map(dim2key, dims_)) - - # Construct DimArrayColumn - dimarraycolumn = DimArrayColumn(x, dims_) - - # Merge DimColumns - if mergedims - colnames = vcat([:geometry], [:value]) - dimcol = MergedDimColumn(Tuple(dimcolumns), :geometry) - return DimTable{typeof(dimcol)}(colnames, dimcol, [dimarraycolumn]) - else - return DimTable{typeof(dimcolumns)}(vcat(dimnames, [:value]), dimcolumns, [dimarraycolumn]) - end + return DimTable(DimStack((;value=x)), mergedims=mergedims) end end dimcolumns(t::DimTable) = getfield(t, :dimcolumns) dimarraycolumns(t::DimTable) = getfield(t, :dimarraycolumns) +colnames(t::DimTable) = getfield(t, :colnames) dims(t::DimTable) = dims(parent(t)) Base.parent(t::DimTable) = getfield(t, :colnames) From e02d7e183195bc3b442b3e29e7e33077e11bc1e8 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Sat, 9 Sep 2023 16:16:48 -0600 Subject: [PATCH 13/24] More cleanup --- src/tables.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tables.jl b/src/tables.jl index c26a8fbf1..f8af34698 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -155,10 +155,10 @@ end function DimTable(s::AbstractDimStack; mergedims=nothing) s = isnothing(mergedims) ? s : DimensionalData.mergedims(s, mergedims) dims_ = dims(s) - dimcolumns = collect(map(d -> DimColumn(d, dims_), dims_)) - dimarraycolumns = collect(map(A -> DimArrayColumn(A, dims(s)), s)) - keys = collect(_colnames(s)) - return DimTable(keys, dimcolumns, dimarraycolumns) + dimcolumns = map(d -> DimColumn(d, dims_), dims_) + dimarraycolumns = map(A -> DimArrayColumn(A, dims_), s) + keys = _colnames(s) + return DimTable(collect(keys), collect(dimcolumns), collect(dimarraycolumns)) end function DimTable(xs::Vararg{AbstractDimArray}; layernames=[Symbol("layer_$i") for i in eachindex(xs)], mergedims=nothing) From 7ff81cc914b937faff1a61f556185b3e4d27e5c1 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Sat, 9 Sep 2023 16:19:45 -0600 Subject: [PATCH 14/24] Added comparedims when constructing a DimTable from multiple AbstractDimArrays --- src/tables.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/tables.jl b/src/tables.jl index f8af34698..9f1e337fc 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -162,6 +162,9 @@ function DimTable(s::AbstractDimStack; mergedims=nothing) end function DimTable(xs::Vararg{AbstractDimArray}; layernames=[Symbol("layer_$i") for i in eachindex(xs)], mergedims=nothing) + # Check that dims are compatible + comparedims(xs...) + # Construct DimColumns xs = isnothing(mergedims) ? xs : map(x -> DimensionalData.mergedims(x, mergedims), xs) dims_ = dims(first(xs)) From d638436eb2b3260e905c86e61a3e275545ee00d4 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Sat, 9 Sep 2023 23:32:34 -0600 Subject: [PATCH 15/24] mergedims can now accept dimensions that are not present in the stack/array --- src/mergedims.jl | 6 +++++- src/tables.jl | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/mergedims.jl b/src/mergedims.jl index e478ac588..8ea8000f7 100644 --- a/src/mergedims.jl +++ b/src/mergedims.jl @@ -25,6 +25,7 @@ Dim{:space} MergedLookup{Tuple{Float64, Int64}} Tuple{Float64, Int64}[(0.0, 10), """ function mergedims(all_dims, dim_pairs::Pair...) # filter out dims completely missing + dim_pairs = map(x -> _filter_dims(all_dims, first(x)) => last(x), dim_pairs) dim_pairs_complete = filter(dim_pairs) do (old_dims,) dims_present = dims(all_dims, _astuple(old_dims)) isempty(dims_present) && return false @@ -88,6 +89,9 @@ function _unmergedims(all_dims, merged_dims) end return _cat_tuples(unmerged_dims...) end + _unmergedims(all_dims, dim_pairs::Pair...) = _cat_tuples(replace(all_dims, dim_pairs...)) -_cat_tuples(tuples...) = mapreduce(_astuple, (x, y) -> (x..., y...), tuples) \ No newline at end of file +_cat_tuples(tuples...) = mapreduce(_astuple, (x, y) -> (x..., y...), tuples) + +_filter_dims(alldims, dims) = filter(dim -> hasdim(alldims, dim), dims) \ No newline at end of file diff --git a/src/tables.jl b/src/tables.jl index 9f1e337fc..341563241 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -125,7 +125,7 @@ Base.vec(c::DimArrayColumn{T}) where T = [c[i] for i in eachindex(c)] Base.Array(c::DimArrayColumn) = vec(c) -#DimTable +# DimTable """ From 4e43e11604b36d7f81352417bfbdf1603f11afb6 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Sun, 10 Sep 2023 01:03:42 -0600 Subject: [PATCH 16/24] Implemented unmergedims --- src/DimensionalData.jl | 4 ++-- src/mergedims.jl | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/DimensionalData.jl b/src/DimensionalData.jl index 3685f83b1..0e765e630 100644 --- a/src/DimensionalData.jl +++ b/src/DimensionalData.jl @@ -55,7 +55,7 @@ export AbstractDimArray, DimArray export AbstractDimStack, DimStack -export AbstractDimTable, DimTable, WideDimTable +export AbstractDimTable, DimTable export DimIndices, DimKeys, DimPoints @@ -66,7 +66,7 @@ export dims, refdims, metadata, name, lookup, bounds export dimnum, hasdim, hasselection, otherdims # utils -export set, rebuild, reorder, modify, broadcast_dims, broadcast_dims!, mergedims +export set, rebuild, reorder, modify, broadcast_dims, broadcast_dims!, mergedims, unmergedims const DD = DimensionalData diff --git a/src/mergedims.jl b/src/mergedims.jl index 8ea8000f7..9860e4381 100644 --- a/src/mergedims.jl +++ b/src/mergedims.jl @@ -72,6 +72,35 @@ function mergedims(ds::AbstractDimStack, dim_pairs::Pair...) rebuild_from_arrays(ds, vals) end +""" + unmergedims(merged_dims) +Return the unmerged dimensions from a tuple of merged dimensions. However, the order of the original dimensions are not necessarily preserved. +""" +function unmergedims(merged_dims) + reduce(map(dims, merged_dims), init=Tuple([])) do acc, x + x isa Tuple ? (acc..., x...) : (acc..., x) + end +end + +""" + unmergedims(A::AbstractDimArray, original_dims, merged_dims) => AbstractDimArray +Return a new array whose dimensions are restored to their original prior to calling [`mergedims(A, dim_pairs)`](@ref). +""" +function unmergedims(A::AbstractDimArray, original_dims, merged_dims) + unmerged_dims = unmergedims(merged_dims) + reshaped = reshape(data(A), size(unmerged_dims)) + permuted = permutedims(reshaped, dimnum(unmerged_dims, original_dims)) + return DimArray(permuted, original_dims) +end + +""" + unmergedims(s::AbstractDimStack, original_dims, merged_dims) => AbstractDimStack +Return a new stack whose dimensions are restored to their original prior to calling [`mergedims(s, dim_pairs)`](@ref). +""" +function unmergedims(s::AbstractDimStack, original_dims, merged_dims) + return map(A -> unmergedims(A, original_dims, merged_dims), s) +end + function _mergedims(all_dims, dim_pair::Pair, dim_pairs::Pair...) old_dims, new_dim = dim_pair dims_to_merge = dims(all_dims, _astuple(old_dims)) From c70b3bb30a28602a21f1c8a5dcd0deb4eeda6f29 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Sun, 10 Sep 2023 01:29:34 -0600 Subject: [PATCH 17/24] Removed unnecessary argument from undergedims --- src/mergedims.jl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/mergedims.jl b/src/mergedims.jl index 9860e4381..18beed2f3 100644 --- a/src/mergedims.jl +++ b/src/mergedims.jl @@ -83,10 +83,11 @@ function unmergedims(merged_dims) end """ - unmergedims(A::AbstractDimArray, original_dims, merged_dims) => AbstractDimArray + unmergedims(A::AbstractDimArray, original_dims) => AbstractDimArray Return a new array whose dimensions are restored to their original prior to calling [`mergedims(A, dim_pairs)`](@ref). """ -function unmergedims(A::AbstractDimArray, original_dims, merged_dims) +function unmergedims(A::AbstractDimArray, original_dims) + merged_dims = dims(A) unmerged_dims = unmergedims(merged_dims) reshaped = reshape(data(A), size(unmerged_dims)) permuted = permutedims(reshaped, dimnum(unmerged_dims, original_dims)) @@ -94,11 +95,11 @@ function unmergedims(A::AbstractDimArray, original_dims, merged_dims) end """ - unmergedims(s::AbstractDimStack, original_dims, merged_dims) => AbstractDimStack + unmergedims(s::AbstractDimStack, original_dims) => AbstractDimStack Return a new stack whose dimensions are restored to their original prior to calling [`mergedims(s, dim_pairs)`](@ref). """ -function unmergedims(s::AbstractDimStack, original_dims, merged_dims) - return map(A -> unmergedims(A, original_dims, merged_dims), s) +function unmergedims(s::AbstractDimStack, original_dims) + return map(A -> unmergedims(A, original_dims), s) end function _mergedims(all_dims, dim_pair::Pair, dim_pairs::Pair...) From 8721b4614dfb5f3edc9be3ecad25beb94434255b Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Sun, 10 Sep 2023 01:41:33 -0600 Subject: [PATCH 18/24] DimTable now preserves name of AbstractDimArray if present. --- src/tables.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tables.jl b/src/tables.jl index 341563241..f1ec4a6dd 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -186,7 +186,8 @@ function DimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=nothing) layernames = Symbol.(["$(dim2key(layersfrom))_$i" for i in 1:nlayers]) return DimTable(layers..., layernames=layernames, mergedims=mergedims) else - return DimTable(DimStack((;value=x)), mergedims=mergedims) + s = name(x) == NoName() ? DimStack((;value=x)) : DimStack(x) + return DimTable(s, mergedims=mergedims) end end From 6f54fd09e118f9b0232b1d7d4d79641b8b415105 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Mon, 11 Sep 2023 15:06:24 -0600 Subject: [PATCH 19/24] Added parent field to DimTable --- src/tables.jl | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/tables.jl b/src/tables.jl index f1ec4a6dd..4f1bd30d1 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -147,6 +147,7 @@ To get dimension columns, you can index with `Dimension` (`X()`) or `Dimension` type (`X`) as well as the regular `Int` or `Symbol`. """ struct DimTable <: AbstractDimTable + parent::AbstractDimArray colnames::Vector{Symbol} dimcolumns::Vector{DimColumn} dimarraycolumns::Vector{DimArrayColumn} @@ -158,7 +159,7 @@ function DimTable(s::AbstractDimStack; mergedims=nothing) dimcolumns = map(d -> DimColumn(d, dims_), dims_) dimarraycolumns = map(A -> DimArrayColumn(A, dims_), s) keys = _colnames(s) - return DimTable(collect(keys), collect(dimcolumns), collect(dimarraycolumns)) + return DimTable(first(s), collect(keys), collect(dimcolumns), collect(dimarraycolumns)) end function DimTable(xs::Vararg{AbstractDimArray}; layernames=[Symbol("layer_$i") for i in eachindex(xs)], mergedims=nothing) @@ -176,7 +177,7 @@ function DimTable(xs::Vararg{AbstractDimArray}; layernames=[Symbol("layer_$i") f # Return DimTable colnames = vcat(dimnames, layernames) - return DimTable(colnames, dimcolumns, dimarraycolumns) + return DimTable(first(xs), colnames, dimcolumns, dimarraycolumns) end function DimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=nothing) @@ -193,10 +194,9 @@ end dimcolumns(t::DimTable) = getfield(t, :dimcolumns) dimarraycolumns(t::DimTable) = getfield(t, :dimarraycolumns) -colnames(t::DimTable) = getfield(t, :colnames) -dims(t::DimTable) = dims(parent(t)) +colnames(t::DimTable) = Tuple(getfield(t, :colnames)) -Base.parent(t::DimTable) = getfield(t, :colnames) +Base.parent(t::DimTable) = getfield(t, :parent) for func in (:dims, :val, :index, :lookup, :metadata, :order, :sampling, :span, :bounds, :locus, :name, :label, :units) @@ -207,16 +207,15 @@ end Tables.istable(::DimTable) = true Tables.columnaccess(::Type{<:DimTable}) = true Tables.columns(t::DimTable) = t -Tables.columnnames(c::DimTable) = parent(c) +Tables.columnnames(c::DimTable) = colnames(c) function Tables.schema(t::DimTable) - colnames = parent(t) types = vcat([map(eltype, dimcolumns(t))...], [map(eltype, dimarraycolumns(t))...]) - Tables.Schema(colnames, types) + Tables.Schema(colnames(t), types) end @inline function Tables.getcolumn(t::DimTable, key::Symbol) - keys = parent(t) + keys = colnames(t) i = findfirst(==(key), keys) n_dimcols = length(dimcolumns(t)) if i <= n_dimcols From 2fc44d0d9847c238e851b15e9efcc4cbcae9a335 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Mon, 11 Sep 2023 15:41:30 -0600 Subject: [PATCH 20/24] Passed test cases --- src/tables.jl | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/tables.jl b/src/tables.jl index 4f1bd30d1..c9e0d701e 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -214,14 +214,28 @@ function Tables.schema(t::DimTable) Tables.Schema(colnames(t), types) end +@inline function Tables.getcolumn(t::DimTable, i::Int) + nkeys = length(colnames(t)) + if i > length(dims(t)) + dimarraycolumns(t)[i - length(dims(t))] + elseif i > 0 && i < nkeys + dimcolumns(t)[i] + else + throw(ArgumentError("There is no table column $i")) + end +end + +@inline function Tables.getcolumn(t::DimTable, dim::DimOrDimType) + dimcolumns(t)[dimnum(t, dim)] +end + @inline function Tables.getcolumn(t::DimTable, key::Symbol) keys = colnames(t) i = findfirst(==(key), keys) - n_dimcols = length(dimcolumns(t)) - if i <= n_dimcols - return dimcolumns(t)[i] + if isnothing(i) + throw(ArgumentError("There is no table column $key")) else - return dimarraycolumns(t)[i - n_dimcols] + return Tables.getcolumn(t, i) end end @@ -229,7 +243,6 @@ end Tables.getcolumn(t, key) end - # TableTraits.jl interface From aff11c7f920d4038fdb1da7bdbe9dd25603da90e Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Thu, 14 Sep 2023 23:40:38 -0600 Subject: [PATCH 21/24] Added test cases and updated docs --- docs/src/reference.md | 2 ++ src/array/array.jl | 6 ++++-- src/stack/stack.jl | 9 --------- test/merged.jl | 22 ++++++++++++++++++++++ test/tables.jl | 23 +++++++++++++++++++++++ 5 files changed, 51 insertions(+), 11 deletions(-) diff --git a/docs/src/reference.md b/docs/src/reference.md index 98cf1e167..fe0d3dbda 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -50,6 +50,8 @@ rebuild modify broadcast_dims broadcast_dims! +mergedims +unmergedims reorder Base.cat Base.map diff --git a/src/array/array.jl b/src/array/array.jl index 8167d47e2..c9efcc7e0 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -627,7 +627,8 @@ end """ mergedims(A::AbstractDimArray, dim_pairs::Pair...) => AbstractDimArray -Return a new array whose dimensions are the result of [`mergedims(dims(A), dim_pairs)`](@ref). + mergedims(A::AbstractDimStack, dim_pairs::Pair...) => AbstractDimStack +Return a new array or stack whose dimensions are the result of [`mergedims(dims(A), dim_pairs)`](@ref). """ function mergedims(A::AbstractDimArray, dim_pairs::Pair...) isempty(dim_pairs) && return A @@ -652,7 +653,8 @@ end """ unmergedims(A::AbstractDimArray, original_dims) => AbstractDimArray -Return a new array whose dimensions are restored to their original prior to calling [`mergedims(A, dim_pairs)`](@ref). + unmergedims(A::AbstractDimStack, original_dims) => AbstractDimStack +Return a new array or stack whose dimensions are restored to their original prior to calling [`mergedims(A, dim_pairs)`](@ref). """ function unmergedims(A::AbstractDimArray, original_dims) merged_dims = dims(A) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 75e3a272c..815938bac 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -157,21 +157,12 @@ for func in (:index, :lookup, :metadata, :sampling, :span, :bounds, :locus, :ord @eval ($func)(s::AbstractDimStack, args...) = ($func)(dims(s), args...) end -""" - mergedims(ds::AbstractDimStack, dim_pairs::Pair...) => AbstractDimStack -Return a new stack where `mergedims(A, dim_pairs...)` has been applied to each layer `A` of -`ds`. -""" function mergedims(ds::AbstractDimStack, dim_pairs::Pair...) isempty(dim_pairs) && return ds vals = map(A -> mergedims(A, dim_pairs...), values(ds)) rebuild_from_arrays(ds, vals) end -""" - unmergedims(s::AbstractDimStack, original_dims) => AbstractDimStack -Return a new stack whose dimensions are restored to their original prior to calling [`mergedims(s, dim_pairs)`](@ref). -""" function unmergedims(s::AbstractDimStack, original_dims) return map(A -> unmergedims(A, original_dims), s) end diff --git a/test/merged.jl b/test/merged.jl index 5f00d3bb4..70ea3ac9d 100644 --- a/test/merged.jl +++ b/test/merged.jl @@ -64,3 +64,25 @@ end @test occursin("Y", sp) @test occursin("Z", sp) end + +@testset "unmerge" begin + a = DimArray(rand(32, 32, 3), (X,Y,Dim{:band})) + merged = mergedims(a, (X,Y)=>:geometry) + unmerged = unmergedims(merged, dims(a)) + + # Test Merge + @test hasdim(merged, Dim{:band}) + @test hasdim(merged, Dim{:geometry}) + @test !hasdim(merged, X) + @test !hasdim(merged, Y) + @test size(merged) == (3, 32 * 32) + + # Test Unmerge + @test hasdim(unmerged, X) + @test hasdim(unmerged, Y) + @test hasdim(unmerged, Dim{:band}) + @test !hasdim(unmerged, Dim{:geometry}) + @test dims(unmerged) == dims(a) + @test size(unmerged) == size(a) + @test all(a == unmerged) +end \ No newline at end of file diff --git a/test/tables.jl b/test/tables.jl index 9ee13be6e..651c72769 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -142,3 +142,26 @@ end @test Tables.columntable(a) == (a = [1],) @test Tables.columntable(ds) == (a = [1], b = [2]) end + +@testset "DimTable layersfrom" begin + a = DimArray(rand(32, 32, 5, 3), (X,Y,Dim{:band},Ti)) + t1 = DimTable(a) + t2 = DimTable(a, layersfrom=Dim{:band}) + @test Tables.columnnames(t1) == (:X, :Y, :band, :Ti, :value) + @test Tables.columnnames(t2) == (:X, :Y, :Ti, :band_1, :band_2, :band_3, :band_4, :band_5) + @test length(t1.X) == (32 * 32 * 5 * 3) + @test length(t2.X) == (32 * 32 * 3) +end + +@testset "DimTable mergelayers" begin + a = DimStack([DimArray(rand(32, 32, 3), (X,Y,Ti)) for _ in 1:3]) + b = DimArray(rand(32, 32, 3), (X,Y,Dim{:band})) + t1 = DimTable(a, mergedims=(:X,:Y)=>:geometry) + t2 = DimTable(a, mergedims=(:X,:Y,:Z)=>:geometry) # Merge missing dimension + t3 = DimTable(a, mergedims=(X,:Y,Ti)=>:dimensions) # Mix symbols and dimensions + t4 = DimTable(b, mergedims=(:X,:Y)=>:geometry) # Test DimArray + @test Tables.columnnames(t1) == (:Ti, :geometry, :layer1, :layer2, :layer3) + @test Tables.columnnames(t2) == (:Ti, :geometry, :layer1, :layer2, :layer3) + @test Tables.columnnames(t3) == (:dimensions, :layer1, :layer2, :layer3) + @test Tables.columnnames(t4) == (:band, :geometry, :value) +end \ No newline at end of file From d7240f452b77a4f63bc031537c59d4324303e4c9 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Fri, 15 Sep 2023 00:18:20 -0600 Subject: [PATCH 22/24] Updated docs for DimTable --- src/tables.jl | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/src/tables.jl b/src/tables.jl index c9e0d701e..9aa84d3d8 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -131,11 +131,13 @@ Base.Array(c::DimArrayColumn) = vec(c) """ DimTable <: AbstractDimTable - DimTable(A::AbstractDimArray) + DimTable(s::AbstractDimStack; mergedims=nothing) + DimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=nothing) + DimTable(xs::Vararg{AbstractDimArray}; layernames=nothing, mergedims=nothing) -Construct a Tables.jl/TableTraits.jl compatible object out of an `AbstractDimArray`. +Construct a Tables.jl/TableTraits.jl compatible object out of an `AbstractDimArray` or `AbstractDimStack`. -This table will have a column for the array data and columns for each +This table will have columns for the array data and columns for each `Dimension` index, as a [`DimColumn`]. These are lazy, and generated as required. @@ -145,6 +147,22 @@ column name `:Ti`, and `Dim{:custom}` becomes `:custom`. To get dimension columns, you can index with `Dimension` (`X()`) or `Dimension` type (`X`) as well as the regular `Int` or `Symbol`. + +# Keywords +* `mergedims`: Combine two or more dimensions into a new dimension. +* `layersfrom`: Treat a dimension of an `AbstractDimArray` as layers of an `AbstractDimStack`. + +# Example +```jldoctest +julia> a = DimArray(rand(32,32,3), (X,Y,Dim{:band})); + +julia> DimTable(a, layersfrom=Dim{:band}, mergedims=(X,Y)=>:geometry) +DimTable with 1024 rows, 4 columns, and schema: + :geometry Tuple{Int64, Int64} + :band_1 Float64 + :band_2 Float64 + :band_3 Float64 +``` """ struct DimTable <: AbstractDimTable parent::AbstractDimArray @@ -162,10 +180,13 @@ function DimTable(s::AbstractDimStack; mergedims=nothing) return DimTable(first(s), collect(keys), collect(dimcolumns), collect(dimarraycolumns)) end -function DimTable(xs::Vararg{AbstractDimArray}; layernames=[Symbol("layer_$i") for i in eachindex(xs)], mergedims=nothing) +function DimTable(xs::Vararg{AbstractDimArray}; layernames=nothing, mergedims=nothing) # Check that dims are compatible comparedims(xs...) + # Construct Layer Names + layernames = isnothing(layernames) ? [Symbol("layer_$i") for i in eachindex(xs)] : layernames + # Construct DimColumns xs = isnothing(mergedims) ? xs : map(x -> DimensionalData.mergedims(x, mergedims), xs) dims_ = dims(first(xs)) From 1ecaee9e7c0fefaa847a6cd15c12e8f9f48f820e Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Fri, 15 Sep 2023 14:58:53 -0600 Subject: [PATCH 23/24] Updated docs and test cases --- src/array/array.jl | 10 ++++++++-- test/merged.jl | 4 +++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/array/array.jl b/src/array/array.jl index c9efcc7e0..6e61c8bb2 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -579,6 +579,7 @@ dimconstructor(dims::Tuple{}) = DimArray """ mergedims(old_dims => new_dim) => Dimension + Return a dimension `new_dim` whose indices are a [`MergedLookup`](@ref) of the indices of `old_dims`. """ @@ -589,11 +590,13 @@ end """ mergedims(dims, old_dims => new_dim, others::Pair...) => dims_new + If dimensions `old_dims`, `new_dim`, etc. are found in `dims`, then return new `dims_new` where all dims in `old_dims` have been combined into a single dim `new_dim`. The returned dimension will keep only the name of `new_dim`. Its coords will be a [`MergedLookup`](@ref) of the coords of the dims in `old_dims`. New dimensions are always placed at the end of `dims_new`. `others` contains other dimension pairs to be merged. + # Example ````jldoctest julia> ds = (X(0:0.1:0.4), Y(10:10:100), Ti([0, 3, 4])); @@ -628,6 +631,7 @@ end """ mergedims(A::AbstractDimArray, dim_pairs::Pair...) => AbstractDimArray mergedims(A::AbstractDimStack, dim_pairs::Pair...) => AbstractDimStack + Return a new array or stack whose dimensions are the result of [`mergedims(dims(A), dim_pairs)`](@ref). """ function mergedims(A::AbstractDimArray, dim_pairs::Pair...) @@ -642,10 +646,11 @@ function mergedims(A::AbstractDimArray, dim_pairs::Pair...) end """ - unmergedims(merged_dims) + unmergedims(merged_dims::Tuple{Vararg{Dimension}}) => Tuple{Vararg{Dimension}} + Return the unmerged dimensions from a tuple of merged dimensions. However, the order of the original dimensions are not necessarily preserved. """ -function unmergedims(merged_dims) +function unmergedims(merged_dims::Tuple{Vararg{Dimension}}) reduce(map(dims, merged_dims), init=Tuple([])) do acc, x x isa Tuple ? (acc..., x...) : (acc..., x) end @@ -654,6 +659,7 @@ end """ unmergedims(A::AbstractDimArray, original_dims) => AbstractDimArray unmergedims(A::AbstractDimStack, original_dims) => AbstractDimStack + Return a new array or stack whose dimensions are restored to their original prior to calling [`mergedims(A, dim_pairs)`](@ref). """ function unmergedims(A::AbstractDimArray, original_dims) diff --git a/test/merged.jl b/test/merged.jl index 70ea3ac9d..cdf18780d 100644 --- a/test/merged.jl +++ b/test/merged.jl @@ -69,6 +69,7 @@ end a = DimArray(rand(32, 32, 3), (X,Y,Dim{:band})) merged = mergedims(a, (X,Y)=>:geometry) unmerged = unmergedims(merged, dims(a)) + perm_unmerged = unmergedims(permutedims(merged, (2,1)), dims(a)) # Test Merge @test hasdim(merged, Dim{:band}) @@ -84,5 +85,6 @@ end @test !hasdim(unmerged, Dim{:geometry}) @test dims(unmerged) == dims(a) @test size(unmerged) == size(a) - @test all(a == unmerged) + @test all(a .== unmerged) + @test all(a .== perm_unmerged) end \ No newline at end of file From 59f10a7194d9beb32028a8fc9e06869dc7509f7a Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Fri, 15 Sep 2023 21:35:40 -0600 Subject: [PATCH 24/24] Removed Any[] --- src/array/array.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/array/array.jl b/src/array/array.jl index 6e61c8bb2..db1d36101 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -651,7 +651,7 @@ end Return the unmerged dimensions from a tuple of merged dimensions. However, the order of the original dimensions are not necessarily preserved. """ function unmergedims(merged_dims::Tuple{Vararg{Dimension}}) - reduce(map(dims, merged_dims), init=Tuple([])) do acc, x + reduce(map(dims, merged_dims), init=()) do acc, x x isa Tuple ? (acc..., x...) : (acc..., x) end end