Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Overhaul DimTable #536

Merged
merged 25 commits into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
c75746d
Add initial implementation of `mergedims`
sethaxen Apr 4, 2023
9058798
Use values instead of layers
sethaxen Apr 5, 2023
63627dc
Simplify base method
sethaxen Apr 5, 2023
ccb0f71
Perform all checks in main method
sethaxen Apr 5, 2023
61a5a2a
Simplify mergedims for arrays
sethaxen Apr 5, 2023
8c06a43
Handle case where different numbers of dims provided
sethaxen Apr 5, 2023
5d8123f
Add mergedims to docs
sethaxen Apr 5, 2023
af9381a
Implemented WideDimTable
JoshuaBillson Sep 9, 2023
d90b86a
Removed Dead Code
JoshuaBillson Sep 9, 2023
14ca271
mergedims now allows selection of specific dimensions
JoshuaBillson Sep 9, 2023
a3dfda7
Dropped MergedDimColumn
JoshuaBillson Sep 9, 2023
78938b3
cleaned up code
JoshuaBillson Sep 9, 2023
e02d7e1
More cleanup
JoshuaBillson Sep 9, 2023
7ff81cc
Added comparedims when constructing a DimTable from multiple Abstract…
JoshuaBillson Sep 9, 2023
d638436
mergedims can now accept dimensions that are not present in the stack…
JoshuaBillson Sep 10, 2023
4e43e11
Implemented unmergedims
JoshuaBillson Sep 10, 2023
c70b3bb
Removed unnecessary argument from undergedims
JoshuaBillson Sep 10, 2023
8721b46
DimTable now preserves name of AbstractDimArray if present.
JoshuaBillson Sep 10, 2023
2115e4e
Merged mergedims
JoshuaBillson Sep 11, 2023
6f54fd0
Added parent field to DimTable
JoshuaBillson Sep 11, 2023
2fc44d0
Passed test cases
JoshuaBillson Sep 11, 2023
aff11c7
Added test cases and updated docs
JoshuaBillson Sep 15, 2023
d7240f4
Updated docs for DimTable
JoshuaBillson Sep 15, 2023
1ecaee9
Updated docs and test cases
JoshuaBillson Sep 15, 2023
59f10a7
Removed Any[]
JoshuaBillson Sep 16, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/DimensionalData.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ export AbstractDimArray, DimArray

export AbstractDimStack, DimStack

export AbstractDimTable, DimTable
export AbstractDimTable, DimTable, WideDimTable

export DimIndices, DimKeys, DimPoints

Expand All @@ -66,7 +66,7 @@ export dims, refdims, metadata, name, lookup, bounds
export dimnum, hasdim, hasselection, otherdims

# utils
export set, rebuild, reorder, modify, broadcast_dims, broadcast_dims!
export set, rebuild, reorder, modify, broadcast_dims, broadcast_dims!, mergedims

const DD = DimensionalData

Expand All @@ -88,6 +88,7 @@ include("stack/methods.jl")
include("stack/show.jl")
# Other
include("dimindices.jl")
include("mergedims.jl")
include("tables.jl")
# Combined (easier to work on these in one file)
include("plotrecipes.jl")
Expand Down
1 change: 1 addition & 0 deletions src/Dimensions/primitives.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ or are at least rotations/transformations of the same type.
"""
@inline dimsmatch(dims, query) = dimsmatch(<:, dims, query)
@inline function dimsmatch(f::Function, dims::Tuple, query::Tuple)
length(dims) == length(query) || return false
all(map((d, l) -> dimsmatch(f, d, l), dims, query))
end
@inline dimsmatch(f::Function, dim, query) = dimsmatch(f, typeof(dim), typeof(query))
Expand Down
93 changes: 93 additions & 0 deletions src/mergedims.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""
mergedims(old_dims => new_dim) => Dimension
Return a dimension `new_dim` whose indices are a [`MergedLookup`](@ref) of the indices of
`old_dims`.
"""
function mergedims((old_dims, new_dim)::Pair)
data = vec(DimPoints(_astuple(old_dims)))
return rebuild(basedims(new_dim), MergedLookup(data, old_dims))
end

"""
mergedims(dims, old_dims => new_dim, others::Pair...) => dims_new
If dimensions `old_dims`, `new_dim`, etc. are found in `dims`, then return new `dims_new`
where all dims in `old_dims` have been combined into a single dim `new_dim`.
The returned dimension will keep only the name of `new_dim`. Its coords will be a
[`MergedLookup`](@ref) of the coords of the dims in `old_dims`. New dimensions are always
placed at the end of `dims_new`. `others` contains other dimension pairs to be merged.
# Example
````jldoctest
julia> ds = (X(0:0.1:0.4), Y(10:10:100), Ti([0, 3, 4]));
julia> mergedims(ds, Ti => :time, (X, Y) => :space)
Dim{:time} MergedLookup{Tuple{Int64}} Tuple{Int64}[(0,), (3,), (4,)] Ti,
Dim{:space} MergedLookup{Tuple{Float64, Int64}} Tuple{Float64, Int64}[(0.0, 10), (0.1, 10), …, (0.3, 100), (0.4, 100)] X, Y
````
"""
function mergedims(all_dims, dim_pairs::Pair...)
# filter out dims completely missing
dim_pairs_complete = filter(dim_pairs) do (old_dims,)
dims_present = dims(all_dims, _astuple(old_dims))
isempty(dims_present) && return false
all(hasdim(dims_present, old_dims)) || throw(ArgumentError(
"Not all dimensions $old_dims found in $(map(basetypeof, all_dims))"
))
return true
end
isempty(dim_pairs_complete) && return all_dims
dim_pairs_concrete = map(dim_pairs_complete) do (old_dims, new_dim)
return dims(all_dims, _astuple(old_dims)) => new_dim
end
# throw error if old dim groups overlap
old_dims_tuples = map(first, dim_pairs_concrete)
if !dimsmatch(_cat_tuples(old_dims_tuples...), combinedims(old_dims_tuples...))
throw(ArgumentError("Dimensions to be merged are not all unique"))
end
return _mergedims(all_dims, dim_pairs_concrete...)
end

"""
mergedims(A::AbstractDimArray, dim_pairs::Pair...) => AbstractDimArray
Return a new array whose dimensions are the result of [`mergedims(dims(A), dim_pairs)`](@ref).
"""
function mergedims(A::AbstractDimArray, dim_pairs::Pair...)
isempty(dim_pairs) && return A
all_dims = dims(A)
dims_new = mergedims(all_dims, dim_pairs...)
dimsmatch(all_dims, dims_new) && return A
dims_perm = _unmergedims(dims_new, map(last, dim_pairs))
Aperm = PermutedDimsArray(A, dims_perm)
data_merged = reshape(data(Aperm), map(length, dims_new))
rebuild(A, data_merged, dims_new)
end

"""
mergedims(ds::AbstractDimStack, dim_pairs::Pair...) => AbstractDimStack
Return a new stack where `mergedims(A, dim_pairs...)` has been applied to each layer `A` of
`ds`.
"""
function mergedims(ds::AbstractDimStack, dim_pairs::Pair...)
isempty(dim_pairs) && return ds
vals = map(A -> mergedims(A, dim_pairs...), values(ds))
rebuild_from_arrays(ds, vals)
end

function _mergedims(all_dims, dim_pair::Pair, dim_pairs::Pair...)
old_dims, new_dim = dim_pair
dims_to_merge = dims(all_dims, _astuple(old_dims))
merged_dim = mergedims(dims_to_merge => new_dim)
all_dims_new = (otherdims(all_dims, dims_to_merge)..., merged_dim)
isempty(dim_pairs) && return all_dims_new
return _mergedims(all_dims_new, dim_pairs...)
end

function _unmergedims(all_dims, merged_dims)
_merged_dims = dims(all_dims, merged_dims)
unmerged_dims = map(all_dims) do d
hasdim(_merged_dims, d) || return _astuple(d)
return dims(lookup(d))
end
return _cat_tuples(unmerged_dims...)
end
_unmergedims(all_dims, dim_pairs::Pair...) = _cat_tuples(replace(all_dims, dim_pairs...))

_cat_tuples(tuples...) = mapreduce(_astuple, (x, y) -> (x..., y...), tuples)
142 changes: 83 additions & 59 deletions src/tables.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
# Tables.jl interface
"""
AbstractDimTable <: Tables.AbstractColumns

Abstract supertype for dim tables
"""
abstract type AbstractDimTable <: Tables.AbstractColumns end

# Tables.jl interface for AbstractDimStack and AbstractDimArray

DimTableSources = Union{AbstractDimStack,AbstractDimArray}

Expand All @@ -20,6 +27,16 @@ Tables.schema(s::AbstractDimStack) = Tables.schema(DimTable(s))
@inline Tables.getcolumn(t::DimTableSources, dim::DimOrDimType) =
Tables.getcolumn(t, dimnum(t, dim))

function _colnames(s::AbstractDimStack)
dimkeys = map(dim2key, (dims(s)))
# The data is always the last column/s
(dimkeys..., keys(s)...)
end


# DimColumn


"""
DimColumn{T,D<:Dimension} <: AbstractVector{T}

Expand Down Expand Up @@ -56,8 +73,6 @@ end
dim(c::DimColumn) = getfield(c, :dim)
dimstride(c::DimColumn) = getfield(c, :dimstride)

# Simple Array interface

Base.length(c::DimColumn) = getfield(c, :length)
@inline function Base.getindex(c::DimColumn, i::Int)
Base.@boundscheck checkbounds(c, i)
Expand All @@ -70,6 +85,10 @@ Base.axes(c::DimColumn) = (Base.OneTo(length(c)),)
Base.vec(c::DimColumn{T}) where T = [c[i] for i in eachindex(c)]
Base.Array(c::DimColumn) = vec(c)


# DimArrayColumn


struct DimArrayColumn{T,A<:AbstractDimArray{T},DS,DL,L} <: AbstractVector{T}
data::A
dimstrides::DS
Expand All @@ -89,8 +108,6 @@ Base.parent(c::DimArrayColumn) = getfield(c, :data)
dimstrides(c::DimArrayColumn) = getfield(c, :dimstrides)
dimlengths(c::DimArrayColumn) = getfield(c, :dimlengths)

# Simple Array interface

Base.length(c::DimArrayColumn) = getfield(c, :length)
@inline function Base.getindex(c::DimArrayColumn, i::Int)
Base.@boundscheck checkbounds(c, i)
Expand All @@ -107,12 +124,9 @@ Base.axes(c::DimArrayColumn) = (Base.OneTo(length(c)),)
Base.vec(c::DimArrayColumn{T}) where T = [c[i] for i in eachindex(c)]
Base.Array(c::DimArrayColumn) = vec(c)

"""
AbstractDimTable <: Tables.AbstractColumns

Abstract supertype for dim tables
"""
abstract type AbstractDimTable <: Tables.AbstractColumns end
#DimTable


"""
DimTable <: AbstractDimTable
Expand All @@ -132,91 +146,101 @@ column name `:Ti`, and `Dim{:custom}` becomes `:custom`.
To get dimension columns, you can index with `Dimension` (`X()`) or
`Dimension` type (`X`) as well as the regular `Int` or `Symbol`.
"""
struct DimTable{Keys,S,DC,DAC} <: AbstractDimTable
stack::S
dimcolumns::DC
dimarraycolumns::DAC
end
DimTable{K}(stack::S, dimcolumns::DC, strides::SD) where {K,S,DC,SD} =
DimTable{K,S,DC,SD}(stack, dimcolumns, strides)
DimTable(A::AbstractDimArray, As::AbstractDimArray...) = DimTable((A, As...))
function DimTable(As::Tuple{AbstractDimArray,Vararg{AbstractDimArray}}...)
DimTable(DimStack(As...))
end
function DimTable(s::AbstractDimStack)
struct DimTable <: AbstractDimTable
colnames::Vector{Symbol}
dimcolumns::Vector{DimColumn}
dimarraycolumns::Vector{DimArrayColumn}
end

function DimTable(s::AbstractDimStack; mergedims=nothing)
s = isnothing(mergedims) ? s : DimensionalData.mergedims(s, mergedims)
dims_ = dims(s)
dimcolumns = map(d -> DimColumn(d, dims_), dims_)
dimarraycolumns = map(A -> DimArrayColumn(A, dims_), s)
keys = _colnames(s)
DimTable{keys}(s, dimcolumns, dimarraycolumns)
return DimTable(collect(keys), collect(dimcolumns), collect(dimarraycolumns))
end

function DimTable(xs::Vararg{AbstractDimArray}; layernames=[Symbol("layer_$i") for i in eachindex(xs)], mergedims=nothing)
# Check that dims are compatible
comparedims(xs...)

# Construct DimColumns
JoshuaBillson marked this conversation as resolved.
Show resolved Hide resolved
xs = isnothing(mergedims) ? xs : map(x -> DimensionalData.mergedims(x, mergedims), xs)
dims_ = dims(first(xs))
dimcolumns = collect(map(d -> DimColumn(d, dims_), dims_))
dimnames = collect(map(dim2key, dims_))

# Construct DimArrayColumns
dimarraycolumns = collect(map(A -> DimArrayColumn(A, dims_), xs))

# Return DimTable
colnames = vcat(dimnames, layernames)
return DimTable(colnames, dimcolumns, dimarraycolumns)
end

function DimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=nothing)
if !isnothing(layersfrom) && any(hasdim(x, layersfrom))
nlayers = size(x, layersfrom)
layers = [(@view x[layersfrom(i)]) for i in 1:nlayers]
layernames = Symbol.(["$(dim2key(layersfrom))_$i" for i in 1:nlayers])
return DimTable(layers..., layernames=layernames, mergedims=mergedims)
else
return DimTable(DimStack((;value=x)), mergedims=mergedims)
end
end

dimcolumns(t::DimTable) = getfield(t, :dimcolumns)
dimarraycolumns(t::DimTable) = getfield(t, :dimarraycolumns)
colnames(t::DimTable) = getfield(t, :colnames)
dims(t::DimTable) = dims(parent(t))

Base.parent(t::DimTable) = getfield(t, :stack)
Base.parent(t::DimTable) = getfield(t, :colnames)

for func in (:dims, :val, :index, :lookup, :metadata, :order, :sampling, :span, :bounds,
:locus, :name, :label, :units)
@eval $func(t::DimTable, args...) = $func(parent(t), args...)

JoshuaBillson marked this conversation as resolved.
Show resolved Hide resolved
end

# Tables interface

Tables.istable(::DimTable) = true
Tables.columnaccess(::Type{<:DimTable}) = true
Tables.columns(t::DimTable) = t
Tables.columnnames(c::DimTable{Keys}) where Keys = Keys
function Tables.schema(t::DimTable{Keys}) where Keys
s = parent(t)
types = (map(eltype, dims(s))..., map(eltype, parent(s))...)
Tables.Schema(Keys, types)
end

@inline function Tables.getcolumn(t::DimTable{Keys}, i::Int) where Keys
nkeys = length(Keys)
if i > length(dims(t))
dimarraycolumns(t)[i - length(dims(t))]
elseif i > 0 && i < nkeys
dimcolumns(t)[i]
else
throw(ArgumentError("There is no table column $i"))
end
end
@inline function Tables.getcolumn(t::DimTable, dim::DimOrDimType)
dimcolumns(t)[dimnum(t, dim)]
Tables.columnnames(c::DimTable) = parent(c)

function Tables.schema(t::DimTable)
colnames = parent(t)
types = vcat([map(eltype, dimcolumns(t))...], [map(eltype, dimarraycolumns(t))...])
Tables.Schema(colnames, types)
end
# Retrieve a column by name
@inline function Tables.getcolumn(t::DimTable{Keys}, key::Symbol) where Keys
if key in keys(dimarraycolumns(t))
dimarraycolumns(t)[key]

@inline function Tables.getcolumn(t::DimTable, key::Symbol)
keys = parent(t)
i = findfirst(==(key), keys)
n_dimcols = length(dimcolumns(t))
if i <= n_dimcols
return dimcolumns(t)[i]
else
dimcolumns(t)[dimnum(dims(t), key)]
return dimarraycolumns(t)[i - n_dimcols]
end
end

@inline function Tables.getcolumn(t::DimTable, ::Type{T}, i::Int, key::Symbol) where T
Tables.getcolumn(t, key)
end

function _colnames(s::AbstractDimStack)
dimkeys = map(dim2key, (dims(s)))
# The data is always the last column/s
(dimkeys..., keys(s)...)
end


# TableTraits.jl interface


function IteratorInterfaceExtensions.getiterator(x::DimTableSources)
return Tables.datavaluerows(Tables.columntable(x))
return Tables.datavaluerows(Tables.dictcolumntable(x))
end
IteratorInterfaceExtensions.isiterable(::DimTableSources) = true
TableTraits.isiterabletable(::DimTableSources) = true

function IteratorInterfaceExtensions.getiterator(t::DimTable)
return Tables.datavaluerows(Tables.columntable(t))
JoshuaBillson marked this conversation as resolved.
Show resolved Hide resolved
return Tables.datavaluerows(Tables.dictcolumntable(t))
end
IteratorInterfaceExtensions.isiterable(::DimTable) = true
TableTraits.isiterabletable(::DimTable) = true
TableTraits.isiterabletable(::DimTable) = true