Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Breaking: DimVector of NamedTuple is a NamedTuple DimTable #839

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 85 additions & 52 deletions src/tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,18 @@ Abstract supertype for dim tables
"""
abstract type AbstractDimTable <: Tables.AbstractColumns end

struct Columns end
struct Rows end

# Tables.jl interface for AbstractDimStack and AbstractDimArray

DimTableSources = Union{AbstractDimStack,AbstractDimArray}

Tables.istable(::Type{<:DimTableSources}) = true
Tables.columnaccess(::Type{<:DimTableSources}) = true
Tables.columns(x::DimTableSources) = DimTable(x)

Tables.columnnames(A::AbstractDimArray) = _colnames(DimStack(A))
Tables.columnnames(s::AbstractDimStack) = _colnames(s)

Tables.schema(A::AbstractDimArray) = Tables.schema(DimStack(A))
Tables.schema(s::AbstractDimStack) = Tables.schema(DimTable(s))
Tables.columnnames(x::DimTableSources) = _colnames(x)
Tables.schema(x::DimTableSources) = Tables.schema(DimTable(x))

@inline Tables.getcolumn(x::DimTableSources, i::Int) = Tables.getcolumn(DimTable(x), i)
@inline Tables.getcolumn(x::DimTableSources, key::Symbol) =
Expand All @@ -27,11 +26,14 @@ Tables.schema(s::AbstractDimStack) = Tables.schema(DimTable(s))
@inline Tables.getcolumn(t::DimTableSources, dim::DimOrDimType) =
Tables.getcolumn(t, dimnum(t, dim))

function _colnames(s::AbstractDimStack)
dimkeys = map(name, dims(s))
# The data is always the last column/s
(dimkeys..., keys(s)...)
_colnames(s::AbstractDimStack) = (map(name, dims(s))..., keys(s)...)
function _colnames(A::AbstractDimArray)
n = Symbol(name(A)) == Symbol("") ? :value : Symbol(name(A))
(map(name, dims(A))..., n)
end
_colnames(A::AbstractDimVector{T}) where T<:NamedTuple =
(map(name, dims(A))..., _colnames(T)...)
_colnames(::Type{<:NamedTuple{Keys}}) where Keys = Keys

# DimTable

Expand Down Expand Up @@ -88,18 +90,20 @@ julia> a = DimArray(ones(16, 16, 3), (X, Y, Dim{:band}))
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 … 1.0 1.0 1.0 1.0 1.0 1.0 1.0

julia>
julia>

```
"""
struct DimTable <: AbstractDimTable
struct DimTable{Mode} <: AbstractDimTable
parent::Union{AbstractDimArray,AbstractDimStack}
colnames::Vector{Symbol}
dimcolumns::Vector{AbstractVector}
dimarraycolumns::Vector{AbstractVector}
dimarraycolumns::Vector
end

function DimTable(s::AbstractDimStack; mergedims=nothing)
function DimTable(s::AbstractDimStack;
mergedims=nothing,
)
s = isnothing(mergedims) ? s : DD.mergedims(s, mergedims)
dimcolumns = collect(_dimcolumns(s))
dimarraycolumns = if hassamedims(s)
Expand All @@ -108,40 +112,52 @@ function DimTable(s::AbstractDimStack; mergedims=nothing)
map(A -> vec(DimExtensionArray(A, dims(s))), layers(s))
end |> collect
keys = collect(_colnames(s))
return DimTable(s, keys, dimcolumns, dimarraycolumns)
return DimTable{Columns}(s, keys, dimcolumns, dimarraycolumns)
end
function DimTable(xs::Vararg{AbstractDimArray}; layernames=nothing, mergedims=nothing)
function DimTable(As::Vararg{AbstractDimArray};
layernames=nothing,
mergedims=nothing,
)
# Check that dims are compatible
comparedims(xs...)

comparedims(As...)
# Construct Layer Names
layernames = isnothing(layernames) ? [Symbol("layer_$i") for i in eachindex(xs)] : layernames

layernames = isnothing(layernames) ? uniquekeys(As) : layernames
# Construct dimension and array columns with DimExtensionArray
xs = isnothing(mergedims) ? xs : map(x -> DimensionalData.mergedims(x, mergedims), xs)
dims_ = dims(first(xs))
As = isnothing(mergedims) ? As : map(x -> DD.mergedims(x, mergedims), As)
dims_ = dims(first(As))
dimcolumns = collect(_dimcolumns(dims_))
dimnames = collect(map(name, dims_))
dimarraycolumns = collect(map(vec ∘ parent, xs))
dimarraycolumns = collect(map(vec ∘ parent, As))
colnames = vcat(dimnames, layernames)

# Return DimTable
return DimTable(first(xs), colnames, dimcolumns, dimarraycolumns)
return DimTable{Columns}(first(As), colnames, dimcolumns, dimarraycolumns)
end
function DimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=nothing)
if !isnothing(layersfrom) && any(hasdim(x, layersfrom))
d = dims(x, layersfrom)
nlayers = size(x, d)
layers = [view(x, rebuild(d, i)) for i in 1:nlayers]
function DimTable(A::AbstractDimArray;
layersfrom=nothing,
mergedims=nothing,
)
if !isnothing(layersfrom) && any(hasdim(A, layersfrom))
d = dims(A, layersfrom)
nlayers = size(A, d)
layers = [view(A, rebuild(d, i)) for i in 1:nlayers]
layernames = if iscategorical(d)
Symbol.((name(d),), '_', lookup(d))
else
Symbol.(("$(name(d))_$i" for i in 1:nlayers))
end
return DimTable(layers..., layernames=layernames, mergedims=mergedims)
return DimTable(layers...; layernames, mergedims)
else
s = name(x) == NoName() ? DimStack((;value=x)) : DimStack(x)
return DimTable(s, mergedims=mergedims)
A = isnothing(mergedims) ? A : DD.mergedims(A, mergedims)
dimcolumns = collect(_dimcolumns(A))
colnames = collect(_colnames(A))
if (ndims(A) == 1) && (eltype(A) <: NamedTuple)
dimarrayrows = parent(A)
return DimTable{Rows}(A, colnames, dimcolumns, dimarrayrows)
else
dimarraycolumns = [vec(parent(A))]
return DimTable{Columns}(A, colnames, dimcolumns, dimarraycolumns)
end
end
end

Expand All @@ -156,8 +172,6 @@ function _dimcolumn(x, d::Dimension)
end
end



dimcolumns(t::DimTable) = getfield(t, :dimcolumns)
dimarraycolumns(t::DimTable) = getfield(t, :dimarraycolumns)
colnames(t::DimTable) = Tuple(getfield(t, :colnames))
Expand All @@ -175,12 +189,26 @@ Tables.columnaccess(::Type{<:DimTable}) = true
Tables.columns(t::DimTable) = t
Tables.columnnames(c::DimTable) = colnames(c)

function Tables.schema(t::DimTable)
types = vcat([map(eltype, dimcolumns(t))...], [map(eltype, dimarraycolumns(t))...])
function Tables.schema(t::DimTable)
types = vcat([map(eltype, dimcolumns(t))...], _dimarraycolumn_eltypes(t))
Tables.Schema(colnames(t), types)
end

@inline function Tables.getcolumn(t::DimTable, i::Int)
_dimarraycolumn_eltypes(t::DimTable{Columns}) = [map(eltype, dimarraycolumns(t))...]
_dimarraycolumn_eltypes(t::DimTable{Rows}) = _eltypes(eltype(dimarraycolumns(t)))
_eltypes(::Type{T}) where T<:NamedTuple = collect(T.types)

@inline function Tables.getcolumn(t::DimTable{Rows}, i::Int)
nkeys = length(colnames(t))
if i > length(dims(t))
map(nt -> nt[i], dimarraycolumns(t))
elseif i > 0 && i < nkeys
dimcolumns(t)[i]
else
throw(ArgumentError("There is no table column $i"))
end
end
@inline function Tables.getcolumn(t::DimTable{Columns}, i::Int)
nkeys = length(colnames(t))
if i > length(dims(t))
dimarraycolumns(t)[i - length(dims(t))]
Expand All @@ -190,12 +218,19 @@ end
throw(ArgumentError("There is no table column $i"))
end
end

@inline function Tables.getcolumn(t::DimTable, dim::DimOrDimType)
@inline function Tables.getcolumn(t::DimTable, dim::Union{Dimension,Type{<:Dimension}})
dimcolumns(t)[dimnum(t, dim)]
end

@inline function Tables.getcolumn(t::DimTable, key::Symbol)
@inline function Tables.getcolumn(t::DimTable{Rows}, key::Symbol)
key in colnames(t) || throw(ArgumentError("There is no table column $key"))
if hasdim(parent(t), key)
dimcolumns(t)[dimnum(t, key)]
else
# Function barrier
_col_from_rows(dimarraycolumns(t), key)
end
end
@inline function Tables.getcolumn(t::DimTable{Columns}, key::Symbol)
keys = colnames(t)
i = findfirst(==(key), keys)
if isnothing(i)
Expand All @@ -204,22 +239,20 @@ end
return Tables.getcolumn(t, i)
end
end

@inline function Tables.getcolumn(t::DimTable, ::Type{T}, i::Int, key::Symbol) where T
Tables.getcolumn(t, key)
end

# TableTraits.jl interface

_col_from_rows(rows, key) = map(row -> row[key], rows)

function IteratorInterfaceExtensions.getiterator(x::DimTableSources)
return Tables.datavaluerows(Tables.dictcolumntable(x))
end
IteratorInterfaceExtensions.isiterable(::DimTableSources) = true
# TableTraits.jl interface
TableTraits.isiterabletable(::DimTableSources) = true
TableTraits.isiterabletable(::DimTable) = true

function IteratorInterfaceExtensions.getiterator(t::DimTable)
return Tables.datavaluerows(Tables.dictcolumntable(t))
end
# IteratorInterfaceExtensions.jl interface
IteratorInterfaceExtensions.getiterator(x::DimTableSources) =
Tables.datavaluerows(Tables.dictcolumntable(x))
IteratorInterfaceExtensions.getiterator(t::DimTable) =
Tables.datavaluerows(Tables.dictcolumntable(t))
IteratorInterfaceExtensions.isiterable(::DimTableSources) = true
IteratorInterfaceExtensions.isiterable(::DimTable) = true
TableTraits.isiterabletable(::DimTable) = true
8 changes: 6 additions & 2 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,13 @@ function uniquekeys(keys::Vector{Symbol})
end
end
function uniquekeys(keys::Tuple{Symbol,Vararg{Symbol}})
ids = ntuple(x -> x, length(keys))
ids = ntuple(identity, length(keys))
map(keys, ids) do k, id
count(k1 -> k == k1, keys) > 1 ? Symbol(:layer, id) : k
if k == Symbol("")
Symbol(:layer, id)
else
count(k1 -> k == k1, keys) > 1 ? Symbol(:layer, id) : k
end
end
end
uniquekeys(t::Tuple) = ntuple(i -> Symbol(:layer, i), length(t))
Expand Down
15 changes: 14 additions & 1 deletion test/tables.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
using DimensionalData, IteratorInterfaceExtensions, TableTraits, Tables, Test, DataFrames
using DimensionalData
using Test
using Tables
using IteratorInterfaceExtensions
using TableTraits
using DataFrames

using DimensionalData.Lookups, DimensionalData.Dimensions
using DimensionalData: DimTable, DimExtensionArray
Expand Down Expand Up @@ -154,3 +159,11 @@ end
@test Tables.columnnames(t3) == (:dimensions, :layer1, :layer2, :layer3)
@test Tables.columnnames(t4) == (:band, :geometry, :value)
end

@testset "DimTable NamedTuple" begin
da = DimArray([(; a=1.0f0i, b=2.0i) for i in 1:10], X)
t = DimTable(da)
s = Tables.schema(t)
@test s.names == (:X, :a, :b)
@test s.types == (Int, Float32, Float64)
end
Loading