From 60256a052e13dc743c467caf440e32301c37fe6f Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Tue, 18 Jun 2024 12:41:52 -0600 Subject: [PATCH 01/33] Table Materializer Methods --- src/DimensionalData.jl | 1 + src/array/array.jl | 7 ++++ src/stack/stack.jl | 11 +++++++ src/table_ops.jl | 72 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+) create mode 100644 src/table_ops.jl diff --git a/src/DimensionalData.jl b/src/DimensionalData.jl index 9eb0c0ae2..0b176b383 100644 --- a/src/DimensionalData.jl +++ b/src/DimensionalData.jl @@ -84,6 +84,7 @@ const DD = DimensionalData # Common include("interface.jl") include("name.jl") +include("table_ops.jl") # Arrays include("array/array.jl") diff --git a/src/array/array.jl b/src/array/array.jl index d82f4492f..b6dd0320d 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -411,6 +411,13 @@ function DimArray(A::AbstractBasicDimArray; newdata = collect(data) DimArray(newdata, format(dims, newdata); refdims, name, metadata) end +# Write a single column from a table with one or more coordinate columns to a DimArray +function DimArray(table, dims::Tuple, col::Symbol; missingval=missing) + perm = _sort_coords(table, dims) + data = Tables.getcolumn(table, col) + dst = _write_vals(data, dims, perm, missingval) + return DimArray(reshape(dst, size(dims)), dims, name=col) +end """ DimArray(f::Function, dim::Dimension; [name]) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index afed49cdc..004ca9f23 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -423,5 +423,16 @@ function DimStack(data::NamedTuple, dims::Tuple; all(map(d -> axes(d) == axes(first(data)), data)) || _stack_size_mismatch() DimStack(data, format(dims, first(data)), refdims, layerdims, metadata, layermetadata) end +# Write each column from a table with one or more coordinate columns to a layer in a DimStack +function DimStack(table, dims::Tuple; missingval=missing) + arrays = Any[] + perm = _sort_coords(table, dims) + data_cols = _data_cols(table, dims) + for (name, data) in pairs(data_cols) + dst = _write_vals(data, dims, perm, missingval) + push!(arrays, reshape(dst, size(dims))) + end + return DimStack(NamedTuple{keys(data_cols)}(arrays), dims) +end layerdims(s::DimStack{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,Nothing}, name::Symbol) = dims(s) diff --git a/src/table_ops.jl b/src/table_ops.jl new file mode 100644 index 000000000..031ceeae4 --- /dev/null +++ b/src/table_ops.jl @@ -0,0 +1,72 @@ +function _write_vals(data, dims::Tuple, perm, missingval) + # Allocate Destination Array + dst_size = reduce(*, length.(dims)) + dst = Vector{eltype(data)}(undef, dst_size) + dst[perm] .= data + + # Handle Missing Rows + _missingval = _cast_missing(data, missingval) + missing_rows = ones(Bool, dst_size) + missing_rows[perm] .= false + return ifelse.(missing_rows, _missingval, dst) +end + +# Find the order of the table's rows according to the coordinate values +_sort_coords(table, dims::Tuple) = _sort_coords(_dim_cols(table, dims), dims) +function _sort_coords(coords::NamedTuple, dims::Tuple) + ords = _coords_to_ords(coords, dims) + indices = _ords_to_indices(ords, dims) + return indices +end + +# Extract coordinate columns from table +function _dim_cols(table, dims::Tuple) + dim_cols = name.(dims) + return NamedTuple{dim_cols}(Tables.getcolumn(table, col) for col in dim_cols) +end + +# Extract data columns from table +function _data_cols(table, dims::Tuple) + dim_cols = name.(dims) + data_cols = filter(x -> !(x in dim_cols), Tables.columnnames(table)) + return NamedTuple{Tuple(data_cols)}(Tables.getcolumn(table, col) for col in data_cols) +end + +# Determine the ordinality of a set of numerical coordinates +function _coords_to_ords(coords::AbstractVector, dim::AbstractVector{<:Real}) + stride = (last(dim) - first(dim)) / (length(dim) - 1) + return round.(UInt32, ((coords .- first(dim)) ./ stride) .+ 1) +end + +# Determine the ordinality of a set of categorical coordinates +function _coords_to_ords(coords::AbstractVector, dim::AbstractVector) + d = Dict{eltype(dim),UInt32}() + for (i, x) in enumerate(dim) + d[x] = i + end + return map(x -> d[x], coords) +end + +# Preprocessing methods for _coords_to_ords +_coords_to_ords(coords::AbstractVector, dim::Dimension) = _coords_to_ords(coords, collect(dim)) +_coords_to_ords(coords::Tuple, dims::Tuple) = Tuple(_coords_to_ords(c, d) for (c, d) in zip(coords, dims)) +_coords_to_ords(coords::NamedTuple, dims::Tuple) = _coords_to_ords(Tuple(coords[d] for d in name.(dims)), dims) + +# Determine the index from a tuple of coordinate orders +function _ords_to_indices(ords, dims) + stride = 1 + indices = ones(Int, length(ords[1])) + for (ord, dim) in zip(ords, dims) + indices .+= (ord .- 1) .* stride + stride *= length(dim) + end + return indices +end + +function _cast_missing(::AbstractArray{T}, missingval) where {T} + try + return convert(T, missingval) + catch e + return missingval + end +end \ No newline at end of file From eab2fa09afa78a0f825fd3b58c55d90fb7907108 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Tue, 18 Jun 2024 13:17:49 -0600 Subject: [PATCH 02/33] Made col Optional for DimArray --- src/array/array.jl | 3 ++- src/table_ops.jl | 9 +++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/array/array.jl b/src/array/array.jl index 37851b4f2..ff8019055 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -412,8 +412,9 @@ function DimArray(A::AbstractBasicDimArray; DimArray(newdata, format(dims, newdata); refdims, name, metadata) end # Write a single column from a table with one or more coordinate columns to a DimArray -function DimArray(table, dims::Tuple, col::Symbol; missingval=missing) +function DimArray(table, dims; col=nothing, missingval=missing) perm = _sort_coords(table, dims) + col = isnothing(col) ? _data_col_names(table, dims) |> first : col data = Tables.getcolumn(table, col) dst = _write_vals(data, dims, perm, missingval) return DimArray(reshape(dst, size(dims)), dims, name=col) diff --git a/src/table_ops.jl b/src/table_ops.jl index 031ceeae4..1299b7bb2 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -27,11 +27,16 @@ end # Extract data columns from table function _data_cols(table, dims::Tuple) - dim_cols = name.(dims) - data_cols = filter(x -> !(x in dim_cols), Tables.columnnames(table)) + data_cols = _data_col_names(table, dims) return NamedTuple{Tuple(data_cols)}(Tables.getcolumn(table, col) for col in data_cols) end +# Get names of data columns from table +function _data_col_names(table, dims::Tuple) + dim_cols = name.(dims) + return filter(x -> !(x in dim_cols), Tables.columnnames(table)) +end + # Determine the ordinality of a set of numerical coordinates function _coords_to_ords(coords::AbstractVector, dim::AbstractVector{<:Real}) stride = (last(dim) - first(dim)) / (length(dim) - 1) From d4892df56cefd61a32f05964ee5b257d9b644e34 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Wed, 19 Jun 2024 23:30:39 -0600 Subject: [PATCH 03/33] Apply suggestions from code review Co-authored-by: Rafael Schouten --- src/table_ops.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 1299b7bb2..d6aaffb90 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -1,6 +1,6 @@ function _write_vals(data, dims::Tuple, perm, missingval) # Allocate Destination Array - dst_size = reduce(*, length.(dims)) + dst_size = prod(map(length, dims)) dst = Vector{eltype(data)}(undef, dst_size) dst[perm] .= data @@ -21,7 +21,7 @@ end # Extract coordinate columns from table function _dim_cols(table, dims::Tuple) - dim_cols = name.(dims) + dim_cols = map(name, dims) return NamedTuple{dim_cols}(Tables.getcolumn(table, col) for col in dim_cols) end @@ -33,13 +33,13 @@ end # Get names of data columns from table function _data_col_names(table, dims::Tuple) - dim_cols = name.(dims) + dim_cols = map(name, dims) return filter(x -> !(x in dim_cols), Tables.columnnames(table)) end # Determine the ordinality of a set of numerical coordinates function _coords_to_ords(coords::AbstractVector, dim::AbstractVector{<:Real}) - stride = (last(dim) - first(dim)) / (length(dim) - 1) + step = (last(dim) - first(dim)) / (length(dim) - 1) return round.(UInt32, ((coords .- first(dim)) ./ stride) .+ 1) end @@ -68,6 +68,7 @@ function _ords_to_indices(ords, dims) return indices end +_cast_missing(::AbstractArray, missingval::Missing) = missing function _cast_missing(::AbstractArray{T}, missingval) where {T} try return convert(T, missingval) From ea6751adad6a33182854c0bc0c64e5139379939c Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Thu, 20 Jun 2024 00:53:05 -0600 Subject: [PATCH 04/33] Handle coordinates with different loci --- src/table_ops.jl | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 1299b7bb2..0417805a0 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -37,23 +37,46 @@ function _data_col_names(table, dims::Tuple) return filter(x -> !(x in dim_cols), Tables.columnnames(table)) end -# Determine the ordinality of a set of numerical coordinates -function _coords_to_ords(coords::AbstractVector, dim::AbstractVector{<:Real}) - stride = (last(dim) - first(dim)) / (length(dim) - 1) - return round.(UInt32, ((coords .- first(dim)) ./ stride) .+ 1) +# Determine the ordinality of a set of regularly spaced numerical coordinates with a starting locus +function _coords_to_ords( + coords::AbstractVector, + dim::Dimension, + ::Type{<:Real}, + ::DimensionalData.Start, + ::DimensionalData.Regular) + step = (last(dim) - first(dim)) / (length(dim) - 1) + return floor.(Int, ((coords .- first(dim)) ./ step) .+ 1) end -# Determine the ordinality of a set of categorical coordinates -function _coords_to_ords(coords::AbstractVector, dim::AbstractVector) - d = Dict{eltype(dim),UInt32}() - for (i, x) in enumerate(dim) - d[x] = i - end - return map(x -> d[x], coords) +# Determine the ordinality of a set of regularly spaced numerical coordinates with a central locus +function _coords_to_ords( + coords::AbstractVector, + dim::Dimension, + ::Type{<:Real}, + ::DimensionalData.Center, + ::DimensionalData.Regular) + step = (last(dim) - first(dim)) / (length(dim) - 1) + return round.(Int, ((coords .- first(dim)) ./ step) .+ 1) +end + +# Determine the ordinality of a set of regularly spaced numerical coordinates with an end locus +function _coords_to_ords( + coords::AbstractVector, + dim::Dimension, + ::Type{<:Real}, + ::DimensionalData.End, + ::DimensionalData.Regular) + step = (last(dim) - first(dim)) / (length(dim) - 1) + return ceil.(Int, ((coords .- first(dim)) ./ step) .+ 1) +end + +# Determine the ordinality of a set of categorical or irregular coordinates +function _coords_to_ords(coords::AbstractVector, dim::Dimension, ::Any, ::Any, ::Any) + return map(c -> DimensionalData.selectindices(dim, At(c)), coords) end # Preprocessing methods for _coords_to_ords -_coords_to_ords(coords::AbstractVector, dim::Dimension) = _coords_to_ords(coords, collect(dim)) +_coords_to_ords(coords::AbstractVector, dim::Dimension) = _coords_to_ords(coords, dim, eltype(dim), locus(dim), span(dim)) _coords_to_ords(coords::Tuple, dims::Tuple) = Tuple(_coords_to_ords(c, d) for (c, d) in zip(coords, dims)) _coords_to_ords(coords::NamedTuple, dims::Tuple) = _coords_to_ords(Tuple(coords[d] for d in name.(dims)), dims) From 6a9d26e9f4d484a8ded27933df9e61e695986087 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Thu, 20 Jun 2024 01:56:19 -0600 Subject: [PATCH 05/33] replaced At() with Contains() in _coords_to_ords --- src/table_ops.jl | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 175371066..d7fae5318 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -21,10 +21,14 @@ end # Extract coordinate columns from table function _dim_cols(table, dims::Tuple) - dim_cols = map(name, dims) + dim_cols = _dim_col_names(dims) return NamedTuple{dim_cols}(Tables.getcolumn(table, col) for col in dim_cols) end +function _dim_col_names(dims) + return map(name, dims) +end + # Extract data columns from table function _data_cols(table, dims::Tuple) data_cols = _data_col_names(table, dims) @@ -33,7 +37,7 @@ end # Get names of data columns from table function _data_col_names(table, dims::Tuple) - dim_cols = map(name, dims) + dim_cols = _dim_col_names(dims) return filter(x -> !(x in dim_cols), Tables.columnnames(table)) end @@ -72,13 +76,13 @@ end # Determine the ordinality of a set of categorical or irregular coordinates function _coords_to_ords(coords::AbstractVector, dim::Dimension, ::Any, ::Any, ::Any) - return map(c -> DimensionalData.selectindices(dim, At(c)), coords) + return map(c -> DimensionalData.selectindices(dim, Contains(c)), coords) end # Preprocessing methods for _coords_to_ords _coords_to_ords(coords::AbstractVector, dim::Dimension) = _coords_to_ords(coords, dim, eltype(dim), locus(dim), span(dim)) _coords_to_ords(coords::Tuple, dims::Tuple) = Tuple(_coords_to_ords(c, d) for (c, d) in zip(coords, dims)) -_coords_to_ords(coords::NamedTuple, dims::Tuple) = _coords_to_ords(Tuple(coords[d] for d in name.(dims)), dims) +_coords_to_ords(coords::NamedTuple, dims::Tuple) = _coords_to_ords(map(x -> coords[x], _dim_col_names(dims)), dims) # Determine the index from a tuple of coordinate orders function _ords_to_indices(ords, dims) @@ -98,4 +102,4 @@ function _cast_missing(::AbstractArray{T}, missingval) where {T} catch e return missingval end -end \ No newline at end of file +end From 9164c228a6076b3f8a5d60cdab17f45e136235f9 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Tue, 25 Jun 2024 00:44:52 -0600 Subject: [PATCH 06/33] Added optional selectors and public methods for table materializer --- src/table_ops.jl | 98 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 78 insertions(+), 20 deletions(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index d7fae5318..0ecaf8c1d 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -1,34 +1,89 @@ -function _write_vals(data, dims::Tuple, perm, missingval) +""" + restore_array(data, indices, dims; missingval=missing) + +Restore a dimensional array from a set of values and their corresponding indices. + +# Arguments +- `data`: An `AbstractVector` of values to write to the destination array. +- `indices`: The flat index of each value in `data`. +- `dims`: A `Tuple` of `Dimension` for the corresponding destination array. +- `missingval`: The value to store for missing indices. + +# Example +```julia +julia> d = DimArray(rand(256, 256), (X, Y)); + +julia> t = DimTable(d); + +julia> indices = index_by_coords(t, dims(d)); + +julia> restored = restore_array(Tables.getcolumn(t, :value), indices, dims(d)); + +julia> all(restored .== d) +true +``` +""" +function restore_array(data::AbstractVector, indices::AbstractVector{<:Integer}, dims::Tuple; missingval=missing) # Allocate Destination Array dst_size = prod(map(length, dims)) dst = Vector{eltype(data)}(undef, dst_size) - dst[perm] .= data + dst[indices] .= data # Handle Missing Rows _missingval = _cast_missing(data, missingval) missing_rows = ones(Bool, dst_size) - missing_rows[perm] .= false - return ifelse.(missing_rows, _missingval, dst) + missing_rows[indices] .= false + data = ifelse.(missing_rows, _missingval, dst) + + # Reshape Array + return reshape(data, size(dims)) +end + +""" + index_by_coords(table, dims; selector=Contains) + +Return the flat index of each row in `table` based on its associated coordinates. +Dimension columns are determined from the name of each dimension in `dims`. +It is assumed that the source/destination array has the same dimension order as `dims`. + +# Arguments +- `table`: A table representation of a dimensional array. +- `dims`: A `Tuple` of `Dimension` corresponding to the source/destination array. +- `selector`: The selector type to use for non-numerical/irregular coordinates. + +# Example +```julia +julia> d = DimArray(rand(256, 256), (X, Y)); + +julia> t = DimTable(d); + +julia> index_by_coords(t, dims(d)) +65536-element Vector{Int64}: + 1 + 2 + ⋮ + 65535 + 65536 +``` +""" +function index_by_coords(table, dims::Tuple; selector=DimensionalData.Contains) + return _sort_coords(table, dims, selector) end # Find the order of the table's rows according to the coordinate values -_sort_coords(table, dims::Tuple) = _sort_coords(_dim_cols(table, dims), dims) -function _sort_coords(coords::NamedTuple, dims::Tuple) - ords = _coords_to_ords(coords, dims) +_sort_coords(table, dims::Tuple, ::Type{T}) where {T <: DimensionalData.Selector} = _sort_coords(_dim_cols(table, dims), dims, T) +function _sort_coords(coords::NamedTuple, dims::Tuple, ::Type{T}) where {T <: DimensionalData.Selector} + ords = _coords_to_ords(coords, dims, T) indices = _ords_to_indices(ords, dims) return indices end # Extract coordinate columns from table function _dim_cols(table, dims::Tuple) - dim_cols = _dim_col_names(dims) + dim_cols = name(dims) return NamedTuple{dim_cols}(Tables.getcolumn(table, col) for col in dim_cols) end -function _dim_col_names(dims) - return map(name, dims) -end - # Extract data columns from table function _data_cols(table, dims::Tuple) data_cols = _data_col_names(table, dims) @@ -37,7 +92,7 @@ end # Get names of data columns from table function _data_col_names(table, dims::Tuple) - dim_cols = _dim_col_names(dims) + dim_cols = name(dims) return filter(x -> !(x in dim_cols), Tables.columnnames(table)) end @@ -45,6 +100,7 @@ end function _coords_to_ords( coords::AbstractVector, dim::Dimension, + ::Type{<:DimensionalData.Selector}, ::Type{<:Real}, ::DimensionalData.Start, ::DimensionalData.Regular) @@ -56,6 +112,7 @@ end function _coords_to_ords( coords::AbstractVector, dim::Dimension, + ::Type{<:DimensionalData.Selector}, ::Type{<:Real}, ::DimensionalData.Center, ::DimensionalData.Regular) @@ -67,6 +124,7 @@ end function _coords_to_ords( coords::AbstractVector, dim::Dimension, + ::Type{<:DimensionalData.Selector}, ::Type{<:Real}, ::DimensionalData.End, ::DimensionalData.Regular) @@ -75,14 +133,14 @@ function _coords_to_ords( end # Determine the ordinality of a set of categorical or irregular coordinates -function _coords_to_ords(coords::AbstractVector, dim::Dimension, ::Any, ::Any, ::Any) - return map(c -> DimensionalData.selectindices(dim, Contains(c)), coords) +function _coords_to_ords(coords::AbstractVector, dim::Dimension, ::Type{T}, ::Any, ::Any, ::Any) where {T<:DimensionalData.Selector} + return map(c -> DimensionalData.selectindices(dim, T(c)), coords) end -# Preprocessing methods for _coords_to_ords -_coords_to_ords(coords::AbstractVector, dim::Dimension) = _coords_to_ords(coords, dim, eltype(dim), locus(dim), span(dim)) -_coords_to_ords(coords::Tuple, dims::Tuple) = Tuple(_coords_to_ords(c, d) for (c, d) in zip(coords, dims)) -_coords_to_ords(coords::NamedTuple, dims::Tuple) = _coords_to_ords(map(x -> coords[x], _dim_col_names(dims)), dims) +# Determine the ordinality of a set of coordinates +_coords_to_ords(coords::AbstractVector, dim::Dimension, ::Type{T}) where {T <: DimensionalData.Selector} = _coords_to_ords(coords, dim, T, eltype(dim), locus(dim), span(dim)) +_coords_to_ords(coords::Tuple, dims::Tuple, ::Type{T}) where {T <: DimensionalData.Selector} = Tuple(_coords_to_ords(c, d, T) for (c, d) in zip(coords, dims)) +_coords_to_ords(coords::NamedTuple, dims::Tuple, ::Type{T}) where {T <: DimensionalData.Selector} = _coords_to_ords(map(x -> coords[x], name(dims)), dims, T) # Determine the index from a tuple of coordinate orders function _ords_to_indices(ords, dims) @@ -102,4 +160,4 @@ function _cast_missing(::AbstractArray{T}, missingval) where {T} catch e return missingval end -end +end \ No newline at end of file From 2ebec1c69d8f9c4daea3b96ce715102b7facbfc2 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Tue, 25 Jun 2024 00:54:32 -0600 Subject: [PATCH 07/33] Updated table constructors for DimArray and DimStack --- src/array/array.jl | 9 ++++----- src/stack/stack.jl | 10 +++------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/src/array/array.jl b/src/array/array.jl index ff8019055..dc61cf9c1 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -412,12 +412,11 @@ function DimArray(A::AbstractBasicDimArray; DimArray(newdata, format(dims, newdata); refdims, name, metadata) end # Write a single column from a table with one or more coordinate columns to a DimArray -function DimArray(table, dims; col=nothing, missingval=missing) - perm = _sort_coords(table, dims) +function DimArray(table, dims; col=nothing, missingval=missing, selector=DimensionalData.Contains) + indices = index_by_coords(table, dims; selector=selector) col = isnothing(col) ? _data_col_names(table, dims) |> first : col - data = Tables.getcolumn(table, col) - dst = _write_vals(data, dims, perm, missingval) - return DimArray(reshape(dst, size(dims)), dims, name=col) + data = restore_array(Tables.getcolumn(table, col), indices, dims; missingval=missingval) + return DimArray(data, dims, name=col) end """ DimArray(f::Function, dim::Dimension; [name]) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 004ca9f23..165735a16 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -424,14 +424,10 @@ function DimStack(data::NamedTuple, dims::Tuple; DimStack(data, format(dims, first(data)), refdims, layerdims, metadata, layermetadata) end # Write each column from a table with one or more coordinate columns to a layer in a DimStack -function DimStack(table, dims::Tuple; missingval=missing) - arrays = Any[] - perm = _sort_coords(table, dims) +function DimStack(table, dims::Tuple; missingval=missing, selector=DimensionalData.Contains) data_cols = _data_cols(table, dims) - for (name, data) in pairs(data_cols) - dst = _write_vals(data, dims, perm, missingval) - push!(arrays, reshape(dst, size(dims))) - end + indices = index_by_coords(table, dims; selector=selector) + arrays = [restore_array(d, indices, dims; missingval=missingval) for d in values(data_cols)] return DimStack(NamedTuple{keys(data_cols)}(arrays), dims) end From 8e791bfe43de768678859966011a3cd6a0315d4b Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Fri, 5 Jul 2024 16:53:52 -0600 Subject: [PATCH 08/33] Updated DimArray and DimStack docs to include table materializer methods --- src/array/array.jl | 13 +++++++------ src/stack/stack.jl | 7 ++++--- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/array/array.jl b/src/array/array.jl index dc61cf9c1..c42870f28 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -334,7 +334,7 @@ end DimArray <: AbstractDimArray DimArray(data, dims, refdims, name, metadata) - DimArray(data, dims::Tuple; refdims=(), name=NoName(), metadata=NoMetadata()) + DimArray(data, dims::Tuple; refdims=(), name=NoName(), metadata=NoMetadata(), selector=Contains) The main concrete subtype of [`AbstractDimArray`](@ref). @@ -344,12 +344,13 @@ moves dimensions to reference dimension `refdims` after reducing operations ## Arguments -- `data`: An `AbstractArray`. +- `data`: An `AbstractArray` or a table with coordinate columns corresponding to `dims`. - `dims`: A `Tuple` of `Dimension` - `name`: A string name for the array. Shows in plots and tables. - `refdims`: refence dimensions. Usually set programmatically to track past slices and reductions of dimension for labelling and reconstruction. - `metadata`: `Dict` or `Metadata` object, or `NoMetadata()` +- `selector`: The coordinate selector type to use when materializing from a table. Indexing can be done with all regular indices, or with [`Dimension`](@ref)s and/or [`Selector`](@ref)s. @@ -412,11 +413,11 @@ function DimArray(A::AbstractBasicDimArray; DimArray(newdata, format(dims, newdata); refdims, name, metadata) end # Write a single column from a table with one or more coordinate columns to a DimArray -function DimArray(table, dims; col=nothing, missingval=missing, selector=DimensionalData.Contains) +function DimArray(table, dims; name=NoName(), selector=DimensionalData.Contains, kw...) indices = index_by_coords(table, dims; selector=selector) - col = isnothing(col) ? _data_col_names(table, dims) |> first : col - data = restore_array(Tables.getcolumn(table, col), indices, dims; missingval=missingval) - return DimArray(data, dims, name=col) + col = name == NoName() ? _data_col_names(table, dims) |> first : Symbol(name) + data = restore_array(Tables.getcolumn(table, col), indices, dims; missingval=missing) + return DimArray(data, dims, name=col; kw...) end """ DimArray(f::Function, dim::Dimension; [name]) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 165735a16..acf6506e0 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -278,6 +278,7 @@ end """ DimStack <: AbstractDimStack + DimStack(table, dims; kw...) DimStack(data::AbstractDimArray...; kw...) DimStack(data::Tuple{Vararg{AbstractDimArray}}; kw...) DimStack(data::NamedTuple{Keys,Vararg{AbstractDimArray}}; kw...) @@ -424,11 +425,11 @@ function DimStack(data::NamedTuple, dims::Tuple; DimStack(data, format(dims, first(data)), refdims, layerdims, metadata, layermetadata) end # Write each column from a table with one or more coordinate columns to a layer in a DimStack -function DimStack(table, dims::Tuple; missingval=missing, selector=DimensionalData.Contains) +function DimStack(table, dims::Tuple; selector=DimensionalData.Contains, kw...) data_cols = _data_cols(table, dims) indices = index_by_coords(table, dims; selector=selector) - arrays = [restore_array(d, indices, dims; missingval=missingval) for d in values(data_cols)] - return DimStack(NamedTuple{keys(data_cols)}(arrays), dims) + arrays = [restore_array(d, indices, dims; missingval=missing) for d in values(data_cols)] + return DimStack(NamedTuple{keys(data_cols)}(arrays), dims; kw...) end layerdims(s::DimStack{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,Nothing}, name::Symbol) = dims(s) From 4cd5f9d7c86dcfefcc26378da8c8dfd2afc4b98d Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Fri, 5 Jul 2024 16:54:15 -0600 Subject: [PATCH 09/33] Table materializer test cases --- test/tables.jl | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/test/tables.jl b/test/tables.jl index b5bd416ea..728cd00cb 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -154,3 +154,48 @@ end @test Tables.columnnames(t3) == (:dimensions, :layer1, :layer2, :layer3) @test Tables.columnnames(t4) == (:band, :geometry, :value) end + +@testset "Materialize from table" begin + a = DimArray(rand(UInt8, 100, 100), (X(100:-1:1), Y(-250:5:249))) + b = DimArray(rand(Float32, 100, 100), (X(100:-1:1), Y(-250:5:249))) + c = DimArray(rand(Float64, 100, 100), (X(100:-1:1), Y(-250:5:249))) + ds = DimStack((a=a, b=b, c=c)) + t = DataFrame(ds) + t1 = Random.shuffle(t) + t2 = t[101:end,:] + + # Restore DimArray from shuffled table + @test all(DimArray(t1, dims(ds)) .== a) + @test all(DimArray(t1, dims(ds), name="a") .== a) + @test all(DimArray(t1, dims(ds), name="b") .== b) + @test all(DimArray(t1, dims(ds), name="c") .== c) + + # Restore DimArray from table with missing rows + @test all(DimArray(t2, dims(ds), name="a")[Y(2:100)] .== a[Y(2:100)]) + @test all(DimArray(t2, dims(ds), name="b")[Y(2:100)] .== b[Y(2:100)]) + @test all(DimArray(t2, dims(ds), name="c")[Y(2:100)] .== c[Y(2:100)]) + @test DimArray(t2, dims(ds), name="a")[Y(1)] .|> ismissing |> all + @test DimArray(t2, dims(ds), name="b")[Y(1)] .|> ismissing |> all + @test DimArray(t2, dims(ds), name="c")[Y(1)] .|> ismissing |> all + @test DimArray(t2, dims(ds), name="a")[Y(2:100)] .|> ismissing .|> (!) |> all + @test DimArray(t2, dims(ds), name="b")[Y(2:100)] .|> ismissing .|> (!) |> all + @test DimArray(t2, dims(ds), name="c")[Y(2:100)] .|> ismissing .|> (!) |> all + + # Restore DimStack from shuffled table + restored_stack = DimStack(t1, dims(ds)) + @test all(restored_stack.a .== ds.a) + @test all(restored_stack.b .== ds.b) + @test all(restored_stack.c .== ds.c) + + # Restore DimStack from table with missing rows + restored_stack = DimStack(t2, dims(ds)) + @test all(restored_stack.a[Y(2:100)] .== ds.a[Y(2:100)]) + @test all(restored_stack.b[Y(2:100)] .== ds.b[Y(2:100)]) + @test all(restored_stack.c[Y(2:100)] .== ds.c[Y(2:100)]) + @test restored_stack.a[Y(1)] .|> ismissing |> all + @test restored_stack.b[Y(1)] .|> ismissing |> all + @test restored_stack.c[Y(1)] .|> ismissing |> all + @test restored_stack.a[Y(2:100)] .|> ismissing .|> (!) |> all + @test restored_stack.b[Y(2:100)] .|> ismissing .|> (!) |> all + @test restored_stack.c[Y(2:100)] .|> ismissing .|> (!) |> all +end \ No newline at end of file From 0c1991a42f0edf3e198d3e25cc85b53c5abc0a85 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Fri, 5 Jul 2024 17:03:20 -0600 Subject: [PATCH 10/33] export table materializer methods --- src/DimensionalData.jl | 3 +++ src/array/array.jl | 2 +- src/stack/stack.jl | 2 +- src/table_ops.jl | 8 ++++---- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/DimensionalData.jl b/src/DimensionalData.jl index 0b176b383..bccc406c1 100644 --- a/src/DimensionalData.jl +++ b/src/DimensionalData.jl @@ -77,6 +77,9 @@ export dimnum, hasdim, hasselection, otherdims # utils export set, rebuild, reorder, modify, broadcast_dims, broadcast_dims!, mergedims, unmergedims +# table utils +export restore_array, coords_to_index + export groupby, seasons, months, hours, intervals, ranges const DD = DimensionalData diff --git a/src/array/array.jl b/src/array/array.jl index c42870f28..9f19ef5e7 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -414,7 +414,7 @@ function DimArray(A::AbstractBasicDimArray; end # Write a single column from a table with one or more coordinate columns to a DimArray function DimArray(table, dims; name=NoName(), selector=DimensionalData.Contains, kw...) - indices = index_by_coords(table, dims; selector=selector) + indices = coords_to_index(table, dims; selector=selector) col = name == NoName() ? _data_col_names(table, dims) |> first : Symbol(name) data = restore_array(Tables.getcolumn(table, col), indices, dims; missingval=missing) return DimArray(data, dims, name=col; kw...) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index acf6506e0..98f4d78c7 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -427,7 +427,7 @@ end # Write each column from a table with one or more coordinate columns to a layer in a DimStack function DimStack(table, dims::Tuple; selector=DimensionalData.Contains, kw...) data_cols = _data_cols(table, dims) - indices = index_by_coords(table, dims; selector=selector) + indices = coords_to_index(table, dims; selector=selector) arrays = [restore_array(d, indices, dims; missingval=missing) for d in values(data_cols)] return DimStack(NamedTuple{keys(data_cols)}(arrays), dims; kw...) end diff --git a/src/table_ops.jl b/src/table_ops.jl index 0ecaf8c1d..f93ffc388 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -15,7 +15,7 @@ julia> d = DimArray(rand(256, 256), (X, Y)); julia> t = DimTable(d); -julia> indices = index_by_coords(t, dims(d)); +julia> indices = coords_to_index(t, dims(d)); julia> restored = restore_array(Tables.getcolumn(t, :value), indices, dims(d)); @@ -40,7 +40,7 @@ function restore_array(data::AbstractVector, indices::AbstractVector{<:Integer}, end """ - index_by_coords(table, dims; selector=Contains) + coords_to_index(table, dims; selector=Contains) Return the flat index of each row in `table` based on its associated coordinates. Dimension columns are determined from the name of each dimension in `dims`. @@ -57,7 +57,7 @@ julia> d = DimArray(rand(256, 256), (X, Y)); julia> t = DimTable(d); -julia> index_by_coords(t, dims(d)) +julia> coords_to_index(t, dims(d)) 65536-element Vector{Int64}: 1 2 @@ -66,7 +66,7 @@ julia> index_by_coords(t, dims(d)) 65536 ``` """ -function index_by_coords(table, dims::Tuple; selector=DimensionalData.Contains) +function coords_to_index(table, dims::Tuple; selector=DimensionalData.Contains) return _sort_coords(table, dims, selector) end From 4534de580576023b01f27fd2d3b1c4f65672ff02 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Fri, 5 Jul 2024 17:20:28 -0600 Subject: [PATCH 11/33] Added Random to tables.jl test cases --- test/tables.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/tables.jl b/test/tables.jl index 728cd00cb..23ea9eed5 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -1,4 +1,4 @@ -using DimensionalData, IteratorInterfaceExtensions, TableTraits, Tables, Test, DataFrames +using DimensionalData, IteratorInterfaceExtensions, TableTraits, Tables, Test, DataFrames, Random using DimensionalData.Lookups, DimensionalData.Dimensions using DimensionalData: DimTable, DimExtensionArray From ed395cac2fca04d1000a3778b38fa96a7d0a696c Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Wed, 7 Aug 2024 23:59:45 -0600 Subject: [PATCH 12/33] Update src/array/array.jl Co-authored-by: Rafael Schouten --- src/array/array.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/array/array.jl b/src/array/array.jl index 1946e1292..c1e4e6559 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -432,7 +432,7 @@ function DimArray(A::AbstractBasicDimArray; end # Write a single column from a table with one or more coordinate columns to a DimArray function DimArray(table, dims; name=NoName(), selector=DimensionalData.Contains, kw...) - indices = coords_to_index(table, dims; selector=selector) + indices = coords_to_index(table, dims; selector) col = name == NoName() ? _data_col_names(table, dims) |> first : Symbol(name) data = restore_array(Tables.getcolumn(table, col), indices, dims; missingval=missing) return DimArray(data, dims, name=col; kw...) From 00336afd3dcc21b4b60170b041eaf0fe9f9ae089 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Thu, 8 Aug 2024 00:00:00 -0600 Subject: [PATCH 13/33] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index f93ffc388..d7b8622e7 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -66,7 +66,7 @@ julia> coords_to_index(t, dims(d)) 65536 ``` """ -function coords_to_index(table, dims::Tuple; selector=DimensionalData.Contains) +function coords_to_index(table, dims::Tuple; selector=DimensionalData.Contains()) return _sort_coords(table, dims, selector) end From 532f887b4c1e5a8e38479272ca958e410ce4970c Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Thu, 8 Aug 2024 00:00:38 -0600 Subject: [PATCH 14/33] Removed exports --- src/DimensionalData.jl | 3 --- src/array/array.jl | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/DimensionalData.jl b/src/DimensionalData.jl index bccc406c1..0b176b383 100644 --- a/src/DimensionalData.jl +++ b/src/DimensionalData.jl @@ -77,9 +77,6 @@ export dimnum, hasdim, hasselection, otherdims # utils export set, rebuild, reorder, modify, broadcast_dims, broadcast_dims!, mergedims, unmergedims -# table utils -export restore_array, coords_to_index - export groupby, seasons, months, hours, intervals, ranges const DD = DimensionalData diff --git a/src/array/array.jl b/src/array/array.jl index 1946e1292..d7e3efcd3 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -334,7 +334,7 @@ end DimArray <: AbstractDimArray DimArray(data, dims, refdims, name, metadata) - DimArray(data, dims::Tuple; refdims=(), name=NoName(), metadata=NoMetadata(), selector=Contains) + DimArray(data, dims::Tuple; refdims=(), name=NoName(), metadata=NoMetadata(), selector=Contains()) The main concrete subtype of [`AbstractDimArray`](@ref). From 06a2c912faf69345688d8d3004b6e5ad93f1efd6 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Thu, 8 Aug 2024 00:05:29 -0600 Subject: [PATCH 15/33] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index d7b8622e7..e12083fd0 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -107,7 +107,6 @@ function _coords_to_ords( step = (last(dim) - first(dim)) / (length(dim) - 1) return floor.(Int, ((coords .- first(dim)) ./ step) .+ 1) end - # Determine the ordinality of a set of regularly spaced numerical coordinates with a central locus function _coords_to_ords( coords::AbstractVector, From 3bacf338eac5ff7d334e22c131ff786bc72b0b3c Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Thu, 8 Aug 2024 00:18:29 -0600 Subject: [PATCH 16/33] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index e12083fd0..12f79a364 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -132,7 +132,7 @@ function _coords_to_ords( end # Determine the ordinality of a set of categorical or irregular coordinates -function _coords_to_ords(coords::AbstractVector, dim::Dimension, ::Type{T}, ::Any, ::Any, ::Any) where {T<:DimensionalData.Selector} +function _coords_to_ords(coords::AbstractVector, dim::Dimension, sel::DimensionalData.Selector, ::Any, ::Any, ::Any) return map(c -> DimensionalData.selectindices(dim, T(c)), coords) end From 4ced6f7a445577dce25af1f6eeee84d8d72a5a83 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Thu, 8 Aug 2024 00:18:46 -0600 Subject: [PATCH 17/33] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 12f79a364..03f6d05fa 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -111,7 +111,7 @@ end function _coords_to_ords( coords::AbstractVector, dim::Dimension, - ::Type{<:DimensionalData.Selector}, + ::DimensionalData.Selector, ::Type{<:Real}, ::DimensionalData.Center, ::DimensionalData.Regular) From c846dfdfe0d4ad00bf30ba46f24934851458a760 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Thu, 8 Aug 2024 00:18:54 -0600 Subject: [PATCH 18/33] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 03f6d05fa..519ea20a1 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -100,7 +100,7 @@ end function _coords_to_ords( coords::AbstractVector, dim::Dimension, - ::Type{<:DimensionalData.Selector}, + ::DimensionalData.Selector, ::Type{<:Real}, ::DimensionalData.Start, ::DimensionalData.Regular) From fe2c871514be29e3c1e9d2a8024c219671a90311 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Thu, 8 Aug 2024 00:19:05 -0600 Subject: [PATCH 19/33] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 519ea20a1..b0d45e6f9 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -133,7 +133,7 @@ end # Determine the ordinality of a set of categorical or irregular coordinates function _coords_to_ords(coords::AbstractVector, dim::Dimension, sel::DimensionalData.Selector, ::Any, ::Any, ::Any) - return map(c -> DimensionalData.selectindices(dim, T(c)), coords) + return map(c -> DimensionalData.selectindices(dim, rebuild(sel, c)), coords) end # Determine the ordinality of a set of coordinates From 61f82204739d71845dec865d899e3439aba40dc7 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Thu, 8 Aug 2024 00:19:36 -0600 Subject: [PATCH 20/33] Replaced selector type with instance. --- src/array/array.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/array/array.jl b/src/array/array.jl index 690d901f6..d488a6b32 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -431,7 +431,7 @@ function DimArray(A::AbstractBasicDimArray; DimArray(newdata, format(dims, newdata); refdims, name, metadata) end # Write a single column from a table with one or more coordinate columns to a DimArray -function DimArray(table, dims; name=NoName(), selector=DimensionalData.Contains, kw...) +function DimArray(table, dims; name=NoName(), selector=DimensionalData.Contains(), kw...) indices = coords_to_index(table, dims; selector) col = name == NoName() ? _data_col_names(table, dims) |> first : Symbol(name) data = restore_array(Tables.getcolumn(table, col), indices, dims; missingval=missing) From dbe7b991420c199ebdb87fa1fc434443853d95ec Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Sun, 11 Aug 2024 18:13:17 -0600 Subject: [PATCH 21/33] Table materializer can now infer dimensions from the coordinates. --- src/array/array.jl | 6 +- src/stack/stack.jl | 4 +- src/table_ops.jl | 249 +++++++++++++++++++++++++++++++++++---------- 3 files changed, 202 insertions(+), 57 deletions(-) diff --git a/src/array/array.jl b/src/array/array.jl index d488a6b32..0b222c6a0 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -431,12 +431,12 @@ function DimArray(A::AbstractBasicDimArray; DimArray(newdata, format(dims, newdata); refdims, name, metadata) end # Write a single column from a table with one or more coordinate columns to a DimArray -function DimArray(table, dims; name=NoName(), selector=DimensionalData.Contains(), kw...) - indices = coords_to_index(table, dims; selector) +function DimArray(table, dims; name=NoName(), selector=DimensionalData.Near(), precision=6, kw...) + data = restore_array(table, dims; selector=selector, missingval=missing, name=name, precision=precision) col = name == NoName() ? _data_col_names(table, dims) |> first : Symbol(name) - data = restore_array(Tables.getcolumn(table, col), indices, dims; missingval=missing) return DimArray(data, dims, name=col; kw...) end +DimArray(table; kw...) = DimArray(table, _guess_dims(table; kw...); kw...) """ DimArray(f::Function, dim::Dimension; [name]) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 5b19ae29a..80b46388e 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -422,9 +422,9 @@ function DimStack(data::NamedTuple, dims::Tuple; DimStack(data, format(dims, first(data)), refdims, layerdims, metadata, layermetadata) end # Write each column from a table with one or more coordinate columns to a layer in a DimStack -function DimStack(table, dims::Tuple; selector=DimensionalData.Contains, kw...) +function DimStack(table, dims::Tuple; selector=DimensionalData.Contains(), kw...) data_cols = _data_cols(table, dims) - indices = coords_to_index(table, dims; selector=selector) + indices = coords_to_indices(table, dims; selector=selector) arrays = [restore_array(d, indices, dims; missingval=missing) for d in values(data_cols)] return DimStack(NamedTuple{keys(data_cols)}(arrays), dims; kw...) end diff --git a/src/table_ops.jl b/src/table_ops.jl index b0d45e6f9..7008c674d 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -1,13 +1,22 @@ """ - restore_array(data, indices, dims; missingval=missing) + restore_array(table; kw...) + restore_array(table, dims::Tuple; name=NoName(), missingval=missing, selector=Near(), precision=6) -Restore a dimensional array from a set of values and their corresponding indices. +Restore a dimensional array from its tabular representation. # Arguments -- `data`: An `AbstractVector` of values to write to the destination array. -- `indices`: The flat index of each value in `data`. -- `dims`: A `Tuple` of `Dimension` for the corresponding destination array. -- `missingval`: The value to store for missing indices. +- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other tabular data structure. +Rows can be missing or out of order. +- `dims`: The dimensions of the corresponding `DimArray`. The dimensions may be explicitly defined, or they +may be inferred from the data. In the second case, `restore_array` accepts the same arguments as `guess_dims`. + +# Keyword Arguments +- `name`: The name of the column in `table` from which to restore the array. Defaults to the +first non-dimensional column. +- `missingval`: The value to store for missing rows. +- `selector`: The `Selector` to use when matching coordinates in `table` to their corresponding +indices in `dims`. +- `precision`: Specifies the number of digits to use for guessing dimensions (default = `6`). # Example ```julia @@ -15,15 +24,29 @@ julia> d = DimArray(rand(256, 256), (X, Y)); julia> t = DimTable(d); -julia> indices = coords_to_index(t, dims(d)); - -julia> restored = restore_array(Tables.getcolumn(t, :value), indices, dims(d)); +julia> restored = restore_array(t); julia> all(restored .== d) true ``` """ -function restore_array(data::AbstractVector, indices::AbstractVector{<:Integer}, dims::Tuple; missingval=missing) +restore_array(table; kw...) = restore_array(table, _dim_col_names(table); kw...) +function restore_array(table, dims::Tuple; name=NoName(), missingval=missing, selector=DimensionalData.Near(), precision=6) + # Get array dimensions + dims = guess_dims(table, dims, precision=precision) + + # Determine row indices based on coordinate values + indices = coords_to_indices(table, dims; selector=selector) + + # Extract the data column correspondong to `name` + col = name == NoName() ? _data_col_names(table, dims) |> first : Symbol(name) + data = _get_column(table, col) + + # Restore array data + return _restore_array(data, indices, dims, missingval) +end + +function _restore_array(data::AbstractVector, indices::AbstractVector{<:Integer}, dims::Tuple, missingval) # Allocate Destination Array dst_size = prod(map(length, dims)) dst = Vector{eltype(data)}(undef, dst_size) @@ -40,7 +63,7 @@ function restore_array(data::AbstractVector, indices::AbstractVector{<:Integer}, end """ - coords_to_index(table, dims; selector=Contains) + coords_to_indices(table, dims; selector=Near()) Return the flat index of each row in `table` based on its associated coordinates. Dimension columns are determined from the name of each dimension in `dims`. @@ -57,7 +80,7 @@ julia> d = DimArray(rand(256, 256), (X, Y)); julia> t = DimTable(d); -julia> coords_to_index(t, dims(d)) +julia> coords_to_indices(t, dims(d)) 65536-element Vector{Int64}: 1 2 @@ -66,24 +89,103 @@ julia> coords_to_index(t, dims(d)) 65536 ``` """ -function coords_to_index(table, dims::Tuple; selector=DimensionalData.Contains()) - return _sort_coords(table, dims, selector) +function coords_to_indices(table, dims::Tuple; selector=DimensionalData.Near()) + return _coords_to_indices(table, dims, selector) end # Find the order of the table's rows according to the coordinate values -_sort_coords(table, dims::Tuple, ::Type{T}) where {T <: DimensionalData.Selector} = _sort_coords(_dim_cols(table, dims), dims, T) -function _sort_coords(coords::NamedTuple, dims::Tuple, ::Type{T}) where {T <: DimensionalData.Selector} - ords = _coords_to_ords(coords, dims, T) +_coords_to_indices(table, dims::Tuple, sel::DimensionalData.Selector) = _coords_to_indices(_dim_cols(table, dims), dims, sel) +function _coords_to_indices(coords::NamedTuple, dims::Tuple, sel::DimensionalData.Selector) + ords = _coords_to_ords(coords, dims, sel) indices = _ords_to_indices(ords, dims) return indices end +""" + guess_dims(table; kw...) + guess_dims(table, dims; precision=6) + +Guesses the dimensions of an array based on the provided tabular representation. + +# Arguments +- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other tabular data structure. +The dimensions will be inferred from the corresponding coordinate collumns in the table. +- `dims`: One or more dimensions to be inferred. If no dimensions are specified, then `guess_dims` will default +to any available dimensions in the set `(:X, :Y, :Z, :Ti, :Band)`. Dimensions can be given as either a singular +value or as a `Pair` with both the dimensions and corresponding order. The order will be inferred from the data +when none is given. This should work for sorted coordinates, but will not be sufficient when the table's rows are +out of order. + +# Keyword Arguments +- `precision`: Specifies the number of digits to use for guessing dimensions (default = `6`). + +# Returns +A tuple containing the inferred dimensions from the table. + +# Example +```julia +julia> xdims = X(LinRange{Float64}(610000.0, 661180.0, 2560)); + +julia> ydims = Y(LinRange{Float64}(6.84142e6, 6.79024e6, 2560)); + +julia> bdims = Dim{:Band}([:B02, :B03, :B04]); + +julia> d = DimArray(rand(UInt16, 2560, 2560, 3), (xdims, ydims, bdims)); + +julia> t = DataFrame(d); + +julia> t_rand = Random.shuffle(t); + +julia> dims(d) +↓ X Sampled{Float64} LinRange{Float64}(610000.0, 661180.0, 2560) ForwardOrdered Regular Points, +→ Y Sampled{Float64} LinRange{Float64}(6.84142e6, 6.79024e6, 2560) ReverseOrdered Regular Points, +↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered + +julia> DD.guess_dims(t) +↓ X Sampled{Float64} LinRange{Float64}(610000.0, 661180.0, 2560) ForwardOrdered Regular Points, +→ Y Sampled{Float64} LinRange{Float64}(6.84142e6, 6.79024e6, 2560) ReverseOrdered Regular Points, +↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered + +julia> DD.guess_dims(t, (X, Y, :Band)) +↓ X Sampled{Float64} LinRange{Float64}(610000.0, 661180.0, 2560) ForwardOrdered Regular Points, +→ Y Sampled{Float64} LinRange{Float64}(6.84142e6, 6.79024e6, 2560) ReverseOrdered Regular Points, +↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered + +julia> DD.guess_dims(t_rand, (X => DD.ForwardOrdered(), Y => DD.ReverseOrdered(), :Band => DD.ForwardOrdered())) +↓ X Sampled{Float64} LinRange{Float64}(610000.0, 661180.0, 2560) ForwardOrdered Regular Points, +→ Y Sampled{Float64} LinRange{Float64}(6.84142e6, 6.79024e6, 2560) ReverseOrdered Regular Points, +↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered +``` +""" +guess_dims(table; kw...) = guess_dims(table, filter(x -> x in Tables.columnnames(table), (:X,:Y,:Z,:Ti,:Band)); kw...) +guess_dims(table, dims::Tuple; kw...) = map(dim -> guess_dims(table, dim; kw...), dims) +guess_dims(table, dim; precision=6) = _guess_dims(_get_column(table, dim), dim, precision) + +_guess_dims(coords::AbstractVector, dim::DD.Dimension, args...) = dim +_guess_dims(coords::AbstractVector, dim::Type{<:DD.Dimension}, args...) = _guess_dims(coords, DD.name(dim), args...) +_guess_dims(coords::AbstractVector, dim::Pair, args...) = _guess_dims(coords, first(dim), last(dim), args...) +function _guess_dims(coords::AbstractVector, dim::Symbol, precision::Int) + dim_vals = _dim_vals(coords, precision) + order = _guess_dim_order(dim_vals) + span = _guess_dim_span(dim_vals, order, precision) + return _build_dim(dim_vals, dim, order, span) +end +function _guess_dims(coords::AbstractVector, dim::Symbol, order::DD.Order, precision::Int) + dim_vals = _dim_vals(coords, order, precision) + span = _guess_dim_span(dim_vals, order, precision) + return _build_dim(dim_vals, dim, order, span) +end + # Extract coordinate columns from table function _dim_cols(table, dims::Tuple) - dim_cols = name(dims) + dim_cols = DD.name(dims) return NamedTuple{dim_cols}(Tables.getcolumn(table, col) for col in dim_cols) end +# Extract dimension column names from the given table +_dim_col_names(table) = filter(x -> x in Tables.columnnames(table), (:X,:Y,:Z,:Ti,:Band)) +_dim_col_names(table, dims::Tuple) = map(col -> Tables.getcolumn(table, col), DD.name(dims)) + # Extract data columns from table function _data_cols(table, dims::Tuple) data_cols = _data_col_names(table, dims) @@ -92,54 +194,97 @@ end # Get names of data columns from table function _data_col_names(table, dims::Tuple) - dim_cols = name(dims) + dim_cols = DD.name(dims) return filter(x -> !(x in dim_cols), Tables.columnnames(table)) end -# Determine the ordinality of a set of regularly spaced numerical coordinates with a starting locus -function _coords_to_ords( - coords::AbstractVector, - dim::Dimension, - ::DimensionalData.Selector, - ::Type{<:Real}, - ::DimensionalData.Start, - ::DimensionalData.Regular) - step = (last(dim) - first(dim)) / (length(dim) - 1) - return floor.(Int, ((coords .- first(dim)) ./ step) .+ 1) -end -# Determine the ordinality of a set of regularly spaced numerical coordinates with a central locus +# Determine the ordinality of a set of coordinates +_coords_to_ords(coords::AbstractVector, dim::DD.Dimension, sel::DD.Selector) = _coords_to_ords(coords, dim, sel, DD.locus(dim), DD.span(dim)) +_coords_to_ords(coords::Tuple, dims::Tuple, sel::DimensionalData.Selector) = Tuple(_coords_to_ords(c, d, sel) for (c, d) in zip(coords, dims)) +_coords_to_ords(coords::NamedTuple, dims::Tuple, sel::DimensionalData.Selector) = _coords_to_ords(map(x -> coords[x], DD.name(dims)), dims, sel) + +# Determine the ordinality of a set of regularly spaced numerical coordinates function _coords_to_ords( - coords::AbstractVector, + coords::AbstractVector{<:Real}, dim::Dimension, - ::DimensionalData.Selector, - ::Type{<:Real}, - ::DimensionalData.Center, - ::DimensionalData.Regular) - step = (last(dim) - first(dim)) / (length(dim) - 1) - return round.(Int, ((coords .- first(dim)) ./ step) .+ 1) + ::DimensionalData.Near, + position::DimensionalData.Position, + span::DimensionalData.Regular) + step = DD.step(span) + float_ords = ((coords .- first(dim)) ./ step) .+ 1 + int_ords = _round_ords(float_ords, position) + return clamp!(int_ords, 1, length(dim)) end -# Determine the ordinality of a set of regularly spaced numerical coordinates with an end locus +# Determine the ordinality of a set of categorical or irregular coordinates function _coords_to_ords( coords::AbstractVector, dim::Dimension, - ::Type{<:DimensionalData.Selector}, - ::Type{<:Real}, - ::DimensionalData.End, - ::DimensionalData.Regular) - step = (last(dim) - first(dim)) / (length(dim) - 1) - return ceil.(Int, ((coords .- first(dim)) ./ step) .+ 1) + sel::DimensionalData.Selector, + ::DimensionalData.Position, + ::DimensionalData.Span) + return map(c -> DimensionalData.selectindices(dim, rebuild(sel, c)), coords) end -# Determine the ordinality of a set of categorical or irregular coordinates -function _coords_to_ords(coords::AbstractVector, dim::Dimension, sel::DimensionalData.Selector, ::Any, ::Any, ::Any) - return map(c -> DimensionalData.selectindices(dim, rebuild(sel, c)), coords) +# Round coordinate ordinality to the appropriate integer given the specified locus +_round_ords(ords::AbstractVector{<:Real}, ::DimensionalData.Start) = floor.(Int, ords) +_round_ords(ords::AbstractVector{<:Real}, ::DimensionalData.Center) = round.(Int, ords) +_round_ords(ords::AbstractVector{<:Real}, ::DimensionalData.End) = ceil.(Int, ords) + +# Extract dimension value from the given vector of coordinates +_dim_vals(coords::AbstractVector, precision::Int) = _unique_vals(coords, precision) +_dim_vals(coords::AbstractVector, ::DD.Order, precision::Int) = _unique_vals(coords, precision) +_dim_vals(coords::AbstractVector, ::DD.ForwardOrdered, precision::Int) = sort!(_unique_vals(coords, precision)) +_dim_vals(coords::AbstractVector, ::DD.ReverseOrdered, precision::Int) = sort!(_unique_vals(coords, precision), rev=true) + +# Extract all unique coordinates from the given vector +_unique_vals(coords::AbstractVector, precision::Int) = _round_dim_val.(coords, precision) |> unique + +# Round dimension value within the specified precision +_round_dim_val(x, ::Int) = x +_round_dim_val(x::Real, precision::Int) = round(x, digits=precision) + +# Determine if the given coordinates are forward ordered, reverse ordered, or unordered +function _guess_dim_order(coords::AbstractVector) + if issorted(coords) + return DD.ForwardOrdered() + elseif issorted(coords, rev=true) + return DD.ReverseOrdered() + else + return DD.Unordered() + end end -# Determine the ordinality of a set of coordinates -_coords_to_ords(coords::AbstractVector, dim::Dimension, ::Type{T}) where {T <: DimensionalData.Selector} = _coords_to_ords(coords, dim, T, eltype(dim), locus(dim), span(dim)) -_coords_to_ords(coords::Tuple, dims::Tuple, ::Type{T}) where {T <: DimensionalData.Selector} = Tuple(_coords_to_ords(c, d, T) for (c, d) in zip(coords, dims)) -_coords_to_ords(coords::NamedTuple, dims::Tuple, ::Type{T}) where {T <: DimensionalData.Selector} = _coords_to_ords(map(x -> coords[x], name(dims)), dims, T) +# Estimate the span between consecutive coordinates +_guess_dim_span(::AbstractVector, ::DD.Order, ::Int) = DD.Irregular() +_guess_dim_span(::AbstractVector{<:Real}, ::DD.Order, ::Int) = DD.Irregular() +function _guess_dim_span(coords::AbstractVector{<:Real}, ::DD.Ordered, precision::Int) + steps = round.((@view coords[2:end]) .- (@view coords[1:end-1]), digits=precision) + span = argmin(abs, steps) + return all(isinteger, round.(steps ./ span, digits=precision)) ? DD.Regular(span) : DD.Irregular() +end + +function _build_dim(vals::AbstractVector, dim::Symbol, order::DD.Order, ::DD.Span) + return Dim{dim}(DD.Categorical(vals, order=order)) +end +function _build_dim(vals::AbstractVector{<:Real}, dim::Symbol, order::DD.Order, span::DD.Irregular) + return Dim{dim}(DD.Sampled(vals, order=order, span=span, sampling=DD.Points())) +end +function _build_dim(vals::AbstractVector{<:Real}, dim::Symbol, order::DD.Order, span::DD.Regular) + n = round(Int, abs((last(vals) - first(vals)) / span.step) + 1) + dim_vals = LinRange(first(vals), last(vals), n) + return Dim{dim}(DD.Sampled(dim_vals, order=order, span=span, sampling=DD.Points())) +end + +_get_column(table, x::Type{<:DD.Dimension}) = Tables.getcolumn(table, DD.name(x)) +_get_column(table, x::DD.Dimension) = Tables.getcolumn(table, DD.name(x)) +_get_column(table, x::Symbol) = Tables.getcolumn(table, x) +_get_column(table, x::Pair) = _get_column(table, first(x)) + +_dim_name(x::Symbol) = x +_dim_name(x::DD.Dimension) = DD.name(x) +_dim_name(x::Type{<:DD.Dimension}) = DD.name(x) +_dim_name(x::Pair) = _dim_name(first(x)) => last(x) # Determine the index from a tuple of coordinate orders function _ords_to_indices(ords, dims) From f41098856ab8b41952904dc69730a1f5e3384dad Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Tue, 17 Sep 2024 23:55:45 -0600 Subject: [PATCH 22/33] Update src/stack/stack.jl Co-authored-by: Rafael Schouten --- src/stack/stack.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 80b46388e..6e47f1ccb 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -278,7 +278,7 @@ end """ DimStack <: AbstractDimStack - DimStack(table, dims; kw...) + DimStack(table, [dims]; kw...) DimStack(data::AbstractDimArray...; kw...) DimStack(data::Tuple{Vararg{AbstractDimArray}}; kw...) DimStack(data::NamedTuple{Keys,Vararg{AbstractDimArray}}; kw...) From a17f06900044214b149926e62a0bd3ef273c41a0 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Tue, 17 Sep 2024 23:56:08 -0600 Subject: [PATCH 23/33] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 7008c674d..e0add6489 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -94,7 +94,8 @@ function coords_to_indices(table, dims::Tuple; selector=DimensionalData.Near()) end # Find the order of the table's rows according to the coordinate values -_coords_to_indices(table, dims::Tuple, sel::DimensionalData.Selector) = _coords_to_indices(_dim_cols(table, dims), dims, sel) +_coords_to_indices(table, dims::Tuple, sel::DimensionalData.Selector) = + _coords_to_indices(_dim_cols(table, dims), dims, sel) function _coords_to_indices(coords::NamedTuple, dims::Tuple, sel::DimensionalData.Selector) ords = _coords_to_ords(coords, dims, sel) indices = _ords_to_indices(ords, dims) From 9bdded94e085fe54396234f6f15ac1e5a4041f7c Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Tue, 17 Sep 2024 23:56:23 -0600 Subject: [PATCH 24/33] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index e0add6489..04e7543b6 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -109,7 +109,7 @@ end Guesses the dimensions of an array based on the provided tabular representation. # Arguments -- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other tabular data structure. +- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other Tables.jl compatible data structure. The dimensions will be inferred from the corresponding coordinate collumns in the table. - `dims`: One or more dimensions to be inferred. If no dimensions are specified, then `guess_dims` will default to any available dimensions in the set `(:X, :Y, :Z, :Ti, :Band)`. Dimensions can be given as either a singular From 545108758d23908baa149555aa131c247b664249 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Wed, 18 Sep 2024 00:16:02 -0600 Subject: [PATCH 25/33] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 04e7543b6..791aa06a4 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -268,7 +268,7 @@ end function _build_dim(vals::AbstractVector, dim::Symbol, order::DD.Order, ::DD.Span) return Dim{dim}(DD.Categorical(vals, order=order)) end -function _build_dim(vals::AbstractVector{<:Real}, dim::Symbol, order::DD.Order, span::DD.Irregular) +function _build_dim(vals::AbstractVector{<:Union{Number,DateTime}}, dim::Symbol, order::DD.Order, span::DD.Irregular) return Dim{dim}(DD.Sampled(vals, order=order, span=span, sampling=DD.Points())) end function _build_dim(vals::AbstractVector{<:Real}, dim::Symbol, order::DD.Order, span::DD.Regular) From faf4d76ba019157c563794523ad82ac38fec3d1b Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Wed, 18 Sep 2024 00:17:24 -0600 Subject: [PATCH 26/33] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 791aa06a4..48120e9b9 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -271,7 +271,7 @@ end function _build_dim(vals::AbstractVector{<:Union{Number,DateTime}}, dim::Symbol, order::DD.Order, span::DD.Irregular) return Dim{dim}(DD.Sampled(vals, order=order, span=span, sampling=DD.Points())) end -function _build_dim(vals::AbstractVector{<:Real}, dim::Symbol, order::DD.Order, span::DD.Regular) +function _build_dim(vals::AbstractVector{<:Union{Number,DateTime}}, dim::Symbol, order::DD.Order, span::DD.Regular) n = round(Int, abs((last(vals) - first(vals)) / span.step) + 1) dim_vals = LinRange(first(vals), last(vals), n) return Dim{dim}(DD.Sampled(dim_vals, order=order, span=span, sampling=DD.Points())) From 02f60a387d17c53de8ff5460ecc4917760fc8537 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Wed, 18 Sep 2024 00:18:11 -0600 Subject: [PATCH 27/33] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 48120e9b9..bf7ea9969 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -258,7 +258,6 @@ end # Estimate the span between consecutive coordinates _guess_dim_span(::AbstractVector, ::DD.Order, ::Int) = DD.Irregular() -_guess_dim_span(::AbstractVector{<:Real}, ::DD.Order, ::Int) = DD.Irregular() function _guess_dim_span(coords::AbstractVector{<:Real}, ::DD.Ordered, precision::Int) steps = round.((@view coords[2:end]) .- (@view coords[1:end-1]), digits=precision) span = argmin(abs, steps) From fafd3574ddf186b020247609b9292fcbe1414f61 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Sun, 22 Sep 2024 11:55:51 -0600 Subject: [PATCH 28/33] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index bf7ea9969..6fd56589e 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -281,10 +281,6 @@ _get_column(table, x::DD.Dimension) = Tables.getcolumn(table, DD.name(x)) _get_column(table, x::Symbol) = Tables.getcolumn(table, x) _get_column(table, x::Pair) = _get_column(table, first(x)) -_dim_name(x::Symbol) = x -_dim_name(x::DD.Dimension) = DD.name(x) -_dim_name(x::Type{<:DD.Dimension}) = DD.name(x) -_dim_name(x::Pair) = _dim_name(first(x)) => last(x) # Determine the index from a tuple of coordinate orders function _ords_to_indices(ords, dims) From d7f15f5d30286e9a2086419fe2811c48147fc20a Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Wed, 25 Sep 2024 16:47:44 -0600 Subject: [PATCH 29/33] Update src/array/array.jl Co-authored-by: Rafael Schouten --- src/array/array.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/array/array.jl b/src/array/array.jl index 0b222c6a0..65b4dc011 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -431,7 +431,7 @@ function DimArray(A::AbstractBasicDimArray; DimArray(newdata, format(dims, newdata); refdims, name, metadata) end # Write a single column from a table with one or more coordinate columns to a DimArray -function DimArray(table, dims; name=NoName(), selector=DimensionalData.Near(), precision=6, kw...) +function DimArray(table, dims; name=NoName(), selector=Near(), precision=6, kw...) data = restore_array(table, dims; selector=selector, missingval=missing, name=name, precision=precision) col = name == NoName() ? _data_col_names(table, dims) |> first : Symbol(name) return DimArray(data, dims, name=col; kw...) From 34a0a697fe14c79b2c93b527cc5dcc897420c7b8 Mon Sep 17 00:00:00 2001 From: Joshua Billson <61667893+JoshuaBillson@users.noreply.github.com> Date: Wed, 25 Sep 2024 18:15:36 -0600 Subject: [PATCH 30/33] Update src/table_ops.jl Co-authored-by: Rafael Schouten --- src/table_ops.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 6fd56589e..052d46659 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -265,7 +265,7 @@ function _guess_dim_span(coords::AbstractVector{<:Real}, ::DD.Ordered, precision end function _build_dim(vals::AbstractVector, dim::Symbol, order::DD.Order, ::DD.Span) - return Dim{dim}(DD.Categorical(vals, order=order)) + return rebuild(name2dim(dim), DD.Categorical(vals, order=order)) end function _build_dim(vals::AbstractVector{<:Union{Number,DateTime}}, dim::Symbol, order::DD.Order, span::DD.Irregular) return Dim{dim}(DD.Sampled(vals, order=order, span=span, sampling=DD.Points())) From d0b9eb721d227e22ac5428c5261bce2ff45b73c2 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Wed, 25 Sep 2024 18:16:12 -0600 Subject: [PATCH 31/33] Added support for guessing the dimension ordering and span for Dates and DateTimes --- src/table_ops.jl | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index 6fd56589e..b01a8cb1b 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -200,9 +200,9 @@ function _data_col_names(table, dims::Tuple) end # Determine the ordinality of a set of coordinates -_coords_to_ords(coords::AbstractVector, dim::DD.Dimension, sel::DD.Selector) = _coords_to_ords(coords, dim, sel, DD.locus(dim), DD.span(dim)) -_coords_to_ords(coords::Tuple, dims::Tuple, sel::DimensionalData.Selector) = Tuple(_coords_to_ords(c, d, sel) for (c, d) in zip(coords, dims)) -_coords_to_ords(coords::NamedTuple, dims::Tuple, sel::DimensionalData.Selector) = _coords_to_ords(map(x -> coords[x], DD.name(dims)), dims, sel) +_coords_to_ords(coords::AbstractVector, dim::Dimension, sel::DD.Selector) = _coords_to_ords(coords, dim, sel, DD.locus(dim), DD.span(dim)) +_coords_to_ords(coords::Tuple, dims::Tuple, sel::DD.Selector) = Tuple(_coords_to_ords(c, d, sel) for (c, d) in zip(coords, dims)) +_coords_to_ords(coords::NamedTuple, dims::Tuple, sel::DD.Selector) = _coords_to_ords(map(x -> coords[x], DD.name(dims)), dims, sel) # Determine the ordinality of a set of regularly spaced numerical coordinates function _coords_to_ords( @@ -239,19 +239,20 @@ _dim_vals(coords::AbstractVector, ::DD.ForwardOrdered, precision::Int) = sort!(_ _dim_vals(coords::AbstractVector, ::DD.ReverseOrdered, precision::Int) = sort!(_unique_vals(coords, precision), rev=true) # Extract all unique coordinates from the given vector -_unique_vals(coords::AbstractVector, precision::Int) = _round_dim_val.(coords, precision) |> unique - -# Round dimension value within the specified precision -_round_dim_val(x, ::Int) = x -_round_dim_val(x::Real, precision::Int) = round(x, digits=precision) +_unique_vals(coords::AbstractVector, ::Int) = unique(coords) +_unique_vals(coords::AbstractVector{<:Real}, precision::Int) = round.(coords, digits=precision) |> unique # Determine if the given coordinates are forward ordered, reverse ordered, or unordered function _guess_dim_order(coords::AbstractVector) - if issorted(coords) - return DD.ForwardOrdered() - elseif issorted(coords, rev=true) - return DD.ReverseOrdered() - else + try + if issorted(coords) + return DD.ForwardOrdered() + elseif issorted(coords, rev=true) + return DD.ReverseOrdered() + else + return DD.Unordered() + end + catch return DD.Unordered() end end @@ -263,14 +264,19 @@ function _guess_dim_span(coords::AbstractVector{<:Real}, ::DD.Ordered, precision span = argmin(abs, steps) return all(isinteger, round.(steps ./ span, digits=precision)) ? DD.Regular(span) : DD.Irregular() end +function _guess_dim_span(coords::AbstractVector{<:Dates.AbstractTime}, ::DD.Ordered, precision::Int) + steps = (@view coords[2:end]) .- (@view coords[1:end-1]) + span = argmin(abs, steps) + return all(isinteger, round.(steps ./ span, digits=precision)) ? DD.Regular(span) : DD.Irregular() +end function _build_dim(vals::AbstractVector, dim::Symbol, order::DD.Order, ::DD.Span) return Dim{dim}(DD.Categorical(vals, order=order)) end -function _build_dim(vals::AbstractVector{<:Union{Number,DateTime}}, dim::Symbol, order::DD.Order, span::DD.Irregular) +function _build_dim(vals::AbstractVector{<:Union{Number,Dates.AbstractTime}}, dim::Symbol, order::DD.Order, span::DD.Irregular) return Dim{dim}(DD.Sampled(vals, order=order, span=span, sampling=DD.Points())) end -function _build_dim(vals::AbstractVector{<:Union{Number,DateTime}}, dim::Symbol, order::DD.Order, span::DD.Regular) +function _build_dim(vals::AbstractVector{<:Union{Number,Dates.AbstractTime}}, dim::Symbol, order::DD.Order, span::DD.Regular) n = round(Int, abs((last(vals) - first(vals)) / span.step) + 1) dim_vals = LinRange(first(vals), last(vals), n) return Dim{dim}(DD.Sampled(dim_vals, order=order, span=span, sampling=DD.Points())) From 0ea72a05e3ff1088189cf9aab520e520ce597a75 Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Fri, 27 Sep 2024 14:19:10 -0600 Subject: [PATCH 32/33] Replaced LinRange with StepRangeLen in _build_dim --- src/table_ops.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/table_ops.jl b/src/table_ops.jl index dd995b79f..8f7fcbe14 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -274,12 +274,12 @@ function _build_dim(vals::AbstractVector, dim::Symbol, order::DD.Order, ::DD.Spa return rebuild(name2dim(dim), DD.Categorical(vals, order=order)) end function _build_dim(vals::AbstractVector{<:Union{Number,Dates.AbstractTime}}, dim::Symbol, order::DD.Order, span::DD.Irregular) - return Dim{dim}(DD.Sampled(vals, order=order, span=span, sampling=DD.Points())) + return rebuild(name2dim(dim), DD.Sampled(vals, order=order, span=span, sampling=DD.Points())) end function _build_dim(vals::AbstractVector{<:Union{Number,Dates.AbstractTime}}, dim::Symbol, order::DD.Order, span::DD.Regular) n = round(Int, abs((last(vals) - first(vals)) / span.step) + 1) - dim_vals = LinRange(first(vals), last(vals), n) - return Dim{dim}(DD.Sampled(dim_vals, order=order, span=span, sampling=DD.Points())) + dim_vals = StepRangeLen(first(vals), span.step, n) + return rebuild(name2dim(dim), DD.Sampled(dim_vals, order=order, span=span, sampling=DD.Points())) end _get_column(table, x::Type{<:DD.Dimension}) = Tables.getcolumn(table, DD.name(x)) From bc629320e37ae80cf5eeadb63890f5d28810abeb Mon Sep 17 00:00:00 2001 From: JoshuaBillson Date: Tue, 15 Oct 2024 00:41:38 -0600 Subject: [PATCH 33/33] Added Tables.istable check to DimArray constructor --- src/array/array.jl | 34 ++++++++++--- src/table_ops.jl | 120 +++++++++++++++++++++------------------------ 2 files changed, 83 insertions(+), 71 deletions(-) diff --git a/src/array/array.jl b/src/array/array.jl index 65b4dc011..f02449401 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -431,12 +431,34 @@ function DimArray(A::AbstractBasicDimArray; DimArray(newdata, format(dims, newdata); refdims, name, metadata) end # Write a single column from a table with one or more coordinate columns to a DimArray -function DimArray(table, dims; name=NoName(), selector=Near(), precision=6, kw...) - data = restore_array(table, dims; selector=selector, missingval=missing, name=name, precision=precision) - col = name == NoName() ? _data_col_names(table, dims) |> first : Symbol(name) - return DimArray(data, dims, name=col; kw...) +function DimArray(table, dims; name=NoName(), selector=Near(), precision=6, missingval=missing, kw...) + # Confirm that the Tables interface is implemented + Tables.istable(table) || throw(ArgumentError("`table` must satisfy the `Tables.jl` interface.")) + + # Get array dimensions + dims = guess_dims(table, dims, precision=precision) + + # Determine row indices based on coordinate values + indices = coords_to_indices(table, dims; selector=selector) + + # Extract the data column correspondong to `name` + col = name == NoName() ? data_col_names(table, dims) |> first : Symbol(name) + data = Tables.getcolumn(table, col) + + # Restore array data + array = restore_array(data, indices, dims, missingval) + + # Return DimArray + return DimArray(array, dims, name=col; kw...) +end +# Same as above, but guess dimension names +function DimArray(table; kw...) + # Confirm that the Tables interface is implemented + Tables.istable(table) || throw(ArgumentError("`table` must satisfy the `Tables.jl` interface.")) + + # Use default dimension + return DimArray(table, guess_dims(table; kw...); kw...) end -DimArray(table; kw...) = DimArray(table, _guess_dims(table; kw...); kw...) """ DimArray(f::Function, dim::Dimension; [name]) @@ -445,7 +467,7 @@ Apply function `f` across the values of the dimension `dim` the given dimension. Optionally provide a name for the result. """ function DimArray(f::Function, dim::Dimension; name=Symbol(nameof(f), "(", name(dim), ")")) - DimArray(f.(val(dim)), (dim,); name) + DimArray(f.(val(dim)), (dim,); name) end const DimVector = DimArray{T,1} where T diff --git a/src/table_ops.jl b/src/table_ops.jl index 8f7fcbe14..73b3d2525 100644 --- a/src/table_ops.jl +++ b/src/table_ops.jl @@ -1,52 +1,19 @@ """ - restore_array(table; kw...) - restore_array(table, dims::Tuple; name=NoName(), missingval=missing, selector=Near(), precision=6) + restore_array(data::AbstractVector, indices::AbstractVector{<:Integer}, dims::Tuple, missingval) Restore a dimensional array from its tabular representation. # Arguments -- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other tabular data structure. -Rows can be missing or out of order. -- `dims`: The dimensions of the corresponding `DimArray`. The dimensions may be explicitly defined, or they -may be inferred from the data. In the second case, `restore_array` accepts the same arguments as `guess_dims`. - -# Keyword Arguments -- `name`: The name of the column in `table` from which to restore the array. Defaults to the -first non-dimensional column. -- `missingval`: The value to store for missing rows. -- `selector`: The `Selector` to use when matching coordinates in `table` to their corresponding -indices in `dims`. -- `precision`: Specifies the number of digits to use for guessing dimensions (default = `6`). - -# Example -```julia -julia> d = DimArray(rand(256, 256), (X, Y)); - -julia> t = DimTable(d); +- `data`: An `AbstractVector` containing the flat data to be written to a `DimArray`. +- `indices`: An `AbstractVector` containing the flat indices corresponding to each element in `data`. +- `dims`: The dimensions of the destination `DimArray`. +- `missingval`: The value to write for missing elements in `data`. -julia> restored = restore_array(t); - -julia> all(restored .== d) -true +# Returns +An `Array` containing the ordered valued in `data` with the size specified by `dims`. ``` """ -restore_array(table; kw...) = restore_array(table, _dim_col_names(table); kw...) -function restore_array(table, dims::Tuple; name=NoName(), missingval=missing, selector=DimensionalData.Near(), precision=6) - # Get array dimensions - dims = guess_dims(table, dims, precision=precision) - - # Determine row indices based on coordinate values - indices = coords_to_indices(table, dims; selector=selector) - - # Extract the data column correspondong to `name` - col = name == NoName() ? _data_col_names(table, dims) |> first : Symbol(name) - data = _get_column(table, col) - - # Restore array data - return _restore_array(data, indices, dims, missingval) -end - -function _restore_array(data::AbstractVector, indices::AbstractVector{<:Integer}, dims::Tuple, missingval) +function restore_array(data::AbstractVector, indices::AbstractVector{<:Integer}, dims::Tuple, missingval) # Allocate Destination Array dst_size = prod(map(length, dims)) dst = Vector{eltype(data)}(undef, dst_size) @@ -143,28 +110,63 @@ julia> dims(d) ↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered julia> DD.guess_dims(t) -↓ X Sampled{Float64} LinRange{Float64}(610000.0, 661180.0, 2560) ForwardOrdered Regular Points, -→ Y Sampled{Float64} LinRange{Float64}(6.84142e6, 6.79024e6, 2560) ReverseOrdered Regular Points, +↓ X Sampled{Float64} 610000.0:20.0:661180.0 ForwardOrdered Regular Points, +→ Y Sampled{Float64} 6.84142e6:-20.0:6.79024e6 ReverseOrdered Regular Points, ↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered -julia> DD.guess_dims(t, (X, Y, :Band)) -↓ X Sampled{Float64} LinRange{Float64}(610000.0, 661180.0, 2560) ForwardOrdered Regular Points, -→ Y Sampled{Float64} LinRange{Float64}(6.84142e6, 6.79024e6, 2560) ReverseOrdered Regular Points, +julia> DD.guess_dims(t, X, Y, :Band) +↓ X Sampled{Float64} 610000.0:20.0:661180.0 ForwardOrdered Regular Points, +→ Y Sampled{Float64} 6.84142e6:-20.0:6.79024e6 ReverseOrdered Regular Points, ↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered -julia> DD.guess_dims(t_rand, (X => DD.ForwardOrdered(), Y => DD.ReverseOrdered(), :Band => DD.ForwardOrdered())) -↓ X Sampled{Float64} LinRange{Float64}(610000.0, 661180.0, 2560) ForwardOrdered Regular Points, -→ Y Sampled{Float64} LinRange{Float64}(6.84142e6, 6.79024e6, 2560) ReverseOrdered Regular Points, +julia> DD.guess_dims(t_rand, X => DD.ForwardOrdered, Y => DD.ReverseOrdered, :Band => DD.ForwardOrdered) +↓ X Sampled{Float64} 610000.0:20.0:661180.0 ForwardOrdered Regular Points, +→ Y Sampled{Float64} 6.84142e6:-20.0:6.79024e6 ReverseOrdered Regular Points, ↗ Band Categorical{Symbol} [:B02, :B03, :B04] ForwardOrdered ``` """ -guess_dims(table; kw...) = guess_dims(table, filter(x -> x in Tables.columnnames(table), (:X,:Y,:Z,:Ti,:Band)); kw...) -guess_dims(table, dims::Tuple; kw...) = map(dim -> guess_dims(table, dim; kw...), dims) -guess_dims(table, dim; precision=6) = _guess_dims(_get_column(table, dim), dim, precision) +guess_dims(table; kw...) = guess_dims(table, _dim_col_names(table); kw...) +function guess_dims(table, dims::Tuple; precision=6) + map(dim -> _guess_dims(get_column(table, dim), dim, precision), dims) +end + +""" + get_column(table, dim::Type{<:DD.Dimension}) + get_column(table, dim::DD.Dimension) + get_column(table, dim::Symbol) + get_column(table, dim::Pair) + +Retrieve the coordinate data stored in the column specified by `dim`. + +# Arguments +- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other Tables.jl compatible data structure. +- `dim`: A single dimension to be retrieved, which may be a `Symbol`, a `Dimension`, or a `Dimension => Order` pair. +""" +get_column(table, x::Type{<:DD.Dimension}) = Tables.getcolumn(table, DD.name(x)) +get_column(table, x::DD.Dimension) = Tables.getcolumn(table, DD.name(x)) +get_column(table, x::Symbol) = Tables.getcolumn(table, x) +get_column(table, x::Pair) = get_column(table, first(x)) + +""" + data_col_names(table, dims::Tuple) + +Return the names of all columns that don't matched the dimensions given by `dims`. + +# Arguments +- `table`: The input data table, which could be a `DataFrame`, `DimTable`, or any other Tables.jl compatible data structure. +- `dims`: A `Tuple` of one or more `Dimensions`. +""" +function data_col_names(table, dims::Tuple) + dim_cols = DD.name(dims) + return filter(x -> !(x in dim_cols), Tables.columnnames(table)) +end _guess_dims(coords::AbstractVector, dim::DD.Dimension, args...) = dim _guess_dims(coords::AbstractVector, dim::Type{<:DD.Dimension}, args...) = _guess_dims(coords, DD.name(dim), args...) _guess_dims(coords::AbstractVector, dim::Pair, args...) = _guess_dims(coords, first(dim), last(dim), args...) +function _guess_dims(coords::AbstractVector, dim::Symbol, ::Type{T}, precision::Int) where {T <: DD.Order} + return _guess_dims(coords, dim, T(), precision) +end function _guess_dims(coords::AbstractVector, dim::Symbol, precision::Int) dim_vals = _dim_vals(coords, precision) order = _guess_dim_order(dim_vals) @@ -189,16 +191,10 @@ _dim_col_names(table, dims::Tuple) = map(col -> Tables.getcolumn(table, col), DD # Extract data columns from table function _data_cols(table, dims::Tuple) - data_cols = _data_col_names(table, dims) + data_cols = data_col_names(table, dims) return NamedTuple{Tuple(data_cols)}(Tables.getcolumn(table, col) for col in data_cols) end -# Get names of data columns from table -function _data_col_names(table, dims::Tuple) - dim_cols = DD.name(dims) - return filter(x -> !(x in dim_cols), Tables.columnnames(table)) -end - # Determine the ordinality of a set of coordinates _coords_to_ords(coords::AbstractVector, dim::Dimension, sel::DD.Selector) = _coords_to_ords(coords, dim, sel, DD.locus(dim), DD.span(dim)) _coords_to_ords(coords::Tuple, dims::Tuple, sel::DD.Selector) = Tuple(_coords_to_ords(c, d, sel) for (c, d) in zip(coords, dims)) @@ -282,12 +278,6 @@ function _build_dim(vals::AbstractVector{<:Union{Number,Dates.AbstractTime}}, di return rebuild(name2dim(dim), DD.Sampled(dim_vals, order=order, span=span, sampling=DD.Points())) end -_get_column(table, x::Type{<:DD.Dimension}) = Tables.getcolumn(table, DD.name(x)) -_get_column(table, x::DD.Dimension) = Tables.getcolumn(table, DD.name(x)) -_get_column(table, x::Symbol) = Tables.getcolumn(table, x) -_get_column(table, x::Pair) = _get_column(table, first(x)) - - # Determine the index from a tuple of coordinate orders function _ords_to_indices(ords, dims) stride = 1