diff --git a/docs/src/reference.md b/docs/src/reference.md index fe0d3dbda..1e2670870 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -168,6 +168,8 @@ LookupArrays.LookupArray LookupArrays.Aligned LookupArrays.AbstractSampled LookupArrays.Sampled +LookupArrays.AbstractCyclic +LookupArrays.Cyclic LookupArrays.AbstractCategorical LookupArrays.Categorical LookupArrays.Unaligned diff --git a/src/LookupArrays/LookupArrays.jl b/src/LookupArrays/LookupArrays.jl index d76447235..02f6c6871 100644 --- a/src/LookupArrays/LookupArrays.jl +++ b/src/LookupArrays/LookupArrays.jl @@ -44,7 +44,7 @@ export AutoStep, AutoBounds, AutoIndex export LookupArray export AutoLookup, NoLookup -export Aligned, AbstractSampled, Sampled, AbstractCategorical, Categorical +export Aligned, AbstractSampled, Sampled, AbstractCyclic, Cyclic, AbstractCategorical, Categorical export Unaligned, Transformed const StandardIndices = Union{AbstractArray{<:Integer},Colon,Integer,CartesianIndex,CartesianIndices} diff --git a/src/LookupArrays/lookup_arrays.jl b/src/LookupArrays/lookup_arrays.jl index bce283374..ded3f9e85 100644 --- a/src/LookupArrays/lookup_arrays.jl +++ b/src/LookupArrays/lookup_arrays.jl @@ -6,7 +6,7 @@ Types defining the behaviour of a lookup index, how it is plotted and how [`Selector`](@ref)s like [`Between`](@ref) work. A `LookupArray` may be [`NoLookup`](@ref) indicating that the index is just the -underlying array axis, [`Categorical`](@ref) for ordered or unordered categories, +underlying array axis, [`Categorical`](@ref) for ordered or unordered categories, or a [`Sampled`](@ref) index for [`Points`](@ref) or [`Intervals`](@ref). """ abstract type LookupArray{T,N} <: AbstractArray{T,N} end @@ -14,7 +14,7 @@ abstract type LookupArray{T,N} <: AbstractArray{T,N} end const LookupArrayTuple = Tuple{LookupArray,Vararg{LookupArray}} -span(lookup::LookupArray) = NoSpan() +span(lookup::LookupArray) = NoSpan() sampling(lookup::LookupArray) = NoSampling() dims(::LookupArray) = nothing @@ -107,7 +107,7 @@ order(lookup::Aligned) = lookup.order NoLookup() -A [`LookupArray`](@ref) that is identical to the array axis. +A [`LookupArray`](@ref) that is identical to the array axis. [`Selector`](@ref)s can't be used on this lookup. ## Example @@ -170,9 +170,9 @@ locus(lookup::AbstractSampled) = locus(sampling(lookup)) Base.step(lookup::AbstractSampled) = step(span(lookup)) function Base.:(==)(l1::AbstractSampled, l2::AbstractSampled) - order(l1) == order(l2) && - span(l1) == span(l2) && - sampling(l1) == sampling(l2) && + order(l1) == order(l2) && + span(l1) == span(l2) && + sampling(l1) == sampling(l2) && parent(l1) == parent(l2) end @@ -216,11 +216,30 @@ _bounds(::Center, ::ReverseOrdered, span, lookup) = _bounds(::End, ::ForwardOrdered, span, lookup) = first(lookup) - step(span), last(lookup) _bounds(::End, ::ReverseOrdered, span, lookup) = last(lookup) + step(span), first(lookup) + +const SAMPLED_ARGUMENTS_DOC = """ +- `data`: An `AbstractVector` of index values, matching the length of the curresponding + array axis. +- `order`: [`Order`](@ref)) indicating the order of the index, + [`AutoOrder`](@ref) by default, detected from the order of `data` + to be [`ForwardOrdered`](@ref), [`ReverseOrdered`](@ref) or [`Unordered`](@ref). + These can be provided explicitly if they are known and performance is important. +- `span`: indicates the size of intervals or distance between points, and will be set to + [`Regular`](@ref) for `AbstractRange` and [`Irregular`](@ref) for `AbstractArray`, + unless assigned manually. +- `sampling`: is assigned to [`Points`](@ref), unless set to [`Intervals`](@ref) manually. + Using [`Intervals`](@ref) will change the behaviour of `bounds` and `Selectors`s + to take account for the full size of the interval, rather than the point alone. +- `metadata`: a `Dict` or `Metadata` wrapper that holds any metadata object adding more + information about the array axis - useful for extending DimensionalData for specific + contexts, like geospatial data in GeoData.jl. By default it is `NoMetadata()`. +""" + """ Sampled <: AbstractSampled Sampled(data::AbstractVector, order::Order, span::Span, sampling::Sampling, metadata) - Sampled(; data=AutoIndex(), order=AutoOrder(), span=AutoSpan(), sampling=Points(), metadata=NoMetadata()) + Sampled(data=AutoIndex(); order=AutoOrder(), span=AutoSpan(), sampling=Points(), metadata=NoMetadata()) A concrete implementation of the [`LookupArray`](@ref) [`AbstractSampled`](@ref). It can be used to represent @@ -230,26 +249,12 @@ A concrete implementation of the [`LookupArray`](@ref) correct `bounds` and [`Selector`](@ref)s for points or intervals of regular, irregular, forward and reverse indexes. -On `AbstractDimArray` construction, `Sampled` lookup is assigned for all lookups of +On `AbstractDimArray` construction, `Sampled` lookup is assigned for all lookups of `AbstractRange` not assigned to [`Categorical`](@ref). ## Arguments -- `data`: An `AbstractVector` of index values, matching the length of the curresponding - array axis. -- `order`: [`Order`](@ref)) indicating the order of the index, - [`AutoOrder`](@ref) by default, detected from the order of `data` - to be [`ForwardOrdered`](@ref), [`ReverseOrdered`](@ref) or [`Unordered`](@ref). - These can be provided explicitly if they are known and performance is important. -- `span`: indicates the size of intervals or distance between points, and will be set to - [`Regular`](@ref) for `AbstractRange` and [`Irregular`](@ref) for `AbstractArray`, - unless assigned manually. -- `sampling`: is assigned to [`Points`](@ref), unless set to [`Intervals`](@ref) manually. - Using [`Intervals`](@ref) will change the behaviour of `bounds` and `Selectors`s - to take account for the full size of the interval, rather than the point alone. -- `metadata`: a `Dict` or `Metadata` wrapper that holds any metadata object adding more - information about the array axis - useful for extending DimensionalData for specific - contexts, like geospatial data in GeoData.jl. By default it is `NoMetadata()`. +$SAMPLED_ARGUMENTS_DOC ## Example @@ -284,26 +289,151 @@ struct Sampled{T,A<:AbstractVector{T},O,Sp,Sa,M} <: AbstractSampled{T,O,Sp,Sa} sampling::Sa metadata::M end -function Sampled( - data=AutoIndex(); +function Sampled(data=AutoIndex(); order=AutoOrder(), span=AutoSpan(), sampling=AutoSampling(), metadata=NoMetadata() ) Sampled(data, order, span, sampling, metadata) end -function rebuild(l::Sampled; +function rebuild(l::Sampled; data=parent(l), order=order(l), span=span(l), sampling=sampling(l), metadata=metadata(l), kw... ) Sampled(data, order, span, sampling, metadata) end +# These are used to specialise dispatch: +# When Cycling, we need to modify any `Selector`. after that +# we swicth to `NotCycling` and use `AbstractSampled` fallbacks. +# We could switch to `Sampled` ata that point, but its less extensible. +abstract type CycleStatus end + +struct Cycling <: CycleStatus end +struct NotCycling <: CycleStatus end + +""" + AbstractCyclic <: AbstractSampled end + +An abstract supertype for cyclic lookups. + +These are `AbstractSampled` lookups that are cyclic for `Selectors`. +""" +abstract type AbstractCyclic{X,T,O,Sp,Sa} <: AbstractSampled{T,O,Sp,Sa} end + +cycle(l::AbstractCyclic) = l.cycle +cycle_status(l::AbstractCyclic) = l.cycle_status + +bounds(l::AbstractCyclic{<:Any,T}) where T = (typemin(T), typemax(T)) + +# Indexing with `AbstractArray` must rebuild the lookup as +# `Sampled` as we no longer have the whole cycle. +for f in (:getindex, :view, :dotview) + @eval @propagate_inbounds Base.$f(l::AbstractCyclic, i::AbstractArray) = + Sampled(rebuild(l; data=Base.$f(parent(l), i))) +end + + +no_cycling(l::AbstractCyclic) = rebuild(l; cycle_status=NotCycling()) + +function cycle_val(l::AbstractCyclic, val) + cycle_start = ordered_first(l) + # This formulation is necessary for dates + ncycles = (val - cycle_start) รท (cycle_start + cycle(l) - cycle_start) + res = val - ncycles * cycle(l) + # Catch precision errors + if (cycle_start + (ncycles + 1) * cycle(l)) <= val + i = 1 + while i < 10000 + if (cycle_start + (ncycles + i) * cycle(l)) > val + return val - (ncycles + i - 1) * cycle(l) + end + i += 1 + end + elseif res < cycle_start + i = 1 + while i < 10000 + res = val - (ncycles - i + 1) * cycle(l) + res >= cycle_start && return res + i += 1 + end + else + return res + end + error("`Cyclic` lookup too innacurate, value not found") +end + + + +""" + Cyclic <: AbstractCyclic + + Cyclic(data; order=AutoOrder(), span=AutoSpan(), sampling=Points(), metadata=NoMetadata(), cycle) + +A `Cyclic` lookup is similar to `Sampled` but out of range `Selectors` [`At`](@ref), +[`Near`](@ref), [`Contains`](@ref) will cycle the values to `typemin` or `typemax` +over the length of `cycle`. [`Where`](@ref) and `..` work as for [`Sampled`](@ref). + +This is useful when we are using mean annual datasets over a real time-span, +or for wrapping longitudes so that `-360` and `360` are the same. + +## Arguments + +$SAMPLED_ARGUMENTS_DOC +- `cycle`: the length of the cycle. This does not have to exactly match the data, + the `step` size is `Week(1)` the cycle can be `Years(1)`. + +## Notes + +1. If you use dates and e.g. cycle over a `Year`, every year will have the + number and spacing of `Week`s and `Day`s as the cycle year. Using `At` may not be reliable + in terms of exact dates, as it will be applied to the specified date plus or minus `n` years. +2. Indexing into a `Cycled` with any `AbstractArray` or `AbstractRange` will return + a [`Sampled`](@ref) as the full cycle is likely no longer available. +3. `..` or `Between` selectors do not work in a cycled way: they work as for [`Sampled`](@ref). + This may change in future to return cycled values, but there are problems with this, such as + leap years breaking correct date cycling of a single year. If you actually need this behaviour, + please make a GitHub issue. +""" +struct Cyclic{X,T,A<:AbstractVector{T},O,Sp,Sa,M,C} <: AbstractCyclic{X,T,O,Sp,Sa} + data::A + order::O + span::Sp + sampling::Sa + metadata::M + cycle::C + cycle_status::X + function Cyclic( + data::A, order::O, span::Sp, sampling::Sa, metadata::M, cycle::C, cycle_status::X + ) where {A<:AbstractVector{T},O,Sp,Sa,M,C,X} where T + _check_ordered_cyclic(order) + new{X,T,A,O,Sp,Sa,M,C}(data, order, span, sampling, metadata, cycle, cycle_status) + end +end +function Cyclic(data=AutoIndex(); + order=AutoOrder(), span=AutoSpan(), + sampling=AutoSampling(), metadata=NoMetadata(), + cycle, # Mandatory keyword, there are too many possible bugs with auto detection +) + cycle_status = Cycling() + Cyclic(data, order, span, sampling, metadata, cycle, cycle_status) +end + +_check_ordered_cyclic(order::Ordered) = nothing +_check_ordered_cyclic(order::Unordered) = throw(ArgumentError("Cyclic lookups must be `Ordered`")) + +function rebuild(l::Cyclic; + data=parent(l), order=order(l), span=span(l), sampling=sampling(l), metadata=metadata(l), + cycle=cycle(l), _cycle_status=cycle_status(l), kw... +) + Cyclic(data, order, span, sampling, metadata, cycle, cycle_status) +end + """ AbstractCategorical <: Aligned [`LookupArray`](@ref)s where the values are categories. -[`Categorical`](@ref) is the provided concrete implementation. +[`Categorical`](@ref) is the provided concrete implementation. but this can easily be extended - all methods are defined for `AbstractCategorical`. All `AbstractCategorical` must provide a `rebuild` @@ -337,7 +467,7 @@ This will be automatically assigned if the index contains `AbstractString`, ## Arguments - `data`: An `AbstractVector` of index values, matching the length of the curresponding - array axis. + array axis. - `order`: [`Order`](@ref)) indicating the order of the index, [`AutoOrder`](@ref) by default, detected from the order of `data` to be `ForwardOrdered`, `ReverseOrdered` or `Unordered`. @@ -372,7 +502,7 @@ function Categorical(data=AutoIndex(); order=AutoOrder(), metadata=NoMetadata()) Categorical(data, order, metadata) end -function rebuild(l::Categorical; +function rebuild(l::Categorical; data=parent(l), order=order(l), metadata=metadata(l), kw... ) Categorical(data, order, metadata) @@ -412,7 +542,7 @@ from CoordinateTransformations.jl may be useful. ## Keyword Arguments -- `metdata`: +- `metdata`: ## Example @@ -441,7 +571,7 @@ function Transformed(f, dim; metadata=NoMetadata()) Transformed(AutoIndex(), f, basetypeof(dim)(), metadata) end -function rebuild(l::Transformed; +function rebuild(l::Transformed; data=parent(l), f=f(l), dim=dim(l), metadata=metadata(l) ) Transformed(data, f, dim, metadata) @@ -597,7 +727,7 @@ function _slicebounds(locus::Center, span::Irregular, l::LookupArray{T}, i::Abst if isrev(order(l)) op(l[first(i)] - l[first(i) - 1], 2) + l[first(i) - 1] else - op(l[first(i) - 1] - l[first(i)], 2) + l[first(i)] + op(l[first(i) - 1] - l[first(i)], 2) + l[first(i)] end end lst = if last(i) >= lastindex(l) @@ -617,7 +747,7 @@ end # TODO what should this do? @inline reducelookup(lookup::Unaligned) = NoLookup(OneTo(1)) # Categories are combined. -@inline reducelookup(lookup::Categorical{<:AbstractString}) = +@inline reducelookup(lookup::Categorical{<:AbstractString}) = rebuild(lookup; data=["combined"]) @inline reducelookup(lookup::Categorical) = rebuild(lookup; data=[:combined]) # Sampled is resampled @@ -673,7 +803,7 @@ _mayberange(x, step::Nothing) = [x] @inline function centerval(index::AbstractArray{<:DateTime}, len) f = first(index) l = last(index) - if f <= l + if f <= l return (l - f) / 2 + first(index) else return (f - l) / 2 + last(index) diff --git a/src/LookupArrays/lookup_traits.jl b/src/LookupArrays/lookup_traits.jl index 8778a44f4..a2f7cf1f2 100644 --- a/src/LookupArrays/lookup_traits.jl +++ b/src/LookupArrays/lookup_traits.jl @@ -269,4 +269,3 @@ change the `LookupArray` type without changing the index values. struct AutoIndex <: AbstractVector{Int} end Base.size(::AutoIndex) = (0,) - diff --git a/src/LookupArrays/selector.jl b/src/LookupArrays/selector.jl index 2440a1a6a..72a78f7ee 100644 --- a/src/LookupArrays/selector.jl +++ b/src/LookupArrays/selector.jl @@ -122,6 +122,10 @@ selectindices(l::LookupArray, sel::At{<:AbstractVector}) = _selectvec(l, sel) _selectvec(l, sel) = [selectindices(l, rebuild(sel; val=v)) for v in val(sel)] +function at(lookup::AbstractCyclic{Cycling}, sel::At; kw...) + cycled_sel = rebuild(sel; val=cycle_val(lookup, val(sel))) + return at(no_cycling(lookup), cycled_sel; kw...) +end function at(lookup::NoLookup, sel::At; kw...) v = val(sel) r = round(Int, v) @@ -226,6 +230,10 @@ end selectindices(l::LookupArray, sel::Near) = near(l, sel) selectindices(l::LookupArray, sel::Near{<:AbstractVector}) = _selectvec(l, sel) +function near(lookup::AbstractCyclic{Cycling}, sel::Near) + cycled_sel = rebuild(sel; val=cycle_val(lookup, val(sel))) + near(no_cycling(lookup), cycled_sel) +end near(lookup::NoLookup, sel::Near{<:Real}) = max(1, min(round(Int, val(sel)), lastindex(lookup))) function near(lookup::LookupArray, sel::Near) span(lookup) isa Union{Irregular,Explicit} && locus(lookup) isa Union{Start,End} && @@ -306,6 +314,10 @@ end selectindices(l::LookupArray, sel::Contains; kw...) = contains(l, sel) selectindices(l::LookupArray, sel::Contains{<:AbstractVector}) = _selectvec(l, sel) +function contains(lookup::AbstractCyclic{Cycling}, sel::Contains; kw...) + cycled_sel = rebuild(sel; val=cycle_val(lookup, val(sel))) + return contains(no_cycling(lookup), cycled_sel; kw...) +end function contains(l::NoLookup, sel::Contains; kw...) i = Int(val(sel)) i in l || throw(SelectorError(l, i)) @@ -484,6 +496,11 @@ function between(l::NoLookup, sel::Interval) x = intersect(sel, first(axes(l, 1))..last(axes(l, 1))) return ceil(Int, x.left):floor(Int, x.right) end +# function between(l::AbstractCyclic{Cycling}, sel::Interval) +# cycle_val(l, sel.x)..cycle_val(l, sel.x) +# cycled_sel = rebuild(sel; val=) +# near(no_cycling(lookup), cycled_sel; kw...) +# end between(l::LookupArray, interval::Interval) = between(sampling(l), l, interval) # This is the main method called above function between(sampling::Sampling, l::LookupArray, interval::Interval) diff --git a/test/lookup.jl b/test/lookup.jl index 134e08b9e..11bb8e9e0 100644 --- a/test/lookup.jl +++ b/test/lookup.jl @@ -245,6 +245,29 @@ end @test_throws ErrorException intervalbounds(dim) end + @testset "Cyclic" begin + vals = -180.0:1:179.0 + l = Cyclic(vals; cycle=360.0, order=ForwardOrdered(), span=Regular(1.0), sampling=Intervals(Start())) + dim = X(l) + @test order(dim) == ForwardOrdered() + @test step(dim) == 1.0 + @test span(dim) == Regular(1.0) + @test sampling(dim) == Intervals(Start()) + @test locus(dim) == Start() + @test bounds(dim) == (-Inf, Inf) + # Indexing with AbstractArray returns Sampled + for f in (getindex, view, Base.dotview) + @test f(l, 1:10) isa Sampled + end + # TODO clarify intervalbounds - we cant return the whole set to typemax, so we return onecycle? + # @test intervalbounds(dim) + dim = X(Cyclic(reverse(vals); cycle=360.0, order=ReverseOrdered(), span=Regular(1.0), sampling=Intervals(Start()))) + @test bounds(dim) == (typemin(Float64), typemax(Float64)) + @test order(dim) == ReverseOrdered() + @test bounds(dim) == (-Inf, Inf) + @test_throws ArgumentError Cyclic(vals; cycle=360, order=Unordered()) + end + end @testset "dims2indices with Transformed" begin diff --git a/test/selector.jl b/test/selector.jl index e1d5ce50a..1b26dacd6 100644 --- a/test/selector.jl +++ b/test/selector.jl @@ -1,6 +1,6 @@ using DimensionalData, Test, Unitful, Combinatorics, Dates, IntervalSets, Extents using DimensionalData.LookupArrays, DimensionalData.Dimensions -using .LookupArrays: between, touches, at, near, contains, bounds, SelectorError +using .LookupArrays: between, touches, at, near, contains, bounds, SelectorError, cycle_val a = [1 2 3 4 5 6 7 8 @@ -1326,7 +1326,67 @@ end end -@testset "NoIndex" begin +@testset "Cyclic lookup" begin + lookups = ( + day=Cyclic(DateTime(2001):Day(1):DateTime(2002, 12, 31); cycle=Year(1), order=ForwardOrdered(), span=Regular(Day(1)), sampling=Intervals(Start())), + week=Cyclic(DateTime(2001):Week(1):DateTime(2002, 12, 31); cycle=Year(1), order=ForwardOrdered(), span=Regular(Week(1)), sampling=Intervals(Start())), + month=Cyclic(DateTime(2001):Month(1):DateTime(2002, 12, 31); cycle=Year(1), order=ForwardOrdered(), span=Regular(Month(1)), sampling=Intervals(Start())), + month_month=Cyclic(DateTime(2001):Month(1):DateTime(2002, 1, 31); cycle=Month(1), order=ForwardOrdered(), span=Regular(Month(1)), sampling=Intervals(Start())), + ) + + for l in lookups + # Test exact cycles + @test at(l, At(DateTime(1))) == 1 + @test at(l, At(DateTime(1999))) == 1 + @test at(l, At(DateTime(2000))) == 1 + @test at(l, At(DateTime(2001))) == 1 + @test at(l, At(DateTime(4000))) == 1 + @test near(l, Near(DateTime(1))) == 1 + @test near(l, Near(DateTime(1999))) == 1 + @test near(l, Near(DateTime(2000))) == 1 + @test near(l, Near(DateTime(2001))) == 1 + @test near(l, Near(DateTime(4000))) == 1 + @test contains(l, Contains(DateTime(1))) == 1 + @test contains(l, Contains(DateTime(1999))) == 1 + @test contains(l, Contains(DateTime(2000))) == 1 + @test contains(l, Contains(DateTime(2001))) == 1 + @test contains(l, Contains(DateTime(4000))) == 1 + end + + l = lookups.month + @test at(l, At(DateTime(1, 12))) == 12 + @test at(l, At(DateTime(1999, 12))) == 12 + @test at(l, At(DateTime(2000, 12))) == 12 + @test at(l, At(DateTime(2001, 12))) == 12 + @test at(l, At(DateTime(3000, 12))) == 12 + l = lookups.day + @test at(l, At(DateTime(1, 12, 31))) == 365 + @test at(l, At(DateTime(1999, 12, 31))) == 365 + # This is kinda wrong, as there are 366 days in 2000 + # But our l has 365. Leap years would be handled + # properly with a four year cycle + @test at(l, At(DateTime(2000, 12, 31))) == 365 + @test at(l, At(DateTime(2001, 12, 31))) == 365 + @test at(l, At(DateTime(3000, 12, 31))) == 365 + + @testset "Leap years are correct with four year cycles" begin + l = Cyclic(DateTime(2000):Day(1):DateTime(2003, 12, 31); cycle=Year(4), order=ForwardOrdered(), span=Regular(Day(1)), sampling=Intervals(Start())) + @test at(l, At(DateTime(1, 12, 31))) == findfirst(==(DateTime(2001, 12, 31)), l) + @test at(l, At(DateTime(1999, 12, 31))) == findfirst(==(DateTime(1999 + 4, 12, 31)), l) + @test at(l, At(DateTime(2000, 12, 31))) == 366 == findfirst(==(DateTime(2000, 12, 31)), l) + @test at(l, At(DateTime(2007, 12, 31))) == findfirst(==(DateTime(2007 - 4, 12, 31)), l) + @test at(l, At(DateTime(3000, 12, 31))) == 366 == findfirst(==(DateTime(3000 - 250 * 4, 12, 31)), l) + end + + @testset "Cycling works with floats too" begin + l = Cyclic(-180.0:1:179.0; cycle=360.0, order=ForwardOrdered(), span=Regular(1.0), sampling=Intervals(Start())) + @test contains(l, Contains(360)) == 181 + @test contains(l, Contains(-360)) == 181 + @test contains(l, Contains(180)) == 1 + end +end + +@testset "NoLookup" begin l = NoLookup(1:100) @test_throws SelectorError selectindices(l, At(0)) @test_throws SelectorError selectindices(l, At(200))