Skip to content

Commit

Permalink
add a cyclic lookup (#565)
Browse files Browse the repository at this point in the history
* add a cyclic lookup

* cleanup printing

* tweaks

* docs tweaks

* test Sampled return value

* add Cyclic to docs

* fix tests

* fix again

* fix tests

* move constructor
  • Loading branch information
rafaqz authored Nov 5, 2023
1 parent 5ae55cb commit ef99985
Show file tree
Hide file tree
Showing 7 changed files with 269 additions and 38 deletions.
2 changes: 2 additions & 0 deletions docs/src/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ LookupArrays.LookupArray
LookupArrays.Aligned
LookupArrays.AbstractSampled
LookupArrays.Sampled
LookupArrays.AbstractCyclic
LookupArrays.Cyclic
LookupArrays.AbstractCategorical
LookupArrays.Categorical
LookupArrays.Unaligned
Expand Down
2 changes: 1 addition & 1 deletion src/LookupArrays/LookupArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ export AutoStep, AutoBounds, AutoIndex

export LookupArray
export AutoLookup, NoLookup
export Aligned, AbstractSampled, Sampled, AbstractCategorical, Categorical
export Aligned, AbstractSampled, Sampled, AbstractCyclic, Cyclic, AbstractCategorical, Categorical
export Unaligned, Transformed

const StandardIndices = Union{AbstractArray{<:Integer},Colon,Integer,CartesianIndex,CartesianIndices}
Expand Down
198 changes: 164 additions & 34 deletions src/LookupArrays/lookup_arrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ Types defining the behaviour of a lookup index, how it is plotted
and how [`Selector`](@ref)s like [`Between`](@ref) work.
A `LookupArray` may be [`NoLookup`](@ref) indicating that the index is just the
underlying array axis, [`Categorical`](@ref) for ordered or unordered categories,
underlying array axis, [`Categorical`](@ref) for ordered or unordered categories,
or a [`Sampled`](@ref) index for [`Points`](@ref) or [`Intervals`](@ref).
"""
abstract type LookupArray{T,N} <: AbstractArray{T,N} end


const LookupArrayTuple = Tuple{LookupArray,Vararg{LookupArray}}

span(lookup::LookupArray) = NoSpan()
span(lookup::LookupArray) = NoSpan()
sampling(lookup::LookupArray) = NoSampling()

dims(::LookupArray) = nothing
Expand Down Expand Up @@ -107,7 +107,7 @@ order(lookup::Aligned) = lookup.order
NoLookup()
A [`LookupArray`](@ref) that is identical to the array axis.
A [`LookupArray`](@ref) that is identical to the array axis.
[`Selector`](@ref)s can't be used on this lookup.
## Example
Expand Down Expand Up @@ -170,9 +170,9 @@ locus(lookup::AbstractSampled) = locus(sampling(lookup))
Base.step(lookup::AbstractSampled) = step(span(lookup))

function Base.:(==)(l1::AbstractSampled, l2::AbstractSampled)
order(l1) == order(l2) &&
span(l1) == span(l2) &&
sampling(l1) == sampling(l2) &&
order(l1) == order(l2) &&
span(l1) == span(l2) &&
sampling(l1) == sampling(l2) &&
parent(l1) == parent(l2)
end

Expand Down Expand Up @@ -216,11 +216,30 @@ _bounds(::Center, ::ReverseOrdered, span, lookup) =
_bounds(::End, ::ForwardOrdered, span, lookup) = first(lookup) - step(span), last(lookup)
_bounds(::End, ::ReverseOrdered, span, lookup) = last(lookup) + step(span), first(lookup)


const SAMPLED_ARGUMENTS_DOC = """
- `data`: An `AbstractVector` of index values, matching the length of the curresponding
array axis.
- `order`: [`Order`](@ref)) indicating the order of the index,
[`AutoOrder`](@ref) by default, detected from the order of `data`
to be [`ForwardOrdered`](@ref), [`ReverseOrdered`](@ref) or [`Unordered`](@ref).
These can be provided explicitly if they are known and performance is important.
- `span`: indicates the size of intervals or distance between points, and will be set to
[`Regular`](@ref) for `AbstractRange` and [`Irregular`](@ref) for `AbstractArray`,
unless assigned manually.
- `sampling`: is assigned to [`Points`](@ref), unless set to [`Intervals`](@ref) manually.
Using [`Intervals`](@ref) will change the behaviour of `bounds` and `Selectors`s
to take account for the full size of the interval, rather than the point alone.
- `metadata`: a `Dict` or `Metadata` wrapper that holds any metadata object adding more
information about the array axis - useful for extending DimensionalData for specific
contexts, like geospatial data in GeoData.jl. By default it is `NoMetadata()`.
"""

"""
Sampled <: AbstractSampled
Sampled(data::AbstractVector, order::Order, span::Span, sampling::Sampling, metadata)
Sampled(; data=AutoIndex(), order=AutoOrder(), span=AutoSpan(), sampling=Points(), metadata=NoMetadata())
Sampled(data=AutoIndex(); order=AutoOrder(), span=AutoSpan(), sampling=Points(), metadata=NoMetadata())
A concrete implementation of the [`LookupArray`](@ref)
[`AbstractSampled`](@ref). It can be used to represent
Expand All @@ -230,26 +249,12 @@ A concrete implementation of the [`LookupArray`](@ref)
correct `bounds` and [`Selector`](@ref)s for points or intervals of regular,
irregular, forward and reverse indexes.
On `AbstractDimArray` construction, `Sampled` lookup is assigned for all lookups of
On `AbstractDimArray` construction, `Sampled` lookup is assigned for all lookups of
`AbstractRange` not assigned to [`Categorical`](@ref).
## Arguments
- `data`: An `AbstractVector` of index values, matching the length of the curresponding
array axis.
- `order`: [`Order`](@ref)) indicating the order of the index,
[`AutoOrder`](@ref) by default, detected from the order of `data`
to be [`ForwardOrdered`](@ref), [`ReverseOrdered`](@ref) or [`Unordered`](@ref).
These can be provided explicitly if they are known and performance is important.
- `span`: indicates the size of intervals or distance between points, and will be set to
[`Regular`](@ref) for `AbstractRange` and [`Irregular`](@ref) for `AbstractArray`,
unless assigned manually.
- `sampling`: is assigned to [`Points`](@ref), unless set to [`Intervals`](@ref) manually.
Using [`Intervals`](@ref) will change the behaviour of `bounds` and `Selectors`s
to take account for the full size of the interval, rather than the point alone.
- `metadata`: a `Dict` or `Metadata` wrapper that holds any metadata object adding more
information about the array axis - useful for extending DimensionalData for specific
contexts, like geospatial data in GeoData.jl. By default it is `NoMetadata()`.
$SAMPLED_ARGUMENTS_DOC
## Example
Expand Down Expand Up @@ -284,26 +289,151 @@ struct Sampled{T,A<:AbstractVector{T},O,Sp,Sa,M} <: AbstractSampled{T,O,Sp,Sa}
sampling::Sa
metadata::M
end
function Sampled(
data=AutoIndex();
function Sampled(data=AutoIndex();
order=AutoOrder(), span=AutoSpan(),
sampling=AutoSampling(), metadata=NoMetadata()
)
Sampled(data, order, span, sampling, metadata)
end

function rebuild(l::Sampled;
function rebuild(l::Sampled;
data=parent(l), order=order(l), span=span(l), sampling=sampling(l), metadata=metadata(l), kw...
)
Sampled(data, order, span, sampling, metadata)
end

# These are used to specialise dispatch:
# When Cycling, we need to modify any `Selector`. after that
# we swicth to `NotCycling` and use `AbstractSampled` fallbacks.
# We could switch to `Sampled` ata that point, but its less extensible.
abstract type CycleStatus end

struct Cycling <: CycleStatus end
struct NotCycling <: CycleStatus end

"""
AbstractCyclic <: AbstractSampled end
An abstract supertype for cyclic lookups.
These are `AbstractSampled` lookups that are cyclic for `Selectors`.
"""
abstract type AbstractCyclic{X,T,O,Sp,Sa} <: AbstractSampled{T,O,Sp,Sa} end

cycle(l::AbstractCyclic) = l.cycle
cycle_status(l::AbstractCyclic) = l.cycle_status

bounds(l::AbstractCyclic{<:Any,T}) where T = (typemin(T), typemax(T))

# Indexing with `AbstractArray` must rebuild the lookup as
# `Sampled` as we no longer have the whole cycle.
for f in (:getindex, :view, :dotview)
@eval @propagate_inbounds Base.$f(l::AbstractCyclic, i::AbstractArray) =
Sampled(rebuild(l; data=Base.$f(parent(l), i)))
end


no_cycling(l::AbstractCyclic) = rebuild(l; cycle_status=NotCycling())

function cycle_val(l::AbstractCyclic, val)
cycle_start = ordered_first(l)
# This formulation is necessary for dates
ncycles = (val - cycle_start) ÷ (cycle_start + cycle(l) - cycle_start)
res = val - ncycles * cycle(l)
# Catch precision errors
if (cycle_start + (ncycles + 1) * cycle(l)) <= val
i = 1
while i < 10000
if (cycle_start + (ncycles + i) * cycle(l)) > val
return val - (ncycles + i - 1) * cycle(l)
end
i += 1
end
elseif res < cycle_start
i = 1
while i < 10000
res = val - (ncycles - i + 1) * cycle(l)
res >= cycle_start && return res
i += 1
end
else
return res
end
error("`Cyclic` lookup too innacurate, value not found")
end



"""
Cyclic <: AbstractCyclic
Cyclic(data; order=AutoOrder(), span=AutoSpan(), sampling=Points(), metadata=NoMetadata(), cycle)
A `Cyclic` lookup is similar to `Sampled` but out of range `Selectors` [`At`](@ref),
[`Near`](@ref), [`Contains`](@ref) will cycle the values to `typemin` or `typemax`
over the length of `cycle`. [`Where`](@ref) and `..` work as for [`Sampled`](@ref).
This is useful when we are using mean annual datasets over a real time-span,
or for wrapping longitudes so that `-360` and `360` are the same.
## Arguments
$SAMPLED_ARGUMENTS_DOC
- `cycle`: the length of the cycle. This does not have to exactly match the data,
the `step` size is `Week(1)` the cycle can be `Years(1)`.
## Notes
1. If you use dates and e.g. cycle over a `Year`, every year will have the
number and spacing of `Week`s and `Day`s as the cycle year. Using `At` may not be reliable
in terms of exact dates, as it will be applied to the specified date plus or minus `n` years.
2. Indexing into a `Cycled` with any `AbstractArray` or `AbstractRange` will return
a [`Sampled`](@ref) as the full cycle is likely no longer available.
3. `..` or `Between` selectors do not work in a cycled way: they work as for [`Sampled`](@ref).
This may change in future to return cycled values, but there are problems with this, such as
leap years breaking correct date cycling of a single year. If you actually need this behaviour,
please make a GitHub issue.
"""
struct Cyclic{X,T,A<:AbstractVector{T},O,Sp,Sa,M,C} <: AbstractCyclic{X,T,O,Sp,Sa}
data::A
order::O
span::Sp
sampling::Sa
metadata::M
cycle::C
cycle_status::X
function Cyclic(
data::A, order::O, span::Sp, sampling::Sa, metadata::M, cycle::C, cycle_status::X
) where {A<:AbstractVector{T},O,Sp,Sa,M,C,X} where T
_check_ordered_cyclic(order)
new{X,T,A,O,Sp,Sa,M,C}(data, order, span, sampling, metadata, cycle, cycle_status)
end
end
function Cyclic(data=AutoIndex();
order=AutoOrder(), span=AutoSpan(),
sampling=AutoSampling(), metadata=NoMetadata(),
cycle, # Mandatory keyword, there are too many possible bugs with auto detection
)
cycle_status = Cycling()
Cyclic(data, order, span, sampling, metadata, cycle, cycle_status)
end

_check_ordered_cyclic(order::Ordered) = nothing
_check_ordered_cyclic(order::Unordered) = throw(ArgumentError("Cyclic lookups must be `Ordered`"))

function rebuild(l::Cyclic;
data=parent(l), order=order(l), span=span(l), sampling=sampling(l), metadata=metadata(l),
cycle=cycle(l), _cycle_status=cycle_status(l), kw...
)
Cyclic(data, order, span, sampling, metadata, cycle, cycle_status)
end

"""
AbstractCategorical <: Aligned
[`LookupArray`](@ref)s where the values are categories.
[`Categorical`](@ref) is the provided concrete implementation.
[`Categorical`](@ref) is the provided concrete implementation.
but this can easily be extended - all methods are defined for `AbstractCategorical`.
All `AbstractCategorical` must provide a `rebuild`
Expand Down Expand Up @@ -337,7 +467,7 @@ This will be automatically assigned if the index contains `AbstractString`,
## Arguments
- `data`: An `AbstractVector` of index values, matching the length of the curresponding
array axis.
array axis.
- `order`: [`Order`](@ref)) indicating the order of the index,
[`AutoOrder`](@ref) by default, detected from the order of `data`
to be `ForwardOrdered`, `ReverseOrdered` or `Unordered`.
Expand Down Expand Up @@ -372,7 +502,7 @@ function Categorical(data=AutoIndex(); order=AutoOrder(), metadata=NoMetadata())
Categorical(data, order, metadata)
end

function rebuild(l::Categorical;
function rebuild(l::Categorical;
data=parent(l), order=order(l), metadata=metadata(l), kw...
)
Categorical(data, order, metadata)
Expand Down Expand Up @@ -412,7 +542,7 @@ from CoordinateTransformations.jl may be useful.
## Keyword Arguments
- `metdata`:
- `metdata`:
## Example
Expand Down Expand Up @@ -441,7 +571,7 @@ function Transformed(f, dim; metadata=NoMetadata())
Transformed(AutoIndex(), f, basetypeof(dim)(), metadata)
end

function rebuild(l::Transformed;
function rebuild(l::Transformed;
data=parent(l), f=f(l), dim=dim(l), metadata=metadata(l)
)
Transformed(data, f, dim, metadata)
Expand Down Expand Up @@ -597,7 +727,7 @@ function _slicebounds(locus::Center, span::Irregular, l::LookupArray{T}, i::Abst
if isrev(order(l))
op(l[first(i)] - l[first(i) - 1], 2) + l[first(i) - 1]
else
op(l[first(i) - 1] - l[first(i)], 2) + l[first(i)]
op(l[first(i) - 1] - l[first(i)], 2) + l[first(i)]
end
end
lst = if last(i) >= lastindex(l)
Expand All @@ -617,7 +747,7 @@ end
# TODO what should this do?
@inline reducelookup(lookup::Unaligned) = NoLookup(OneTo(1))
# Categories are combined.
@inline reducelookup(lookup::Categorical{<:AbstractString}) =
@inline reducelookup(lookup::Categorical{<:AbstractString}) =
rebuild(lookup; data=["combined"])
@inline reducelookup(lookup::Categorical) = rebuild(lookup; data=[:combined])
# Sampled is resampled
Expand Down Expand Up @@ -673,7 +803,7 @@ _mayberange(x, step::Nothing) = [x]
@inline function centerval(index::AbstractArray{<:DateTime}, len)
f = first(index)
l = last(index)
if f <= l
if f <= l
return (l - f) / 2 + first(index)
else
return (f - l) / 2 + last(index)
Expand Down
1 change: 0 additions & 1 deletion src/LookupArrays/lookup_traits.jl
Original file line number Diff line number Diff line change
Expand Up @@ -269,4 +269,3 @@ change the `LookupArray` type without changing the index values.
struct AutoIndex <: AbstractVector{Int} end

Base.size(::AutoIndex) = (0,)

17 changes: 17 additions & 0 deletions src/LookupArrays/selector.jl
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ selectindices(l::LookupArray, sel::At{<:AbstractVector}) = _selectvec(l, sel)

_selectvec(l, sel) = [selectindices(l, rebuild(sel; val=v)) for v in val(sel)]

function at(lookup::AbstractCyclic{Cycling}, sel::At; kw...)
cycled_sel = rebuild(sel; val=cycle_val(lookup, val(sel)))
return at(no_cycling(lookup), cycled_sel; kw...)
end
function at(lookup::NoLookup, sel::At; kw...)
v = val(sel)
r = round(Int, v)
Expand Down Expand Up @@ -226,6 +230,10 @@ end
selectindices(l::LookupArray, sel::Near) = near(l, sel)
selectindices(l::LookupArray, sel::Near{<:AbstractVector}) = _selectvec(l, sel)

function near(lookup::AbstractCyclic{Cycling}, sel::Near)
cycled_sel = rebuild(sel; val=cycle_val(lookup, val(sel)))
near(no_cycling(lookup), cycled_sel)
end
near(lookup::NoLookup, sel::Near{<:Real}) = max(1, min(round(Int, val(sel)), lastindex(lookup)))
function near(lookup::LookupArray, sel::Near)
span(lookup) isa Union{Irregular,Explicit} && locus(lookup) isa Union{Start,End} &&
Expand Down Expand Up @@ -306,6 +314,10 @@ end
selectindices(l::LookupArray, sel::Contains; kw...) = contains(l, sel)
selectindices(l::LookupArray, sel::Contains{<:AbstractVector}) = _selectvec(l, sel)

function contains(lookup::AbstractCyclic{Cycling}, sel::Contains; kw...)
cycled_sel = rebuild(sel; val=cycle_val(lookup, val(sel)))
return contains(no_cycling(lookup), cycled_sel; kw...)
end
function contains(l::NoLookup, sel::Contains; kw...)
i = Int(val(sel))
i in l || throw(SelectorError(l, i))
Expand Down Expand Up @@ -484,6 +496,11 @@ function between(l::NoLookup, sel::Interval)
x = intersect(sel, first(axes(l, 1))..last(axes(l, 1)))
return ceil(Int, x.left):floor(Int, x.right)
end
# function between(l::AbstractCyclic{Cycling}, sel::Interval)
# cycle_val(l, sel.x)..cycle_val(l, sel.x)
# cycled_sel = rebuild(sel; val=)
# near(no_cycling(lookup), cycled_sel; kw...)
# end
between(l::LookupArray, interval::Interval) = between(sampling(l), l, interval)
# This is the main method called above
function between(sampling::Sampling, l::LookupArray, interval::Interval)
Expand Down
Loading

0 comments on commit ef99985

Please sign in to comment.