Skip to content

Commit

Permalink
allow DiskArrays.cache to operate on the parent of a dimarray. (#849
Browse files Browse the repository at this point in the history
)

* Add a DiskArrays extension to DimensionalData

* Implement `cache`, `haschunks`, `eachchunk` for DimArrays

basically everything forwards to the parent array, and `chunk` rebuilds the dimarray with the cached parent.

* Add tests

* Fix tests

* Add docs

* Update ext/DimensionalDataDiskArraysExt.jl

Co-authored-by: Rafael Schouten <[email protected]>

* Update ext/DimensionalDataDiskArraysExt.jl

* Update ext/DimensionalDataDiskArraysExt.jl

---------

Co-authored-by: Rafael Schouten <[email protected]>
  • Loading branch information
asinghvi17 and rafaqz authored Nov 9, 2024
1 parent b39c4fc commit b73b6ee
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 5 deletions.
8 changes: 6 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
[weakdeps]
AlgebraOfGraphics = "cbdf2221-f076-402e-a563-3d30da359d67"
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3"
Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"

[extensions]
DimensionalDataAlgebraOfGraphicsExt = "AlgebraOfGraphics"
DimensionalDataCategoricalArraysExt = "CategoricalArrays"
DimensionalDataDiskArraysExt = "DiskArrays"
DimensionalDataMakie = "Makie"
DimensionalDataStatsBase = "StatsBase"

Expand All @@ -41,8 +43,8 @@ AlgebraOfGraphics = "0.8"
Aqua = "0.8"
ArrayInterface = "7"
BenchmarkTools = "1"
CategoricalArrays = "0.10"
CairoMakie = "0.10, 0.11, 0.12"
CategoricalArrays = "0.10"
ColorTypes = "0.11"
Combinatorics = "1"
ConstructionBase = "1"
Expand All @@ -51,6 +53,7 @@ DataAPI = "1.16"
DataFrames = "1"
Dates = "1"
Distributions = "0.25"
DiskArrays = "0.3, 0.4"
Documenter = "1"
Extents = "0.1"
GPUArrays = "10"
Expand Down Expand Up @@ -90,6 +93,7 @@ ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
CoordinateTransformations = "150eb455-5306-5404-9cee-2592286d6298"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
Expand All @@ -107,4 +111,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"

[targets]
test = ["AlgebraOfGraphics", "Aqua", "ArrayInterface", "BenchmarkTools", "CategoricalArrays", "ColorTypes", "Combinatorics", "CoordinateTransformations", "DataFrames", "Distributions", "Documenter", "GPUArrays", "ImageFiltering", "ImageTransformations", "JLArrays", "CairoMakie", "OffsetArrays", "Plots", "Random", "SafeTestsets", "StatsBase", "StatsPlots", "Test", "Unitful"]
test = ["AlgebraOfGraphics", "Aqua", "ArrayInterface", "BenchmarkTools", "CategoricalArrays", "ColorTypes", "Combinatorics", "CoordinateTransformations", "DataFrames", "DiskArrays", "Distributions", "Documenter", "GPUArrays", "ImageFiltering", "ImageTransformations", "JLArrays", "CairoMakie", "OffsetArrays", "Plots", "Random", "SafeTestsets", "StatsBase", "StatsPlots", "Test", "Unitful"]
13 changes: 10 additions & 3 deletions docs/src/diskarrays.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,22 @@
- generators
- zip

as well as caching chunks in RAM via `DiskArrays.cache(dimarray)`.

It is rarely used directly, but is present in most
disk and cloud based spatial data packages in julia, including:
ArchGDAL.jl, NetCDF.jl, Zarr.jl, NCDatasets.jl, GRIBDatasets.jl and CommonDataModel.jl
[ArchGDAL.jl](https://github.com/yeesian/ArchGDAL.jl),
[NetCDF.jl](https://github.com/JuliaGeo/NetCDF.jl),
[Zarr.jl](https://github.com/JuliaIO/Zarr.jl),
[NCDatasets.jl](https://github.com/Alexander-Barth/NCDatasets.jl),
[GRIBDatasets.jl](https://github.com/JuliaGeo/GRIBDatasets.jl) and
[CommonDataModel.jl](https://github.com/JuliaGeo/CommonDataModel.jl).

The combination of DiskArrays.jl and DimensionalData.jl is Julia's answer to
python's [xarray](https://xarray.dev/). Rasters.jl and YAXArrays.jl are user-facing
python's [xarray](https://xarray.dev/). [Rasters.jl](https://github.com/rafaqz/Rasters.jl) and [YAXArrays.jl](https://github.com/JuliaDataCubes/YAXArrays.jl) are user-facing
tools building on this combination.


They have no direct dependency relationships, but are intentionally
They have no meaningful direct dependency relationships, but are intentionally
designed to integrate via both adherence to Julia's `AbstractArray`
interface, and by coordination during development of both packages.
19 changes: 19 additions & 0 deletions ext/DimensionalDataDiskArraysExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""
DimensionalDataDiskArraysExt
Extend some methods of DiskArrays (`cache`, etc) to work on the base data of any DimArray.
"""
module DimensionalDataDiskArraysExt

using DimensionalData
import DimensionalData: AbstractBasicDimArray
import DiskArrays

DiskArrays.cache(x::Union{AbstractDimStack,AbstractDimArray}; kw...) =
modify(A -> DiskArrays.cache(A; kw...), x)

DiskArrays.haschunks(da::AbstractBasicDimArray) = DiskArrays.haschunks(parent(da))
DiskArrays.eachchunk(da::AbstractBasicDimArray) = DiskArrays.eachchunk(parent(da))


end
18 changes: 18 additions & 0 deletions test/ecosystem.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ using ImageFiltering
using ImageTransformations
using ArrayInterface
using StatsBase
using DiskArrays

using DimensionalData.Lookups

Expand Down Expand Up @@ -84,3 +85,20 @@ end
@test mean(da, weights([0.3,0.3,0.4]); dims=Y) == mean(parent(da), weights([0.3,0.3,0.4]); dims=2)
@test sum(da, weights([0.3,0.3,0.4]); dims=Y) == sum(parent(da), weights([0.3,0.3,0.4]); dims=2)
end

@testset "DiskArrays" begin
raw_data = rand(100, 100)
chunked_data = DiskArrays.TestTypes.ChunkedDiskArray(raw_data, (10, 10))
da = DimArray(chunked_data, (X, Y))

@testset "cache" begin
@test parent(da) isa DiskArrays.TestTypes.ChunkedDiskArray
@test DiskArrays.cache(da) isa DimArray
@test parent(DiskArrays.cache(da)) isa DiskArrays.CachedDiskArray
@test da == DiskArrays.cache(da)
end
@testset "chunks" begin
@test DiskArrays.haschunks(da) == DiskArrays.haschunks(chunked_data)
@test DiskArrays.eachchunk(da) == DiskArrays.eachchunk(chunked_data)
end
end

0 comments on commit b73b6ee

Please sign in to comment.