Skip to content

Commit

Permalink
docs: Update readme for release (#42)
Browse files Browse the repository at this point in the history
  • Loading branch information
paleolimbot authored May 30, 2024
1 parent 8558d1d commit 6084e0a
Show file tree
Hide file tree
Showing 12 changed files with 374 additions and 42 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
Imports:
nanoarrow (>= 0.5.0),
wk (>= 0.6.0)
wk (>= 0.9.0)
LinkingTo:
wk
Config/testthat/edition: 3
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ S3method(wk_handle,geoarrow_vctr)
S3method(wk_is_geodesic,geoarrow_vctr)
export(as_geoarrow_array)
export(as_geoarrow_array_stream)
export(as_geoarrow_schema)
export(as_geoarrow_vctr)
export(geoarrow_handle)
export(geoarrow_large_wkb)
Expand All @@ -51,6 +52,7 @@ export(geoarrow_wkb)
export(geoarrow_wkt)
export(geoarrow_writer)
export(infer_geoarrow_schema)
export(is_geoarrow_schema)
export(na_extension_geoarrow)
export(na_extension_large_wkb)
export(na_extension_large_wkt)
Expand Down
102 changes: 81 additions & 21 deletions R/array.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,30 +19,56 @@ as_geoarrow_array <- function(x, ..., schema = NULL) {
as_geoarrow_array.default <- function(x, ..., schema = NULL) {
if (is.null(schema)) {
schema <- infer_geoarrow_schema(x)
} else {
schema <- nanoarrow::as_nanoarrow_schema(schema)
}

wk::wk_handle(x, geoarrow_writer(schema))
}

#' @export
as_geoarrow_array.nanoarrow_array <- function(x, ..., schema = NULL) {
schema_src <- nanoarrow::infer_nanoarrow_schema(x)
x_schema <- nanoarrow::infer_nanoarrow_schema(x)

if (!is.null(schema)) {
schema <- nanoarrow::as_nanoarrow_schema(schema)
}

# If this is not already a geoarrow array, see if we can interpret it as one
if (!is_geoarrow_schema(x_schema)) {
x_schema <- as_geoarrow_schema(x_schema)

# Try to copy metadata from requested schema if present (e.g., so that
# a CRS or edge type can be assigned)
request_metadata <- schema$metadata[["ARROW:extension:metadata"]]
if (!is.null(request_metadata)) {
x_schema$metadata[["ARROW:extension:metadata"]] <- request_metadata
}

# Reinterpret the array
x <- reinterpret_array(x, x_schema)
}


if (is.null(schema)) {
schema <- infer_geoarrow_schema(x)
return(x)
}

parsed <- geoarrow_schema_parse(schema)
parsed_src <- geoarrow_schema_parse(schema_src)
if (identical(parsed, parsed_src)) {
x
} else {
stream <- geoarrow_kernel_call_scalar(
"as_geoarrow",
nanoarrow::basic_array_stream(list(x), schema = schema_src, validate = FALSE),
list("type" = parsed$id)
)
stream$get_next()
# If the source and request type are the same, return x
x_schema_parsed <- geoarrow_schema_parse(x_schema)
schema_parsed <- geoarrow_schema_parse(schema)
if (identical(x_schema_parsed, schema_parsed)) {
return(x)
}

# Otherwise, let as_geoarrow() sort this out
stream <- geoarrow_kernel_call_scalar(
"as_geoarrow",
nanoarrow::basic_array_stream(list(x), schema = x_schema, validate = FALSE),
list("type" = schema_parsed$id)
)

stream$get_next()
}

#' @export
Expand All @@ -58,6 +84,10 @@ as_geoarrow_array_stream <- function(x, ..., schema = NULL) {

#' @export
as_geoarrow_array_stream.default <- function(x, ..., schema = NULL) {
if (!is.null(schema)) {
schema <- nanoarrow::as_nanoarrow_schema(schema)
}

nanoarrow::basic_array_stream(
list(as_geoarrow_array(x, schema = schema)),
schema = schema
Expand All @@ -66,30 +96,60 @@ as_geoarrow_array_stream.default <- function(x, ..., schema = NULL) {

#' @export
as_geoarrow_array_stream.nanoarrow_array_stream <- function(x, ..., schema = NULL) {
x_schema <- x$get_schema()

if (!is.null(schema)) {
schema <- nanoarrow::as_nanoarrow_schema(schema)
}

# If this is not already a geoarrow array, see if we can interpret it as one
if (!is_geoarrow_schema(x_schema)) {
x_schema <- as_geoarrow_schema(x_schema)

# Try to copy metadata from requested schema if present (e.g., so that
# a CRS or edge type can be assigned)
request_metadata <- schema$metadata[["ARROW:extension:metadata"]]
if (!is.null(request_metadata)) {
x_schema$metadata[["ARROW:extension:metadata"]] <- request_metadata
}

# Reinterpret the array
x <- reinterpret_stream(x, x_schema)
}

# If there is no requested schema, just return the stream (i.e., don't attempt to
# consume a stream to find its optimal output type)
if (is.null(schema)) {
return(x)
}

x_schema <- x$get_schema()
# If the source is the same as the destination, return it
x_schema_parsed <- geoarrow_schema_parse(x_schema)
schema_parsed <- geoarrow_schema_parse(schema)
if (identical(x_schema_parsed, schema_parsed)) {
return(x)
}

collected <- nanoarrow::collect_array_stream(
x,
schema = x_schema,
validate = FALSE
)

geoarrow_kernel_call_scalar(
"as_geoarrow",
nanoarrow::basic_array_stream(collected),
reinterpret_stream(x, x_schema, validate = FALSE),
list("type" = schema_parsed$id)
)
}

reinterpret_array <- function(array, schema, validate = TRUE) {
array2 <- nanoarrow::nanoarrow_allocate_array()
nanoarrow::nanoarrow_pointer_export(array, array2)
nanoarrow::nanoarrow_array_set_schema(array2, schema, validate = validate)
array2
}

reinterpret_stream <- function(stream, schema, validate = TRUE) {
# Eventually we should stream this somehow instead of collecting it all
chunks <- nanoarrow::collect_array_stream(stream, schema = schema, validate = FALSE)
nanoarrow::basic_array_stream(chunks, schema, validate = validate)
}

geoarrow_array_from_buffers <- function(schema, buffers) {
schema <- nanoarrow::as_nanoarrow_schema(schema)
extension_name <- schema$metadata[["ARROW:extension:name"]]
Expand Down
16 changes: 9 additions & 7 deletions R/pkg-nanoarrow.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,7 @@
# Runs before coverage starts on load
# nocov start
register_geoarrow_extension <- function() {
all_ext_name <- c(
"geoarrow.wkt", "geoarrow.wkb", "geoarrow.point", "geoarrow.linestring",
"geoarrow.polygon", "geoarrow.multipoint", "geoarrow.mutlilinestring",
"geoarrow.multipolygon"
)

for (ext_name in all_ext_name) {
for (ext_name in all_extension_names()) {
nanoarrow::register_nanoarrow_extension(
ext_name,
nanoarrow::nanoarrow_extension_spec(subclass = "geoarrow_extension_spec")
Expand Down Expand Up @@ -50,3 +44,11 @@ as_nanoarrow_array_extension.geoarrow_extension_spec <- function(
convert_array.geoarrow_vctr <- function(array, to, ...) {
as_geoarrow_vctr(array, schema = nanoarrow::as_nanoarrow_schema(to))
}

all_extension_names <- function() {
c(
"geoarrow.wkt", "geoarrow.wkb", "geoarrow.point", "geoarrow.linestring",
"geoarrow.polygon", "geoarrow.multipoint", "geoarrow.mutlilinestring",
"geoarrow.multipolygon"
)
}
3 changes: 3 additions & 0 deletions R/pkg-wk.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ wk_is_geodesic.geoarrow_vctr <- function(x) {
#' @export
as_geoarrow_array.wk_wkt <- function(x, ..., schema = NULL) {
if (!is.null(schema)) {
schema <- nanoarrow::as_nanoarrow_schema(schema)
if (!identical(schema$metadata[["ARROW:extension:name"]], "geoarrow.wkt")) {
return(NextMethod())
}
Expand All @@ -48,6 +49,7 @@ as_geoarrow_array.wk_wkt <- function(x, ..., schema = NULL) {
#' @export
as_geoarrow_array.wk_wkb <- function(x, ..., schema = NULL) {
if (!is.null(schema)) {
schema <- nanoarrow::as_nanoarrow_schema(schema)
if (!identical(schema$metadata[["ARROW:extension:name"]], "geoarrow.wkb")) {
return(NextMethod())
}
Expand All @@ -70,6 +72,7 @@ as_geoarrow_array.wk_wkb <- function(x, ..., schema = NULL) {
#' @export
as_geoarrow_array.wk_xy <- function(x, ..., schema = NULL) {
if (!is.null(schema)) {
schema <- nanoarrow::as_nanoarrow_schema(schema)
if (!identical(schema$metadata[["ARROW:extension:name"]], "geoarrow.point")) {
return(NextMethod())
}
Expand Down
38 changes: 37 additions & 1 deletion R/type.R
Original file line number Diff line number Diff line change
Expand Up @@ -202,22 +202,58 @@ geoarrow_multipolygon <- function(dimensions = "XY",
#'
#' @param schema A [nanoarrow_schema][nanoarrow::as_nanoarrow_schema]
#' @param extension_name An extension name to use if schema is a storage type.
#' @param infer_from_storage Attempt to guess an extension name if schema is not
#' a geoarrow extension type.
#'
#' @return A list of parsed properties
#' @export
#'
#' @examples
#' geoarrow_schema_parse(na_extension_geoarrow("POINT"))
#'
geoarrow_schema_parse <- function(schema, extension_name = NULL) {
geoarrow_schema_parse <- function(schema, extension_name = NULL,
infer_from_storage = FALSE) {
schema <- nanoarrow::as_nanoarrow_schema(schema)
if (!is.null(extension_name)) {
extension_name <- as.character(extension_name)[1]
} else if (infer_from_storage && is.null(schema$metadata[["ARROW:extension:name"]])) {
# Only a few storage types have unambiguous representations
extension_name <- switch(
schema$format,
"z" = ,
"Z" = "geoarrow.wkb",
"u" = ,
"U" = "geoarrow.wkt",
"+s" = "geoarrow.point"
)
}

.Call(geoarrow_c_schema_parse, schema, extension_name)
}

#' @rdname geoarrow_schema_parse
#' @export
is_geoarrow_schema <- function(schema) {
schema <- nanoarrow::as_nanoarrow_schema(schema)
ext <- schema$metadata[["ARROW:extension:name"]]
!is.null(ext) && (ext %in% all_extension_names())
}

#' @rdname geoarrow_schema_parse
#' @export
as_geoarrow_schema <- function(schema) {
schema <- nanoarrow::as_nanoarrow_schema(schema)

# If this is already a geoarrow extension type, we're ready!
if (is_geoarrow_schema(schema)) {
return(schema)
}

# Otherwise, try to infer
parsed <- geoarrow_schema_parse(schema, infer_from_storage = TRUE)
na_extension_geoarrow_internal(parsed$id, NULL, parsed$edge_type)
}

na_extension_geoarrow_internal <- function(type_id, crs, edges) {
metadata <- na_extension_metadata_internal(crs, edges)
schema <- nanoarrow::nanoarrow_allocate_schema()
Expand Down
48 changes: 47 additions & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,54 @@ pak::pak("geoarrow/geoarrow-r")

## Example

This is a basic example which shows you how to solve a common problem:
The geoarrow package implements conversions to/from various geospatial types (e.g., sf, sfc, s2, wk) with various Arrow representations (e.g., arrow, nanoarrow). The most useful conversions are between the **arrow** and **sf** packages, which in most cases allow sf objects to be passed to **arrow** functions directly after `library(geoarrow)` or `requireNamespace("geoarrow")` has been called.

```{r example}
library(geoarrow)
library(arrow, warn.conflicts = FALSE)
library(sf)
nc <- read_sf(system.file("gpkg/nc.gpkg", package = "sf"))
tf <- tempfile(fileext = ".parquet")
nc |>
tibble::as_tibble() |>
write_parquet(tf)
open_dataset(tf) |>
dplyr::filter(startsWith(NAME, "A")) |>
dplyr::select(NAME, geom) |>
st_as_sf()
```

By default, arrow objects are converted to a neutral wrapper around chunked Arrow memory, which in turn implements conversions to most spatial types:

```{r}
df <- read_parquet(tf)
df$geom
st_as_sfc(df$geom)
```

The entry point to creating arrays is `as_geoarrow_vctr()`:

```{r}
as_geoarrow_vctr(c("POINT (0 1)", "POINT (2 3)"))
```

By default these do not attempt to create a new storage type; however, you can request a storage type or infer one from the data:

```{r}
as_geoarrow_vctr(c("POINT (0 1)", "POINT (2 3)"), schema = geoarrow_native("POINT"))
vctr <- as_geoarrow_vctr(c("POINT (0 1)", "POINT (2 3)"))
as_geoarrow_vctr(vctr, schema = infer_geoarrow_schema(vctr))
```

There are a number of files to use as examples at <https://geoarrow.org/data> that can be read with `arrow::read_ipc_file()`:

```{r}
url <- "https://github.com/geoarrow/geoarrow-data/releases/download/v0.1.0/ns-water-basin_point.arrow"
tab <- read_ipc_file(url, as_data_frame = FALSE)
tab$geometry$type
```

Loading

0 comments on commit 6084e0a

Please sign in to comment.