Skip to content

Commit

Permalink
Merge pull request #2 from ecohealthalliance/refactor-s7
Browse files Browse the repository at this point in the history
Major refactor
  • Loading branch information
Noam Ross authored Nov 28, 2023
2 parents f515e5a + 8b8efa3 commit c936f7a
Show file tree
Hide file tree
Showing 48 changed files with 908 additions and 911 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,6 @@
^pkgdown$
^inst/scratch$
^inst/mc$
^ints/cache_timestamp.rds
^check$
^artifacts$
8 changes: 1 addition & 7 deletions .github/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@ This outlines how to propose a change to `relic`.
- `relic` is a high-level interface designed for working with git repositories
of data science workflows. It is not intended to be a general-purpose client
for git.
- `relic`'s primary features are for extracting and comparing files and data
from git history and running code within and across git history. Functions
in the package are generally _read-only_ and do not commit or modify history.
- `relic` aims to have relatively few dependencies for its core functions (`git2r`
and low-level packages such as `fs` and `rlang`). For extended functionality,
other packages may be used, but these should live under `Suggests:` and
Expand All @@ -21,14 +18,11 @@ This outlines how to propose a change to `relic`.
- `relic` has specific support for workflows using
[`targets`](https://books.ropensci.org/targets/). Similar functionality
for other workflow managers may be considered in the future, as may high-level
interfaces for dealing with other versioned data such as S3 buckets.
interfaces for dealing with other versioned data.
- `relic` uses `git2r` to interface with git/libgit2. In general `relic` functions
should not call `libgit2` directly nor call `git` via the command line. If
`git2r` does not expose needed functionality in `libgit2`, consider making
a contribution to `git2r`.
- In general `relic` only deals with the local git repository. It does not
interface with remote repositories, nor interact with the APIs of services like
GitLab, GitHub, or gitea.

## Testing

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
use-public-rspm: true
- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::sessioninfo, any::rcmdcheck, any::pkgdepends, any::covr, ropensci-review-tools/pkgcheck
extra-packages: any::sessioninfo, any::rcmdcheck, any::pkgdepends, any::covr, any::riskmetric, github::ropensci-review-tools/pkgcheck,
needs: check
- name: Run just package tests and record test coverage
run: |
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
inst/doc
inst/scratch
inst/cache_timestamp.rds
inst/mc
docs
tests/testthat/*.log
check
artifacts
relic-test
### R ###
# History files
*Rhistory
Expand Down
7 changes: 6 additions & 1 deletion .lintr
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
linters: linters_with_defaults() # see vignette("lintr")
linters: linters_with_defaults(line_length_linter = NULL, indentation_linter = NULL)
exclusions: list(
"inst/scratch",
"inst/example-repo" = list(undesirable_function_linter = Inf)
)
encoding: "UTF-8"

13 changes: 8 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ Authors@R: c(
person("EcoHealth Alliance", role = c("cph", "fnd"))
)
Description: The 'relic' package provides tools for extracting files and
objects from the history of a git repository. It is a high-level
objects from the revision history, including local and remote git
repositories and S3 buckets. It is a high-level
interface designed to enable comparison of objects in reproducible
research workflows, especially pipelines that use the 'targets'
package.
Expand All @@ -18,26 +19,28 @@ BugReports: https://github.com/ecohealthalliance/relic/issues
Imports:
fs,
git2r,
gh,
rlang
Suggests:
callr,
devtag,
glue,
knitr,
lintr,
minioclient,
paws,
paws.storage,
openssl,
rmarkdown,
rprojroot,
spelling,
styler,
targets,
testthat (>= 3.0.0),
withr
VignetteBuilder:
knitr
Remotes:
cboettig/minioclient#14,
ropensci/git2r,
moodymudskipper/devtag
ropensci/git2r
Config/testthat/edition: 3
Encoding: UTF-8
Language: en-US
Expand Down
28 changes: 15 additions & 13 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,26 +1,28 @@
# Generated by roxygen2: do not edit by hand

export(commits_between)
S3method(extract_relic,default)
S3method(extract_relic,relic_git_blob)
S3method(extract_relic,relic_git_tree)
S3method(extract_relic,relic_github_blob)
S3method(extract_relic,relic_github_tree)
S3method(extract_relic,relic_s3_obj)
export(create_example_repo)
export(dir_ls_version)
export(dir_ls_versions)
export(file_copy_version)
export(file_copy_versions)
export(file_read_version)
export(file_read_versions)
export(relic)
export(get_file_version)
export(relic_cache)
export(relic_cache_clear)
export(tar_exists_version)
export(tar_exists_version_raw)
export(relic_cache_cleanup)
export(relic_cache_cleanup_time)
export(relic_cache_delete)
export(relic_cache_max_age)
export(relic_cache_max_size)
export(relic_cache_regular_cleanup)
export(tar_meta_version)
export(tar_read_raw_version)
export(tar_read_raw_versions)
export(tar_read_version)
export(tar_read_versions)
import(fs)
import(git2r)
importFrom(gh,gh)
importFrom(rlang,abort)
importFrom(rlang,check_installed)
importFrom(rlang,inform)
importFrom(rlang,is_scalar_character)
importFrom(rlang,warn)
165 changes: 165 additions & 0 deletions R/cache.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
#' The relic cache
#'
#' @description The relic cache directory stores files that have been retrieved
#' from both local and remote repositories to avoid repeated extractions or
#' downloads. Its location can be set with the environment variable
#' `RELIC_CACHE_DIR` or `options("relic.cache.dir")`, and it defaults to the
#' user cache directory. The cache is cleaned up regularly at package startup,
#' but can also be cleaned up manually with `relic_cache_cleanup()` or cleared
#' entirely with `relic_cache_delete()`.
#' @export
#' @return The path to the relic cache directory
#' @examples
#' relic_cache()
relic_cache <- function() {
dir_create(path_tidy(Sys.getenv(
"RELIC_CACHE_DIR",
getOption("relic.cache.dir",
default = tools::R_user_dir("relic", "cache")
)
)))
}

#' @export
#' @rdname relic_cache
relic_cache_delete <- function() {
dir_delete(relic_cache())
}

#' @param max_age The maximum age of files to keep in the cache, as a [difftime][base::difftime()]
#' object. Files older than this will be deleted. Defaults to `Inf`. Can be
#' set with the environment variable `RELIC_CACHE_MAX_AGE` or
#' `options("relic.cache.max.age")`, which take numeric time in days or a
#' string with units, e.g., "1 day" or "2 weeks".
#' @param max_size The maximum size of the cache, as a string that can be parsed
#' by [fs::fs_bytes()]. Defaults to "20 MB". Can be set with the environment
#' variable `RELIC_CACHE_MAX_SIZE` or `options("relic.cache.max.size")`.
#' Cached files will be deleted from oldest to youngest until the cache size
#' is under this limit.
#' @export
#' @rdname relic_cache
relic_cache_cleanup <- function(max_age = relic_cache_max_age(), max_size = relic_cache_max_size()) {
cache_all <- dir_info(relic_cache(), recurse = TRUE, include_dirs = TRUE, all = TRUE)
min_age <- Sys.time() - max_age
file_delete(cache_all[cache_all$modification_time < min_age, ]$path)
cache_all <- cache_all[cache_all$modification_time >= min_age, ]

# Delete oldest files up until size is under max size
cache_files <- cache_all[cache_all$type %in% c("file"), ]
file_delete(cache_files[cumsum(cache_files$size) > max_size, ]$path)

# Delete any symlinks that point to non-existent files
file_delete(cache_all[cache_all$type == "symlink" & !file_exists(cache_all$path), ]$path)

# Delete empty directories recursively, by checking if their path is found in the path of any files
cache_dirs <- cache_all[cache_all$type %in% c("directory"), ]
for (dir in cache_dirs$path) {
if (dir_exists(dir) && !length(dir_ls(dir, all = TRUE))) {
dir_delete(dir)
}
}
# Delete any symlinks that point to non-existent files again to get rid of directories
file_delete(cache_all[cache_all$type == "symlink" & !file_exists(cache_all$path), ]$path)
}

#' @param cleanup_time The time between cache cleanups, as a [difftime][base::difftime()] object.
#' Defaults to 1 day. Can be set with the environment variable
#' `RELIC_CACHE_CLEANUP_TIME` or `options("relic.cache.cleanup.time")`, which
#' take numeric time in days or a string with units, e.g., "1 day" or "2
#' weeks". If set to "Inf", no cleanup will be performed at startup.
#' @export
#' @rdname relic_cache
relic_cache_regular_cleanup <- function(cleanup_time = relic_cache_cleanup_time()) {
cache_timestamp_file <- path(path_package("relic"), "cache_timestamp.rds")
if (!file_exists(cache_timestamp_file) ||
readRDS(cache_timestamp_file) < (Sys.time() - cleanup_time)) {
relic_cache_cleanup()
}
saveRDS(Sys.time(), cache_timestamp_file)
}

#' @export
#' @rdname relic_cache
relic_cache_max_size <- function() {
fs_bytes(Sys.getenv(
"RELIC_CACHE_MAX_SIZE",
getOption("relic.cache.max.size",
default = "20 MB"
)
))
}

#' @export
#' @rdname relic_cache
relic_cache_max_age <- function() {
parse_age(Sys.getenv(
"RELIC_CACHE_MAX_AGE",
getOption("relic.cache.max.age",
default = Inf
)
))
}

#' @export
#' @rdname relic_cache
relic_cache_cleanup_time <- function() {
parse_age(Sys.getenv(
"RELIC_CACHE_CLEANUP_TIME",
getOption("relic.cache.cleanup.time",
default = 1
)
))
}

parse_bool <- function(x) {
if (is.logical(x)) {
out <- x
} else if (is.numeric(x)) {
out <- (x != 0)
} else if (is.character(x)) {
x <- tolower(x)
if (x %in% c("true", "t", "yes", "y", "1")) {
out <- TRUE
} else if (x %in% c("false", "f", "no", "n", "0")) {
out <- FALSE
}
} else {
abort("Invalid boolean value: ", x)
}
out
}


# nolint start: cyclocomp_linter
parse_age <- function(x) {
if (is.na(x) || is.null(x) || !length(x) || !nzchar(x) || x == "Inf" || is.infinite(x)) { # nolint
return(as.difftime(Inf, units = "days"))
} else if (is.numeric(x)) {
return(as.difftime(x, units = "days"))
}
x <- strsplit(x, "\\s")[[1]]
units <- if (is.na(x[2])) "days" else x[2]
as.difftime(as.numeric(x[1]), units = units)
}
# nolint end

cache_sha <- function() {
path(dir_create(relic_cache(), "sha"))
}

cache_git <- function() {
path(dir_create(relic_cache(), "git"))
}

cache_gh <- function() {
path(dir_create(relic_cache(), "gh"))
}

cache_s3 <- function() {
path(dir_create(relic_cache(), "s3"))
}


relic_git_cache_path <- function(relic) {
path(relic_cache(), relic@commit$sha, relic@path)
}
Loading

0 comments on commit c936f7a

Please sign in to comment.