Skip to content

Commit

Permalink
v0.1.14 option to read large integers as doubles
Browse files Browse the repository at this point in the history
  • Loading branch information
coolbutuseless committed Jan 13, 2024
1 parent 268bc2c commit 80a439c
Show file tree
Hide file tree
Showing 8 changed files with 89 additions and 18 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: yyjsonr
Type: Package
Title: Fast JSON Parser and Generator
Version: 0.1.13
Version: 0.1.14
Authors@R: c(
person("Mike", "Cheng", role = c("aut", "cre", 'cph'),
email = "[email protected]"),
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@

# yyjson4 0.1.14 2024-01-13

* Add `int64 = "double"` option to `opts_read_json()`
* Preparations for CRAN

# yyjsonr 0.1.13 2024-01-05

Expand Down
5 changes: 3 additions & 2 deletions R/json-opts.R
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,8 @@ yyjson_write_flag <- list(
#' Create named list of options for parsing R from JSON
#'
#' @param int64 how to encode large integers which do not fit into R's integer
#' type. 'string' imports them as a character vector. 'bit64' will
#' type. 'string' imports them as a character vector. 'double' will
#' convert the integer to a double precision numeric value. 'bit64' will
#' use the 'integer64' type from the 'bit64' package. Note that the
#' 'integer64' type is a \emph{signed} integer type, and a warning will
#' be issued if JSON contains an \emph{unsigned} integer which cannot
Expand Down Expand Up @@ -207,7 +208,7 @@ opts_read_json <- function(
arr_of_objs_to_df = TRUE,
str_specials = c('string', 'special'),
num_specials = c('special', 'string'),
int64 = c('string', 'bit64'),
int64 = c('string', 'double', 'bit64'),
length1_array_asis = FALSE,
yyjson_read_flag = 0L
) {
Expand Down
5 changes: 3 additions & 2 deletions man/opts_read_json.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 41 additions & 11 deletions src/R-yyjson-parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,13 @@ parse_options create_parse_options(SEXP parse_opts_) {
opt.length1_array_asis = asLogical(val_);
} else if (strcmp(opt_name, "int64") == 0) {
const char *val = CHAR(STRING_ELT(val_, 0));
opt.int64 = strcmp(val, "string") == 0 ? INT64_AS_STR : INT64_AS_BIT64;
if (strcmp(val, "double") == 0) {
opt.int64 = INT64_AS_DBL;
} else if (strcmp(val, "bit64") == 0) {
opt.int64 = INT64_AS_BIT64;
} else {
opt.int64 = INT64_AS_STR;
}
} else if (strcmp(opt_name, "df_missing_list_elem") == 0) {
opt.df_missing_list_elem = val_;
} else if (strcmp(opt_name, "yyjson_read_flag") == 0) {
Expand Down Expand Up @@ -504,7 +510,9 @@ unsigned int update_type_bitset(unsigned int type_bitset, yyjson_val *val, parse
{
uint64_t tmp = yyjson_get_uint(val);
if (tmp > INT32_MAX) {
if (opt->int64 == INT64_AS_BIT64) {
if (opt->int64 == INT64_AS_DBL) {
type_bitset |= VAL_REAL;
} else if (opt->int64 == INT64_AS_BIT64) {
// Signed INT32_MAX
// Signed INT64_MAX = 2^63-1 = 9223372036854775807
// Signed INT64_MIN = -2^63 = -9223372036854775808
Expand All @@ -524,7 +532,9 @@ unsigned int update_type_bitset(unsigned int type_bitset, yyjson_val *val, parse
{
int64_t tmp = yyjson_get_sint(val);
if (tmp < INT32_MIN || tmp > INT32_MAX) {
if (opt->int64 == INT64_AS_BIT64) {
if (opt->int64 == INT64_AS_DBL) {
type_bitset |= VAL_REAL;
} else if (opt->int64 == INT64_AS_BIT64) {
type_bitset |= VAL_INT64;
} else {
type_bitset |= VAL_STR_INT;
Expand Down Expand Up @@ -1716,7 +1726,17 @@ SEXP json_as_robj(yyjson_val *val, parse_options *opt) {
{
uint64_t tmp = yyjson_get_uint(val);
if (tmp > INT32_MAX) {
if (opt->int64 == INT64_AS_BIT64) {
if (opt->int64 == INT64_AS_STR) {
#if defined(__APPLE__) || defined(_WIN32)
snprintf(buf, 128, "%llu", yyjson_get_uint(val));
#else
snprintf(buf, 128, "%lu", yyjson_get_uint(val));
#endif
res_ = PROTECT(mkString(buf)); nprotect++;
} else if (opt->int64 == INT64_AS_DBL) {
double x = json_val_to_double(val, opt);
res_ = PROTECT(ScalarReal(x)); nprotect++;
} else if (opt->int64 == INT64_AS_BIT64) {
if (tmp > INT64_MAX) {
warning("64bit unsigned integer values exceed bit64::integer64. Expect overflow");
}
Expand All @@ -1725,12 +1745,7 @@ SEXP json_as_robj(yyjson_val *val, parse_options *opt) {
((long long *)(REAL(res_)))[0] = x;
setAttrib(res_, R_ClassSymbol, mkString("integer64"));
} else {
#if defined(__APPLE__) || defined(_WIN32)
snprintf(buf, 128, "%lld", yyjson_get_sint(val));
#else
snprintf(buf, 128, "%lu", yyjson_get_uint(val));
#endif
res_ = mkString(buf);
error("Unhandled opt.bit64 option for YYJSON_SUBTYPE_UINT");
}
} else {
res_ = PROTECT(ScalarInteger(tmp)); nprotect++;
Expand All @@ -1741,12 +1756,27 @@ SEXP json_as_robj(yyjson_val *val, parse_options *opt) {
{
int64_t tmp = yyjson_get_sint(val);
if (tmp < INT32_MIN || tmp > INT32_MAX) {
if (opt->int64 == INT64_AS_STR) {
#if defined(__APPLE__) || defined(_WIN32)
snprintf(buf, 128, "%lld", yyjson_get_sint(val));
#else
snprintf(buf, 128, "%ld", yyjson_get_sint(val));
#endif
res_ = mkString(buf);
res_ = PROTECT(mkString(buf)); nprotect++;
} else if (opt->int64 == INT64_AS_DBL) {
double x = json_val_to_double(val, opt);
res_ = PROTECT(ScalarReal(x)); nprotect++;
} else if (opt->int64 == INT64_AS_BIT64) {
if (tmp > INT64_MAX || tmp < INT64_MIN) {
warning("64bit signed integer values exceed bit64::integer64. Expect overflow");
}
long long x = json_val_to_integer64(val, opt);
res_ = PROTECT(ScalarReal(0)); nprotect++;
((long long *)(REAL(res_)))[0] = x;
setAttrib(res_, R_ClassSymbol, mkString("integer64"));
} else {
error("Unhandled opt.bit64 option for YYJSON_SUBTYPE_SINT");
}
} else {
res_ = PROTECT(ScalarInteger(tmp)); nprotect++;
}
Expand Down
3 changes: 2 additions & 1 deletion src/R-yyjson-parse.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@
// E.g. Maybe want to add "INT64_AS_DBL" or "STR_SPECIALS_AS_NULL"
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#define INT64_AS_STR 1 << 0
#define INT64_AS_BIT64 1 << 1
#define INT64_AS_DBL 1 << 1
#define INT64_AS_BIT64 1 << 2

#define STR_SPECIALS_AS_SPECIAL 0
#define STR_SPECIALS_AS_STRING 1
Expand Down
21 changes: 21 additions & 0 deletions tests/testthat/test-int64-double.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@


test_that("unsigned int64 to double works", {

str <- '[1.0, 4294967296]'
robj <- read_json_str(str, int64 = 'double')

expect_true(is.double(robj))
expect_equal(robj, c(1, 4294967296))
})



test_that("signed int64 to double works", {

str <- '[1.0, -4294967296]'
robj <- read_json_str(str, int64 = 'double')

expect_true(is.double(robj))
expect_equal(robj, c(1, -4294967296))
})
15 changes: 14 additions & 1 deletion vignettes/from_json_options.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,9 @@ value's representation in R.
The possible values for the `int64` option are:

* `string` store JSON integer as a string in R
* `double` will store the JSON integer as a double precisision numeric. If the
integer is outside the range +/- 2^53, then it may not be stored perfectly in
the double.
* `bit64` convert to a 64-bit integer supported by the [`{bit64}`](https://cran.r-project.org/package=bit64)
package.

Expand All @@ -248,7 +251,17 @@ suppressPackageStartupMessages(

```{r}
str <- '[1, 274877906944]'
read_json_str(str) # default: int64 = 'string'
# default: int64 = 'string'
# Since result is a mix of types, a list is returned
read_json_str(str)
# Read large integer as double
robj <- read_json_str(str, int64 = 'double')
class(robj)
robj
# Read large integer as 'bit64::integer64' type
library(bit64)
read_json_str(str, int64 = 'bit64')
```
Expand Down

0 comments on commit 80a439c

Please sign in to comment.