diff --git a/DESCRIPTION b/DESCRIPTION index 3585598..6195232 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: yyjsonr Type: Package Title: Fast JSON Parser and Generator -Version: 0.1.13 +Version: 0.1.14 Authors@R: c( person("Mike", "Cheng", role = c("aut", "cre", 'cph'), email = "mikefc@coolbutuseless.com"), diff --git a/NEWS.md b/NEWS.md index d4219f8..391ab8e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,8 @@ +# yyjson4 0.1.14 2024-01-13 + +* Add `int64 = "double"` option to `opts_read_json()` +* Preparations for CRAN # yyjsonr 0.1.13 2024-01-05 diff --git a/R/json-opts.R b/R/json-opts.R index 919b634..1f83574 100644 --- a/R/json-opts.R +++ b/R/json-opts.R @@ -161,7 +161,8 @@ yyjson_write_flag <- list( #' Create named list of options for parsing R from JSON #' #' @param int64 how to encode large integers which do not fit into R's integer -#' type. 'string' imports them as a character vector. 'bit64' will +#' type. 'string' imports them as a character vector. 'double' will +#' convert the integer to a double precision numeric value. 'bit64' will #' use the 'integer64' type from the 'bit64' package. Note that the #' 'integer64' type is a \emph{signed} integer type, and a warning will #' be issued if JSON contains an \emph{unsigned} integer which cannot @@ -207,7 +208,7 @@ opts_read_json <- function( arr_of_objs_to_df = TRUE, str_specials = c('string', 'special'), num_specials = c('special', 'string'), - int64 = c('string', 'bit64'), + int64 = c('string', 'double', 'bit64'), length1_array_asis = FALSE, yyjson_read_flag = 0L ) { diff --git a/man/opts_read_json.Rd b/man/opts_read_json.Rd index b977647..194f817 100644 --- a/man/opts_read_json.Rd +++ b/man/opts_read_json.Rd @@ -11,7 +11,7 @@ opts_read_json( arr_of_objs_to_df = TRUE, str_specials = c("string", "special"), num_specials = c("special", "string"), - int64 = c("string", "bit64"), + int64 = c("string", "double", "bit64"), length1_array_asis = FALSE, yyjson_read_flag = 0L ) @@ -44,7 +44,8 @@ be converted to the \code{'special'} R numeric values \code{NA, Inf, NaN}, or left as a \code{'string'}. Default: 'special'} \item{int64}{how to encode large integers which do not fit into R's integer -type. 'string' imports them as a character vector. 'bit64' will +type. 'string' imports them as a character vector. 'double' will +convert the integer to a double precision numeric value. 'bit64' will use the 'integer64' type from the 'bit64' package. Note that the 'integer64' type is a \emph{signed} integer type, and a warning will be issued if JSON contains an \emph{unsigned} integer which cannot diff --git a/src/R-yyjson-parse.c b/src/R-yyjson-parse.c index 735a33a..06da855 100644 --- a/src/R-yyjson-parse.c +++ b/src/R-yyjson-parse.c @@ -64,7 +64,13 @@ parse_options create_parse_options(SEXP parse_opts_) { opt.length1_array_asis = asLogical(val_); } else if (strcmp(opt_name, "int64") == 0) { const char *val = CHAR(STRING_ELT(val_, 0)); - opt.int64 = strcmp(val, "string") == 0 ? INT64_AS_STR : INT64_AS_BIT64; + if (strcmp(val, "double") == 0) { + opt.int64 = INT64_AS_DBL; + } else if (strcmp(val, "bit64") == 0) { + opt.int64 = INT64_AS_BIT64; + } else { + opt.int64 = INT64_AS_STR; + } } else if (strcmp(opt_name, "df_missing_list_elem") == 0) { opt.df_missing_list_elem = val_; } else if (strcmp(opt_name, "yyjson_read_flag") == 0) { @@ -504,7 +510,9 @@ unsigned int update_type_bitset(unsigned int type_bitset, yyjson_val *val, parse { uint64_t tmp = yyjson_get_uint(val); if (tmp > INT32_MAX) { - if (opt->int64 == INT64_AS_BIT64) { + if (opt->int64 == INT64_AS_DBL) { + type_bitset |= VAL_REAL; + } else if (opt->int64 == INT64_AS_BIT64) { // Signed INT32_MAX // Signed INT64_MAX = 2^63-1 = 9223372036854775807 // Signed INT64_MIN = -2^63 = -9223372036854775808 @@ -524,7 +532,9 @@ unsigned int update_type_bitset(unsigned int type_bitset, yyjson_val *val, parse { int64_t tmp = yyjson_get_sint(val); if (tmp < INT32_MIN || tmp > INT32_MAX) { - if (opt->int64 == INT64_AS_BIT64) { + if (opt->int64 == INT64_AS_DBL) { + type_bitset |= VAL_REAL; + } else if (opt->int64 == INT64_AS_BIT64) { type_bitset |= VAL_INT64; } else { type_bitset |= VAL_STR_INT; @@ -1716,7 +1726,17 @@ SEXP json_as_robj(yyjson_val *val, parse_options *opt) { { uint64_t tmp = yyjson_get_uint(val); if (tmp > INT32_MAX) { - if (opt->int64 == INT64_AS_BIT64) { + if (opt->int64 == INT64_AS_STR) { +#if defined(__APPLE__) || defined(_WIN32) + snprintf(buf, 128, "%llu", yyjson_get_uint(val)); +#else + snprintf(buf, 128, "%lu", yyjson_get_uint(val)); +#endif + res_ = PROTECT(mkString(buf)); nprotect++; + } else if (opt->int64 == INT64_AS_DBL) { + double x = json_val_to_double(val, opt); + res_ = PROTECT(ScalarReal(x)); nprotect++; + } else if (opt->int64 == INT64_AS_BIT64) { if (tmp > INT64_MAX) { warning("64bit unsigned integer values exceed bit64::integer64. Expect overflow"); } @@ -1725,12 +1745,7 @@ SEXP json_as_robj(yyjson_val *val, parse_options *opt) { ((long long *)(REAL(res_)))[0] = x; setAttrib(res_, R_ClassSymbol, mkString("integer64")); } else { -#if defined(__APPLE__) || defined(_WIN32) - snprintf(buf, 128, "%lld", yyjson_get_sint(val)); -#else - snprintf(buf, 128, "%lu", yyjson_get_uint(val)); -#endif - res_ = mkString(buf); + error("Unhandled opt.bit64 option for YYJSON_SUBTYPE_UINT"); } } else { res_ = PROTECT(ScalarInteger(tmp)); nprotect++; @@ -1741,12 +1756,27 @@ SEXP json_as_robj(yyjson_val *val, parse_options *opt) { { int64_t tmp = yyjson_get_sint(val); if (tmp < INT32_MIN || tmp > INT32_MAX) { + if (opt->int64 == INT64_AS_STR) { #if defined(__APPLE__) || defined(_WIN32) snprintf(buf, 128, "%lld", yyjson_get_sint(val)); #else snprintf(buf, 128, "%ld", yyjson_get_sint(val)); #endif - res_ = mkString(buf); + res_ = PROTECT(mkString(buf)); nprotect++; + } else if (opt->int64 == INT64_AS_DBL) { + double x = json_val_to_double(val, opt); + res_ = PROTECT(ScalarReal(x)); nprotect++; + } else if (opt->int64 == INT64_AS_BIT64) { + if (tmp > INT64_MAX || tmp < INT64_MIN) { + warning("64bit signed integer values exceed bit64::integer64. Expect overflow"); + } + long long x = json_val_to_integer64(val, opt); + res_ = PROTECT(ScalarReal(0)); nprotect++; + ((long long *)(REAL(res_)))[0] = x; + setAttrib(res_, R_ClassSymbol, mkString("integer64")); + } else { + error("Unhandled opt.bit64 option for YYJSON_SUBTYPE_SINT"); + } } else { res_ = PROTECT(ScalarInteger(tmp)); nprotect++; } diff --git a/src/R-yyjson-parse.h b/src/R-yyjson-parse.h index 3eb1f06..4a291ce 100644 --- a/src/R-yyjson-parse.h +++ b/src/R-yyjson-parse.h @@ -35,7 +35,8 @@ // E.g. Maybe want to add "INT64_AS_DBL" or "STR_SPECIALS_AS_NULL" //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #define INT64_AS_STR 1 << 0 -#define INT64_AS_BIT64 1 << 1 +#define INT64_AS_DBL 1 << 1 +#define INT64_AS_BIT64 1 << 2 #define STR_SPECIALS_AS_SPECIAL 0 #define STR_SPECIALS_AS_STRING 1 diff --git a/tests/testthat/test-int64-double.R b/tests/testthat/test-int64-double.R new file mode 100644 index 0000000..a84a104 --- /dev/null +++ b/tests/testthat/test-int64-double.R @@ -0,0 +1,21 @@ + + +test_that("unsigned int64 to double works", { + + str <- '[1.0, 4294967296]' + robj <- read_json_str(str, int64 = 'double') + + expect_true(is.double(robj)) + expect_equal(robj, c(1, 4294967296)) +}) + + + +test_that("signed int64 to double works", { + + str <- '[1.0, -4294967296]' + robj <- read_json_str(str, int64 = 'double') + + expect_true(is.double(robj)) + expect_equal(robj, c(1, -4294967296)) +}) diff --git a/vignettes/from_json_options.Rmd b/vignettes/from_json_options.Rmd index 81f92d6..ff65217 100644 --- a/vignettes/from_json_options.Rmd +++ b/vignettes/from_json_options.Rmd @@ -236,6 +236,9 @@ value's representation in R. The possible values for the `int64` option are: * `string` store JSON integer as a string in R +* `double` will store the JSON integer as a double precisision numeric. If the + integer is outside the range +/- 2^53, then it may not be stored perfectly in + the double. * `bit64` convert to a 64-bit integer supported by the [`{bit64}`](https://cran.r-project.org/package=bit64) package. @@ -248,7 +251,17 @@ suppressPackageStartupMessages( ```{r} str <- '[1, 274877906944]' -read_json_str(str) # default: int64 = 'string' + +# default: int64 = 'string' +# Since result is a mix of types, a list is returned +read_json_str(str) + +# Read large integer as double +robj <- read_json_str(str, int64 = 'double') +class(robj) +robj + +# Read large integer as 'bit64::integer64' type library(bit64) read_json_str(str, int64 = 'bit64') ```