From f7438f39872c7585a309bda3f800ae872ef83a0c Mon Sep 17 00:00:00 2001 From: ijl Date: Thu, 22 Feb 2024 21:21:52 +0000 Subject: [PATCH] yyjson 0eca326, recursion limit --- README.md | 3 + include/yyjson/yyjson.c | 327 ++++++++++++++++++++++++++++++++-------- include/yyjson/yyjson.h | 152 ++++++++++++++----- script/vendor-yyjson | 4 +- test/test_api.py | 38 ++++- 5 files changed, 425 insertions(+), 99 deletions(-) diff --git a/README.md b/README.md index c35b8f50..167518a9 100644 --- a/README.md +++ b/README.md @@ -625,6 +625,9 @@ It raises `JSONDecodeError` if given an invalid type or invalid JSON. This includes if the input contains `NaN`, `Infinity`, or `-Infinity`, which the standard library allows, but is not valid JSON. +It raises `JSONDecodeError` if a combination of array or object recurses +1024 levels deep. + `JSONDecodeError` is a subclass of `json.JSONDecodeError` and `ValueError`. This is for compatibility with the standard library. diff --git a/include/yyjson/yyjson.c b/include/yyjson/yyjson.c index 17db9778..b2e2a233 100644 --- a/include/yyjson/yyjson.c +++ b/include/yyjson/yyjson.c @@ -307,6 +307,9 @@ uint32_t yyjson_version(void) { #define YYJSON_MUT_DOC_VAL_POOL_INIT_SIZE (0x10 * sizeof(yyjson_mut_val)) #define YYJSON_MUT_DOC_VAL_POOL_MAX_SIZE (0x1000000 * sizeof(yyjson_mut_val)) +/* The minimum size of the dynamic allocator's chunk. */ +#define YYJSON_ALC_DYN_MIN_SIZE 0x1000 + /* Default value for compile-time options. */ #ifndef YYJSON_DISABLE_READER #define YYJSON_DISABLE_READER 0 @@ -326,8 +329,9 @@ uint32_t yyjson_version(void) { #ifndef YYJSON_DISABLE_UTF8_VALIDATION #define YYJSON_DISABLE_UTF8_VALIDATION 0 #endif - - +#ifndef YYJSON_READER_CONTAINER_RECURSION_LIMIT +#define YYJSON_READER_CONTAINER_RECURSION_LIMIT 1024 +#endif /*============================================================================== * Macros @@ -966,6 +970,15 @@ static const yyjson_alc YYJSON_DEFAULT_ALC = { NULL }; + + +/*============================================================================== + * Null Memory Allocator + * + * This allocator is just a placeholder to ensure that the internal + * malloc/realloc/free function pointers are not null. + *============================================================================*/ + static void *null_malloc(void *ctx, usize size) { return NULL; } @@ -989,29 +1002,37 @@ static const yyjson_alc YYJSON_NULL_ALC = { /*============================================================================== * Pool Memory Allocator - * This is a simple memory allocator that uses linked list memory chunk. - * The following code will be executed only when the library user creates - * this allocator manually. + * + * This allocator is initialized with a fixed-size buffer. + * The buffer is split into multiple memory chunks for memory allocation. *============================================================================*/ -/** chunk header */ +/** memory chunk header */ typedef struct pool_chunk { - usize size; /* chunk memory size (include chunk header) */ - struct pool_chunk *next; + usize size; /* chunk memory size, include chunk header */ + struct pool_chunk *next; /* linked list, nullable */ + /* char mem[]; flexible array member */ } pool_chunk; -/** ctx header */ +/** allocator ctx header */ typedef struct pool_ctx { - usize size; /* total memory size (include ctx header) */ - pool_chunk *free_list; + usize size; /* total memory size, include ctx header */ + pool_chunk *free_list; /* linked list, nullable */ + /* pool_chunk chunks[]; flexible array member */ } pool_ctx; +/** align up the input size to chunk size */ +static_inline void pool_size_align(usize *size) { + *size = size_align_up(*size, sizeof(pool_chunk)) + sizeof(pool_chunk); +} + static void *pool_malloc(void *ctx_ptr, usize size) { + /* assert(size != 0) */ pool_ctx *ctx = (pool_ctx *)ctx_ptr; pool_chunk *next, *prev = NULL, *cur = ctx->free_list; - if (unlikely(size == 0 || size >= ctx->size)) return NULL; - size = size_align_up(size, sizeof(pool_chunk)) + sizeof(pool_chunk); + if (unlikely(size >= ctx->size)) return NULL; + pool_size_align(&size); while (cur) { if (cur->size < size) { @@ -1038,6 +1059,7 @@ static void *pool_malloc(void *ctx_ptr, usize size) { } static void pool_free(void *ctx_ptr, void *ptr) { + /* assert(ptr != NULL) */ pool_ctx *ctx = (pool_ctx *)ctx_ptr; pool_chunk *cur = ((pool_chunk *)ptr) - 1; pool_chunk *prev = NULL, *next = ctx->free_list; @@ -1064,25 +1086,15 @@ static void pool_free(void *ctx_ptr, void *ptr) { static void *pool_realloc(void *ctx_ptr, void *ptr, usize old_size, usize size) { + /* assert(ptr != NULL && size != 0 && old_size < size) */ pool_ctx *ctx = (pool_ctx *)ctx_ptr; pool_chunk *cur = ((pool_chunk *)ptr) - 1, *prev, *next, *tmp; - usize free_size; - void *new_ptr; - - if (unlikely(size == 0 || size >= ctx->size)) return NULL; - size = size_align_up(size, sizeof(pool_chunk)) + sizeof(pool_chunk); - /* reduce size */ - if (unlikely(size <= cur->size)) { - free_size = cur->size - size; - if (free_size >= sizeof(pool_chunk) * 2) { - tmp = (pool_chunk *)(void *)((u8 *)cur + cur->size - free_size); - tmp->size = free_size; - pool_free(ctx_ptr, (void *)(tmp + 1)); - cur->size -= free_size; - } - return ptr; - } + /* check size */ + if (unlikely(size >= ctx->size)) return NULL; + pool_size_align(&old_size); + pool_size_align(&size); + if (unlikely(old_size == size)) return ptr; /* find next and prev chunk */ prev = NULL; @@ -1092,10 +1104,9 @@ static void *pool_realloc(void *ctx_ptr, void *ptr, next = next->next; } - /* merge to higher chunk if they are contiguous */ - if ((u8 *)cur + cur->size == (u8 *)next && - cur->size + next->size >= size) { - free_size = cur->size + next->size - size; + if ((u8 *)cur + cur->size == (u8 *)next && cur->size + next->size >= size) { + /* merge to higher chunk if they are contiguous */ + usize free_size = cur->size + next->size - size; if (free_size > sizeof(pool_chunk) * 2) { tmp = (pool_chunk *)(void *)((u8 *)cur + size); if (prev) prev->next = tmp; @@ -1109,15 +1120,15 @@ static void *pool_realloc(void *ctx_ptr, void *ptr, cur->size += next->size; } return ptr; + } else { + /* fallback to malloc and memcpy */ + void *new_ptr = pool_malloc(ctx_ptr, size - sizeof(pool_chunk)); + if (new_ptr) { + memcpy(new_ptr, ptr, cur->size - sizeof(pool_chunk)); + pool_free(ctx_ptr, ptr); + } + return new_ptr; } - - /* fallback to malloc and memcpy */ - new_ptr = pool_malloc(ctx_ptr, size - sizeof(pool_chunk)); - if (new_ptr) { - memcpy(new_ptr, ptr, cur->size - sizeof(pool_chunk)); - pool_free(ctx_ptr, ptr); - } - return new_ptr; } bool yyjson_alc_pool_init(yyjson_alc *alc, void *buf, usize size) { @@ -1147,6 +1158,161 @@ bool yyjson_alc_pool_init(yyjson_alc *alc, void *buf, usize size) { +/*============================================================================== + * Dynamic Memory Allocator + * + * This allocator allocates memory on demand and does not immediately release + * unused memory. Instead, it places the unused memory into a freelist for + * potential reuse in the future. It is only when the entire allocator is + * destroyed that all previously allocated memory is released at once. + *============================================================================*/ + +/** memory chunk header */ +typedef struct dyn_chunk { + usize size; /* chunk size, include header */ + struct dyn_chunk *next; + /* char mem[]; flexible array member */ +} dyn_chunk; + +/** allocator ctx header */ +typedef struct { + dyn_chunk free_list; /* dummy header, sorted from small to large */ + dyn_chunk used_list; /* dummy header */ +} dyn_ctx; + +/** align up the input size to chunk size */ +static_inline bool dyn_size_align(usize *size) { + usize alc_size = *size + sizeof(dyn_chunk); + alc_size = size_align_up(alc_size, YYJSON_ALC_DYN_MIN_SIZE); + if (unlikely(alc_size < *size)) return false; /* overflow */ + *size = alc_size; + return true; +} + +/** remove a chunk from list (the chunk must already be in the list) */ +static_inline void dyn_chunk_list_remove(dyn_chunk *list, dyn_chunk *chunk) { + dyn_chunk *prev = list, *cur; + for (cur = prev->next; cur; cur = cur->next) { + if (cur == chunk) { + prev->next = cur->next; + cur->next = NULL; + return; + } + prev = cur; + } +} + +/** add a chunk to list header (the chunk must not be in the list) */ +static_inline void dyn_chunk_list_add(dyn_chunk *list, dyn_chunk *chunk) { + chunk->next = list->next; + list->next = chunk; +} + +static void *dyn_malloc(void *ctx_ptr, usize size) { + /* assert(size != 0) */ + const yyjson_alc def = YYJSON_DEFAULT_ALC; + dyn_ctx *ctx = (dyn_ctx *)ctx_ptr; + dyn_chunk *chunk, *prev, *next; + if (unlikely(!dyn_size_align(&size))) return NULL; + + /* freelist is empty, create new chunk */ + if (!ctx->free_list.next) { + chunk = (dyn_chunk *)def.malloc(def.ctx, size); + if (unlikely(!chunk)) return NULL; + chunk->size = size; + chunk->next = NULL; + dyn_chunk_list_add(&ctx->used_list, chunk); + return (void *)(chunk + 1); + } + + /* find a large enough chunk, or resize the largest chunk */ + prev = &ctx->free_list; + while (true) { + chunk = prev->next; + if (chunk->size >= size) { /* enough size, reuse this chunk */ + prev->next = chunk->next; + dyn_chunk_list_add(&ctx->used_list, chunk); + return (void *)(chunk + 1); + } + if (!chunk->next) { /* resize the largest chunk */ + chunk = (dyn_chunk *)def.realloc(def.ctx, chunk, chunk->size, size); + if (unlikely(!chunk)) return NULL; + prev->next = NULL; + chunk->size = size; + dyn_chunk_list_add(&ctx->used_list, chunk); + return (void *)(chunk + 1); + } + prev = chunk; + } +} + +static void *dyn_realloc(void *ctx_ptr, void *ptr, + usize old_size, usize size) { + /* assert(ptr != NULL && size != 0 && old_size < size) */ + const yyjson_alc def = YYJSON_DEFAULT_ALC; + dyn_ctx *ctx = (dyn_ctx *)ctx_ptr; + dyn_chunk *prev, *next, *new_chunk; + dyn_chunk *chunk = (dyn_chunk *)ptr - 1; + if (unlikely(!dyn_size_align(&size))) return NULL; + if (chunk->size >= size) return ptr; + + dyn_chunk_list_remove(&ctx->used_list, chunk); + new_chunk = (dyn_chunk *)def.realloc(def.ctx, chunk, chunk->size, size); + if (likely(new_chunk)) { + new_chunk->size = size; + chunk = new_chunk; + } + dyn_chunk_list_add(&ctx->used_list, chunk); + return new_chunk ? (void *)(new_chunk + 1) : NULL; +} + +static void dyn_free(void *ctx_ptr, void *ptr) { + /* assert(ptr != NULL) */ + dyn_ctx *ctx = (dyn_ctx *)ctx_ptr; + dyn_chunk *chunk = (dyn_chunk *)ptr - 1, *prev; + + dyn_chunk_list_remove(&ctx->used_list, chunk); + for (prev = &ctx->free_list; prev; prev = prev->next) { + if (!prev->next || prev->next->size >= chunk->size) { + chunk->next = prev->next; + prev->next = chunk; + break; + } + } +} + +yyjson_alc *yyjson_alc_dyn_new(void) { + const yyjson_alc def = YYJSON_DEFAULT_ALC; + usize hdr_len = sizeof(yyjson_alc) + sizeof(dyn_ctx); + yyjson_alc *alc = (yyjson_alc *)def.malloc(def.ctx, hdr_len); + dyn_ctx *ctx = (dyn_ctx *)(void *)(alc + 1); + if (unlikely(!alc)) return NULL; + alc->malloc = dyn_malloc; + alc->realloc = dyn_realloc; + alc->free = dyn_free; + alc->ctx = alc + 1; + memset(ctx, 0, sizeof(*ctx)); + return alc; +} + +void yyjson_alc_dyn_free(yyjson_alc *alc) { + const yyjson_alc def = YYJSON_DEFAULT_ALC; + dyn_ctx *ctx = (dyn_ctx *)(void *)(alc + 1); + dyn_chunk *chunk, *next; + if (unlikely(!alc)) return; + for (chunk = ctx->free_list.next; chunk; chunk = next) { + next = chunk->next; + def.free(def.ctx, chunk); + } + for (chunk = ctx->used_list.next; chunk; chunk = next) { + next = chunk->next; + def.free(def.ctx, chunk); + } + def.free(def.ctx, alc); +} + + + /*============================================================================== * JSON document and value *============================================================================*/ @@ -1242,6 +1408,7 @@ bool yyjson_mut_doc_set_val_pool_size(yyjson_mut_doc *doc, size_t count) { void yyjson_mut_doc_free(yyjson_mut_doc *doc) { if (doc) { yyjson_alc alc = doc->alc; + memset(&doc->alc, 0, sizeof(alc)); unsafe_yyjson_str_pool_release(&doc->str_pool, &alc); unsafe_yyjson_val_pool_release(&doc->val_pool, &alc); alc.free(alc.ctx, doc); @@ -1305,7 +1472,6 @@ yyjson_mut_val *yyjson_val_mut_copy(yyjson_mut_doc *m_doc, We copy them to another contiguous memory as mutable values, then reconnect the mutable values with the original relationship. */ - usize i_vals_len; yyjson_mut_val *m_vals, *m_val; yyjson_val *i_val, *i_end; @@ -1375,7 +1541,6 @@ static yyjson_mut_val *unsafe_yyjson_mut_val_mut_copy(yyjson_mut_doc *m_doc, second to last item, which needs to be linked to the last item to close the circle. */ - yyjson_mut_val *m_val = unsafe_yyjson_mut_val(m_doc, 1); if (unlikely(!m_val)) return NULL; m_val->tag = m_vals->tag; @@ -1540,12 +1705,13 @@ static_inline bool unsafe_yyjson_num_equals(void *lhs, void *rhs) { yyjson_val_uni *runi = &((yyjson_val *)rhs)->uni; yyjson_subtype lt = unsafe_yyjson_get_subtype(lhs); yyjson_subtype rt = unsafe_yyjson_get_subtype(rhs); - if (lt == rt) - return luni->u64 == runi->u64; - if (lt == YYJSON_SUBTYPE_SINT && rt == YYJSON_SUBTYPE_UINT) + if (lt == rt) return luni->u64 == runi->u64; + if (lt == YYJSON_SUBTYPE_SINT && rt == YYJSON_SUBTYPE_UINT) { return luni->i64 >= 0 && luni->u64 == runi->u64; - if (lt == YYJSON_SUBTYPE_UINT && rt == YYJSON_SUBTYPE_SINT) + } + if (lt == YYJSON_SUBTYPE_UINT && rt == YYJSON_SUBTYPE_SINT) { return runi->i64 >= 0 && luni->u64 == runi->u64; + } return false; } @@ -1571,8 +1737,8 @@ bool unsafe_yyjson_equals(yyjson_val *lhs, yyjson_val *rhs) { while (len-- > 0) { rhs = yyjson_obj_iter_getn(&iter, lhs->uni.str, unsafe_yyjson_get_len(lhs)); - if (!rhs || !unsafe_yyjson_equals(lhs + 1, rhs)) - return false; + if (!rhs) return false; + if (!unsafe_yyjson_equals(lhs + 1, rhs)) return false; lhs = unsafe_yyjson_get_next(lhs + 1); } } @@ -1626,8 +1792,8 @@ bool unsafe_yyjson_mut_equals(yyjson_mut_val *lhs, yyjson_mut_val *rhs) { while (len-- > 0) { rhs = yyjson_mut_obj_iter_getn(&iter, lhs->uni.str, unsafe_yyjson_get_len(lhs)); - if (!rhs || !unsafe_yyjson_mut_equals(lhs->next, rhs)) - return false; + if (!rhs) return false; + if (!unsafe_yyjson_mut_equals(lhs->next, rhs)) return false; lhs = lhs->next->next; } } @@ -2506,6 +2672,7 @@ yyjson_mut_val *yyjson_merge_patch(yyjson_mut_doc *doc, builder = yyjson_mut_obj(doc); if (unlikely(!builder)) return NULL; + memset(&local_orig, 0, sizeof(local_orig)); if (!yyjson_is_obj(orig)) { orig = &local_orig; orig->tag = builder->tag; @@ -2557,6 +2724,7 @@ yyjson_mut_val *yyjson_mut_merge_patch(yyjson_mut_doc *doc, builder = yyjson_mut_obj(doc); if (unlikely(!builder)) return NULL; + memset(&local_orig, 0, sizeof(local_orig)); if (!yyjson_mut_is_obj(orig)) { orig = &local_orig; orig->tag = builder->tag; @@ -3318,8 +3486,6 @@ static_inline void pow10_table_get_exp(i32 exp10, i32 *exp2) { -#if !YYJSON_DISABLE_READER - /*============================================================================== * JSON Character Matcher *============================================================================*/ @@ -3513,6 +3679,8 @@ static_inline bool digi_is_digit_or_fp(u8 d) { +#if !YYJSON_DISABLE_READER + /*============================================================================== * Hex Character Reader * This function is used by JSON reader to read escaped characters. @@ -5631,6 +5799,8 @@ static_noinline yyjson_doc *read_root_single(u8 *hdr, return_err(cur, UNEXPECTED_CHARACTER, "unexpected character"); fail_garbage: return_err(cur, UNEXPECTED_CONTENT, "unexpected content after document"); +fail_recursion: + return_err(cur, RECURSION_DEPTH, "array and object recursion depth exceeded"); #undef return_err } @@ -5687,7 +5857,7 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, yyjson_val *ctn_parent; /* parent of current container */ yyjson_doc *doc; /* the JSON document, equals to val_hdr */ const char *msg; /* error message */ - + u32 container_depth = 0; /* limit on number of open array and map */ bool raw; /* read number as raw */ bool inv; /* allow invalid unicode */ u8 *raw_end; /* raw end for null-terminator */ @@ -5722,6 +5892,11 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, } arr_begin: + container_depth++; + if (unlikely(container_depth >= YYJSON_READER_CONTAINER_RECURSION_LIMIT)) { + goto fail_recursion; + } + /* save current container */ ctn->tag = (((u64)ctn_len + 1) << YYJSON_TAG_BIT) | (ctn->tag & YYJSON_TAG_MASK); @@ -5821,6 +5996,7 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, goto fail_character; arr_end: + container_depth--; /* get parent container */ ctn_parent = (yyjson_val *)(void *)((u8 *)ctn - ctn->uni.ofs); @@ -5839,6 +6015,11 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, } obj_begin: + container_depth++; + if (unlikely(container_depth >= YYJSON_READER_CONTAINER_RECURSION_LIMIT)) { + goto fail_recursion; + } + /* push container */ ctn->tag = (((u64)ctn_len + 1) << YYJSON_TAG_BIT) | (ctn->tag & YYJSON_TAG_MASK); @@ -5967,6 +6148,7 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, goto fail_character; obj_end: + container_depth--; /* pop container */ ctn_parent = (yyjson_val *)(void *)((u8 *)ctn - ctn->uni.ofs); /* point to the next value */ @@ -6018,6 +6200,8 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, return_err(cur, UNEXPECTED_CHARACTER, "unexpected character"); fail_garbage: return_err(cur, UNEXPECTED_CONTENT, "unexpected content after document"); +fail_recursion: + return_err(cur, RECURSION_DEPTH, "array and object recursion depth exceeded"); #undef val_incr #undef return_err @@ -8143,13 +8327,15 @@ static_inline u8 *yyjson_write_single(yyjson_val *val, bool cpy = (enc_table == enc_table_cpy); bool esc = has_write_flag(ESCAPE_UNICODE) != 0; bool inv = has_write_flag(ALLOW_INVALID_UNICODE) != 0; + bool newline = has_write_flag(NEWLINE_AT_END) != 0; + const usize end_len = 2; /* '\n' and '\0' */ switch (unsafe_yyjson_get_type(val)) { case YYJSON_TYPE_RAW: str_len = unsafe_yyjson_get_len(val); str_ptr = (const u8 *)unsafe_yyjson_get_str(val); check_str_len(str_len); - incr_len(str_len + 1); + incr_len(str_len + end_len); cur = write_raw(cur, str_ptr, str_len); break; @@ -8157,7 +8343,7 @@ static_inline u8 *yyjson_write_single(yyjson_val *val, str_len = unsafe_yyjson_get_len(val); str_ptr = (const u8 *)unsafe_yyjson_get_str(val); check_str_len(str_len); - incr_len(str_len * 6 + 4); + incr_len(str_len * 6 + 2 + end_len); if (likely(cpy) && unsafe_yyjson_get_subtype(val)) { cur = write_string_noesc(cur, str_ptr, str_len); } else { @@ -8167,7 +8353,7 @@ static_inline u8 *yyjson_write_single(yyjson_val *val, break; case YYJSON_TYPE_NUM: - incr_len(32); + incr_len(32 + end_len); cur = write_number(cur, val, flg); if (unlikely(!cur)) goto fail_num; break; @@ -8183,13 +8369,13 @@ static_inline u8 *yyjson_write_single(yyjson_val *val, break; case YYJSON_TYPE_ARR: - incr_len(4); + incr_len(2 + end_len); byte_copy_2(cur, "[]"); cur += 2; break; case YYJSON_TYPE_OBJ: - incr_len(4); + incr_len(2 + end_len); byte_copy_2(cur, "{}"); cur += 2; break; @@ -8198,6 +8384,7 @@ static_inline u8 *yyjson_write_single(yyjson_val *val, goto fail_type; } + if (newline) *cur++ = '\n'; *cur = '\0'; *dat_len = (usize)(cur - hdr); memset(err, 0, sizeof(yyjson_write_err)); @@ -8270,6 +8457,7 @@ static_inline u8 *yyjson_write_minify(const yyjson_val *root, bool cpy = (enc_table == enc_table_cpy); bool esc = has_write_flag(ESCAPE_UNICODE) != 0; bool inv = has_write_flag(ALLOW_INVALID_UNICODE) != 0; + bool newline = has_write_flag(NEWLINE_AT_END) != 0; alc_len = root->uni.ofs / sizeof(yyjson_val); alc_len = alc_len * YYJSON_WRITER_ESTIMATED_MINIFY_RATIO + 64; @@ -8376,6 +8564,11 @@ static_inline u8 *yyjson_write_minify(const yyjson_val *root, } doc_end: + if (newline) { + incr_len(2); + *(cur - 1) = '\n'; + cur++; + } *--cur = '\0'; *dat_len = (usize)(cur - hdr); memset(err, 0, sizeof(yyjson_write_err)); @@ -8449,6 +8642,7 @@ static_inline u8 *yyjson_write_pretty(const yyjson_val *root, bool esc = has_write_flag(ESCAPE_UNICODE) != 0; bool inv = has_write_flag(ALLOW_INVALID_UNICODE) != 0; usize spaces = has_write_flag(PRETTY_TWO_SPACES) ? 2 : 4; + bool newline = has_write_flag(NEWLINE_AT_END) != 0; alc_len = root->uni.ofs / sizeof(yyjson_val); alc_len = alc_len * YYJSON_WRITER_ESTIMATED_PRETTY_RATIO + 64; @@ -8579,6 +8773,10 @@ static_inline u8 *yyjson_write_pretty(const yyjson_val *root, } doc_end: + if (newline) { + incr_len(2); + *cur++ = '\n'; + } *cur = '\0'; *dat_len = (usize)(cur - hdr); memset(err, 0, sizeof(yyjson_write_err)); @@ -8811,6 +9009,7 @@ static_inline u8 *yyjson_mut_write_minify(const yyjson_mut_val *root, bool cpy = (enc_table == enc_table_cpy); bool esc = has_write_flag(ESCAPE_UNICODE) != 0; bool inv = has_write_flag(ALLOW_INVALID_UNICODE) != 0; + bool newline = has_write_flag(NEWLINE_AT_END) != 0; alc_len = estimated_val_num * YYJSON_WRITER_ESTIMATED_MINIFY_RATIO + 64; alc_len = size_align_up(alc_len, sizeof(yyjson_mut_write_ctx)); @@ -8921,6 +9120,11 @@ static_inline u8 *yyjson_mut_write_minify(const yyjson_mut_val *root, } doc_end: + if (newline) { + incr_len(2); + *(cur - 1) = '\n'; + cur++; + } *--cur = '\0'; *dat_len = (usize)(cur - hdr); err->code = YYJSON_WRITE_SUCCESS; @@ -8996,6 +9200,7 @@ static_inline u8 *yyjson_mut_write_pretty(const yyjson_mut_val *root, bool esc = has_write_flag(ESCAPE_UNICODE) != 0; bool inv = has_write_flag(ALLOW_INVALID_UNICODE) != 0; usize spaces = has_write_flag(PRETTY_TWO_SPACES) ? 2 : 4; + bool newline = has_write_flag(NEWLINE_AT_END) != 0; alc_len = estimated_val_num * YYJSON_WRITER_ESTIMATED_PRETTY_RATIO + 64; alc_len = size_align_up(alc_len, sizeof(yyjson_mut_write_ctx)); @@ -9130,6 +9335,10 @@ static_inline u8 *yyjson_mut_write_pretty(const yyjson_mut_val *root, } doc_end: + if (newline) { + incr_len(2); + *cur++ = '\n'; + } *cur = '\0'; *dat_len = (usize)(cur - hdr); err->code = YYJSON_WRITE_SUCCESS; diff --git a/include/yyjson/yyjson.h b/include/yyjson/yyjson.h index 16d13058..bc688e0f 100644 --- a/include/yyjson/yyjson.h +++ b/include/yyjson/yyjson.h @@ -527,16 +527,16 @@ extern "C" { #define YYJSON_VERSION_MAJOR 0 /** The minor version of yyjson. */ -#define YYJSON_VERSION_MINOR 7 +#define YYJSON_VERSION_MINOR 8 /** The patch version of yyjson. */ #define YYJSON_VERSION_PATCH 0 /** The version of yyjson in hex: `(major << 16) | (minor << 8) | (patch)`. */ -#define YYJSON_VERSION_HEX 0x000700 +#define YYJSON_VERSION_HEX 0x000800 /** The version string of yyjson. */ -#define YYJSON_VERSION_STRING "0.7.0" +#define YYJSON_VERSION_STRING "0.8.0" /** The version of yyjson in hex, same as `YYJSON_VERSION_HEX`. */ yyjson_api uint32_t yyjson_version(void); @@ -635,11 +635,11 @@ typedef struct yyjson_alc { function, but the amount of memory required to write a JSON cannot be directly calculated. - This is not a general-purpose allocator. If used to read multiple JSON - documents and only some of them are released, it may cause memory - fragmentation, leading to performance degradation and memory waste. Therefore, - it is recommended to use this allocator only for reading or writing a single - JSON document. + This is not a general-purpose allocator. It is designed to handle a single JSON + data at a time. If it is used for overly complex memory tasks, such as parsing + multiple JSON documents using the same allocator but releasing only a few of + them, it may cause memory fragmentation, resulting in performance degradation + and memory waste. @param alc The allocator to be initialized. If this parameter is NULL, the function will fail and return false. @@ -662,9 +662,31 @@ typedef struct yyjson_alc { yyjson_doc *doc = yyjson_read_opts(json, strlen(json), 0, &alc, NULL); // the memory of `doc` is on the stack @endcode + + @warning This Allocator is not thread-safe. */ yyjson_api bool yyjson_alc_pool_init(yyjson_alc *alc, void *buf, size_t size); +/** + A dynamic allocator. + + This allocator has a similar usage to the pool allocator above. However, when + there is not enough memory, this allocator will dynamically request more memory + using libc's `malloc` function, and frees it all at once when it is destroyed. + + @return A new dynamic allocator, or NULL if memory allocation failed. + @note The returned value should be freed with `yyjson_alc_dyn_free()`. + + @warning This Allocator is not thread-safe. + */ +yyjson_api yyjson_alc *yyjson_alc_dyn_new(void); + +/** + Free a dynamic allocator which is created by `yyjson_alc_dyn_new()`. + @param alc The dynamic allocator to be destroyed. + */ +yyjson_api void yyjson_alc_dyn_free(yyjson_alc *alc); + /*============================================================================== @@ -716,7 +738,7 @@ typedef uint32_t yyjson_read_flag; - Report error if double number is infinity. - Report error if string contains invalid UTF-8 character or BOM. - Report error on trailing commas, comments, inf and nan literals. */ -static const yyjson_read_flag YYJSON_READ_NOFLAG = 0 << 0; +static const yyjson_read_flag YYJSON_READ_NOFLAG = 0; /** Read the input data in-situ. This option allows the reader to modify and use input data to store string @@ -809,6 +831,9 @@ static const yyjson_read_code YYJSON_READ_ERROR_FILE_OPEN = 12; /** Failed to read a file. */ static const yyjson_read_code YYJSON_READ_ERROR_FILE_READ = 13; +/** Document exceeded YYJSON_READER_CONTAINER_RECURSION_LIMIT. */ +static const yyjson_read_code YYJSON_READ_ERROR_RECURSION_DEPTH = 14; + /** Error information for JSON reader. */ typedef struct yyjson_read_err { /** Error code, see `yyjson_read_code` for all possible values. */ @@ -1045,7 +1070,7 @@ typedef uint32_t yyjson_write_flag; - Report error on inf or nan number. - Report error on invalid UTF-8 string. - Do not escape unicode or slash. */ -static const yyjson_write_flag YYJSON_WRITE_NOFLAG = 0 << 0; +static const yyjson_write_flag YYJSON_WRITE_NOFLAG = 0; /** Write JSON pretty with 4 space indent. */ static const yyjson_write_flag YYJSON_WRITE_PRETTY = 1 << 0; @@ -1074,6 +1099,10 @@ static const yyjson_write_flag YYJSON_WRITE_ALLOW_INVALID_UNICODE = 1 << 5; This flag will override `YYJSON_WRITE_PRETTY` flag. */ static const yyjson_write_flag YYJSON_WRITE_PRETTY_TWO_SPACES = 1 << 6; +/** Adds a newline character `\n` at the end of the JSON. + This can be helpful for text editors or NDJSON. */ +static const yyjson_write_flag YYJSON_WRITE_NEWLINE_AT_END = 1 << 7; + /** Result code for JSON writer */ @@ -3556,7 +3585,7 @@ yyjson_api_inline bool yyjson_mut_obj_rotate(yyjson_mut_val *obj, The `key` should be a null-terminated UTF-8 string. This function allows duplicated key in one object. - @warning The key string are not copied, you should keep the string + @warning The key string is not copied, you should keep the string unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_null(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3566,7 +3595,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_null(yyjson_mut_doc *doc, The `key` should be a null-terminated UTF-8 string. This function allows duplicated key in one object. - @warning The key string are not copied, you should keep the string + @warning The key string is not copied, you should keep the string unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_true(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3576,7 +3605,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_true(yyjson_mut_doc *doc, The `key` should be a null-terminated UTF-8 string. This function allows duplicated key in one object. - @warning The key string are not copied, you should keep the string + @warning The key string is not copied, you should keep the string unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_false(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3586,7 +3615,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_false(yyjson_mut_doc *doc, The `key` should be a null-terminated UTF-8 string. This function allows duplicated key in one object. - @warning The key string are not copied, you should keep the string + @warning The key string is not copied, you should keep the string unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_bool(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3596,7 +3625,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_bool(yyjson_mut_doc *doc, The `key` should be a null-terminated UTF-8 string. This function allows duplicated key in one object. - @warning The key string are not copied, you should keep the string + @warning The key string is not copied, you should keep the string unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_uint(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3606,7 +3635,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_uint(yyjson_mut_doc *doc, The `key` should be a null-terminated UTF-8 string. This function allows duplicated key in one object. - @warning The key string are not copied, you should keep the string + @warning The key string is not copied, you should keep the string unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_sint(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3616,7 +3645,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_sint(yyjson_mut_doc *doc, The `key` should be a null-terminated UTF-8 string. This function allows duplicated key in one object. - @warning The key string are not copied, you should keep the string + @warning The key string is not copied, you should keep the string unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_int(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3626,7 +3655,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_int(yyjson_mut_doc *doc, The `key` should be a null-terminated UTF-8 string. This function allows duplicated key in one object. - @warning The key string are not copied, you should keep the string + @warning The key string is not copied, you should keep the string unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_real(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3636,7 +3665,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_real(yyjson_mut_doc *doc, The `key` and `val` should be null-terminated UTF-8 strings. This function allows duplicated key in one object. - @warning The key/value string are not copied, you should keep these strings + @warning The key/value strings are not copied, you should keep these strings unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_str(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3648,7 +3677,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_str(yyjson_mut_doc *doc, The `len` should be the length of the `val`, in bytes. This function allows duplicated key in one object. - @warning The key/value string are not copied, you should keep these strings + @warning The key/value strings are not copied, you should keep these strings unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_strn(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3660,7 +3689,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_strn(yyjson_mut_doc *doc, The value string is copied. This function allows duplicated key in one object. - @warning The key string are not copied, you should keep the string + @warning The key string is not copied, you should keep the string unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_strcpy(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3673,18 +3702,44 @@ yyjson_api_inline bool yyjson_mut_obj_add_strcpy(yyjson_mut_doc *doc, The `len` should be the length of the `val`, in bytes. This function allows duplicated key in one object. - @warning The key/value string are not copied, you should keep these strings + @warning The key/value strings are not copied, you should keep these strings unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_strncpy(yyjson_mut_doc *doc, yyjson_mut_val *obj, const char *key, const char *val, size_t len); +/** + Creates and adds a new array to the target object. + The `key` should be a null-terminated UTF-8 string. + This function allows duplicated key in one object. + + @warning The key string is not copied, you should keep these strings + unmodified for the lifetime of this JSON document. + @return The new array, or NULL on error. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_add_arr(yyjson_mut_doc *doc, + yyjson_mut_val *obj, + const char *key); + +/** + Creates and adds a new object to the target object. + The `key` should be a null-terminated UTF-8 string. + This function allows duplicated key in one object. + + @warning The key string is not copied, you should keep these strings + unmodified for the lifetime of this JSON document. + @return The new object, or NULL on error. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_add_obj(yyjson_mut_doc *doc, + yyjson_mut_val *obj, + const char *key); + /** Adds a JSON value at the end of the object. The `key` should be a null-terminated UTF-8 string. This function allows duplicated key in one object. - @warning The key string are not copied, you should keep the string + @warning The key string is not copied, you should keep the string unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_val(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -4781,6 +4836,7 @@ yyjson_api_inline size_t yyjson_doc_get_val_count(yyjson_doc *doc) { yyjson_api void yyjson_doc_free(yyjson_doc *doc) { if (doc) { yyjson_alc alc = doc->alc; + memset(&doc->alc, 0, sizeof(alc)); if (doc->str_pool) alc.free(alc.ctx, doc->str_pool); alc.free(alc.ctx, doc); } @@ -6920,6 +6976,22 @@ yyjson_api_inline bool yyjson_mut_obj_add_strncpy(yyjson_mut_doc *doc, }); } +yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_add_arr(yyjson_mut_doc *doc, + yyjson_mut_val *obj, + const char *_key) { + yyjson_mut_val *key = yyjson_mut_str(doc, _key); + yyjson_mut_val *val = yyjson_mut_arr(doc); + return yyjson_mut_obj_add(obj, key, val) ? val : NULL; +} + +yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_add_obj(yyjson_mut_doc *doc, + yyjson_mut_val *obj, + const char *_key) { + yyjson_mut_val *key = yyjson_mut_str(doc, _key); + yyjson_mut_val *val = yyjson_mut_obj(doc); + return yyjson_mut_obj_add(obj, key, val) ? val : NULL; +} + yyjson_api_inline bool yyjson_mut_obj_add_val(yyjson_mut_doc *doc, yyjson_mut_val *obj, const char *_key, @@ -7666,33 +7738,39 @@ yyjson_api_inline bool yyjson_ptr_get_bool( } /** - Set provided `value` if the JSON Pointer (RFC 6901) exists and is type uint. - Returns true if value at `ptr` exists and is the correct type, otherwise false. + Set provided `value` if the JSON Pointer (RFC 6901) exists and is an integer + that fits in `uint64_t`. Returns true if successful, otherwise false. */ yyjson_api_inline bool yyjson_ptr_get_uint( yyjson_val *root, const char *ptr, uint64_t *value) { yyjson_val *val = yyjson_ptr_get(root, ptr); - if (value && yyjson_is_uint(val)) { - *value = unsafe_yyjson_get_uint(val); - return true; - } else { - return false; + if (value && val) { + uint64_t ret = val->uni.u64; + if (unsafe_yyjson_is_uint(val) || + (unsafe_yyjson_is_sint(val) && !(ret >> 63))) { + *value = ret; + return true; + } } + return false; } /** - Set provided `value` if the JSON Pointer (RFC 6901) exists and is type sint. - Returns true if value at `ptr` exists and is the correct type, otherwise false. + Set provided `value` if the JSON Pointer (RFC 6901) exists and is an integer + that fits in `int64_t`. Returns true if successful, otherwise false. */ yyjson_api_inline bool yyjson_ptr_get_sint( yyjson_val *root, const char *ptr, int64_t *value) { yyjson_val *val = yyjson_ptr_get(root, ptr); - if (value && yyjson_is_sint(val)) { - *value = unsafe_yyjson_get_sint(val); - return true; - } else { - return false; + if (value && val) { + int64_t ret = val->uni.i64; + if (unsafe_yyjson_is_sint(val) || + (unsafe_yyjson_is_uint(val) && ret >= 0)) { + *value = ret; + return true; + } } + return false; } /** diff --git a/script/vendor-yyjson b/script/vendor-yyjson index 9d755b02..d86f1a4d 100755 --- a/script/vendor-yyjson +++ b/script/vendor-yyjson @@ -2,7 +2,7 @@ set -eou pipefail -yyjson_version="5e3b26d2659287d31e2f8e10f95f95feb7e5ab3a" +yyjson_version="0eca326fe57aeeb866e6f04c9ef9ea9f8343157e" curl -Ls -o include/yyjson/yyjson.c "https://raw.githubusercontent.com/ibireme/yyjson/${yyjson_version}/src/yyjson.c" curl -Ls -o include/yyjson/yyjson.h "https://raw.githubusercontent.com/ibireme/yyjson/${yyjson_version}/src/yyjson.h" @@ -26,3 +26,5 @@ sed -i 's/ if (!err) err = &dummy_err;//g' include/yyjson/yyjson.c sed -i 's/likely(!alc_ptr)/!alc_ptr/g' include/yyjson/yyjson.c sed -i 's/unlikely(read_flag_eq(flg, YYJSON_READ_##_flag))/false/g' include/yyjson/yyjson.c + +git apply include/yyjson/recursion-limit.patch diff --git a/test/test_api.py b/test/test_api.py index 7fbc6e83..ed80c7c3 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -11,6 +11,8 @@ SIMPLE_TYPES = (1, 1.0, -1, None, "str", True, False) +LOADS_RECURSION_LIMIT = 1024 + def default(obj): return str(obj) @@ -50,12 +52,44 @@ def test_loads_type(self): for val in (1, 3.14, [], {}, None): pytest.raises(orjson.JSONDecodeError, orjson.loads, val) - def test_loads_recursion(self): + def test_loads_recursion_partial(self): """ - loads() recursion limit + loads() recursion limit partial """ pytest.raises(orjson.JSONDecodeError, orjson.loads, "[" * (1024 * 1024)) + def test_loads_recursion_valid_limit_array(self): + """ + loads() recursion limit at limit array + """ + n = LOADS_RECURSION_LIMIT + 1 + value = b"[" * n + b"]" * n + pytest.raises(orjson.JSONDecodeError, orjson.loads, value) + + def test_loads_recursion_valid_limit_object(self): + """ + loads() recursion limit at limit object + """ + n = LOADS_RECURSION_LIMIT + value = b'{"key":' * n + b'{"key":true}' + b"}" * n + pytest.raises(orjson.JSONDecodeError, orjson.loads, value) + + def test_loads_recursion_valid_limit_mixed(self): + """ + loads() recursion limit at limit mixed + """ + n = LOADS_RECURSION_LIMIT - 1 + value = b"[" b'{"key":' * n + b'{"key":true}' + b"}" * n + b"]" + pytest.raises(orjson.JSONDecodeError, orjson.loads, value) + + def test_loads_recursion_valid_excessive_array(self): + """ + loads() recursion limit excessively high value + """ + n = 10000000 + value = b"[" * n + b"]" * n + pytest.raises(orjson.JSONDecodeError, orjson.loads, value) + def test_version(self): """ __version__