-
Notifications
You must be signed in to change notification settings - Fork 142
/
json.h
1422 lines (1270 loc) · 42.5 KB
/
json.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* This file is a rollup of the json, ptrvec, twine and sanity modules from the
* jsoncvt project into a single self-contained compilation unit, with the
* following modifications:
*
* - The 'sanity' functions have been reimplemented using err(3) and internal
* calls to err() and die() have been changed to use errx(3).
* - All public functions are declared as 'static'.
* - Public functions which are not required by Solo5's elftool are removed from
* compilation with #if 0 blocks and marked as UNUSED.
*
* The following license applies to this file:
*
* == License ==
*
* Copyright ⓒ 2014, 2015 Robert S. Krzaczek.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* “Software”), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* == Other Copyrights ==
*
* While the code presented in *sanity.h* and *sanity.c* is original, it
* is certainly inspired by the excellent book, “The Practice of
* Programming” by Brian W. Kernighan and Rob Pike. Quoting from that
* source:
*
* [quote,'http://cm.bell-labs.com/cm/cs/tpop/[The Practice Of Programming]']
* _____________________________________________________________________
* You may use this code for any purpose, as long as you leave the
* copyright notice and book citation attached. Copyright © 1999 Lucent
* Technologies. All rights reserved. Mon Mar 19 13:59:27 EST 2001
* _____________________________________________________________________
*/
#ifndef MFT_JSON_H
#define MFT_JSON_H
#define _GNU_SOURCE
#include <ctype.h>
#include <err.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define extern static
/* See one of the index files for license and other details. */
#ifndef jsoncvt_json_h
#define jsoncvt_json_h
/** The different types of values in our JSON parser. Unlike the
* standard, we discriminate between integer and real values. */
enum jtypes {
jnull, /**< The JSON "null" value. */
jtrue, /**< Just a simple "true" value. */
jfalse, /**< Just a simple "false" value. */
jstring, /**< Just your run of the mill string. */
jnumber, /**< A JSON number (still just a string). */
jarray, /**< A vector of values. */
jobject, /**< An assoc. array of names and arbitrary values. */
jint, /**< A JSON number parsed into a native integer. */
jreal, /**< A JSON number parsed into a long double. */
};
/** A jvalue represents the different values found in a parse of a
* JSON doc. A value can be terminal, like a string or a number, or
* it can nest, as with arrays and objects. The value of #d reflects
* which part of the union is value. */
typedef struct jvalue {
/** Just your basic discriminator, describing which part of the
* union below is active. When this is jtrue, jfalse, or jnull,
* nothing in #u is valid (being unnecessary); all other values
* correspond to one of the #u members as described below. */
enum jtypes d;
/** To allow for better error reporting by application code after
* calling jparse(file), we store the line number where this jvalue
* begins in the file. */
size_t line;
/** Some values have a name associated with them; in a JSON
* object, for example, the value is assigned to a specific name.
* When #d is jobject, this string should point to the name of a
* member (whose value is in #u). For other values of #d, this
* member should be null. A previous implementation used a
* separate structure for these pairings, but placing the name
* inside each value only costs an extra 4 or 8 bytes yet
* simplifies the tree quite a bit for our client. */
char *n;
/** According to #d above, one or none of these are the active value. */
union {
/** When #d is jstring or jnumber, this string is active in
* the union. While obvious for jstring, why would this be
* used for jnumber? Because, often, there's no need to parse
* the number value into something native. While integers are
* exact, there's often an unavoidable loss of precision
* when converting real numbers. So, we defer it as long as
* we can. If the client application actually *wants* a
* parsed value, it can convert the string to a native value,
* cache it away in the #i or #r members, and change the
* discriminator to jint or jreal accordingly. This avoids
* unnecessary parsing work and loss of precision, but
* doesn't make it unduly hard for a client to deal with. See
* jupdate() as a function the client can call to do just
* that. */
char *s;
/** When the discriminator is jint, this integer is active. */
long long i;
/** When the discriminator is jreal, this long double is
* active. */
long double r;
/** When the discriminator is jarray or jobject, this
* zero-terminated vector of pointers to jvalue is active.
* You'll find the ptrvec routines make building these
* easy. */
struct jvalue **v;
} u;
} jvalue;
extern jvalue *jnew(void);
extern jvalue *jclear( jvalue * );
extern void jdel( jvalue * );
extern jvalue *jparse( FILE *fp );
extern jvalue *jupdate( jvalue * );
#if 0 /* UNUSED */
extern int jdump( FILE *fp, const jvalue *j );
#endif
#endif
/* See one of the index files for license and other details. */
#ifndef jsoncvt_ptrvec_h
#define jsoncvt_ptrvec_h
/** ptrvec is just used to make creating pointer-to-pointer lists
* (like an argv) easy to build. It automatically manages its memory,
* reallocating when necessary, and so on.
*
* Expected usage is something like
*
* 1. Obtain new ptrvec via pvnew(), or initialize one to all zeroes.
*
* 2. Use pvadd() to add pointers to the vector. The underlying
* vector is always null terminated, even while building, so you can
* access the in-progress vector safely via p.
*
* 3. Use pvdup() to create a new void** that is exactly the size
* needed for the resulting string, or use pvfinal() below. The
* elements of the vector are just copied; only the vector itself is
* allocated anew.
*
* 4. Free up any current space space via pvclear(), resetting things
* to "empty" again. Set a hard size via pvsize().
*
* 5. if you called pvnew() earlier, call pvdel() to free it. If you
* just want to free up the memory it uses but not the ptrvec itself,
* call pvclear(). pvfinal() combines both pvdup() and pvclear(). */
typedef struct ptrvec {
/** A table of pointers to anything living here. The number of
* actual pointers allocated is tracked in sz, and the number of
* active pointers is tracked in len. */
void **p;
/** How many of the pointers at p are in use? */
size_t len;
/** How many pointers have been allocated at p? */
size_t sz;
} ptrvec;
#if 0 /* UNUSED */
extern ptrvec *pvnew(void);
#endif
extern ptrvec *pvclear( ptrvec * );
extern void **pvfinal( ptrvec * );
#if 0 /* UNUSED */
extern void pvdel( ptrvec * );
#endif
extern void **pvdup( const ptrvec * );
extern ptrvec *pvsize( ptrvec *, size_t );
extern ptrvec *pvensure( ptrvec *, size_t );
extern ptrvec *pvadd( ptrvec *, void * );
#endif
/* See one of the index files for license and other details. */
#ifndef jsoncvt_twine_h
#define jsoncvt_twine_h
/** twine is like a string, it manages memory for a string, and can
* be used to create new C strings as necessary. It's called twine
* because it's like a string, but a little thicker; there's usually a
* spool of it handy, like the memory management it comes with; and
* it's easy to extract a string from it when you're done.
*
* It serves primarily as a C-string builder with memory management.
* With it, you can build strings incrementally from a variety of
* sources, without the overhead of length calculations and
* realloc(3) calls every time. A trailing null is included even in
* the in-progress string, so that the internal string can be used
* directly when necessary.
*
* Expected usage is something like
*
* 1. Obtain new twine via twnew(), or initialize one to all zeroes.
*
* 2. Use the twadd*() and twset*() functions to build up its contents.
* You can access the in-progress string via the p member at any
* time. Under the hood, p points to a null-terminated region bigger
* than necessary to reduce the number of reallocs necessary.
*
* 3. Use twdup() to create a new C string that is exactly the size
* needed for the resulting string, or use twfinal() below.
*
* 4. Free up any current space space via twclear(), resetting things
* to "empty" again. Set a hard size via twsize().
*
* 5. if you called twnew() earlier, call twdel() to free it. If you
* just want to free up the memory it uses but not the twine itself,
* call twclear(). twfinal() combines both twdup() and twclear().
*/
typedef struct twine {
char *p; /**< null terminated C string data */
size_t len; /**< size of the string, not counting null */
size_t sz; /**< size of the underlying buffer */
} twine;
#if 0 /* UNUSED */
extern twine *twnew(void);
#endif
extern twine *twclear( twine * );
extern char *twfinal( twine * );
#if 0 /* UNUSED */
extern void twdel( twine * );
#endif
extern char *twdup( const twine * );
extern twine *twsize( twine *, size_t );
#if 0 /* UNUSED */
extern twine *twset( twine *, const char *, size_t );
extern twine *twsetz( twine *, const char * );
#endif
#if 0 /* UNUSED */
extern twine *twadd( twine *, const twine * );
#endif
extern twine *twaddc( twine *, char );
extern twine *twaddu( twine *, uint32_t );
#if 0 /* UNUSED */
extern twine *twaddz( twine *, const char * );
#endif
#endif
/* See one of the index files for license and other details. */
#ifndef jsoncvt_sanity_h
#define jsoncvt_sanity_h
/* While this code is original, it is certainly inspired by the
* excellent book, "The Practice of Programming" by Brian W. Kernighan
* and Rob Pike.
*
* Quoting from the book: "You may use this code for any purpose, as
* long as you leave the copyright notice and book citation attached.
* Copyright © 1999 Lucent Technologies. All rights reserved. Mon Mar
* 19 13:59:27 EST 2001" */
extern void *emalloc( size_t );
extern void *erealloc( void *, size_t );
#if 0 /* UNUSED */
extern char *estrdup( const char * );
#endif
#endif
/* See one of the index files for license and other details. */
/** Allocate some number of bytes from the system and return a pointer
* to them, or exit. */
void *
emalloc( size_t nb )
{
void *p = malloc( nb );
if( !p )
err( 1, "unable to allocate %zu bytes", nb );
return p;
}
/** Change an allocated buffer to another size, returning a pointer to
* the new buffer. If the buffer could not be grown, an error is
* displayed and the process exits (this function does not return). */
void *
erealloc( void *ptr, size_t nb )
{
void *p = realloc( ptr, nb );
if( !p )
err( 1, "unable to reallocate %zu bytes", nb );
return p;
}
#if 0 /* UNUSED */
/** Just like strdup(3), but exits on failure instead of returning
* crap. */
char *
estrdup( const char *s )
{
return s ? strcpy( emalloc( strlen( s ) + 1 ), s ) : 0;
}
#endif
/* See one of the index files for license and other details. */
enum {
/** Ptrvecs start out with space for this many pointers. The number
* is pretty much arbitrary; if you think all of your ptrvecs are
* going to be extensive, free free to bump this value up to a
* bigger initial size to reduce the load on realloc(3). */
pv_initial_size = 8
};
#if 0 /* UNUSED */
/** Allocate a new ptrvec from the heap, initialize it as zero, and
* return a pointer to it. */
ptrvec *
pvnew(void)
{
ptrvec *p = emalloc( sizeof( *p ));
*p = (ptrvec){0};
return p;
}
#endif
/** If the supplied ptrvec has any storage allocated, return it to the
* heap. The ptrvec itself is not freed. Whereas pvdel() is useful for
* entirely heap-based objects (typically obtained from pvnew()),
* pvclear() is useful at tne end of functions that use a stack-based
* ptrvec object. */
ptrvec *
pvclear( ptrvec *pv )
{
if( !pv )
return 0;
if( pv->p )
free( pv->p );
*pv = (ptrvec){ 0 };
return pv;
}
/** Return a new void** that is a copy of the one we've been building
* in our ptrvec. Unlike our member p, this one will be allocated from
* the heap and contains just enough space to hold the current
* contents of p including its terminating null. */
void **
pvdup( const ptrvec *pv )
{
void **v;
if( !pv ) {
v = emalloc( sizeof( *v ));
*v = 0;
} else {
size_t nb = sizeof( void* ) * ( pv->len + 1 );
v = emalloc( nb );
if( pv->p )
memcpy( v, pv->p, nb );
else
memset( v, 0, nb );
}
return v;
}
/** A wrapper for the common case at the end of working with a ptrvec.
* Return a null terminated void** ready for storage somewhere, and
* kill our own storage so that the next thing to come along can use
* our memory. */
void **
pvfinal( ptrvec *pv )
{
void **v = pvdup( pv );
pvclear( pv );
return v;
}
#if 0 /* UNUSED */
/** Return a ptrvec and its pointers to the head. Once called, the
* supplied pointer is <em>no longer valid.</em> Memory at this old
* ptrvec is zeroed prior to being freed. */
void
pvdel( ptrvec *pv )
{
if( pv )
free( pvclear( pv ));
}
#endif
/** Force the supplied ptrvec to contain exactly some number of
* pointers. */
ptrvec *
pvsize( ptrvec *pv, size_t sz )
{
if( !sz )
return pvclear( pv );
pv->p = erealloc( pv->p, ( pv->sz = sz ) * sizeof( *pv->p ));
if( pv->len >= pv->sz ) {
pv->len = pv->sz - 1;
pv->p[ pv->len ] = 0;
}
return pv;
}
/** Ensures that the supplied ptrvec has at least some number of
* pointers. If it doesn't, the region of pointers in the ptrvec are
* reallocated. Unlike pvsize(), pvensure() grows the ptrvec in a way
* that hopefully avoids constant reallocations. */
ptrvec *
pvensure( ptrvec *pv, size_t sz )
{
size_t newsz;
if( !pv )
return 0;
else if( !sz || sz <= pv->sz )
return pv;
else if( !pv->sz && sz <= pv_initial_size )
return pvsize( pv, pv_initial_size );
/* Choose the next size up for this ptrvec as either 150% of its
current size, or if that's not big enough, 150% of the
requested size. Either is meant to add enough padding so that
we hopefully don't come back here too soon. */
newsz = pv->sz * 3 / 2;
if( newsz < sz )
newsz = sz * 3 / 2;
/* Imperfect, but should catch most overflows, when newsz has
rolled past SIZE_MAX. */
if( newsz < pv->sz )
errx( 1, "ptrvec overflow" );
return pvsize( pv, newsz );
}
/** Add a pointer to the end of the set of pointers managed in this
* ptrvec. The size of the region is managed. The sz might grow a lot,
* but len will only ever grow by one. */
ptrvec *
pvadd( ptrvec *pv, void *v )
{
pvensure( pv, pv->len + 2 );
pv->p[ pv->len++ ] = v;
pv->p[ pv->len ] = 0;
return pv;
}
/* See one of the index files for license and other details. */
enum {
/** The initial size allocation for a twine. Twines start off
* empty (zero bytes in size), and when they grow, this is their
* first size. It's arbitrary, really, what you start with; we're
* going with 16 because it's the cache line size on modern x86
* hardware; anything smaller would be pointless. */
tw_initial_size = 16
};
#if 0 /* UNUSED */
/** Create a new empty twine and return a pointer to it. This
* function never returns if it cannot allocate the requested
* memory. */
twine *
twnew(void)
{
twine *t = emalloc( sizeof( *t ));
*t = (twine){ 0 };
return t;
}
/** Release a twine obtained via strnew() and all of its memory. Once
* you've called this, \a t is no longer valid. */
void
twdel( twine *t )
{
twclear( t );
free( t );
}
#endif
/** Zero a twine, returning its storage back to the system, but
* leaving the twine structure still valid (though zeroed). Useful
* for twine structure defined on the stack, for example. Also, it's
* a severe way to clear a twine of current data, but useful if
* you've got to return memory. */
twine *
twclear( twine *t )
{
if( t->p )
free( t->p );
*t = (twine){ 0 };
return t;
}
/** Return a pure C string that is a copy of the string we've been
* building in our twine. Unlike our string, this one will be
* allocated from the heap and contains just enough space to hold it. */
char *
twdup( const twine *t )
{
char *p = emalloc( t->len + 1 );
return strcpy( p, t->p ? t->p : "" );
}
/** A wrapper for the common case at the end of working with twine.
* Return a C string ready for storage somewhere, and kill our own
* storage so that the next thing to come along can use our memory. */
char *
twfinal( twine *t )
{
char *p = twdup( t );
twclear( t );
return p;
}
/** Given a target size, resize the underlying buffer to be just large
* enough to handle it. This function doesn't pad space like the
* twadd*() functions will, and it might even shrink the buffer
* depending on how this system's realloc(3) is set up. This is
* because if you've added once, you're likely to add again, but if
* you have a size in mind in advance, you probably don't need to
* grow it soon. */
twine *
twsize( twine *t, size_t nb )
{
if( !nb )
return twclear( t );
t->p = erealloc( t->p, t->sz = nb );
if( t->len >= t->sz ) {
t->len = t->sz - 1;
t->p[ t->len ] = 0;
}
return t;
}
#if 0 /* UNUSED */
/** Copy into one of our twines a null-terminated C twine. Does not
* return if we cannot allocate enough memory. This operation doesn't
* pad the size of the twine with any reserve space, because most of
* the time, ssetz() is called on static twines that aren't going
* to be modified. */
twine *
twsetz( twine *t, const char *z )
{
size_t zlen = strlen( z );
/* Life is easy if we know that the source isn't overlapping with
our twine. Given that the source and the destination are two
different types, this should never happen, anyway. But if
something is screwed up, we'll try to dodge the imminent core
dump and do this in a slower, more wasteful fashion. */
char *src = ( !t->p || (( z >= t->p + t->sz ) && ( z + zlen < t->p )))
? __UNCONST(z)
: estrdup( z );
if( zlen + 1 < tw_initial_size ) {
twsize( t, tw_initial_size );
t->len = zlen;
} else
twsize( t, ( t->len = zlen ) + 1 );
strcpy( t->p, src );
if( src != z )
free( src );
return t;
}
/** Copy into one of our twines some number of characters. */
twine *
twset( twine *t, const char *z, size_t nb )
{
/* Life is easy if we know that the source isn't overlapping with
our twine. Given that the source and the destination are two
different types, this should never happen, anyway. But if
something is screwed up, we'll try to dodge the imminent core
dump and do this in a slower, more wasteful fashion. */
char *src = ( !t->p || (( z >= t->p + t->sz ) && ( z + nb < t->p )))
? __UNCONST(z)
: estrdup( z );
if( nb + 1 < tw_initial_size ) {
twsize( t, tw_initial_size );
t->len = nb;
} else
twsize( t, ( t->len = nb ) + 1 );
strncpy( t->p, src, nb );
t->p[nb] = 0;
if( src != z )
free( src );
return t;
}
#endif
/** Grow a twine as much as necessary to satisfy some number of bytes.
* This only concerns itself with size, not the logical length, so be
* sure to add the null in yourself to \a sz. The 1.5X growth factor
* is meant to balance between calling realloc too often, but not
* wasting memory like mad like some libraries do. */
static twine *
twensure( twine *t, size_t sz )
{
size_t newsz;
if( !sz || sz <= t->sz )
return t;
if( sz <= tw_initial_size )
return twsize( t, tw_initial_size );
newsz = t->sz * 3 / 2;
if( newsz < sz )
newsz = sz * 3 / 2;
if( newsz < t->sz )
errx( 1, "twine overflow" );
return twsize( t, newsz );
}
/** Add a single plain character to our twine. This will grow the
* twine with padding if necessary; see twensure(). As with the other
* functions, if we can't get the memory we need, we just error and
* die. In this application, there's no point in trying to recover
* from an out of memory error. */
twine *
twaddc( twine *t, char c )
{
twensure( t, t->len + 2 );
char *p = t->p + t->len;
*p++ = c;
*p++ = 0;
++t->len;
return t;
}
#if 0 /* UNUSED */
/** Like twaddc(), but this adds a null terminated C string. */
twine *
twaddz( twine *t, const char *z )
{
size_t zlen = strlen( z );
twensure( t, t->len + zlen + 1 );
strcpy( t->p + t->len, z );
t->len += zlen;
return t;
}
/** Like twaddz(), but this adds another twine to us. */
twine *
twadd( twine *dst, const twine *src )
{
twensure( dst, dst->len + src->len + 1 );
strcpy( dst->p + dst->len, src->p );
dst->len += src->len;
return dst;
}
#endif
/** Adds a Unicode code point into the twine in UTF-8 format. Handles
* the full range of code points, up to 0x7ffffff. This is
* simplistic, and I think it's not quite comforming (apparently
* there's UTF-8 and there'S CESU and one has intentional omissions
* the other doesn't? */
twine *
twaddu( twine *t, uint32_t c )
{
if( c <= 0x007f )
twaddc( t, c );
else if( c <= 0x07ff ) {
twaddc( t, 0xc0 | ( c >> 6 ));
twaddc( t, 0x80 | ( c & 0x3f ));
} else if( c <= 0xffff ) {
twaddc( t, 0xe0 | ( c >> 12 ));
twaddc( t, 0x80 | ( c >> 6 & 0x3f ));
twaddc( t, 0x80 | ( c & 0x3f ));
} else if( c <= 0x1fffff ) {
twaddc( t, 0xf0 | ( c >> 18 ));
twaddc( t, 0x80 | ( c >> 12 & 0x3f ));
twaddc( t, 0x80 | ( c >> 6 & 0x3f ));
twaddc( t, 0x80 | ( c & 0x3f ));
} else if( c <= 0x3ffffff ) {
twaddc( t, 0xf8 | ( c >> 24 ));
twaddc( t, 0x80 | ( c >> 18 & 0x3f ));
twaddc( t, 0x80 | ( c >> 12 & 0x3f ));
twaddc( t, 0x80 | ( c >> 6 & 0x3f ));
twaddc( t, 0x80 | ( c & 0x3f ));
} else if( c <= 0x7ffffff ) {
twaddc( t, 0xfc | ( c >> 30 ));
twaddc( t, 0x80 | ( c >> 24 & 0x3f ));
twaddc( t, 0x80 | ( c >> 18 & 0x3f ));
twaddc( t, 0x80 | ( c >> 12 & 0x3f ));
twaddc( t, 0x80 | ( c >> 6 & 0x3f ));
twaddc( t, 0x80 | ( c & 0x3f ));
} else
errx(1, "unicode code point cannot be >0x7ffffff" );
return t;
}
/* See one of the index files for license and other details. */
/** This just makes it easier for us to track a line counter along
* with an input stream, so when we report errors, we can say
* something useful about where the error appeared. getch() will bump
* #line when a newline appears on the file stream. Initialize this
* with a file stream opened for reading, and set #line to 1. */
typedef struct ifile {
FILE *fp; /**< Input file stream */
size_t line; /**< Line number */
} ifile;
static jvalue *readvalue( ifile * );
/* When we're running single threaded, use the faster getc_unlocked(3)
* that doesn't enforce thread safety; otherwise, use the regular
* getc(3) which works everywhere. This is a bit of overkill, since
* once a file stream is passed to the JSON parser, _even if_ we were
* multithreaded, this is the only code that should be reading from
* the stream. But, still, let's try to play by the rules. */
inline static int
jgetc( FILE *f )
{
#ifdef _REENTRANT
return getc( f );
#else
return getc_unlocked( f );
#endif
}
/** Returns the next character in the open input stream under \a f,
* and bump the line counter in \a f when appropriate. Errors can
* always be reported using line. */
static int
getch( ifile *f )
{
int c = jgetc( f->fp );
if( c == '\n' )
++f->line;
return c;
}
/** Return a character back to the file stream under \a f, like
* ungetc() would, but also manage its line counter. Though many
* stdio's can handle it, there is actually no guarantee that more
* than a single character can ever be pushed back onto the file
* stream. */
static int
ungetch( ifile *f, char c )
{
if( c == '\n' )
--f->line;
return ungetc( c, f->fp );
}
/** A wrapper around getch() that skips any leading whitespace before
* the character eventually returned, or EOF. Rather than a
* traditional parsing of whitespace, we limit ourselves to only the
* ws characters defined in JSON. */
static int
getchskip( ifile *f )
{
int c;
do {
c = getch( f );
} while( c == ' ' || c == '\t' || c == '\n' || c == '\r' );
return c;
}
/** Skip ahead over any leading whitespace, leaving the next
* non-whitespace character in the stream ready for reading. The
* character returned is effectively a "peek" ahead at the next
* character that will be obtained from getch(). */
static int
skipws( ifile *f )
{
int c = getchskip( f );
if( c != EOF )
ungetch( f, c );
return c;
}
/** Create and return a new jvalue, initialized to be a jnull. Does
* not return if a new jvalue could not be allocated. */
jvalue *
jnew(void)
{
jvalue *j = emalloc( sizeof( *j ));
*j = (jvalue){ 0 };
j->d = jnull;
return j;
}
/** Walk a tree of jvalue, or even just a single jvalue, and free
* everything it contains, leaving \j intact but set to jnull.
* Normally, we'd set the various freed pointers to null explicitly,
* but at the end of the function, we zero the entire structure. */
jvalue *
jclear( jvalue *j )
{
if( j ) {
free( j->n );
switch( j->d ) {
case jarray:
case jobject:
if( j->u.v )
for( jvalue **jv = j->u.v; *jv; ++jv )
jdel( *jv );
free( j->u.v );
break;
case jstring:
case jnumber:
free( j->u.s );
break;
default:
break;
}
*j = (jvalue){ 0 };
j->d = jnull;
}
return j;
}
/** Walk a tree of jvalue, or even just a single value, and free
* everything it contains. Everything, even \a j itself, is freed.
* When this is complete, \a j is <em>no longer valid.</em> */
void
jdel( jvalue *j )
{
free( jclear( j ));
}
/** Report an early EOF; that is, that the input stream ended before a
* value being read was finished. Bad syntax, truncated files, all
* the usual errors like that will trigger this. There's no point in
* reporting the line number, since this is an EOF. This is a
* function of its own, since it happens so often. */
static void
earlyeof(void)
{
errx(1, "premature EOF in JSON data" );
}
/** Almost all of our errors end with a line number and a message that
* the user should be looking in the JSON data. Sure, it's a little
* clunky, but factoring it out into a common routine shortens a lot
* of our subsequent code. The standard for variable arguments,
* though, doesn't allow for the obvious simple implementation of
* this function, so we have to do it the annoying way with temporary
* buffers and such. Also, we could use vasprintf, instead of
* vsnprintf; we don't, because most implementations of *asprintf are
* naive about initial buffer allocations and do all sorts of malloc
* and reallocs under the hood. This approach sucks up a page on the
* stack, but lets go of the space immediately and doesn't fragment
* our heap further. */
static void
ierr( const ifile *f, const char *msg, ... )
{
va_list ap;
char buf[ 512 ];
va_start( ap, msg );
vsnprintf( buf, sizeof( buf ), msg, ap );
va_end( ap );
errx(1, "%s on line %zu in JSON data", buf, f->line );
}
/** The next character read from \a f must be a double quote. */
static bool
expectdq( ifile *f )
{
int c = getch( f );
if( c == EOF ) {
earlyeof();
return false;
} else if( c != '"' ) {
ierr( f, "missing quote from string" );
return false;
} else
return true;
}
/** Reads a JSON string that is wrapped with quotes from \a f, parsing
* all the various string escapes therein. Returns a C string (sans
* quotes) freshly allocated from the heap, or a null on error. When
* null is returned, a diagnostic will have been sent to the standard
* error stream. */
static char *
readstring( ifile *f )
{
if( !expectdq( f ))
return 0;
bool esc = false; /* the next character is escaped */
bool oops = false; /* a bad string was seen? */
unsigned int hex = 0; /* read this many chars as a hex
* Unicode code point */
unsigned int x = 0;
twine tw = (twine){ 0 };
while( !oops ) {
int c = getch( f );
if( c == EOF ) {
earlyeof();
oops = true;
} else if( hex ) {
if( isxdigit( c )) {
if( isdigit( c ))
x = 16 * x + c - '0';
else if( isupper( c ))
x = 16 * x + c - 'A' + 10;
else
x = 16 * x + c - 'a' + 10;
if( !--hex )
twaddu( &tw, x );
} else {
ierr( f, "expected hex digit" );
oops = true;
}
} else if( esc ) {
switch( c ) {
case '"': twaddc( &tw, '"' ); break;
case '/': twaddc( &tw, '/' ); break;
case '\\': twaddc( &tw, '\\' ); break;
case 'b': twaddc( &tw, '\b' ); break;
case 'f': twaddc( &tw, '\f' ); break;
case 'n': twaddc( &tw, '\n' ); break;
case 'r': twaddc( &tw, '\r' ); break;
case 't': twaddc( &tw, '\t' ); break;
case 'u':
x = 0;
hex = 4;
break;
default:
ierr( f, "unknown escape code '\\%c'", (char)c );
oops = true;
}
esc = 0;
} else if( c == '"' ) /* done parsing the string! bye! */
return twfinal( &tw );
else if( c == '\\' )
esc = 1;
else if( c >= ' ' )
twaddc( &tw, c );
else if( isspace( c )) {
ierr( f, "unescaped whitespace" );
oops = true;
} else {
ierr( f, "unknown byte (0x%02x)", c );
oops = true;