-
Notifications
You must be signed in to change notification settings - Fork 27
/
esl_json.c
1374 lines (1188 loc) · 59.6 KB
/
esl_json.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* esl_json : JSON data file parsing
*
* Inspired by Serge Zaitsev's Jasmine parser, https://github.com/zserge/jsmn
*
* Contents:
* 1. Full or incremental JSON parsing
* 2. ESL_JSON: a JSON parse tree
* 3. ESL_JSON_PARSER: precise state at each input byte
* 4. Accessing tokenized data
* 5. Debugging, development tools
* 6. Internal functions
* 7. Unit tests
* 8. Test driver
* 9. Example
*
* References:
* www.json.org
* tools.ietf.org/html/rfc8259
*/
#include <esl_config.h>
#include <stdio.h>
#include <ctype.h>
#include <limits.h>
#include <string.h>
#include "easel.h"
#include "esl_buffer.h"
#include "esl_mem.h"
#include "esl_random.h"
#include "esl_stack.h"
#include "esl_json.h"
static int new_token(ESL_JSON_PARSER *parser, ESL_JSON *pi, enum esl_json_type_e type, esl_pos_t startpos);
static void add_dirty_unicode(ESL_RANDOMNESS *rng, char *b, int n, int *ret_nadd);
/*****************************************************************
* 1. Full or incremental JSON parsing
*****************************************************************/
/* Function: esl_json_Parse()
* Synopsis: Parse a complete JSON data object
* Incept: SRE, Sun 29 Jul 2018 [IB 6165 Madrid-Boston]
*
* Purpose: Given an open input buffer <bf>, read the next
* complete JSON data object from it. Return the
* parse tree thru <*ret_pi>.
*
* Upon successful return, the buffer <bf>'s point is
* sitting precisely on the next byte following the closing
* brace of the JSON object.
*
* Args: bf - open buffer for reading
* ret_pi - RETURN: JSON parse tree
*
* Returns: <eslOK> on success, and <*ret_pi> points
* to the parse tree.
*
* <eslEFORMAT> if the JSON data string is
* invalid. <bf->errbuf> is set to a user-friendly
* error message indicating why. <*ret_pi> is <NULL>.
*
* Throws: <eslEMEM> on allocation failure.
*
* On these exceptions, <*ret_pi> is returned <NULL>.
*/
int
esl_json_Parse(ESL_BUFFER *bf, ESL_JSON **ret_pi)
{
ESL_JSON_PARSER *parser = esl_json_parser_Create();
ESL_JSON *pi = esl_json_Create();
char *s = NULL;
esl_pos_t n = 0;
esl_pos_t pos0 = esl_buffer_GetOffset(bf);
esl_pos_t nused;
int status = eslOK;
if (parser == NULL || pi == NULL) { status = eslEMEM; goto ERROR; }
esl_buffer_SetAnchor(bf, pos0);
while (status == eslOK && esl_buffer_Get(bf, &s, &n) == eslOK)
{
status = esl_json_PartialParse(parser, pi, s, n, &nused, bf->errmsg);
if (status != eslOK && status != eslEOD) goto ERROR;
esl_buffer_Set(bf, s, nused);
}
esl_buffer_RaiseAnchor(bf, pos0);
esl_json_parser_Destroy(parser);
*ret_pi = pi;
return eslOK;
ERROR:
esl_json_parser_Destroy(parser);
esl_json_Destroy(pi);
return status;
}
/* Function: esl_json_PartialParse()
* Synopsis: Incremental parse of a chunk of JSON data string.
* Incept: SRE, Sun 29 Jul 2018 [IB 6165 Madrid-Boston]
*
* Purpose: Parse a chunk of input JSON data string <s> of length <n>,
* adding incrementally to a parse tree <pi>. A <parser>
* keeps precise byte-by-byte state information, enabling
* parsing to stop and start across different chunks.
*
* At the first chunk, caller provides a freshly created
* <ESL_JSON_PARSER> as <parser>. At subsequent chunks,
* caller provides the parser state from the previous call.
*
* If a complete JSON object is finished in this chunk,
* return <eslEOD>, and <*nused> is the number of bytes
* that were consumed (inclusive of the closing brace),
* <nused> $\leq$ <n>.
*
* Args: parser - parser state information from previous chunk
* pi - incremental JSON parse tree - updated upon success
* s - next chunk of JSON data byte array to parse. \0-termination isn't needed.
* n - length of <s>
* ret_nused - RETURN: number of bytes consumed from <s>.
* This is <n> on <eslOK>, $\leq$ <n> on <eslEOD>.
* errbuf - OPTIONAL: <eslERRBUFSIZE> buffer for an error message, or <NULL>
*
* Returns: <eslOK> on success, where the entire chunk was parsed
* without completing a JSON object, consuming all <n>
* bytes, so <*ret_nused> is <n>. <parser> is updated to
* hold the parser state after the last byte of <s>. Parse
* tree <pi> is incrementally updated. Caller can pass
* <parser>, <pi> to parse the next chunk.
*
* <eslEOD> on success, where a complete JSON data object
* ended in this chunk after consuming <*ret_nused> bytes,
* inclusive of closing brace. The <parser> state is
* <eslJSON_OBJ_NONE>, and its <pos>, <linenum>, and
* <linepos> are set to the byte immediately following the
* close brace, ready to parse another JSON object in the
* stream if it's a stream of concatenated objects.
* <pi> contains a complete JSON parse tree.
*
* <eslEFORMAT> on an invalid JSON string:
* <errbuf> contains a detailed (line/cpos) error message.
*/
int
esl_json_PartialParse(ESL_JSON_PARSER *parser, ESL_JSON *pi, const char *s, esl_pos_t n, esl_pos_t *ret_nused, char *errbuf)
{
esl_pos_t i;
enum esl_json_type_e closed_value;
for (i = 0; i < n; i++, parser->pos++)
{
closed_value = eslJSON_UNKNOWN; // i.e. FALSE, we didn't close a value; changes to something if we do.
switch (parser->state) {
case eslJSON_OBJ_NONE: // Only at very beginning of parse: initialize with root object
if (s[i] == '{') { parser->state = eslJSON_OBJ_OPEN; new_token(parser, pi, eslJSON_OBJECT, parser->pos); }
else if (! isspace(s[i])) ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d). expected JSON object to start with {", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.1}
break;
case eslJSON_OBJ_OPEN:
if (s[i] == '"') { parser->state = eslJSON_KEY_OPEN; new_token(parser, pi, eslJSON_KEY, parser->pos+1); } // pos+1 because not including the quote
else if (s[i] == '}') closed_value = eslJSON_OBJECT;
else if (! isspace(s[i])) ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d). expected JSON object key, or closing }", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.3}
break;
case eslJSON_OBJ_COMMA:
if (s[i] == '"') { parser->state = eslJSON_KEY_OPEN; new_token(parser, pi, eslJSON_KEY, parser->pos+1); }
else if (! isspace(s[i])) ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d). expected JSON object key after comma", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.2}
break;
case eslJSON_OBJ_COLON:
case eslJSON_ARR_OPEN:
case eslJSON_ARR_COMMA:
if (s[i] == '"') { parser->state = eslJSON_STR_OPEN; new_token(parser, pi, eslJSON_STRING, parser->pos+1); }
else if (s[i] == '{') { parser->state = eslJSON_OBJ_OPEN; new_token(parser, pi, eslJSON_OBJECT, parser->pos); }
else if (s[i] == '[') { parser->state = eslJSON_ARR_OPEN; new_token(parser, pi, eslJSON_ARRAY, parser->pos); }
else if (s[i] == '-') { parser->state = eslJSON_NUM_SIGN; new_token(parser, pi, eslJSON_NUMBER, parser->pos); }
else if (s[i] == '0') { parser->state = eslJSON_NUM_ZERO; new_token(parser, pi, eslJSON_NUMBER, parser->pos); }
else if (isdigit(s[i])) { parser->state = eslJSON_NUM_NONZERO; new_token(parser, pi, eslJSON_NUMBER, parser->pos); }
else if (s[i] == 't') { parser->state = eslJSON_VAL_TRUE; new_token(parser, pi, eslJSON_BOOLEAN, parser->pos); }
else if (s[i] == 'f') { parser->state = eslJSON_VAL_FALSE; new_token(parser, pi, eslJSON_BOOLEAN, parser->pos); }
else if (s[i] == 'n') { parser->state = eslJSON_VAL_NULL; new_token(parser, pi, eslJSON_NULL, parser->pos); }
else if (! isspace(s[i])) ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d). expected JSON value", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.4}
break;
case eslJSON_STR_OPEN:
case eslJSON_STR_CHAR:
case eslJSON_STR_PROTECTED:
if ( s[i] == '\\' ) parser->state = eslJSON_STR_BACKSLASH;
else if ( s[i] == '"' ) closed_value = eslJSON_STRING;
else if (! iscntrl(s[i])) parser->state = eslJSON_STR_CHAR; // anything not forbidden is allowed: this will accept UTF-8 one byte at a time, though without validating that it's a valid UTF-8 byte sequence.
else ESL_FAIL(eslEFORMAT, errbuf, "invalid control char at line %d pos %d. expected JSON string character", parser->linenum, parser->linepos); // {jbad.5} (For god's sake don't try to print it.) In emacs: C-q <key> to insert, for instance DEL.
break;
case eslJSON_KEY_OPEN:
case eslJSON_KEY_CHAR:
case eslJSON_KEY_PROTECTED:
if ( s[i] == '\\' ) parser->state = eslJSON_KEY_BACKSLASH;
else if ( s[i] == '"' ) closed_value = eslJSON_KEY;
else if (! iscntrl(s[i])) parser->state = eslJSON_KEY_CHAR;
else ESL_FAIL(eslEFORMAT, errbuf, "invalid control char at line %d pos %d. expected JSON key character", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.6}
break;
case eslJSON_STR_BACKSLASH:
if ( strchr("\"\\/bfnrt", s[i]) != NULL) parser->state = eslJSON_STR_PROTECTED;
else if ( s[i] == 'u') parser->state = eslJSON_STR_UNICODE;
else ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d). After \\, valid JSON chars are \"\\/bfnrtu", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.7}
break;
case eslJSON_KEY_BACKSLASH:
if ( strchr("\"\\/bfnrt", s[i]) != NULL) parser->state = eslJSON_KEY_PROTECTED;
else if ( s[i] == 'u') parser->state = eslJSON_KEY_UNICODE;
else ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d). After \\, valid JSON chars are \"\\/bfnrtu", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.8}
break;
case eslJSON_STR_UNICODE:
if ( isxdigit(s[i])) parser->codelen++;
else ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d). In JSON unicode, expected hex digit", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.9}
if ( parser->codelen == 4) { parser->state = eslJSON_STR_PROTECTED; parser->codelen = 0; }
break;
case eslJSON_KEY_UNICODE:
if ( isxdigit(s[i])) parser->codelen++;
else ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d). In JSON unicode, expected hex digit", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.10}
if ( parser->codelen == 4) { parser->state = eslJSON_KEY_PROTECTED; parser->codelen = 0; }
break;
case eslJSON_NUM_SIGN:
if (s[i] == '0') parser->state = eslJSON_NUM_ZERO;
else if (isdigit(s[i])) parser->state = eslJSON_NUM_NONZERO;
else ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d) in number after leading sign of JSON number", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.11}
break;
case eslJSON_NUM_ZERO:
if (s[i] == '.') parser->state = eslJSON_NUM_POINT;
else if (strchr("eE", s[i])) parser->state = eslJSON_NUM_EXP;
else if (strchr(",]}", s[i])) closed_value = eslJSON_NUMBER;
else if (isspace(s[i])) closed_value = eslJSON_NUMBER;
else ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d) after leading zero of JSON number", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.12}
break;
case eslJSON_NUM_NONZERO:
case eslJSON_NUM_LEADDIGIT:
if (isdigit(s[i])) parser->state = eslJSON_NUM_LEADDIGIT;
else if (s[i] == '.') parser->state = eslJSON_NUM_POINT;
else if (strchr("eE", s[i])) parser->state = eslJSON_NUM_EXP;
else if (strchr(",]}", s[i])) closed_value = eslJSON_NUMBER;
else if (isspace(s[i])) closed_value = eslJSON_NUMBER;
else ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d) after leading digit(s) of JSON number", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.13}
break;
case eslJSON_NUM_POINT:
if (isdigit(s[i])) parser->state = eslJSON_NUM_FRACDIGIT;
else ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d) after decimal point of JSON number", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.14}
break;
case eslJSON_NUM_FRACDIGIT:
if (isdigit(s[i])) parser->state = eslJSON_NUM_FRACDIGIT;
else if (strchr("eE", s[i])) parser->state = eslJSON_NUM_EXP;
else if (strchr(",]}", s[i])) closed_value = eslJSON_NUMBER;
else if (isspace(s[i])) closed_value = eslJSON_NUMBER;
else ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d) in fractional part of JSON number", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.15}
break;
case eslJSON_NUM_EXP:
if (isdigit(s[i])) parser->state = eslJSON_NUM_EXPDIGIT;
else if (strchr("+-", s[i])) parser->state = eslJSON_NUM_EXPSIGN;
else ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d) in exponent of JSON number", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.16}
break;
case eslJSON_NUM_EXPSIGN:
if (isdigit(s[i])) parser->state = eslJSON_NUM_EXPDIGIT;
else ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d) after exponent sign of JSON number", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.17}
break;
case eslJSON_NUM_EXPDIGIT:
if (isdigit(s[i])) parser->state = eslJSON_NUM_EXPDIGIT;
else if (strchr(",]}", s[i])) closed_value = eslJSON_NUMBER;
else if (isspace(s[i])) closed_value = eslJSON_NUMBER;
else ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d) in exponent of JSON number", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.18}
break;
case eslJSON_VAL_TRUE:
if (s[i] != "true"[++parser->codelen]) ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d) in JSON 'true'", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.19}
if (parser->codelen == 3) { parser->codelen = 0; closed_value = eslJSON_BOOLEAN; }
break;
case eslJSON_VAL_FALSE:
if (s[i] != "false"[++parser->codelen]) ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d) in JSON 'false'", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.20}
if (parser->codelen == 4) { parser->codelen = 0; closed_value = eslJSON_BOOLEAN; }
break;
case eslJSON_VAL_NULL:
if (s[i] != "null"[++parser->codelen]) ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d) in JSON 'null'", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.21}
if (parser->codelen == 3) { parser->codelen = 0; closed_value = eslJSON_NULL; }
break;
case eslJSON_VAL_INOBJ:
if (s[i] == ',') parser->state = eslJSON_OBJ_COMMA;
else if (s[i] == '}') closed_value = eslJSON_OBJECT;
else if (! isspace(s[i])) ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d) after JSON object value", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.22}
break;
case eslJSON_VAL_INARR:
if (s[i] == ',') parser->state = eslJSON_ARR_COMMA;
else if (s[i] == ']') closed_value = eslJSON_ARRAY;
else if (! isspace(s[i])) ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d) after JSON array value", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.23}
break;
case eslJSON_STR_ASKEY:
if (s[i] == ':') parser->state = eslJSON_OBJ_COLON;
else if (! isspace(s[i])) ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d) after JSON key", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.24}
break;
default: esl_fatal("no such state");
} // end of the big switch for parsing one character given curr state
/* Solely for informative error messages, keep track of line number and position on line.
* Advance counters to what byte i+1 will be.
*/
if (s[i] == '\n') { parser->linenum++; parser->linepos = 1; }
else { parser->linepos++; }
/* for number values, we didn't know whether we've closed the value
* until we saw a non-value character: whitespace, comma, or
* *another* close-value character ] or }. A ] or } means we're
* closing two values, not just one: we close the number here,
* and set state to the `if (ended_value)` block below closes the obj/arr.
*/
if (closed_value == eslJSON_NUMBER)
{
pi->tok[parser->curridx].endpos = parser->pos-1;
esl_stack_IPop(parser->pda, &(parser->curridx));
closed_value = eslJSON_UNKNOWN;
if (pi->tok[parser->curridx].type == eslJSON_OBJECT)
{
if (s[i] == ',') parser->state = eslJSON_OBJ_COMMA;
else if (s[i] == '}') { parser->state = eslJSON_VAL_INOBJ; closed_value = eslJSON_OBJECT; }
else if (isspace(s[i])) parser->state = eslJSON_VAL_INOBJ;
else ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d) after JSON number in key:value pair", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.25}
}
else if (pi->tok[parser->curridx].type == eslJSON_ARRAY)
{
if (s[i] == ',') parser->state = eslJSON_ARR_COMMA;
else if (s[i] == ']') { parser->state = eslJSON_VAL_INARR; closed_value = eslJSON_ARRAY; }
else if (isspace(s[i])) parser->state = eslJSON_VAL_INARR;
else ESL_FAIL(eslEFORMAT, errbuf, "invalid char `%c` (line %d pos %d) after JSON number in array", isprint(s[i]) ? s[i] : ' ', parser->linenum, parser->linepos); // {jbad.26}
}
else esl_fatal("doesn't happen");
}
/* for all other values but numbers (string, array, obj, true,
* false, null) we know when we've properly closed the
* value, on a character that we can consider to be part of
* the value itself. Now to figure out what state we've just
* moved to, when we close this value, we need to know
* whether this value was an obj key, obj value, array
* value, or the root object.
*/
if (closed_value != eslJSON_UNKNOWN)
{
pi->tok[parser->curridx].endpos = ( (pi->tok[parser->curridx].type == eslJSON_STRING || pi->tok[parser->curridx].type == eslJSON_KEY) ? parser->pos-1 : parser->pos);
if ( esl_stack_IPop(parser->pda, &(parser->curridx)) == eslEOD)
{ // if we have nothing to pop, we just closed the root object at i, parser->pos.
// advance to next byte, and reinitialize state
parser->curridx = -1; // no tokens are open.
parser->codelen = 0;
parser->state = eslJSON_OBJ_NONE;
parser->pos++;
i++;
break;
}
if (closed_value == eslJSON_KEY) parser->state = eslJSON_STR_ASKEY;
else if (pi->tok[parser->curridx].type == eslJSON_OBJECT) parser->state = eslJSON_VAL_INOBJ;
else if (pi->tok[parser->curridx].type == eslJSON_ARRAY) parser->state = eslJSON_VAL_INARR;
}
} // end loop over chars in s[0..n-1] string.
*ret_nused = i;
return (i < n ? eslEOD : eslOK);
}
/*****************************************************************
* 2. ESL_JSON : a JSON parse tree
*****************************************************************/
/* Function: esl_json_Create()
* Synopsis: Create a new, empty JSON parse tree object
* Incept: SRE, Tue 31 Jul 2018 [Clint Mansell, Moon]
*
* Throws: <NULL> on allocation failure.
*/
ESL_JSON *
esl_json_Create(void)
{
ESL_JSON *pi = NULL;
int status;
ESL_ALLOC(pi, sizeof(ESL_JSON));
pi->tok = NULL;
ESL_ALLOC(pi->tok, sizeof (ESL_JSON_TOK) * 32);
pi->nalloc = 32;
pi->redline = 65536; // M ~= 2114 for HMMER profile parser; ~3.1M @48B/token. See HMMER h4_hmmfile.md if you change; xref SRE:H5/131.
pi->ntok = 0;
return pi;
ERROR:
esl_json_Destroy(pi);
return NULL;
}
/* Function: esl_json_Grow()
* Synopsis: Double the allocation in a parse tree.
*/
int
esl_json_Grow(ESL_JSON *pi)
{
int status;
ESL_REALLOC(pi->tok, sizeof(ESL_JSON_TOK) * pi->nalloc * 2);
pi->nalloc *= 2;
return eslOK;
ERROR:
return status;
}
/* Function: esl_json_Sizeof()
* Synopsis: Returns allocated size of a parse tree, in bytes
*/
size_t
esl_json_Sizeof(ESL_JSON *pi)
{
size_t n = 0;
n += sizeof(ESL_JSON);
n += sizeof(ESL_JSON_TOK) * pi->nalloc;
return n;
}
/* Function: esl_json_MinSizeof()
* Synopsis: Returns minimum size required for a parse tree, in bytes
*/
size_t
esl_json_MinSizeof(ESL_JSON *pi)
{
size_t n = 0;
n += sizeof(ESL_JSON);
n += sizeof(ESL_JSON_TOK) * pi->ntok;
return n;
}
/* Function: esl_json_Reuse()
* Synopsis: Reinitialize an existing parse tree for reuse.
* Incept: SRE, Sun 05 Aug 2018 [Clint Mansell, Welcome to Lunar Industries]
*/
int
esl_json_Reuse(ESL_JSON *pi)
{
int status;
if (pi->nalloc > pi->redline) {
ESL_REALLOC(pi->tok, sizeof(ESL_JSON_TOK) * pi->redline);
pi->nalloc = pi->redline;
}
pi->ntok = 0;
return eslOK;
ERROR:
return status;
}
/* Function: esl_json_Destroy()
* Synopsis: Free a parse tree.
*/
void
esl_json_Destroy(ESL_JSON *pi)
{
if (pi)
{
free(pi->tok);
free(pi);
}
}
/*****************************************************************
* 3. ESL_JSON_PARSER : precise state at each input byte
*****************************************************************/
/* Function: esl_json_parser_Create()
* Synopsis: Create and initialize a new ESL_JSON_PARSER
* Incept: SRE, Tue 31 Jul 2018 [Clint Mansell, Moon]
*
* Throws: <NULL> on allocation failure.
*/
ESL_JSON_PARSER *
esl_json_parser_Create(void)
{
ESL_JSON_PARSER *parser = NULL;
int status;
ESL_ALLOC(parser, sizeof(ESL_JSON_PARSER));
if (( parser->pda = esl_stack_ICreate()) == NULL) { status = eslEMEM; goto ERROR; }
parser->pos = 0;
parser->linenum = 1;
parser->linepos = 1;
parser->state = eslJSON_OBJ_NONE;
parser->curridx = -1;
parser->codelen = 0;
return parser;
ERROR:
esl_json_parser_Destroy(parser);
return NULL;
}
/* Function: esl_json_parser_Destroy()
* Synopsis: Frees an ESL_JSON_PARSER
*/
void
esl_json_parser_Destroy(ESL_JSON_PARSER *parser)
{
if (parser)
{
esl_stack_Destroy(parser->pda);
free(parser);
}
}
/*****************************************************************
* 4. Accessing tokenized data in ESL_JSON
*****************************************************************/
char *
esl_json_GetMem(const ESL_JSON *pi, int idx, const ESL_BUFFER *bf)
{
return bf->mem + pi->tok[idx].startpos - bf->baseoffset;
}
esl_pos_t
esl_json_GetLen(const ESL_JSON *pi, int idx, const ESL_BUFFER *bf)
{
return pi->tok[idx].endpos - pi->tok[idx].startpos + 1;
}
/* Function: esl_json_ReadInt()
* Synopsis: Read an integer from a valid JSON number token.
* Incept: SRE, Tue 14 Aug 2018
*
* Purpose: Parse tree <pi> token <idx> is a validated JSON number
* in input buffer <bf>; return its value in <*ret_i>.
*
* JSON number format is a superset of the integers.
* Valid integers match <-?0 | -?[1-9][0-9]*>.
*
* Args: pi - JSON parse tree
* idx - index of token in <pi>
* bf - input buffer that <pi> is for
* ret_i - RETURN: integer value
*
* Returns: <eslOK> on success, and <*ret_i> contains the value.
*
* <eslEFORMAT> if the complete token isn't a valid integer,
* and <*ret_i> is 0.
*
* <eslERANGE> if the integer value overflows (underflows)
* INT_MAX (INT_MIN), and <*ret_i> is INT_MAX (INT_MIN).
*
* Throws: (no abnormal error conditions)
*
* Xref: Shares code with <esl_mem_strtoi()>. Less (and different)
* error checking, because we assume token has already been
* validated as a JSON number format, and we assume that
* the entire token consists of integer (no whitespace or
* trailing stuff), and it must be base 10.
*/
int
esl_json_ReadInt(const ESL_JSON *pi, int idx, ESL_BUFFER *bf, int *ret_i)
{
char *p = esl_json_GetMem(pi, idx, bf);
esl_pos_t n = esl_json_GetLen(pi, idx, bf);
int val = 0;
esl_pos_t i = 0;
int sign = 1;
int digit = 0;
bf->errmsg[0] = '\0';
if (p[i] == '-') { sign = -1; i++; }
for (; i < n; i++)
{ // don't have to check leading zero specially; JSON parser has already validated that it's not 00, 0a, or some such.
if (! isdigit(p[i])) { *ret_i = 0; ESL_FAIL(eslEFORMAT, bf->errmsg, "bad JSON integer format, contains nondigit"); } // only happens for .eE+-: components of a float
digit = p[i] - '0';
if (sign == 1 && val > (INT_MAX - digit) / 10) { *ret_i = INT_MAX; return eslERANGE; }
if (sign == -1 && val < (INT_MIN + digit) / 10) { *ret_i = INT_MIN; return eslERANGE; }
val = val * 10 + sign * digit;
}
*ret_i = val;
return eslOK;
}
/* Function: esl_json_ReadFloat()
* Synopsis: Read a float from a valid JSON number token.
* Incept: SRE, Tue 14 Aug 2018
*
* Purpose: Given a parse tree <pi>, where token <idx> is a validated
* JSON number in input buffer <bf>, convert the decimal
* string representation to a float; return the float in
* <*ret_x>.
*
* This function is adapted from <esl_mem_strtof()>.
* Because the input was already validated as a JSON
* number, and all JSON numbers are valid floating-point
* decimal string representations, so no error checking is
* needed, and the complete token is converted. JSON does
* not have a representation for "NaN" or "infinity".
*
* If the representation overflows (e.g. "1e999") the
* result is +/-infinity. If it underflows (e.g. "1e-999")
* the result is 0. These conversions still return
* <eslOK>.
*
* Like <esl_mem_strtof()>, this conversion incurs a small
* roundoff error (usually within +/-1 ulp) that a strictly
* correct <strtof()> implementation does not. See
* <esl_mem.md> for discussion.
*
* Returns: <eslOK>.
*/
int
esl_json_ReadFloat(const ESL_JSON *pi, int idx, ESL_BUFFER *bf, float *ret_x)
{
esl_pos_t n = esl_json_GetLen(pi, idx, bf);
char *p = esl_json_GetMem(pi, idx, bf);
int i = 0;
float sign = 1.0;
float val = 0.0;
float frac = 0.1;
float expsign = 1.;
float exponent = 0.;
/* Parser already verified p[0..n-1] is valid JSON number, so we can
* use a stripped-down copy of esl_mem_strtof(); we don't need to
* check for inf|nan|infinity, for example. Large speed win.
*/
if (p[i] == '-') { sign = -1.0; i++; }
while (i < n && isdigit(p[i])) val = 10. * val + (p[i++]-'0');
if (i < n && p[i] == '.')
while (++i < n && isdigit(p[i]))
{
val += (p[i]-'0') * frac;
frac *= 0.1; // this is a source of roundoff error.
}
if (i < n && (p[i] == 'e' || p[i] == 'E'))
{
i++;
if (p[i] == '-') { expsign = -1.; i++; }
else if (p[i] == '+') { expsign = 1.; i++; }
while (i < n && isdigit(p[i]))
exponent = 10.*exponent + (p[i++]-'0') ;
exponent = exponent * expsign;
if (isfinite(val)) while ( val >= 10. ) { exponent += 1.; val /= 10.; } // renormalization. (and another source of roundoff error)
if (val != 0.0) while ( val < 1. ) { exponent -= 1.; val *= 10.; }
}
ESL_DASSERT1(( i == n ));
*ret_x = sign * val * powf(10.,exponent); // range errors (over/underflow) aren't checked for; just let it go to +/-inf.
return eslOK;
}
/*****************************************************************
* 5. Debugging, development tools
*****************************************************************/
/* Function: esl_json_Validate()
* Synopsis: Validate a JSON parse tree structure
* Incept: SRE, Tue 31 Jul 2018 [Clint Mansell, Moon soundtrack]
*
* Purpose: Validate internals of JSON parse tree <pi>. If optional
* <bf> is provided, do additional validation that
* substrings of the parsed input appear to match what the
* parse tree says they should be. If all seems ok, return
* <eslOK>. If bad, return <eslFAIL> and (if optional
* <errbuf> is provided), put an informative user-directed
* error message in <errbuf>.
*
* Args: pi - parse tree to validate
* bf - optional - input buffer that <pi> corresponds to, or NULL
* errbuf - optional - informative error message on failure, or NULL
*
* Returns: <eslOK> on success. <errbuf>, if it was provided, is an empty string.
*
* <eslFAIL> on failure. <errbuf>, if it was provided, contains
* an informative error message.
*/
int
esl_json_Validate(const ESL_JSON *pi, const ESL_BUFFER *bf, char *errbuf)
{
int i,n;
ESL_JSON_TOK *tok;
int cur, prv; // token indices, following linked list of children
esl_pos_t pos1, pos2; // start, end coords for a token, adjusted to bf->mem coords if <bf> is passed
if (errbuf) errbuf[0] = '\0';
for (i = 0; i < pi->ntok; i++)
{
tok = &(pi->tok[i]);
pos1 = (bf ? tok->startpos - bf->baseoffset : tok->startpos); // bf->mem[0] = s[baseoffset] in original input coords for <s>
pos2 = (bf ? tok->endpos - bf->baseoffset : tok->endpos);
if (pos1 < 0) ESL_FAIL(eslFAIL, errbuf, "bad start pos, tok %d", i);
if (pos2 < 0) ESL_FAIL(eslFAIL, errbuf, "bad end pos, tok %d", i);
if ((tok->type == eslJSON_KEY || tok->type == eslJSON_STRING))
{ if (pos2 < pos1-1) ESL_FAIL(eslFAIL, errbuf, "bad end pos, string/key tok %d", i); } // a zero-length string or key has endpos = startpos-1
else
{ if (pos2 < pos1) ESL_FAIL(eslFAIL, errbuf, "bad end pos, tok %d", i); }
/* integrity of child linked list */
for (cur = tok->firstchild, n=0, prv=-1; cur != -1; cur = pi->tok[cur].nextsib) { n++; prv = cur; }
if (tok->nchild > 0 && (tok->firstchild == -1 || tok->lastchild == -1)) ESL_FAIL(eslFAIL, errbuf, "bad child links, tok %d", i);
if (tok->nchild == 0 && (tok->firstchild != -1 || tok->lastchild != -1)) ESL_FAIL(eslFAIL, errbuf, "tok %d shouldn't have child links");
if (tok->nchild != n) ESL_FAIL(eslFAIL, errbuf, "bad number of children, tok %d", i);
if (tok->lastchild != prv) ESL_FAIL(eslFAIL, errbuf, "bad child linked list for tok %d", i);
/* optionally, if <bf> provided, partially validate each substring */
if (bf)
{
if (pos1 >= bf->n) ESL_FAIL(eslFAIL, errbuf, "bad start pos, tok %d", i);
if (pos2 >= bf->n) ESL_FAIL(eslFAIL, errbuf, "bad end pos, tok %d", i);
switch (tok->type) {
case eslJSON_OBJECT: if (bf->mem[pos1] != '{' || bf->mem[pos2] != '}') { ESL_FAIL(eslFAIL, errbuf, "object closing brackets missing, tok %d", i); } break;
case eslJSON_ARRAY: if (bf->mem[pos1] != '[' || bf->mem[pos2] != ']') { ESL_FAIL(eslFAIL, errbuf, "array closing brackets missing, tok %d", i); } break;
case eslJSON_KEY: if (bf->mem[pos1-1] != '"' || bf->mem[pos2+1] != '"') { ESL_FAIL(eslFAIL, errbuf, "key quotes missing, tok %d", i); } break;
case eslJSON_STRING: if (bf->mem[pos1-1] != '"' || bf->mem[pos2+1] != '"') { ESL_FAIL(eslFAIL, errbuf, "string quotes missing, tok %d", i); } break;
case eslJSON_NUMBER: if (! esl_mem_IsReal(bf->mem + pos1, pos2-pos1+1)) { ESL_FAIL(eslFAIL, errbuf, "number isn't a number, tok %d", i); } break;
case eslJSON_BOOLEAN: if (! esl_memstrcmp(bf->mem + pos1, pos2-pos1+1, "true") &&
! esl_memstrcmp(bf->mem + pos1, pos2-pos1+1, "false")) { ESL_FAIL(eslFAIL, errbuf, "boolean isn't a boolean, tok %d", i); } break;
case eslJSON_NULL: if (! esl_memstrcmp(bf->mem + pos1, pos2-pos1+1, "null")) { ESL_FAIL(eslFAIL, errbuf, "null isn't null, tok %d", i); } break;
default: ESL_FAIL(eslFAIL, errbuf, "no such state type %d, tok %d", (int) tok->type, i);
}
}
}
return eslOK;
}
/* Function: esl_json_DecodeType()
* Synopsis: Returns printable string, given <esl_json_type_e> code.
*/
char *
esl_json_DecodeType(enum esl_json_type_e type)
{
switch (type) {
case eslJSON_UNKNOWN: return "unknown";
case eslJSON_OBJECT: return "object";
case eslJSON_ARRAY: return "array";
case eslJSON_KEY: return "key";
case eslJSON_STRING: return "string";
case eslJSON_NUMBER: return "number";
case eslJSON_BOOLEAN: return "boolean";
case eslJSON_NULL: return "null";
default: return "??";
}
}
/* Function: esl_json_Dump()
* Synopsis: Dump contents of an ESL_JSON parse tree
*/
int
esl_json_Dump(FILE *fp, ESL_JSON *pi)
{
int i;
esl_dataheader(fp, 5, "idx", 8, "type", 8, "startpos", 8, "endpos",
8, "linenum", 8, "linepos",
8, "nchild", 10, "firstchild", 10, "lastchild", 8, "nextsib", 0);
for (i = 0; i < pi->ntok; i++)
fprintf(fp, "%-5d %8s %8d %8d %8d %8d %8d %10d %10d %8d\n",
i, esl_json_DecodeType(pi->tok[i].type),
(int) pi->tok[i].startpos, (int) pi->tok[i].endpos,
pi->tok[i].linenum, pi->tok[i].linepos,
pi->tok[i].nchild, pi->tok[i].firstchild, pi->tok[i].lastchild, pi->tok[i].nextsib);
return eslOK;
}
/* Function: esl_json_SampleDirty()
* Synopsis: Generate a lawful evil JSON string for parser testing
* Incept: SRE, Tue 31 Jul 2018 [Hildur Gudnadottir, Baer]
*
* Purpose: Generate a syntactically valid random JSON string using
* random number generator <rng>. Return it in <*ret_s> and
* its length in bytes in <*ret_n>,
*
* The JSON string is UTF-8 encoded. JSON spec allows
* string values to contain "any" Unicode character. There
* are a lot of UNICODE chars, so for testing purposes
* (where we don't really want the string to look like
* *utter* noise), we generate 3 of them: the 2-byte
* <\u00B5> character $\mu$, the 3-byte <\u221E> character
* $\infty$, and the 4-byte <\U00010083> glyph for 'horse'
* in Linear B. Note that renderer support for Unicode is
* spotty, especially in the high range where Linear B
* lives.
*
* The string is \0-terminated for convenience, but the
* Easel JSON parser works on byte arrays and does not
* require NUL string termination.
*
* <*ret_s> is allocated here; caller frees.
*
* Args: rng - random number generator
* ret_s - RETURN: generated JSON string <s>. Caller frees.
* ret_n - RETURN: length of <s>
*
* Returns: <eslOK> on success. <*ret_s> and <*ret_n> are the result.
*
* Throws: <eslEMEM> on allocation failure.
* <*ret_s> is <NULL>, <*ret_n> is 0.
*
* Note: Parameter choices here are arbitrary, no special reason for
* them other than to make reasonable-length strings that
* are likely to exercise lots of possible JSON syntax.
*/
int
esl_json_SampleDirty(ESL_RANDOMNESS *rng, char **ret_s, int *ret_n)
{
enum esl_json_state_e state = eslJSON_OBJ_NONE; // we start outside the root JSON object
ESL_STACK *pda = NULL; // keeps track of object and array internal nodes in progress
char *s = NULL; // string we're building
int n = 0; // current length of string
int nalloc = 256; // current allocation for string
int nbarrier = 10000; // this keeps string from blowing up infinitely. after n > nbarrier, create no new objects or arrays.
int roll; // random roll 0..99
int closedv; // eslJSON_UNKNOWN becomes a value (eslJSON_NUMBER, etc) when we close a value, and need to pop up to its parent to set state.
int nadd; // how many bytes got added for a random unicode char
int j; // counter over added bytes
int x; // when we pop from <pda>, values have to be ints, which we then cast to the <enum esl_json_state_e>
int status;
ESL_ALLOC(s, sizeof(char) * nalloc);
if ((pda = esl_stack_ICreate()) == NULL) { status = eslEMEM; goto ERROR; }
while (1) // each iteration, add 0..5 bytes to b. (0 when we close a number; 5 when we add a "false" value or a "\uxxxx" unicode)
{
if (n+4 >= nalloc) {
ESL_REALLOC(s, sizeof(char) * nalloc * 2); // make sure we can write to n..n+4 : up to 5 bytes.
nalloc *= 2;
}
roll = esl_rnd_Roll(rng, 100);
closedv = eslJSON_UNKNOWN; // when we close an Array or Object value, we have to do some bookkeeping, popping its parent off stack.
switch (state) { // state machine follows the JSON specification's generative grammar, adding random whitespace where it's allowed
case eslJSON_OBJ_NONE:
if (roll < 40) { s[n++] = " \t\n"[esl_rnd_Roll(rng, 3)]; }
else { state = eslJSON_OBJ_OPEN; s[n++] = '{'; esl_stack_IPush(pda, (int) eslJSON_UNKNOWN); } // UNKNOWN because root JSON object has no parent. When we pop this, we'll be done.
break;
case eslJSON_OBJ_OPEN:
if (roll < 40) { s[n++] = " \t\n"[esl_rnd_Roll(rng, 3)]; }
else if (roll < 95) { state = eslJSON_KEY_OPEN; s[n++] = '"'; }
else { closedv = eslJSON_OBJECT; s[n++] = '}'; }
break;
case eslJSON_OBJ_COMMA:
if (roll < 40) { s[n++] = " \t\n"[esl_rnd_Roll(rng, 3)]; }
else { state = eslJSON_KEY_OPEN; s[n++] = '"'; }
break;
case eslJSON_OBJ_COLON:
case eslJSON_ARR_OPEN:
case eslJSON_ARR_COMMA:
x = (int) (state == eslJSON_OBJ_COLON ? eslJSON_OBJECT : eslJSON_ARRAY); // hacky. if we don't choose to generate whitespace, we are going to push obj|arr onto pda. we have to make this decision now, before we change state.
if (roll < 40) { s[n++] = " \t\n"[esl_rnd_Roll(rng, 3)]; break; } // the break is so we don't push enclosing object to stack
else if (roll < 50 && n <= nbarrier) { state = eslJSON_OBJ_OPEN; s[n++] = '{'; } // checking maxdepth attempts to keep nesting from spinning out of control
else if (roll < 60 && n <= nbarrier) { state = eslJSON_ARR_OPEN; s[n++] = '['; }
else if (roll < 70) { state = eslJSON_STR_OPEN; s[n++] = '"'; }
else if (roll < 75) { state = eslJSON_NUM_SIGN; s[n++] = '-'; }
else if (roll < 80) { state = eslJSON_NUM_ZERO; s[n++] = '0'; }
else if (roll < 85) { state = eslJSON_NUM_NONZERO; s[n++] = "123456789"[esl_rnd_Roll(rng,9)]; }
else if (roll < 90) { closedv = eslJSON_BOOLEAN; s[n++] = 't'; s[n++] = 'r'; s[n++] = 'u'; s[n++] = 'e'; } // push all 4 chars of "true", and we don't need the eslJSON_VAL_TRUE state; we just close the value immediately.
else if (roll < 95) { closedv = eslJSON_BOOLEAN; s[n++] = 'f'; s[n++] = 'a'; s[n++] = 'l'; s[n++] = 's'; s[n++] = 'e'; } // ... ditto "false" and eslJSON_VAL_FALSE
else { closedv = eslJSON_NULL; s[n++] = 'n'; s[n++] = 'u'; s[n++] = 'l'; s[n++] = 'l'; } // ... ditto "null" and eslJSON_VAL_NULL. We can do this because we know we're allocated for up to 5 new bytes per iteration.
esl_stack_IPush(pda, x); // when we open a new value - i.e. on anything but whitespace - push parent object|array to stack
break;
case eslJSON_STR_OPEN:
case eslJSON_STR_CHAR:
case eslJSON_STR_PROTECTED:
if (roll < 5) { state = eslJSON_STR_BACKSLASH; s[n++] = '\\'; }
else if (roll < 20) { closedv = eslJSON_STRING; s[n++] = '"'; }
else { state = eslJSON_STR_CHAR; add_dirty_unicode(rng, s, n, &nadd); n += nadd; }
break;
case eslJSON_KEY_OPEN:
case eslJSON_KEY_CHAR:
case eslJSON_KEY_PROTECTED:
if (roll < 5) { state = eslJSON_KEY_BACKSLASH; s[n++] = '\\'; }
else if (roll < 20) { state = eslJSON_STR_ASKEY; s[n++] = '"'; }
else { state = eslJSON_KEY_CHAR; add_dirty_unicode(rng, s, n, &nadd); n += nadd; }
break;
case eslJSON_STR_BACKSLASH:
if (roll < 15) { state = eslJSON_STR_UNICODE; s[n++] = 'u'; }
else { state = eslJSON_STR_PROTECTED; s[n++] = "\"\\/bfnrt"[esl_rnd_Roll(rng, 8)]; }
break;
case eslJSON_KEY_BACKSLASH:
if (roll < 15) { state = eslJSON_KEY_UNICODE; s[n++] = 'u'; }
else { state = eslJSON_KEY_PROTECTED; s[n++] = "\"\\/bfnrt"[esl_rnd_Roll(rng, 8)]; }
break;
case eslJSON_STR_UNICODE:
state = eslJSON_STR_PROTECTED;
for (j = 0; j < 4; j++)
if (esl_rnd_Roll(rng, 2) == 0) s[n++] = "0123456789abcdef"[esl_rnd_Roll(rng, 16)]; // this will generate invalid unicode sequences too, but JSON parser spec simply says "four hexadecimal digits"
else s[n++] = "0123456789ABCDEF"[esl_rnd_Roll(rng, 16)]; // JSON ECMA-404 spec says either lower or upper case are ok
break;
case eslJSON_KEY_UNICODE:
state = eslJSON_KEY_PROTECTED;
for (j = 0; j < 4; j++)
if (esl_rnd_Roll(rng, 2) == 0) s[n++] = "0123456789abcdef"[esl_rnd_Roll(rng, 16)];
else s[n++] = "0123456789ABCDEF"[esl_rnd_Roll(rng, 16)];
break;
case eslJSON_NUM_SIGN:
if (roll < 10) { state = eslJSON_NUM_ZERO; s[n++] = '0'; }
else { state = eslJSON_NUM_NONZERO; s[n++] = "123456789"[esl_rnd_Roll(rng,9)]; }
break;
case eslJSON_NUM_ZERO:
if (roll < 20) { closedv = eslJSON_NUMBER; } // n did not advance!
else if (roll < 80) { state = eslJSON_NUM_POINT; s[n++] = '.'; }
else { state = eslJSON_NUM_EXP; s[n++] = "eE"[esl_rnd_Roll(rng, 2)]; }
break;
case eslJSON_NUM_NONZERO:
case eslJSON_NUM_LEADDIGIT:
if (roll < 50) { state = eslJSON_NUM_LEADDIGIT; s[n++] = "0123456789"[esl_rnd_Roll(rng,10)]; }
else if (roll < 75) { state = eslJSON_NUM_POINT; s[n++] = '.'; }
else { closedv = eslJSON_NUMBER; } // n did not advance
break;
case eslJSON_NUM_POINT:
state = eslJSON_NUM_FRACDIGIT; s[n++] = "0123456789"[esl_rnd_Roll(rng,10)];
break;
case eslJSON_NUM_FRACDIGIT:
if (roll < 50) { state = eslJSON_NUM_FRACDIGIT; s[n++] = "0123456789"[esl_rnd_Roll(rng,10)]; }
else if (roll < 75) { state = eslJSON_NUM_EXP; s[n++] = "eE"[esl_rnd_Roll(rng, 2)]; }
else closedv = eslJSON_NUMBER;
break;
case eslJSON_NUM_EXP:
if (roll < 60) { state = eslJSON_NUM_EXPDIGIT; s[n++] = "0123456789"[esl_rnd_Roll(rng,10)]; }
else { state = eslJSON_NUM_EXPSIGN; s[n++] = "+-"[esl_rnd_Roll(rng, 2)]; }
break;
case eslJSON_NUM_EXPSIGN:
state = eslJSON_NUM_EXPDIGIT; s[n++] = "0123456789"[esl_rnd_Roll(rng,10)];
break;
case eslJSON_NUM_EXPDIGIT:
if (roll < 20) { state = eslJSON_NUM_EXPDIGIT; s[n++] = "0123456789"[esl_rnd_Roll(rng,10)]; }
else { closedv = eslJSON_NUMBER; }
case eslJSON_VAL_TRUE:
case eslJSON_VAL_FALSE:
case eslJSON_VAL_NULL: // these don't occur, because we generate the whole value at once, not byte by byte
break;
case eslJSON_VAL_INOBJ:
if (roll < 30) { s[n++] = " \t\n"[esl_rnd_Roll(rng, 3)]; }
else if (roll < 85) { state = eslJSON_OBJ_COMMA; s[n++] = ','; }
else { closedv = eslJSON_OBJECT; s[n++] = '}'; }
break;
case eslJSON_VAL_INARR: