Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

c99 code, no jump tables, no gcc specifics #17

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
all: test example

test.c: test/test.c src/js0n.c
gcc -Wall -Wextra -Wno-unused-parameter -o test/test test/test.c src/js0n.c
gcc -std=c99 -Wall -Wextra -Werror -o test/test test/test.c src/js0n.c

test: test.c
@if ./test/test ; then \
Expand Down
254 changes: 144 additions & 110 deletions src/js0n.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,28 @@
// public domain or MIT license, contributions/improvements welcome via github at https://github.com/quartzjer/js0n

#include <string.h> // one strncmp() is used to do key comparison, and a strlen(key) if no len passed in

// gcc started warning for the init syntax used here, is not helpful so don't generate the spam, supressing the warning is really inconsistently supported across versions
#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
#pragma GCC diagnostic push
#include <assert.h>
#ifdef DEBUG
#include <stdio.h>
#endif
#pragma GCC diagnostic ignored "-Wunknown-pragmas"
#pragma GCC diagnostic ignored "-Wpragmas"
#pragma GCC diagnostic ignored "-Winitializer-overrides"
#pragma GCC diagnostic ignored "-Woverride-init"

// only at depth 1, track start pointers to match key/value
#define PUSH(i) if(depth == 1) { if(!index) { val = cur+i; }else{ if(klen && index == 1) start = cur+i; else index--; } }

// determine if key matches or value is complete
#define CAP(i) if(depth == 1) { if(val && !index) {*vlen = (size_t)((cur+i+1) - val); return val;}; if(klen && start) {index = (klen == (size_t)(cur-start) && strncmp(key,start,klen)==0) ? 0 : 2; start = 0;} }


enum state {
S_STRUCT,
S_BARE,
S_STRING,
S_UTF8,
S_ESC,
};

#define range(x,s,e) ((x) >= (s) && (x) <= (e))

// this makes a single pass across the json bytes, using each byte as an index into a jump table to build an index and transition state
const char *js0n(const char *key, size_t klen,
const char *json, size_t jlen, size_t *vlen)
Expand All @@ -27,54 +33,11 @@ const char *js0n(const char *key, size_t klen,
size_t index = 1;
int depth = 0;
int utf8_remain = 0;
static void *gostruct[] =
{
[0 ... 255] = &&l_bad,
['\t'] = &&l_loop, [' '] = &&l_loop, ['\r'] = &&l_loop, ['\n'] = &&l_loop,
['"'] = &&l_qup,
[':'] = &&l_loop,[','] = &&l_loop,
['['] = &&l_up, [']'] = &&l_down, // tracking [] and {} individually would allow fuller validation but is really messy
['{'] = &&l_up, ['}'] = &&l_down,
['-'] = &&l_bare, [48 ... 57] = &&l_bare, // 0-9
[65 ... 90] = &&l_bare, // A-Z
[97 ... 122] = &&l_bare // a-z
};
static void *gobare[] =
{
[0 ... 31] = &&l_bad,
[32 ... 126] = &&l_loop, // could be more pedantic/validation-checking
['\t'] = &&l_unbare, [' '] = &&l_unbare, ['\r'] = &&l_unbare, ['\n'] = &&l_unbare,
[','] = &&l_unbare, [']'] = &&l_unbare, ['}'] = &&l_unbare, [':'] = &&l_unbare,
[127 ... 255] = &&l_bad
};
static void *gostring[] =
{
[0 ... 31] = &&l_bad, [127] = &&l_bad,
[32 ... 126] = &&l_loop,
['\\'] = &&l_esc, ['"'] = &&l_qdown,
[128 ... 191] = &&l_bad,
[192 ... 223] = &&l_utf8_2,
[224 ... 239] = &&l_utf8_3,
[240 ... 247] = &&l_utf8_4,
[248 ... 255] = &&l_bad
};
static void *goutf8_continue[] =
{
[0 ... 127] = &&l_bad,
[128 ... 191] = &&l_utf_continue,
[192 ... 255] = &&l_bad
};
static void *goesc[] =
{
[0 ... 255] = &&l_bad,
['"'] = &&l_unesc, ['\\'] = &&l_unesc, ['/'] = &&l_unesc, ['b'] = &&l_unesc,
['f'] = &&l_unesc, ['n'] = &&l_unesc, ['r'] = &&l_unesc, ['t'] = &&l_unesc, ['u'] = &&l_unesc
};
void **go = gostruct;

enum state state = S_STRUCT;

if(!json || jlen <= 0 || !vlen) return 0;
*vlen = 0;

// no key is array mode, klen provides requested index
if(!key)
{
Expand All @@ -86,77 +49,148 @@ const char *js0n(const char *key, size_t klen,

for(start=cur=json,end=cur+jlen; cur<end; cur++)
{
goto *go[(unsigned char)*cur];
l_loop:;
}

if(depth) *vlen = jlen; // incomplete
return 0;

l_bad:
again:
switch (state) {
case S_STRUCT:
switch(*cur) {
case '\t':
case ' ':
case '\r':
case '\n':
case ':':
case ',':
continue;

case '"': goto l_qup;
case '[': goto l_up;
case ']': goto l_down;
case '{': goto l_up;
case '}': goto l_down;

case '-': goto l_bare;
default: {
if (range(*cur, '0', '9') ||
range(*cur, 'A', 'Z') ||
range(*cur, 'a', 'z'))
goto l_bare;
else
goto l_bad;
}
}
assert(0);
case S_BARE:
switch (*cur) {
case '\t':
case ' ':
case '\r':
case '\n':
case ',':
case ']': // correct? not [ ?
case '}': // correct? not { ?
case ':':
goto l_unbare;
default: {
// could be more pedantic/validation-checking
if (range(*cur, 32, 126))
continue;
goto l_bad;
}
}
assert(0);
case S_STRING:
if (*cur == '\\') {
state = S_ESC;
continue;
}
if (*cur == '"')
goto l_qdown;
if (range(*cur, 32, 126))
continue;
if ((*cur & 224) == 192) { // range(*cur, 192, 223))
state = S_UTF8;
utf8_remain = 1;
continue;
}
if ((*cur & 240) == 224) { // range(*cur, 224, 239)
state = S_UTF8;
utf8_remain = 2;
continue;
}
if ((*cur & 248) == 240) { // range(*cur, 240, 247)
state = S_UTF8;
utf8_remain = 3;
continue;
}
goto l_bad;
// XXX no utf8 outside strings?
case S_UTF8:
if ((*cur & 192) == 128) { // range(*cur, 128, 191)
if (!--utf8_remain)
state = S_STRING;
continue;
}
goto l_bad;
case S_ESC:
switch (*cur) {
case '"':
case '\\':
case '/':
case 'b':
case 'f':
case 'n':
case 'r':
case 't':
case 'u':
state = S_STRING;
continue;
default:
goto l_bad;
}
}
assert (0);
l_bad:
*vlen = cur - json; // where error'd
#ifdef DEBUG
fprintf(stderr, "bad at >%.5s< state %d depth %d\n",
cur, state, depth);
#endif
return 0;
l_up:

l_up:
PUSH(0);
++depth;
goto l_loop;
continue;

l_down:
l_down:
--depth;
CAP(0);
goto l_loop;
continue;

l_qup:
l_qup:
PUSH(1);
go=gostring;
goto l_loop;
state = S_STRING;
continue;

l_qdown:
l_qdown:
CAP(-1);
go=gostruct;
goto l_loop;

l_esc:
go = goesc;
goto l_loop;

l_unesc:
go = gostring;
goto l_loop;

l_bare:
state = S_STRUCT;
continue;

l_bare:
PUSH(0);
go = gobare;
goto l_loop;
state = S_BARE;
continue;

l_unbare:
l_unbare:
CAP(-1);
go = gostruct;
goto *go[(unsigned char)*cur];

l_utf8_2:
go = goutf8_continue;
utf8_remain = 1;
goto l_loop;

l_utf8_3:
go = goutf8_continue;
utf8_remain = 2;
goto l_loop;
state = S_STRUCT;
goto again;

l_utf8_4:
go = goutf8_continue;
utf8_remain = 3;
goto l_loop;
}

l_utf_continue:
if (!--utf8_remain)
go=gostring;
goto l_loop;
if (depth) {
*vlen = jlen; // incomplete
}
return 0;

}

#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
#pragma GCC diagnostic pop
#endif
9 changes: 6 additions & 3 deletions test/test.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ int main(int argc, char **argv)
const char *ret;
size_t len, len2, jlen=0;
FILE *f;


(void) argc;
(void) argv;

fail_unless((f = fopen("./test/test.json","r")) != NULL);
while((len = fread(buff,1,1024,f)) > 0)
{
Expand All @@ -19,13 +22,13 @@ int main(int argc, char **argv)
jlen+=len;
}
fclose(f);

fail_unless((ret = js0n("test",0,json,jlen,&len)));
fail_unless(len);
fail_unless(strncmp("value",ret,len) == 0);

fail_unless((ret = js0n("foo",0,json,jlen,&len)));
fail_unless(len);
fail_unless(len == 13);
fail_unless(strncmp("b\\\"a and \\\\ r",ret,len) == 0);

fail_unless((ret = js0n("array",0,json,jlen,&len)));
Expand Down
2 changes: 1 addition & 1 deletion test/test.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@
"obj":{"true":true},
"name":"value",
"value":"real"
}
}