From 552a6cff20c68fa79390bf2a4a17e9a5635ccd7e Mon Sep 17 00:00:00 2001 From: sushi Date: Sat, 22 Jul 2023 03:30:25 -0400 Subject: [PATCH 1/2] Add crude type parsing for jai tokenizer --- src/langs/jai.jai | 52 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/src/langs/jai.jai b/src/langs/jai.jai index 1c7a42b41..bbab57b7a 100644 --- a/src/langs/jai.jai +++ b/src/langs/jai.jai @@ -59,7 +59,12 @@ get_next_token :: (using tokenizer: *Tokenizer) -> Token { char := << t; if is_alpha(char) || char == #char "_" { - parse_identifier(tokenizer, *token); + if is_type_def(tokenizer) { + parse_identifier(tokenizer, *token); + token.type = .type_keyword; + } else { + parse_identifier(tokenizer, *token); + } } else if is_digit(char) { parse_number(tokenizer, *token); } else if char == { @@ -80,10 +85,10 @@ get_next_token :: (using tokenizer: *Tokenizer) -> Token { case #char "%"; parse_percent (tokenizer, *token); case #char "@"; parse_note (tokenizer, *token); case #char "^"; parse_caret (tokenizer, *token); + case #char "."; parse_period (tokenizer, *token); case #char ";"; token.type = .punctuation; token.punctuation = .semicolon; t += 1; case #char ","; token.type = .punctuation; token.punctuation = .comma; t += 1; - case #char "."; token.type = .punctuation; token.punctuation = .period; t += 1; case #char "{"; token.type = .punctuation; token.punctuation = .l_brace; t += 1; case #char "}"; token.type = .punctuation; token.punctuation = .r_brace; t += 1; case #char "("; token.type = .punctuation; token.punctuation = .l_paren; t += 1; @@ -103,6 +108,32 @@ get_next_token :: (using tokenizer: *Tokenizer) -> Token { return token; } +is_type_def :: (using tokenizer: *Tokenizer) -> bool #expand { + before_prev, prev := last_tokens[0], last_tokens[1]; + if prev.type == .operation && prev.operation == .asterisk { + // : *T + // -> *T + // ] *T + return (before_prev.type == .operation && before_prev.operation == .colon) || + (before_prev.type == .operation && before_prev.operation == .arrow) || + (before_prev.type == .punctuation && before_prev.punctuation == .r_bracket); + } + if prev.type == .operation && prev.operation == .colon { + // thing: T + return before_prev.type == .identifier; + } + if prev.type == .operation && prev.operation == .arrow { + // ) -> T + return before_prev.type == .punctuation && before_prev.punctuation == .r_paren; + } + if prev.type == .punctuation && prev.punctuation == .r_bracket { + // [..] T + // [N] T + return before_prev.type == .number || (before_prev.type == .operation && before_prev.operation == .period_range); + } + return false; +} + parse_identifier :: (using tokenizer: *Tokenizer, token: *Token) { token.type = .identifier; @@ -304,6 +335,21 @@ parse_caret :: (using tokenizer: *Tokenizer, token: *Token) { } } +parse_period :: (using tokenizer: *Tokenizer, token: *Token) { + token.type = .punctuation; + token.punctuation = .period; + + t += 1; + if t >= max_t return; + + if << t == { + case #char "."; + token.type = .operation; + token.operation = .period_range; + t += 1; + } +} + parse_note :: (using tokenizer: *Tokenizer, token: *Token) { token.type = .punctuation; token.punctuation = .note; @@ -527,7 +573,7 @@ OPERATIONS :: string.[ "percent", "percent_equal", "less_than", "double_less_than", "less_than_equal", "greater_than", "greater_than_equal", "minus", "minus_equal", "triple_dash", "asterisk", "asterisk_equal", "colon", "colon_equal", "double_colon", "slash", "plus", "plus_equal", "slash_equal", "ampersand", "double_ampersand", "ampersand_equal", "tilde", "unknown", - "caret", "caret_equal", + "caret", "caret_equal", "period_range", ]; KEYWORDS :: string.[ From 214d1b6c377976eb7b308a57f25af131d099dfa6 Mon Sep 17 00:00:00 2001 From: sushi Date: Sat, 22 Jul 2023 21:52:43 -0400 Subject: [PATCH 2/2] Improve the type tokenization --- src/langs/jai.jai | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/src/langs/jai.jai b/src/langs/jai.jai index bbab57b7a..7563c8731 100644 --- a/src/langs/jai.jai +++ b/src/langs/jai.jai @@ -19,7 +19,7 @@ highlight_jai_syntax :: (using buffer: *Buffer) { // Maybe retroactively highlight a function if token.type == .punctuation && token.punctuation == .l_paren { // Handle "func :: (" - before_prev, prev := last_tokens[0], last_tokens[1]; + before_prev, prev := last_tokens[2], last_tokens[3]; if prev.type == .identifier { memset(colors.data + prev.start, xx Code_Color.FUNCTION, prev.len); } else if before_prev.type == .identifier && prev.type == .operation && prev.operation == .double_colon { @@ -27,15 +27,25 @@ highlight_jai_syntax :: (using buffer: *Buffer) { } } else if token.type == .keyword && token.keyword == .kw_inline { // Handle "func :: inline" - before_prev, prev := last_tokens[0], last_tokens[1]; + before_prev, prev := last_tokens[2], last_tokens[3]; if before_prev.type == .identifier && prev.type == .operation && prev.operation == .double_colon { memset(colors.data + before_prev.start, xx Code_Color.FUNCTION, before_prev.len); } + } else if token.type == .operation && token.operation == .colon { + // Handle ") -> named: s64 {" + before_prev, prev := last_tokens[2], last_tokens[3]; + if before_prev.type == .operation && before_prev.operation == .arrow && prev.type == .type_keyword { + memset(colors.data + prev.start, xx COLOR_MAP[Token.Type.identifier], prev.len); + } + } else if token.type == .identifier && is_type_def(*tokenizer, token) { + token.type = .type_keyword; } - // Remember last 2 tokens + // Remember last 4 tokens last_tokens[0] = last_tokens[1]; - last_tokens[1] = token; + last_tokens[1] = last_tokens[2]; + last_tokens[2] = last_tokens[3]; + last_tokens[3] = token; color := COLOR_MAP[token.type]; memset(colors.data + token.start, xx color, token.len); @@ -59,12 +69,7 @@ get_next_token :: (using tokenizer: *Tokenizer) -> Token { char := << t; if is_alpha(char) || char == #char "_" { - if is_type_def(tokenizer) { - parse_identifier(tokenizer, *token); - token.type = .type_keyword; - } else { - parse_identifier(tokenizer, *token); - } + parse_identifier(tokenizer, *token); } else if is_digit(char) { parse_number(tokenizer, *token); } else if char == { @@ -108,12 +113,17 @@ get_next_token :: (using tokenizer: *Tokenizer) -> Token { return token; } -is_type_def :: (using tokenizer: *Tokenizer) -> bool #expand { - before_prev, prev := last_tokens[0], last_tokens[1]; +is_type_def :: (using tokenizer: *Tokenizer, token: Token) -> bool #expand { + // Handle these scenarios: + // ": T", ": *T", "-> T", "-> *T", "] *T" + // "-> thing: T", "-> thing: *T" + // ignore for *thing: things { + + is_for_identifier := (last_tokens[0].type == .keyword && last_tokens[0].keyword == .kw_for) || (last_tokens[1].type == .keyword && last_tokens[1].keyword == .kw_for); + if is_for_identifier return false; + + before_prev, prev := last_tokens[2], last_tokens[3]; if prev.type == .operation && prev.operation == .asterisk { - // : *T - // -> *T - // ] *T return (before_prev.type == .operation && before_prev.operation == .colon) || (before_prev.type == .operation && before_prev.operation == .arrow) || (before_prev.type == .punctuation && before_prev.punctuation == .r_bracket); @@ -509,7 +519,7 @@ Tokenizer :: struct { start_t: *u8; // cursor when starting parsing new token t: *u8; // cursor - last_tokens: [2] Token; // to retroactively highlight functions + last_tokens: [4] Token; // to retroactively highlight functions } Token :: struct {