Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add syntax highlighting in JS template strings #482

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/buffer.jai
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ tokenize_for_indentation :: (buffer: *Buffer) -> [] Indentation_Token /* temp */
case .Cpp; return tokenize_c_like_lang_for_indentation(buffer, get_next_cpp_token);
case .Css; return tokenize_c_like_lang_for_indentation(buffer, get_next_css_token);
case .D; return tokenize_c_like_lang_for_indentation(buffer, get_next_d_token);
case .Js; return tokenize_c_like_lang_for_indentation(buffer, get_next_js_token);
case .Js; return tokenize_js_for_indentation(buffer);
case .Json; return tokenize_c_like_lang_for_indentation(buffer, get_next_json_token);
case .Glsl; return tokenize_c_like_lang_for_indentation(buffer, get_next_glsl_token);
case .Hlsl; return tokenize_c_like_lang_for_indentation(buffer, get_next_hlsl_token);
Expand Down
7 changes: 7 additions & 0 deletions src/langs/common.jai
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,13 @@ is_white_space :: inline (char : u8) -> bool #no_aoc {
return cast,no_check(bool) result;
}

at_string :: (using tokenizer: *Tokenizer, a: string, $case_sensitive := true) -> bool {
if t + a.count > max_t return false;
b := string.{ count = a.count, data = t };
#if case_sensitive return equal(a, b);
else return equal_nocase(a, b);
}

tokenize_c_like_lang_for_indentation :: (buffer: Buffer, $get_next_token: (*Tokenizer) -> $Token) -> [] Indentation_Token /* temp */ {
tokens: [..] Indentation_Token;
tokens.allocator = temp;
Expand Down
145 changes: 117 additions & 28 deletions src/langs/js.jai
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
tokenize_js :: (using buffer: *Buffer, start_offset := -1, count := -1) -> [] Buffer_Region {
tokenizer := get_tokenizer(buffer, start_offset, count);
tokenizer := get_js_tokenizer(buffer, start_offset, count);

last_token: Token; // to retroactively highlight functions

Expand All @@ -21,15 +21,73 @@ tokenize_js :: (using buffer: *Buffer, start_offset := -1, count := -1) -> [] Bu
memset(tokens.data + token.start, xx token.type, token.len);
}

return .[];
return tokenizer.regions;
}

get_next_js_token :: get_next_token; // export for indent tokenization
tokenize_js_for_indentation :: (buffer: Buffer) -> [] Indentation_Token /* temp */ {
tokens: [..] Indentation_Token;
tokens.allocator = temp;

tokenizer := get_js_tokenizer(buffer);

// Allocate temporary space for tracking one previous token
tokenizer.prev_tokens[0] = New(Token,, temp);

while true {
src := get_next_token(*tokenizer);

token: Indentation_Token = ---;
token.start = src.start;
token.len = src.len;

if src.type == {
case .punctuation;
if src.punctuation == {
case .l_paren; token.type = .open; token.kind = .paren;
case .l_bracket; token.type = .open; token.kind = .bracket;
case .l_brace; token.type = .open; token.kind = .brace;

case .r_paren; token.type = .close; token.kind = .paren;
case .r_bracket; token.type = .close; token.kind = .bracket;
case .r_brace; token.type = .close; token.kind = .brace;

case; continue;
}

case .multiline_comment; token.type = .maybe_multiline;
case .eof; token.type = .eof; // to guarantee we always have indentation tokens
case; token.type = .unimportant;
}

array_add(*tokens, token);

if src.type == .eof break;
}

return tokens;
}

#scope_file

get_next_token :: (using tokenizer: *Tokenizer) -> Token {
eat_white_space(tokenizer);
get_js_tokenizer :: (using buffer: Buffer, start_offset := -1, count := -1) -> Js_Tokenizer {
tokenizer: Js_Tokenizer;

tokenizer.buf = to_string(bytes);
tokenizer.max_t = bytes.data + bytes.count;
tokenizer.t = bytes.data;

if start_offset >= 0 {
start_offset = clamp(start_offset, 0, bytes.count - 1);
count = clamp(count, 0, bytes.count - 1);
tokenizer.t += start_offset;
tokenizer.max_t = tokenizer.t + count;
}

return tokenizer;
}

get_next_token :: (using tokenizer: *Js_Tokenizer) -> Token {
eat_white_space(xx tokenizer);

token: Token;
token.start = cast(s32) (t - buf.data);
Expand Down Expand Up @@ -88,7 +146,7 @@ get_next_token :: (using tokenizer: *Tokenizer) -> Token {
return token;
}

parse_identifier :: (using tokenizer: *Tokenizer, token: *Token) {
parse_identifier :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .identifier;

identifier_str := read_utf8_identifier_string(tokenizer);
Expand All @@ -100,7 +158,7 @@ parse_identifier :: (using tokenizer: *Tokenizer, token: *Token) {
}
}

parse_number :: (using tokenizer: *Tokenizer, token: *Token) {
parse_number :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .number;

start_char := t.*;
Expand Down Expand Up @@ -145,19 +203,19 @@ parse_number :: (using tokenizer: *Tokenizer, token: *Token) {
}
}

parse_colon :: (using tokenizer: *Tokenizer, token: *Token) {
parse_colon :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .operation;
token.operation = .colon;
t += 1;
}

parse_question :: (using tokenizer: *Tokenizer, token: *Token) {
parse_question :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .operation;
token.operation = .question;
t += 1;
}

parse_equal :: (using tokenizer: *Tokenizer, token: *Token) {
parse_equal :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .operation;
token.operation = .equal;

Expand All @@ -169,7 +227,7 @@ parse_equal :: (using tokenizer: *Tokenizer, token: *Token) {
}
}

parse_minus :: (using tokenizer: *Tokenizer, token: *Token) {
parse_minus :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .operation;
token.operation = .minus;

Expand All @@ -191,7 +249,7 @@ parse_minus :: (using tokenizer: *Tokenizer, token: *Token) {
}
}

parse_plus :: (using tokenizer: *Tokenizer, token: *Token) {
parse_plus :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .operation;
token.operation = .plus;

Expand All @@ -208,7 +266,7 @@ parse_plus :: (using tokenizer: *Tokenizer, token: *Token) {
}
}

parse_asterisk :: (using tokenizer: *Tokenizer, token: *Token) {
parse_asterisk :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .operation;
token.operation = .asterisk;

Expand All @@ -222,7 +280,7 @@ parse_asterisk :: (using tokenizer: *Tokenizer, token: *Token) {
}
}

parse_less_than :: (using tokenizer: *Tokenizer, token: *Token) {
parse_less_than :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .operation;
token.operation = .less_than;

Expand All @@ -239,7 +297,7 @@ parse_less_than :: (using tokenizer: *Tokenizer, token: *Token) {
}
}

parse_greater_than :: (using tokenizer: *Tokenizer, token: *Token) {
parse_greater_than :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .operation;
token.operation = .greater_than;

Expand All @@ -253,7 +311,7 @@ parse_greater_than :: (using tokenizer: *Tokenizer, token: *Token) {
}
}

parse_bang :: (using tokenizer: *Tokenizer, token: *Token) {
parse_bang :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .operation;
token.operation = .bang;

Expand All @@ -267,7 +325,7 @@ parse_bang :: (using tokenizer: *Tokenizer, token: *Token) {
}
}

parse_double_quote_string_literal :: (using tokenizer: *Tokenizer, token: *Token) {
parse_double_quote_string_literal :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .string_literal;

escape_seen := false;
Expand All @@ -283,7 +341,7 @@ parse_double_quote_string_literal :: (using tokenizer: *Tokenizer, token: *Token
t += 1;
}

parse_single_quote_string_literal :: (using tokenizer: *Tokenizer, token: *Token) {
parse_single_quote_string_literal :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .string_literal;

escape_seen := false;
Expand All @@ -299,23 +357,47 @@ parse_single_quote_string_literal :: (using tokenizer: *Tokenizer, token: *Token
t += 1;
}

parse_template_string_literal :: (using tokenizer: *Tokenizer, token: *Token) {
parse_template_string_literal :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .string_literal;

escape_seen := false;

t += 1;
while t < max_t {
if <<t == #char "`" && !escape_seen break;
escape_seen = !escape_seen && <<t == #char "\\";
if at_string(tokenizer, "${") {
l_braces_found := 0;
t += 2;

t_start := t;

while t < max_t {
if t.* == #char "{" {
l_braces_found += 1;
} else if t.* == #char "}" {
if l_braces_found == 0 break;
l_braces_found -= 1;
}

t += 1;
}

start := cast(s32) (t_start - buf.data);
end := cast(s32) (t - buf.data);

array_add(*regions, Buffer_Region.{ start = start, end = end, kind = .heredoc, lang = .Js });
} else {
if <<t == #char "`" && !escape_seen break;
escape_seen = !escape_seen && <<t == #char "\\";
}

t += 1;
}
if t >= max_t return;

t += 1;
}

parse_slash_or_comment :: (using tokenizer: *Tokenizer, token: *Token) {
parse_slash_or_comment :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .operation;
token.operation = .slash;

Expand Down Expand Up @@ -343,7 +425,7 @@ parse_slash_or_comment :: (using tokenizer: *Tokenizer, token: *Token) {
}
}

parse_ampersand :: (using tokenizer: *Tokenizer, token: *Token) {
parse_ampersand :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .operation;
token.operation = .ampersand;

Expand All @@ -360,7 +442,7 @@ parse_ampersand :: (using tokenizer: *Tokenizer, token: *Token) {
}
}

parse_pipe :: (using tokenizer: *Tokenizer, token: *Token) {
parse_pipe :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .operation;
token.operation = .pipe;

Expand All @@ -377,7 +459,7 @@ parse_pipe :: (using tokenizer: *Tokenizer, token: *Token) {
}
}

parse_percent :: (using tokenizer: *Tokenizer, token: *Token) {
parse_percent :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .operation;
token.operation = .percent;

Expand All @@ -391,7 +473,7 @@ parse_percent :: (using tokenizer: *Tokenizer, token: *Token) {
}
}

parse_caret :: (using tokenizer: *Tokenizer, token: *Token) {
parse_caret :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .operation;
token.operation = .caret;

Expand All @@ -405,7 +487,7 @@ parse_caret :: (using tokenizer: *Tokenizer, token: *Token) {
}
}

parse_private_identifier :: (using tokenizer: *Tokenizer, token: *Token) {
parse_private_identifier :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .identifier;

t += 1;
Expand All @@ -414,7 +496,7 @@ parse_private_identifier :: (using tokenizer: *Tokenizer, token: *Token) {
identifier_str := read_utf8_identifier_string(tokenizer);
}

parse_decorator :: (using tokenizer: *Tokenizer, token: *Token) {
parse_decorator :: (using tokenizer: *Js_Tokenizer, token: *Token) {
token.type = .identifier;

t += 1;
Expand All @@ -423,6 +505,13 @@ parse_decorator :: (using tokenizer: *Tokenizer, token: *Token) {
identifier_str := read_utf8_identifier_string(tokenizer);
}

Js_Tokenizer :: struct {
#as using base: Tokenizer;

regions: [..] Buffer_Region;
regions.allocator = temp;
}

Token :: struct {
start, len: s32;
type: Token_Type;
Expand Down