diff --git a/src/buffer.jai b/src/buffer.jai index f04fde3d..d0d095f3 100644 --- a/src/buffer.jai +++ b/src/buffer.jai @@ -550,7 +550,7 @@ tokenize_for_indentation :: (buffer: *Buffer) -> [] Indentation_Token /* temp */ case .Cpp; return tokenize_c_like_lang_for_indentation(buffer, get_next_cpp_token); case .Css; return tokenize_c_like_lang_for_indentation(buffer, get_next_css_token); case .D; return tokenize_c_like_lang_for_indentation(buffer, get_next_d_token); - case .Js; return tokenize_c_like_lang_for_indentation(buffer, get_next_js_token); + case .Js; return tokenize_js_for_indentation(buffer); case .Json; return tokenize_c_like_lang_for_indentation(buffer, get_next_json_token); case .Glsl; return tokenize_c_like_lang_for_indentation(buffer, get_next_glsl_token); case .Hlsl; return tokenize_c_like_lang_for_indentation(buffer, get_next_hlsl_token); diff --git a/src/langs/common.jai b/src/langs/common.jai index 0cd3fd78..c84d052d 100644 --- a/src/langs/common.jai +++ b/src/langs/common.jai @@ -122,6 +122,13 @@ is_white_space :: inline (char : u8) -> bool #no_aoc { return cast,no_check(bool) result; } +at_string :: (using tokenizer: *Tokenizer, a: string, $case_sensitive := true) -> bool { + if t + a.count > max_t return false; + b := string.{ count = a.count, data = t }; + #if case_sensitive return equal(a, b); + else return equal_nocase(a, b); +} + tokenize_c_like_lang_for_indentation :: (buffer: Buffer, $get_next_token: (*Tokenizer) -> $Token) -> [] Indentation_Token /* temp */ { tokens: [..] Indentation_Token; tokens.allocator = temp; diff --git a/src/langs/js.jai b/src/langs/js.jai index 971aa2b1..2d323d5a 100644 --- a/src/langs/js.jai +++ b/src/langs/js.jai @@ -1,5 +1,5 @@ tokenize_js :: (using buffer: *Buffer, start_offset := -1, count := -1) -> [] Buffer_Region { - tokenizer := get_tokenizer(buffer, start_offset, count); + tokenizer := get_js_tokenizer(buffer, start_offset, count); last_token: Token; // to retroactively highlight functions @@ -21,15 +21,73 @@ tokenize_js :: (using buffer: *Buffer, start_offset := -1, count := -1) -> [] Bu memset(tokens.data + token.start, xx token.type, token.len); } - return .[]; + return tokenizer.regions; } -get_next_js_token :: get_next_token; // export for indent tokenization +tokenize_js_for_indentation :: (buffer: Buffer) -> [] Indentation_Token /* temp */ { + tokens: [..] Indentation_Token; + tokens.allocator = temp; + + tokenizer := get_js_tokenizer(buffer); + + // Allocate temporary space for tracking one previous token + tokenizer.prev_tokens[0] = New(Token,, temp); + + while true { + src := get_next_token(*tokenizer); + + token: Indentation_Token = ---; + token.start = src.start; + token.len = src.len; + + if src.type == { + case .punctuation; + if src.punctuation == { + case .l_paren; token.type = .open; token.kind = .paren; + case .l_bracket; token.type = .open; token.kind = .bracket; + case .l_brace; token.type = .open; token.kind = .brace; + + case .r_paren; token.type = .close; token.kind = .paren; + case .r_bracket; token.type = .close; token.kind = .bracket; + case .r_brace; token.type = .close; token.kind = .brace; + + case; continue; + } + + case .multiline_comment; token.type = .maybe_multiline; + case .eof; token.type = .eof; // to guarantee we always have indentation tokens + case; token.type = .unimportant; + } + + array_add(*tokens, token); + + if src.type == .eof break; + } + + return tokens; +} #scope_file -get_next_token :: (using tokenizer: *Tokenizer) -> Token { - eat_white_space(tokenizer); +get_js_tokenizer :: (using buffer: Buffer, start_offset := -1, count := -1) -> Js_Tokenizer { + tokenizer: Js_Tokenizer; + + tokenizer.buf = to_string(bytes); + tokenizer.max_t = bytes.data + bytes.count; + tokenizer.t = bytes.data; + + if start_offset >= 0 { + start_offset = clamp(start_offset, 0, bytes.count - 1); + count = clamp(count, 0, bytes.count - 1); + tokenizer.t += start_offset; + tokenizer.max_t = tokenizer.t + count; + } + + return tokenizer; +} + +get_next_token :: (using tokenizer: *Js_Tokenizer) -> Token { + eat_white_space(xx tokenizer); token: Token; token.start = cast(s32) (t - buf.data); @@ -88,7 +146,7 @@ get_next_token :: (using tokenizer: *Tokenizer) -> Token { return token; } -parse_identifier :: (using tokenizer: *Tokenizer, token: *Token) { +parse_identifier :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .identifier; identifier_str := read_utf8_identifier_string(tokenizer); @@ -100,7 +158,7 @@ parse_identifier :: (using tokenizer: *Tokenizer, token: *Token) { } } -parse_number :: (using tokenizer: *Tokenizer, token: *Token) { +parse_number :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .number; start_char := t.*; @@ -145,19 +203,19 @@ parse_number :: (using tokenizer: *Tokenizer, token: *Token) { } } -parse_colon :: (using tokenizer: *Tokenizer, token: *Token) { +parse_colon :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .operation; token.operation = .colon; t += 1; } -parse_question :: (using tokenizer: *Tokenizer, token: *Token) { +parse_question :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .operation; token.operation = .question; t += 1; } -parse_equal :: (using tokenizer: *Tokenizer, token: *Token) { +parse_equal :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .operation; token.operation = .equal; @@ -169,7 +227,7 @@ parse_equal :: (using tokenizer: *Tokenizer, token: *Token) { } } -parse_minus :: (using tokenizer: *Tokenizer, token: *Token) { +parse_minus :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .operation; token.operation = .minus; @@ -191,7 +249,7 @@ parse_minus :: (using tokenizer: *Tokenizer, token: *Token) { } } -parse_plus :: (using tokenizer: *Tokenizer, token: *Token) { +parse_plus :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .operation; token.operation = .plus; @@ -208,7 +266,7 @@ parse_plus :: (using tokenizer: *Tokenizer, token: *Token) { } } -parse_asterisk :: (using tokenizer: *Tokenizer, token: *Token) { +parse_asterisk :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .operation; token.operation = .asterisk; @@ -222,7 +280,7 @@ parse_asterisk :: (using tokenizer: *Tokenizer, token: *Token) { } } -parse_less_than :: (using tokenizer: *Tokenizer, token: *Token) { +parse_less_than :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .operation; token.operation = .less_than; @@ -239,7 +297,7 @@ parse_less_than :: (using tokenizer: *Tokenizer, token: *Token) { } } -parse_greater_than :: (using tokenizer: *Tokenizer, token: *Token) { +parse_greater_than :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .operation; token.operation = .greater_than; @@ -253,7 +311,7 @@ parse_greater_than :: (using tokenizer: *Tokenizer, token: *Token) { } } -parse_bang :: (using tokenizer: *Tokenizer, token: *Token) { +parse_bang :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .operation; token.operation = .bang; @@ -267,7 +325,7 @@ parse_bang :: (using tokenizer: *Tokenizer, token: *Token) { } } -parse_double_quote_string_literal :: (using tokenizer: *Tokenizer, token: *Token) { +parse_double_quote_string_literal :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .string_literal; escape_seen := false; @@ -283,7 +341,7 @@ parse_double_quote_string_literal :: (using tokenizer: *Tokenizer, token: *Token t += 1; } -parse_single_quote_string_literal :: (using tokenizer: *Tokenizer, token: *Token) { +parse_single_quote_string_literal :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .string_literal; escape_seen := false; @@ -299,15 +357,39 @@ parse_single_quote_string_literal :: (using tokenizer: *Tokenizer, token: *Token t += 1; } -parse_template_string_literal :: (using tokenizer: *Tokenizer, token: *Token) { +parse_template_string_literal :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .string_literal; escape_seen := false; t += 1; while t < max_t { - if <= max_t return; @@ -315,7 +397,7 @@ parse_template_string_literal :: (using tokenizer: *Tokenizer, token: *Token) { t += 1; } -parse_slash_or_comment :: (using tokenizer: *Tokenizer, token: *Token) { +parse_slash_or_comment :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .operation; token.operation = .slash; @@ -343,7 +425,7 @@ parse_slash_or_comment :: (using tokenizer: *Tokenizer, token: *Token) { } } -parse_ampersand :: (using tokenizer: *Tokenizer, token: *Token) { +parse_ampersand :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .operation; token.operation = .ampersand; @@ -360,7 +442,7 @@ parse_ampersand :: (using tokenizer: *Tokenizer, token: *Token) { } } -parse_pipe :: (using tokenizer: *Tokenizer, token: *Token) { +parse_pipe :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .operation; token.operation = .pipe; @@ -377,7 +459,7 @@ parse_pipe :: (using tokenizer: *Tokenizer, token: *Token) { } } -parse_percent :: (using tokenizer: *Tokenizer, token: *Token) { +parse_percent :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .operation; token.operation = .percent; @@ -391,7 +473,7 @@ parse_percent :: (using tokenizer: *Tokenizer, token: *Token) { } } -parse_caret :: (using tokenizer: *Tokenizer, token: *Token) { +parse_caret :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .operation; token.operation = .caret; @@ -405,7 +487,7 @@ parse_caret :: (using tokenizer: *Tokenizer, token: *Token) { } } -parse_private_identifier :: (using tokenizer: *Tokenizer, token: *Token) { +parse_private_identifier :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .identifier; t += 1; @@ -414,7 +496,7 @@ parse_private_identifier :: (using tokenizer: *Tokenizer, token: *Token) { identifier_str := read_utf8_identifier_string(tokenizer); } -parse_decorator :: (using tokenizer: *Tokenizer, token: *Token) { +parse_decorator :: (using tokenizer: *Js_Tokenizer, token: *Token) { token.type = .identifier; t += 1; @@ -423,6 +505,13 @@ parse_decorator :: (using tokenizer: *Tokenizer, token: *Token) { identifier_str := read_utf8_identifier_string(tokenizer); } +Js_Tokenizer :: struct { + #as using base: Tokenizer; + + regions: [..] Buffer_Region; + regions.allocator = temp; +} + Token :: struct { start, len: s32; type: Token_Type;