Skip to content

Commit

Permalink
Reworked the tokenizer to make it work on regular expressions.
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexanderSLoburev committed Jan 6, 2024
1 parent f986cd8 commit 50d70c4
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 37 deletions.
5 changes: 5 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"cSpell.words": [
"paren"
]
}
Binary file modified build/tokenizer.beam
Binary file not shown.
113 changes: 78 additions & 35 deletions common/tokenizer.erl
Original file line number Diff line number Diff line change
Expand Up @@ -16,47 +16,90 @@
% ],


'_read_number_string'([], Buffer, Length) -> {[], lists:reverse(Buffer), Length};

'_read_number_string'(Expression, Buffer, Length) ->
[FirstChar | RestChars] = Expression,
case utils:is_digit(FirstChar) or (FirstChar =:= $.) of
true -> '_read_number_string'(RestChars, [FirstChar | Buffer], Length + 1);
false ->
case utils:is_whitespace(FirstChar) of
true -> {RestChars, lists:reverse(Buffer), Length + 1};
false -> {Expression, lists:reverse(Buffer), Length}
end
end.
% '_read_number_string'([], Buffer, Length) -> {[], lists:reverse(Buffer), Length};

% '_read_number_string'(Expression, Buffer, Length) ->
% [FirstChar | RestChars] = Expression,
% case utils:is_digit(FirstChar) or (FirstChar =:= $.) of
% true -> '_read_number_string'(RestChars, [FirstChar | Buffer], Length + 1);
% false ->
% case utils:is_whitespace(FirstChar) of
% true -> {RestChars, lists:reverse(Buffer), Length + 1};
% false -> {Expression, lists:reverse(Buffer), Length}
% end
% end.

read_number_string(Expression) -> '_read_number_string'(Expression, [], 0).
% read_number_string(Expression) -> '_read_number_string'(Expression, [], 0).


'_read_operator_string'([], Buffer, Length) -> {[], lists:reverse(Buffer), Length};
% '_read_operator_string'([], Buffer, Length) -> {[], lists:reverse(Buffer), Length};

'_read_operator_string'(Expression, Buffer, Length) ->
[FirstChar | RestChars] = Expression,
case utils:is_digit(FirstChar) of
true -> {Expression, lists:reverse(Buffer), Length};
false ->
case utils:is_whitespace(FirstChar) of
true -> {RestChars, lists:reverse(Buffer), Length + 1};
false -> '_read_operator_string'(RestChars, [FirstChar | Buffer], Length + 1)
end
end.
% '_read_operator_string'(Expression, Buffer, Length) ->
% [FirstChar | RestChars] = Expression,
% case utils:is_digit(FirstChar) of
% true -> {Expression, lists:reverse(Buffer), Length};
% false ->
% case utils:is_whitespace(FirstChar) of
% true -> {RestChars, lists:reverse(Buffer), Length + 1};
% false -> '_read_operator_string'(RestChars, [FirstChar | Buffer], Length + 1)
% end
% end.

% read_operator_string(Expression) -> '_read_operator_string'(Expression, [], 0).


% '_tokenize'([], Tokens, _Position) -> Tokens;

read_operator_string(Expression) -> '_read_operator_string'(Expression, [], 0).
% '_tokenize'(Expression, Tokens, Position) ->
% {RestCharsAfterReadingNumber, NumberBuffer, LengthOfNumber} = read_number_string(Expression),
% case LengthOfNumber =/= 0 of
% true -> '_tokenize'(RestCharsAfterReadingNumber, [{NumberBuffer, Position} | Tokens], Position + LengthOfNumber);
% false ->
% {RestCharsAfterReadingOperator, OperatorBuffer, LengthOfOperator} = read_operator_string(Expression),
% '_tokenize'(RestCharsAfterReadingOperator, [{OperatorBuffer, Position} | Tokens], Position + LengthOfOperator)
% end.

% tokenize(Expression) -> lists:reverse('_tokenize'(Expression, [], 1)).

'_tokenize'([], Tokens, _Position) -> Tokens;
% TOKEN_SPEC=[{TokenType, TokenRegexp}]
match(_Expression, []) -> throw({parse_error, _Expression});

'_tokenize'(Expression, Tokens, Position) ->
{RestCharsAfterReadingNumber, NumberBuffer, LengthOfNumber} = read_number_string(Expression),
case LengthOfNumber =/= 0 of
true -> '_tokenize'(RestCharsAfterReadingNumber, [{NumberBuffer, Position} | Tokens], Position + LengthOfNumber);
false ->
{RestCharsAfterReadingOperator, OperatorBuffer, LengthOfOperator} = read_operator_string(Expression),
'_tokenize'(RestCharsAfterReadingOperator, [{OperatorBuffer, Position} | Tokens], Position + LengthOfOperator)
end.
match(Expression, TokenSpec) ->
[{TokenType, TokenRegexp} | RestTokenSpec] = TokenSpec,
case re:run(Expression, TokenRegexp, [{capture, first, list}]) of
{match, [Token]} -> {TokenType, Token};
nomatch -> match(Expression, RestTokenSpec)
end.


'_tokenize'([], Tokens, _Position, _TokenSpec) -> lists:reverse(Tokens);

'_tokenize'(Expression, Tokens, Position, TokenSpec) ->
{TokenType, Token} = try
match(Expression, TokenSpec)
catch
throw:{parse_error, Message} ->
throw({parse_error, io_lib:format("Error while parsing an \"~s\" in position ~w", [Message, Position])})
end,
TokenLength = length(Token),
NextPosition = Position + TokenLength,
RestExpression = string:slice(Expression, TokenLength),
case TokenType =:= spaces of
true -> '_tokenize'(RestExpression, Tokens, NextPosition, TokenSpec);
false -> '_tokenize'(RestExpression, [{TokenType, Token, Position} | Tokens], NextPosition, TokenSpec)
end.

tokenize(Expression) -> lists:reverse('_tokenize'(Expression, [], 1)).
tokenize(Expression) -> '_tokenize'(Expression, [], 1, [
{spaces, "^\\s+"},
{number, "^(?:\\d+(?:\\.\\d*)?|\\.\\'d+)"},
{identifier, "^[a-zA-Z_][a-zA-Z0-9_]*"},
{plus, "^\\+"},
{minus, "^\\-"},
{exponentiation, "^\\*\\*"},
{multiplication, "^\\*"},
{integer_division, "^\\//"},
{division, "^\\/"},
{reminder, "^%"},
{left_paren, "^("},
{right_paren, "^)"}
]).
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
-export([start/0]).


handle_token({Token, Position}, Stack) ->
handle_token({_TokenType, Token, Position}, Stack) ->
io:fwrite("Stack: ~p~n", [Stack]),
case utils:try_parse_number(Token) of
Value when is_number(Value) ->
Expand Down Expand Up @@ -73,7 +73,7 @@ handle_token({Token, Position}, Stack) ->

evaluate_rpn(Expression) ->
Tokens = tokenizer:tokenize(Expression),
% io:fwrite("Tokens: ~p~n", [Tokens]),
io:fwrite("Tokens: ~p~n", [Tokens]),
'_evaluate_rpn'(Tokens, []).


Expand Down

0 comments on commit 50d70c4

Please sign in to comment.