From 50d70c4f970143d9fdb6e4790ac669dc5decb529 Mon Sep 17 00:00:00 2001 From: AlexanderSLoburev <121574459+AlexanderSLoburev@users.noreply.github.com> Date: Sat, 6 Jan 2024 13:38:25 +0000 Subject: [PATCH] Reworked the tokenizer to make it work on regular expressions. --- .vscode/settings.json | 5 + build/tokenizer.beam | Bin 1656 -> 1548 bytes common/tokenizer.erl | 113 ++++++++++++------ .../src/reverse_polish_notation_evaluator.erl | 4 +- 4 files changed, 85 insertions(+), 37 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..aa89941 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "cSpell.words": [ + "paren" + ] +} \ No newline at end of file diff --git a/build/tokenizer.beam b/build/tokenizer.beam index ecec711d6665f1b5d22475e4c9712e76dea4eb32..410f0b8d82ecd892eb609bfaa620a9d8c44f6afd 100644 GIT binary patch literal 1548 zcmaKseQXnD7{K4_+ppV)+-b%7b_}aq{3~5$1_Z>+_gt%ZMh& zhlELbwqe5KjM&Tx%$B?)ISO7*QZtvhJcO?&IM5Y4L~8g9F$?qL7}Nl8&P?lO3lg0; z)Ap)`BO#9%!o*B4PXhR85f{iLZ9;4Lo=~hcr+}ED+8R>NTSzL&3~_Dr8^xH@_qx`HivxH8UkZdgJ>|L*AAFk=u6L6#pr4`sp*56YcAT zJAEm4$&5#=G}~_JSa?X@Q5^mK#B0|U&K@88;{KV;rM(VcsqdS=c17zx-ZxS4{m3tc zC8r(lEbbrk4%ZES5?#JEbm&?Co3m;^s$Mgz-e2P_vYk079KPczzGbeaooOO8b4Lmxaj8M7nLO&3a*R}?l^n>(T#^M3@mGYnVcJV-Erq;)4l!v z>+bP&qr;Ug$_L%SC{^|d*AV8nVlCo_xAqFpM86Ef5}kk zy_M(k$Buc6*H-PBd6(@Ah!6U9{_wEutBG9;-uSg}@kGU@;~y+7ZyG%C^?-j>{ob4- zf0W&S@TFse_8e-Gj0oIUX@7#=T9+x!NlK(WA;rQSQp8AUk{*vYs!`heQh@f1_Mi5N z_MN_G@fwB(^8A0|;9gzH##A_TEO+>~YwOOR|1tP*_rgo*q?k zPY)oi67+FMRNvky!}d~t7f$ELr1n&2b|{5=tI`Ev5{|UPREtE7a1@3Td>>1OhT$OdHAW@W78J7h?Vfr@5O; literal 1656 zcmaJ>&ube;6yDj{wRU2w8SShs$99r!S7}0B>~*Ua6E|h-Sfwd(E7}A>FYC25mRFW{ z#qP>#3x+@__!#JEZ^opDLJy{YfiI!ZQz*@)r%)*L7z#b7Z)7WRelC2xr}w^@_r00X z`{au+=M-hAvs77JSxj%vD~j>}V39Q16k+h4h?y4)zu|@5tu+yQNg4-XQ*Q@Jn&e`! zE#gGz-85(?Mv!=opc$k?B5wO(6JNP}MU7%Djk^L(=ONqM2vU)B{JPNB{dOYE7dIX~ zDi(D_?5EM7oYf~j7bM<5qA_rWx1vV3ExaIHk4`s5>YexvO>Q=T zx^0meHmtk2#weby;Qh?q(G^}R*#%5(TCvKEV~1;4cP~^ChlM{Esg>~@tuEkhk^e6C zZ=xzMW3DapQhC?4vnoi~(J8NKc7aiDODz^=rp?f*Bp$I=p9-}=C?q=>{|uvBS?5!X z?qs)A^Dm0_IC8kkhTIG2AiJZQ|GMa~{NOd+-k;A~FKu@Z+FihT29p2V{~JAgs^s7W zC6A!TWqC%2{WkL^EXo)5^As8MnO5(=x>ci)J$%(k-m4%?uf5M%<%2w>U0khN;mXm$1#I^Aj~@BVK|l zDeY1_Gnhv6V=|Q)){buTn&kq6(W5~)w@UbuXBLKNpa;)ckwen5B z)+LC`I_x-aANydGa{v<{_n2A{sW0vAwv243Jt{(ftt}efy71F(ERt61dQ_MCIY8Dg z0Oa1|0QoLa`jrc4t&w)W6+k9<1UE8O+}xdDbLKwP}E5wlC;1dN}_ID7n{m^v%M(Z z>`%I6w&TNyj1w42-qB#3tw&p1Q8+u8TJa3TTTp0utr>v%`t}CyqRL;J(G8DNUgfIh@8es5Ef+NolnS_xd87C8D zl1z~?ViSj4B$voLWSYE7-X<=2pL|FzlLazQK31%kwV~Yjwj=kq0rK3n^N$r|bMX4# H6A%3l#nw%R diff --git a/common/tokenizer.erl b/common/tokenizer.erl index 7f68ac7..52cddad 100644 --- a/common/tokenizer.erl +++ b/common/tokenizer.erl @@ -16,47 +16,90 @@ % ], -'_read_number_string'([], Buffer, Length) -> {[], lists:reverse(Buffer), Length}; - -'_read_number_string'(Expression, Buffer, Length) -> - [FirstChar | RestChars] = Expression, - case utils:is_digit(FirstChar) or (FirstChar =:= $.) of - true -> '_read_number_string'(RestChars, [FirstChar | Buffer], Length + 1); - false -> - case utils:is_whitespace(FirstChar) of - true -> {RestChars, lists:reverse(Buffer), Length + 1}; - false -> {Expression, lists:reverse(Buffer), Length} - end - end. +% '_read_number_string'([], Buffer, Length) -> {[], lists:reverse(Buffer), Length}; + +% '_read_number_string'(Expression, Buffer, Length) -> +% [FirstChar | RestChars] = Expression, +% case utils:is_digit(FirstChar) or (FirstChar =:= $.) of +% true -> '_read_number_string'(RestChars, [FirstChar | Buffer], Length + 1); +% false -> +% case utils:is_whitespace(FirstChar) of +% true -> {RestChars, lists:reverse(Buffer), Length + 1}; +% false -> {Expression, lists:reverse(Buffer), Length} +% end +% end. -read_number_string(Expression) -> '_read_number_string'(Expression, [], 0). +% read_number_string(Expression) -> '_read_number_string'(Expression, [], 0). -'_read_operator_string'([], Buffer, Length) -> {[], lists:reverse(Buffer), Length}; +% '_read_operator_string'([], Buffer, Length) -> {[], lists:reverse(Buffer), Length}; -'_read_operator_string'(Expression, Buffer, Length) -> - [FirstChar | RestChars] = Expression, - case utils:is_digit(FirstChar) of - true -> {Expression, lists:reverse(Buffer), Length}; - false -> - case utils:is_whitespace(FirstChar) of - true -> {RestChars, lists:reverse(Buffer), Length + 1}; - false -> '_read_operator_string'(RestChars, [FirstChar | Buffer], Length + 1) - end - end. +% '_read_operator_string'(Expression, Buffer, Length) -> +% [FirstChar | RestChars] = Expression, +% case utils:is_digit(FirstChar) of +% true -> {Expression, lists:reverse(Buffer), Length}; +% false -> +% case utils:is_whitespace(FirstChar) of +% true -> {RestChars, lists:reverse(Buffer), Length + 1}; +% false -> '_read_operator_string'(RestChars, [FirstChar | Buffer], Length + 1) +% end +% end. + +% read_operator_string(Expression) -> '_read_operator_string'(Expression, [], 0). + + +% '_tokenize'([], Tokens, _Position) -> Tokens; -read_operator_string(Expression) -> '_read_operator_string'(Expression, [], 0). +% '_tokenize'(Expression, Tokens, Position) -> +% {RestCharsAfterReadingNumber, NumberBuffer, LengthOfNumber} = read_number_string(Expression), +% case LengthOfNumber =/= 0 of +% true -> '_tokenize'(RestCharsAfterReadingNumber, [{NumberBuffer, Position} | Tokens], Position + LengthOfNumber); +% false -> +% {RestCharsAfterReadingOperator, OperatorBuffer, LengthOfOperator} = read_operator_string(Expression), +% '_tokenize'(RestCharsAfterReadingOperator, [{OperatorBuffer, Position} | Tokens], Position + LengthOfOperator) +% end. +% tokenize(Expression) -> lists:reverse('_tokenize'(Expression, [], 1)). -'_tokenize'([], Tokens, _Position) -> Tokens; +% TOKEN_SPEC=[{TokenType, TokenRegexp}] +match(_Expression, []) -> throw({parse_error, _Expression}); -'_tokenize'(Expression, Tokens, Position) -> - {RestCharsAfterReadingNumber, NumberBuffer, LengthOfNumber} = read_number_string(Expression), - case LengthOfNumber =/= 0 of - true -> '_tokenize'(RestCharsAfterReadingNumber, [{NumberBuffer, Position} | Tokens], Position + LengthOfNumber); - false -> - {RestCharsAfterReadingOperator, OperatorBuffer, LengthOfOperator} = read_operator_string(Expression), - '_tokenize'(RestCharsAfterReadingOperator, [{OperatorBuffer, Position} | Tokens], Position + LengthOfOperator) - end. +match(Expression, TokenSpec) -> + [{TokenType, TokenRegexp} | RestTokenSpec] = TokenSpec, + case re:run(Expression, TokenRegexp, [{capture, first, list}]) of + {match, [Token]} -> {TokenType, Token}; + nomatch -> match(Expression, RestTokenSpec) + end. + + +'_tokenize'([], Tokens, _Position, _TokenSpec) -> lists:reverse(Tokens); + +'_tokenize'(Expression, Tokens, Position, TokenSpec) -> + {TokenType, Token} = try + match(Expression, TokenSpec) + catch + throw:{parse_error, Message} -> + throw({parse_error, io_lib:format("Error while parsing an \"~s\" in position ~w", [Message, Position])}) + end, + TokenLength = length(Token), + NextPosition = Position + TokenLength, + RestExpression = string:slice(Expression, TokenLength), + case TokenType =:= spaces of + true -> '_tokenize'(RestExpression, Tokens, NextPosition, TokenSpec); + false -> '_tokenize'(RestExpression, [{TokenType, Token, Position} | Tokens], NextPosition, TokenSpec) + end. -tokenize(Expression) -> lists:reverse('_tokenize'(Expression, [], 1)). +tokenize(Expression) -> '_tokenize'(Expression, [], 1, [ + {spaces, "^\\s+"}, + {number, "^(?:\\d+(?:\\.\\d*)?|\\.\\'d+)"}, + {identifier, "^[a-zA-Z_][a-zA-Z0-9_]*"}, + {plus, "^\\+"}, + {minus, "^\\-"}, + {exponentiation, "^\\*\\*"}, + {multiplication, "^\\*"}, + {integer_division, "^\\//"}, + {division, "^\\/"}, + {reminder, "^%"}, + {left_paren, "^("}, + {right_paren, "^)"} +]). \ No newline at end of file diff --git a/reverse_polish_notation_evaluator/src/reverse_polish_notation_evaluator.erl b/reverse_polish_notation_evaluator/src/reverse_polish_notation_evaluator.erl index fa78951..c83567a 100644 --- a/reverse_polish_notation_evaluator/src/reverse_polish_notation_evaluator.erl +++ b/reverse_polish_notation_evaluator/src/reverse_polish_notation_evaluator.erl @@ -2,7 +2,7 @@ -export([start/0]). -handle_token({Token, Position}, Stack) -> +handle_token({_TokenType, Token, Position}, Stack) -> io:fwrite("Stack: ~p~n", [Stack]), case utils:try_parse_number(Token) of Value when is_number(Value) -> @@ -73,7 +73,7 @@ handle_token({Token, Position}, Stack) -> evaluate_rpn(Expression) -> Tokens = tokenizer:tokenize(Expression), - % io:fwrite("Tokens: ~p~n", [Tokens]), + io:fwrite("Tokens: ~p~n", [Tokens]), '_evaluate_rpn'(Tokens, []).