From 7b9a09754162231c84bde41ee4b2bd355f59790d Mon Sep 17 00:00:00 2001 From: Jones Beach Date: Thu, 18 Apr 2024 15:39:19 -0400 Subject: [PATCH] Initial commit --- .github/workflows/rust.yml | 22 + .gitignore | 3 + Cargo.toml | 13 + LICENSE | 674 ++ LICENSE.LESSER | 165 + README.md | 95 + docs/supported.md | 200 + examples/api.py | 9 + examples/async/a.py | 14 + examples/async/b.py | 13 + examples/async/c.py | 9 + examples/async/d.py | 15 + examples/async/manual_test.py | 32 + examples/async/tasks.py | 25 + examples/builtins.py | 29 + examples/context_manager.py | 21 + examples/exceptions.py | 19 + examples/loop_perf.py | 7 + examples/meta.py | 28 + examples/new_method.py | 42 + examples/other.py | 8 + examples/repl.py | 7 + examples/test.py | 101 + examples/todo/descriptor_protocol.py | 26 + examples/todo/file.py | 3 + examples/todo/loop.py | 8 + examples/todo/member_access.py | 32 + examples/todo/monkey_patching.py | 11 + examples/todo/threads.py | 18 + src/bytecode_vm/compiler/mod.rs | 1277 +++ src/bytecode_vm/compiler/types.rs | 92 + src/bytecode_vm/indices.rs | 81 + src/bytecode_vm/interpreter.rs | 389 + src/bytecode_vm/mod.rs | 12 + src/bytecode_vm/opcode.rs | 110 + src/bytecode_vm/types.rs | 144 + src/bytecode_vm/vm/frame.rs | 50 + src/bytecode_vm/vm/mod.rs | 511 + src/bytecode_vm/vm/types.rs | 112 + src/core/container.rs | 63 + src/core/log.rs | 35 + src/core/mod.rs | 23 + src/core/stack.rs | 68 + src/crosscheck/adapters.rs | 91 + src/crosscheck/mod.rs | 7 + src/crosscheck/test_value.rs | 50 + src/crosscheck/traits.rs | 28 + src/domain/mod.rs | 7 + src/fixtures/call_stack/call_stack.py | 2 + src/fixtures/call_stack/other.py | 5 + src/fixtures/imports/other.py | 21 + src/fixtures/imports/regular_import.py | 4 + src/fixtures/imports/regular_import_b.py | 2 + src/fixtures/imports/relative/main_a.py | 4 + src/fixtures/imports/relative/main_b.py | 4 + src/fixtures/imports/relative/main_c.py | 4 + .../imports/relative/mypackage/mymodule.py | 2 + .../relative/mypackage/myothermodule.py | 4 + src/fixtures/imports/selective_import_a.py | 2 + src/fixtures/imports/selective_import_b.py | 3 + src/fixtures/imports/selective_import_c.py | 2 + src/fixtures/imports/selective_import_d.py | 2 + src/fixtures/imports/selective_import_e.py | 4 + src/fixtures/imports/selective_import_f.py | 3 + src/init/builder.rs | 111 + src/init/memphis.rs | 41 + src/init/mod.rs | 7 + src/init/repl.rs | 106 + src/lexer/mod.rs | 2520 +++++ src/lexer/types.rs | 150 + src/lib.rs | 19 + src/llvm_backend/mod.rs | 77 + src/main.rs | 32 + src/parser/mod.rs | 5670 +++++++++++ src/parser/static_analysis.rs | 137 + src/parser/types.rs | 655 ++ src/treewalk/call_stack.rs | 122 + src/treewalk/evaluators.rs | 188 + src/treewalk/execution_context.rs | 71 + src/treewalk/executor.rs | 223 + src/treewalk/interpreter.rs | 8530 +++++++++++++++++ src/treewalk/mod.rs | 22 + src/treewalk/module_loader.rs | 239 + src/treewalk/scope.rs | 153 + src/treewalk/scope_manager.rs | 248 + src/treewalk/state.rs | 222 + src/treewalk/type_registry.rs | 306 + src/treewalk/types/bool.rs | 53 + src/treewalk/types/builtins.rs | 351 + src/treewalk/types/bytearray.rs | 63 + src/treewalk/types/bytes.rs | 59 + src/treewalk/types/cell.rs | 34 + src/treewalk/types/class.rs | 362 + src/treewalk/types/classmethod.rs | 58 + src/treewalk/types/coroutine.rs | 182 + src/treewalk/types/descriptor.rs | 48 + src/treewalk/types/dict.rs | 276 + src/treewalk/types/dict_items.rs | 86 + src/treewalk/types/dict_keys.rs | 67 + src/treewalk/types/dict_values.rs | 67 + src/treewalk/types/exception.rs | 45 + src/treewalk/types/frozenset.rs | 117 + src/treewalk/types/function.rs | 270 + src/treewalk/types/generator.rs | 185 + src/treewalk/types/int.rs | 91 + src/treewalk/types/list.rs | 253 + src/treewalk/types/mapping_proxy.rs | 27 + src/treewalk/types/memoryview.rs | 38 + src/treewalk/types/method.rs | 59 + src/treewalk/types/mod.rs | 89 + src/treewalk/types/module.rs | 74 + src/treewalk/types/object.rs | 232 + src/treewalk/types/pausable/frame.rs | 46 + src/treewalk/types/pausable/mod.rs | 7 + .../types/pausable/pausable_context.rs | 100 + src/treewalk/types/pausable/pausable_trait.rs | 290 + src/treewalk/types/property.rs | 58 + src/treewalk/types/range.rs | 185 + src/treewalk/types/result.rs | 713 ++ src/treewalk/types/reversed.rs | 77 + src/treewalk/types/set.rs | 187 + src/treewalk/types/slice.rs | 185 + src/treewalk/types/staticmethod.rs | 47 + src/treewalk/types/str.rs | 93 + src/treewalk/types/super_type.rs | 70 + src/treewalk/types/traits.rs | 103 + src/treewalk/types/tuple.rs | 157 + src/treewalk/types/type.rs | 136 + src/treewalk/types/types.rs | 203 + src/treewalk/types/utils/dunder.rs | 78 + src/treewalk/types/utils/environment_frame.rs | 41 + src/treewalk/types/utils/mod.rs | 7 + src/treewalk/types/utils/resolved_args.rs | 158 + src/treewalk/types/zip.rs | 110 + src/types/cpython.rs | 201 + src/types/errors.rs | 293 + src/types/mod.rs | 3 + tests/checks.rs | 49 + tests/crosscheck_assignment.rs | 50 + tests/crosscheck_control_flow.rs | 39 + tests/crosscheck_expressions.rs | 67 + tests/crosscheck_function_call.rs | 32 + tests/crosscheck_method_call.rs | 25 + tests/integration.rs | 54 + tests/other_backends.rs | 76 + tests/repl.rs | 32 + 146 files changed, 31874 insertions(+) create mode 100644 .github/workflows/rust.yml create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 LICENSE create mode 100644 LICENSE.LESSER create mode 100644 README.md create mode 100644 docs/supported.md create mode 100644 examples/api.py create mode 100644 examples/async/a.py create mode 100644 examples/async/b.py create mode 100644 examples/async/c.py create mode 100644 examples/async/d.py create mode 100644 examples/async/manual_test.py create mode 100644 examples/async/tasks.py create mode 100644 examples/builtins.py create mode 100644 examples/context_manager.py create mode 100644 examples/exceptions.py create mode 100644 examples/loop_perf.py create mode 100644 examples/meta.py create mode 100644 examples/new_method.py create mode 100644 examples/other.py create mode 100644 examples/repl.py create mode 100644 examples/test.py create mode 100644 examples/todo/descriptor_protocol.py create mode 100644 examples/todo/file.py create mode 100644 examples/todo/loop.py create mode 100644 examples/todo/member_access.py create mode 100644 examples/todo/monkey_patching.py create mode 100644 examples/todo/threads.py create mode 100644 src/bytecode_vm/compiler/mod.rs create mode 100644 src/bytecode_vm/compiler/types.rs create mode 100644 src/bytecode_vm/indices.rs create mode 100644 src/bytecode_vm/interpreter.rs create mode 100644 src/bytecode_vm/mod.rs create mode 100644 src/bytecode_vm/opcode.rs create mode 100644 src/bytecode_vm/types.rs create mode 100644 src/bytecode_vm/vm/frame.rs create mode 100644 src/bytecode_vm/vm/mod.rs create mode 100644 src/bytecode_vm/vm/types.rs create mode 100644 src/core/container.rs create mode 100644 src/core/log.rs create mode 100644 src/core/mod.rs create mode 100644 src/core/stack.rs create mode 100644 src/crosscheck/adapters.rs create mode 100644 src/crosscheck/mod.rs create mode 100644 src/crosscheck/test_value.rs create mode 100644 src/crosscheck/traits.rs create mode 100644 src/domain/mod.rs create mode 100644 src/fixtures/call_stack/call_stack.py create mode 100644 src/fixtures/call_stack/other.py create mode 100644 src/fixtures/imports/other.py create mode 100644 src/fixtures/imports/regular_import.py create mode 100644 src/fixtures/imports/regular_import_b.py create mode 100644 src/fixtures/imports/relative/main_a.py create mode 100644 src/fixtures/imports/relative/main_b.py create mode 100644 src/fixtures/imports/relative/main_c.py create mode 100644 src/fixtures/imports/relative/mypackage/mymodule.py create mode 100644 src/fixtures/imports/relative/mypackage/myothermodule.py create mode 100644 src/fixtures/imports/selective_import_a.py create mode 100644 src/fixtures/imports/selective_import_b.py create mode 100644 src/fixtures/imports/selective_import_c.py create mode 100644 src/fixtures/imports/selective_import_d.py create mode 100644 src/fixtures/imports/selective_import_e.py create mode 100644 src/fixtures/imports/selective_import_f.py create mode 100644 src/init/builder.rs create mode 100644 src/init/memphis.rs create mode 100644 src/init/mod.rs create mode 100644 src/init/repl.rs create mode 100644 src/lexer/mod.rs create mode 100644 src/lexer/types.rs create mode 100644 src/lib.rs create mode 100644 src/llvm_backend/mod.rs create mode 100644 src/main.rs create mode 100644 src/parser/mod.rs create mode 100644 src/parser/static_analysis.rs create mode 100644 src/parser/types.rs create mode 100644 src/treewalk/call_stack.rs create mode 100644 src/treewalk/evaluators.rs create mode 100644 src/treewalk/execution_context.rs create mode 100644 src/treewalk/executor.rs create mode 100644 src/treewalk/interpreter.rs create mode 100644 src/treewalk/mod.rs create mode 100644 src/treewalk/module_loader.rs create mode 100644 src/treewalk/scope.rs create mode 100644 src/treewalk/scope_manager.rs create mode 100644 src/treewalk/state.rs create mode 100644 src/treewalk/type_registry.rs create mode 100644 src/treewalk/types/bool.rs create mode 100644 src/treewalk/types/builtins.rs create mode 100644 src/treewalk/types/bytearray.rs create mode 100644 src/treewalk/types/bytes.rs create mode 100644 src/treewalk/types/cell.rs create mode 100644 src/treewalk/types/class.rs create mode 100644 src/treewalk/types/classmethod.rs create mode 100644 src/treewalk/types/coroutine.rs create mode 100644 src/treewalk/types/descriptor.rs create mode 100644 src/treewalk/types/dict.rs create mode 100644 src/treewalk/types/dict_items.rs create mode 100644 src/treewalk/types/dict_keys.rs create mode 100644 src/treewalk/types/dict_values.rs create mode 100644 src/treewalk/types/exception.rs create mode 100644 src/treewalk/types/frozenset.rs create mode 100644 src/treewalk/types/function.rs create mode 100644 src/treewalk/types/generator.rs create mode 100644 src/treewalk/types/int.rs create mode 100644 src/treewalk/types/list.rs create mode 100644 src/treewalk/types/mapping_proxy.rs create mode 100644 src/treewalk/types/memoryview.rs create mode 100644 src/treewalk/types/method.rs create mode 100644 src/treewalk/types/mod.rs create mode 100644 src/treewalk/types/module.rs create mode 100644 src/treewalk/types/object.rs create mode 100644 src/treewalk/types/pausable/frame.rs create mode 100644 src/treewalk/types/pausable/mod.rs create mode 100644 src/treewalk/types/pausable/pausable_context.rs create mode 100644 src/treewalk/types/pausable/pausable_trait.rs create mode 100644 src/treewalk/types/property.rs create mode 100644 src/treewalk/types/range.rs create mode 100644 src/treewalk/types/result.rs create mode 100644 src/treewalk/types/reversed.rs create mode 100644 src/treewalk/types/set.rs create mode 100644 src/treewalk/types/slice.rs create mode 100644 src/treewalk/types/staticmethod.rs create mode 100644 src/treewalk/types/str.rs create mode 100644 src/treewalk/types/super_type.rs create mode 100644 src/treewalk/types/traits.rs create mode 100644 src/treewalk/types/tuple.rs create mode 100644 src/treewalk/types/type.rs create mode 100644 src/treewalk/types/types.rs create mode 100644 src/treewalk/types/utils/dunder.rs create mode 100644 src/treewalk/types/utils/environment_frame.rs create mode 100644 src/treewalk/types/utils/mod.rs create mode 100644 src/treewalk/types/utils/resolved_args.rs create mode 100644 src/treewalk/types/zip.rs create mode 100644 src/types/cpython.rs create mode 100644 src/types/errors.rs create mode 100644 src/types/mod.rs create mode 100644 tests/checks.rs create mode 100644 tests/crosscheck_assignment.rs create mode 100644 tests/crosscheck_control_flow.rs create mode 100644 tests/crosscheck_expressions.rs create mode 100644 tests/crosscheck_function_call.rs create mode 100644 tests/crosscheck_method_call.rs create mode 100644 tests/integration.rs create mode 100644 tests/other_backends.rs create mode 100644 tests/repl.rs diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml new file mode 100644 index 0000000..f89c46b --- /dev/null +++ b/.github/workflows/rust.yml @@ -0,0 +1,22 @@ +name: Rust + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +env: + CARGO_TERM_COLOR: always + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Run tests + run: cargo test --verbose + - name: Run tests C stdlib enabled (no LLVM backend) + run: cargo test --features c_stdlib diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..419c2e8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +target +Cargo.lock +__pycache__ diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..6ed4a02 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "memphis" +version = "0.1.0" +edition = "2021" +license = "LGPL-3.0-or-later" + +[dependencies] +pyo3 = { version = "0.20.3", optional = true } +inkwell = { version = "0.4.0", features = [ "llvm17-0" ], optional = true } + +[features] +c_stdlib = ["pyo3"] +llvm_backend = ["inkwell"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f288702 --- /dev/null +++ b/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/LICENSE.LESSER b/LICENSE.LESSER new file mode 100644 index 0000000..0a04128 --- /dev/null +++ b/LICENSE.LESSER @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/README.md b/README.md new file mode 100644 index 0000000..011110d --- /dev/null +++ b/README.md @@ -0,0 +1,95 @@ +# memphis + +A Python interpreter written in Rust. This is intended as a learning exercise which could evetually become a drop-in replacement for `python`, but it is not there yet. + +## Supported Language Features +`memphis` contains at least basic support for all the following features. + +- Integer expressions +- String literals +- Boolean operators +- Comparison operators +- Variable assignment +- `print` builtin and ([some others](docs/supported.md)) +- Error handling +- Control flow (if-elif-else, while, for-in) +- Function defintion and function calls +- Class definition, instatiation, and method calls +- Lexical scoping +- Module imports (absolute (same directory, package, and Python standard library) and relative) +- REPL +- Comments (single-line and multi-line) +- Floating point +- Negative numbers +- Stack traces +- Lists (and list comprehension) +- Tuples +- Sets (and set comprehension) +- Dictionaries (and dict comprehension) +- Ranges +- Generator functions +- Slices +- Inheritance +- Object creation and metaclasses +- async/await +- try/except/else/finally/raise +- Args and kwargs +- Closures +- Decorators +- Some modules from the Python standard library written in C + - This is behind the feature flag `c_stdlib` because it uses `pyo3` +- Context managers +- Compound assignment (`+=`) for integers +- f-strings (without escape characters) +- Class variables, class methods, and static methods +- Type hints (in that they are parsed and then ignored) + +### Notable Unsupported Features +- Assignment expressions (`:=`) +- `async with` and `async for` +- Monkey patching +- Async generators +- Regular expressions +- Garbage collection +- Threading +- Match-case statements + +## Installation +```bash +cargo install --path . +``` +Use `memphis` as if it were `python`/`python3` and provide the path to a Python module. +```bash +memphis examples/test.py +``` +Or launch the REPL. +```bash +> memphis +memphis 0.1.0 REPL (Type 'exit()' to quit) +>>> +``` +## Benchmarking +To compare runtime, we can build in release mode and use the different engines. +```bash +cargo install --path . --all-features +hyperfine "memphis examples/loop_perf.py tw" "memphis examples/loop_perf.py vm" "memphis examples/loop_perf.py llvm" --warmup 5 +``` + +We require debug symbols to produce a flamegraph. +```bash +cargo install flamegraph +cargo build --all-features +sudo flamegraph -v -o tw.svg -- target/debug/memphis examples/loop_perf.py tw +sudo flamegraph -v -o vm.svg -- target/debug/memphis examples/loop_perf.py vm +sudo flamegraph -v -o llvm.svg -- target/debug/memphis examples/loop_perf.py llvm +``` + +## Local Development +```bash +cargo build +cargo test +cargo run examples/test.py +cargo run --features c_stdlib examples/test.py +``` +## License +Free use of this software is granted under the terms of the GNU Lesser General Public License (LGPL). For details see the files `LICENSE` and `LICENSE.LESSER` included with the source distribution. All copyrights are owned by their respective authors. diff --git a/docs/supported.md b/docs/supported.md new file mode 100644 index 0000000..0b6ca60 --- /dev/null +++ b/docs/supported.md @@ -0,0 +1,200 @@ +## Supported Python Features + +***GOTCHA***: "Support" here does not mean bug free. + +### Builtins +|builtin|supported?| +|-|-| +|`abs`|✗| +|`aiter`|✗| +|`all`|✗| +|`anext`|✗| +|`any`|✗| +|`ascii`|✗| +|`bin`|✗| +|`bool`|✔| +|`breakpoint`|✗| +|`bytearray`|✔| +|`bytes`|✔| +|`callable`|✗| +|`chr`|✗| +|`classmethod`|✔| +|`compile`|✗| +|`complex`|✗| +|`delattr`|✗| +|`dict`|✔| +|`dir`|✗| +|`divmod`|✗| +|`enumerate`|✗| +|`eval`|✗| +|`exec`|✗| +|`filter`|✗| +|`float`|✗| +|`format`|✗| +|`frozenset`|✔| +|`getattr`|✔| +|`globals`|✔| +|`hasattr`|✗| +|`hash`|✗| +|`help`|✗| +|`hex`|✗| +|`id`|✗| +|`input`|✗| +|`int`|✔| +|`isinstance`|✔| +|`issubclass`|✔| +|`iter`|✔| +|`len`|✔| +|`list`|✔| +|`locals`|✗| +|`map`|✗| +|`max`|✗| +|`memoryview`|✔| +|`min`|✗| +|`next`|✔| +|`object`|✔| +|`oct`|✗| +|`open`|✗| +|`ord`|✗| +|`pow`|✗| +|`print`|✔| +|`property`|✔| +|`range`|✔| +|`repr`|✗| +|`reversed`|✔| +|`round`|✗| +|`set`|✔| +|`setattr`|✗| +|`slice`|✔| +|`sorted`|✗| +|`staticmethod`|✔| +|`str`|✔| +|`sum`|✗| +|`super`|✔| +|`tuple`|✔| +|`type`|✔| +|`vars`|✗| +|`zip`|✔| +|`__import__`|✗| + +[Python Reference](https://docs.python.org/3/library/functions.html) + +### Keywords +|keyword|supported?| +|-|-| +|`True`|✔| +|`None`|✔| +|`False`|✔| +|`and`|✔| +|`as`|✔| +|`assert`|✔| +|`async`|✔| +|`await`|✔| +|`break`|✔| +|`class`|✔| +|`continue`|✔| +|`def`|✔| +|`del`|✔| +|`elif`|✔| +|`else`|✔| +|`except`|✔| +|`finally`|✔| +|`for`|✔| +|`from`|✔| +|`global`|✔| +|`if`|✔| +|`import`|✔| +|`in`|✔| +|`is`|✔| +|`lambda`|✔| +|`nonlocal`|✔| +|`not`|✔| +|`or`|✔| +|`pass`|✔| +|`raise`|✔| +|`return`|✔| +|`try`|✔| +|`while`|✔| +|`with`|✔| +|`yield`|✔| + +[Python Reference](https://docs.python.org/3/reference/lexical_analysis.html#keywords) + +### Dunder Methods and Attributes +|method|supported?| +|-|-| +Object Creation and Destruction +\_\_new__(cls, [...])|✔ +\_\_init__(self, [...])|✔ +\_\_del__(self) +Representation +\_\_repr__(self) +\_\_str__(self) +\_\_format__(self, format_spec) +Comparison and Equality +\_\_eq__(self, other)|✔ +\_\_ne__(self, other)|✔ +\_\_lt__(self, other) +\_\_le__(self, other) +\_\_gt__(self, other) +\_\_ge__(self, other) +Numeric Operators +\_\_add__(self, other) +\_\_sub__(self, other) +\_\_mul__(self, other) +\_\_truediv__(self, other) +\_\_floordiv__(self, other) +\_\_mod__(self, other) +\_\_divmod__(self, other) +\_\_pow__(self, other[, modulo]) +\_\_lshift__(self, other) +\_\_rshift__(self, other) +\_\_and__(self, other) +\_\_or__(self, other) +\_\_xor__(self, other) +Unary Operators and Functions +\_\_neg__(self) +\_\_pos__(self) +\_\_abs__(self) +\_\_invert__(self) +Type Conversion +\_\_int__(self) +\_\_float__(self) +\_\_complex__(self) +\_\_bool__(self) +Container Types +\_\_len__(self) +\_\_getitem__(self, key) +\_\_setitem__(self, key, value) +\_\_delitem__(self, key) +\_\_iter__(self) +\_\_reversed__(self) +\_\_contains__(self, item)|✔ +Attribute Access +\_\_getattr__(self, name) +\_\_getattribute__(self, name) +\_\_setattr__(self, name, value) +\_\_delattr__(self, name) +Descriptors +\_\_get__(self, instance, owner) +\_\_set__(self, instance, value) +\_\_delete__(self, instance) +Callable Objects +\_\_call__(self, [...]) +Context Managers +\_\_enter__(self)|✔ +\_\_exit__(self, exc_type, exc_value, traceback)|✔ +Instance Creation and Destruction (for classes) +\_\_init_subclass__(cls) +\_\_instancecheck__(self, instance) +\_\_subclasscheck__(self, subclass) +Metaclass Methods +\_\_prepare__(cls, name, bases) +\_\_class_getitem__(cls, item) +Customizing Module Import +\_\_path__(self) +\_\_loader__(self) +\_\_package__(self) +\_\_spec__(self) + +This list is a subset of the full spec found at the [Python Reference](https://docs.python.org/3/reference/datamodel.html). diff --git a/examples/api.py b/examples/api.py new file mode 100644 index 0000000..2413cac --- /dev/null +++ b/examples/api.py @@ -0,0 +1,9 @@ +from flask import Flask + +app = Flask(__name__) + +@app.route('/') +def index(): + return 'Web App with Python Flask!' + +app.run(host='0.0.0.0', port=81) diff --git a/examples/async/a.py b/examples/async/a.py new file mode 100644 index 0000000..48551d5 --- /dev/null +++ b/examples/async/a.py @@ -0,0 +1,14 @@ +import asyncio + +from tasks import task1, task2 + +async def main(): + task_1 = task1(5) + task_2 = task2(5) + + print(await task_1) + print(await task_2) + + return "Main Completed" + +print(asyncio.run(main())) diff --git a/examples/async/b.py b/examples/async/b.py new file mode 100644 index 0000000..5f49ea9 --- /dev/null +++ b/examples/async/b.py @@ -0,0 +1,13 @@ +import asyncio + +from tasks import task1, task2 + +async def main(): + task_1 = asyncio.create_task(task1(5)) + task_2 = asyncio.create_task(task2(5)) + + result_1 = await task_1 + print(result_1) + await task_2 + +asyncio.run(main()) diff --git a/examples/async/c.py b/examples/async/c.py new file mode 100644 index 0000000..e36646d --- /dev/null +++ b/examples/async/c.py @@ -0,0 +1,9 @@ +import asyncio + +from tasks import task1, task2 + +async def main(): + asyncio.create_task(task1(4)) + asyncio.create_task(task2(4)) + +asyncio.run(main()) diff --git a/examples/async/d.py b/examples/async/d.py new file mode 100644 index 0000000..0b15fb2 --- /dev/null +++ b/examples/async/d.py @@ -0,0 +1,15 @@ +import asyncio + +from tasks import task1, task2 + +async def main(**kwargs): + task_1 = task1(kwargs['count']) + task_2 = task2(kwargs['count']) + + print(await task_1) + print(await task_2) + + return "Main Completed" + +args = {'count': 5} +print(asyncio.run(main(**args))) diff --git a/examples/async/manual_test.py b/examples/async/manual_test.py new file mode 100644 index 0000000..0110875 --- /dev/null +++ b/examples/async/manual_test.py @@ -0,0 +1,32 @@ +import asyncio + +async def task1(n): + i = 0 + while i < n: + if i == 2: + print("TWO Task 1 - Step", i + 1) + await asyncio.sleep(3) + print("two") + else: + print("Task 1 - Step", i + 1) + await asyncio.sleep(1) + print("one") + i += 1 + return True + +async def task2(n): + i = 0 + while i < n: + print("Task 2 - Step", i + 1) + await asyncio.sleep(1) + i += 1 + return True + +async def main(): + task_1 = asyncio.create_task(task1(5)) + task_2 = asyncio.create_task(task2(5)) + + await task_1 + await task_2 + +asyncio.run(main()) diff --git a/examples/async/tasks.py b/examples/async/tasks.py new file mode 100644 index 0000000..5f4d82e --- /dev/null +++ b/examples/async/tasks.py @@ -0,0 +1,25 @@ +import asyncio + +async def task1(n): + i = 0 + while i < n: + if i == 2: + print("Iteration 2: Task 1 - Step", i + 1) + await asyncio.sleep(0.03) + print("END Iteration 2: Task 1") + else: + print("Task 1 - Step", i + 1) + await asyncio.sleep(0.01) + print("End Task 1") + i += 1 + + return "Task 1 Completed" + +async def task2(n): + i = 0 + while i < n: + print("Task 2 - Step", i + 1) + await asyncio.sleep(0.01) + i += 1 + + return "Task 2 Completed" diff --git a/examples/builtins.py b/examples/builtins.py new file mode 100644 index 0000000..b887bf1 --- /dev/null +++ b/examples/builtins.py @@ -0,0 +1,29 @@ +# This test is to verify that we treat the callable classes versus builtin functions correctly. +print(bool) +print(bytearray) +print(classmethod) +print(dict) +print(frozenset) +print(getattr) +print(globals) +print(int) +print(isinstance) +print(issubclass) +print(iter) +print(len) +print(list) +print(memoryview) +print(next) +print(object) +print(print) +print(property) +print(range) +print(reversed) +print(set) +print(slice) +print(staticmethod) +print(super) +print(str) +print(tuple) +print(type) +print(zip) diff --git a/examples/context_manager.py b/examples/context_manager.py new file mode 100644 index 0000000..0e1a0f4 --- /dev/null +++ b/examples/context_manager.py @@ -0,0 +1,21 @@ +class MyContextManager: + def __init__(self): + self.a = 0 + + def __enter__(self): + self.a = self.a + 1 + print("Enter the context") + return self + + def call(self): + self.a = self.a + 1 + print("In the context") + + def __exit__(self, exc_type, exc_value, traceback): + self.a = self.a + 1 + print("Exit the context") + +with MyContextManager() as cm: + cm.call() + +print("FINAL VALUE", cm.a) diff --git a/examples/exceptions.py b/examples/exceptions.py new file mode 100644 index 0000000..14a919b --- /dev/null +++ b/examples/exceptions.py @@ -0,0 +1,19 @@ +try: + # Attempting to open a file that does not exist, which should raise an IOError + #file = open('examples/test.py', 'r') + + # If the file existed, the following line would try to divide by zero, which would raise a ZeroDivisionError + result = 10 / 0 +except IOError: + print("An IOError occurred. File not found!") +except ZeroDivisionError: + print("A ZeroDivisionError occurred. Cannot divide by zero!") +except Exception as e: + # This will catch any other exceptions that are not caught by the specific except blocks above. + print(f"An unexpected error occurred: {e}") +else: + # This block will execute only if no exceptions are raised in the try block. + print("Operation successful.") +finally: + # This block will execute no matter what, even if exceptions are raised. + print("The 'try except' block is finished.") diff --git a/examples/loop_perf.py b/examples/loop_perf.py new file mode 100644 index 0000000..d02009f --- /dev/null +++ b/examples/loop_perf.py @@ -0,0 +1,7 @@ +n = 10000 +i = 0 + +while i < n: + i = i + 1 + +print("Done") diff --git a/examples/meta.py b/examples/meta.py new file mode 100644 index 0000000..984394e --- /dev/null +++ b/examples/meta.py @@ -0,0 +1,28 @@ +# This test is supported, but I'm not testing it as part of integration tests +# because there are some internal properties as part of the namespace that +# we do not yet support. +# tldr the test works but the stdout doesn't match exactly +class InterfaceMeta(type): + def __new__(mcls, name, bases, namespace, **kwargs): + print('------------') + print('mcls', mcls) + print('name', name) + print('bases', bases) + print('namespace', namespace) + new_cls = super().__new__(mcls, name, bases, namespace) + return new_cls + + def run(cls): + return 5 + +class BaseInterface(metaclass=InterfaceMeta): + pass + +class ConcreteImplementation(BaseInterface): + pass + +class IncompleteImplementation(BaseInterface): + pass + +a = ConcreteImplementation.run() +print(a) diff --git a/examples/new_method.py b/examples/new_method.py new file mode 100644 index 0000000..4392128 --- /dev/null +++ b/examples/new_method.py @@ -0,0 +1,42 @@ +# This class shares a single instance under the hood and the __init__ method will only run +# for the first time. +class SingletonA: + _instance = None + _initialized = False + + def __new__(cls, *args, **kwargs): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self, data): + if not self._initialized: + self.data = data + self._initialized = True + +# This class shares a single instance under the hood, but the __init__ method will run for +# each instance. +class SingletonB: + _instance = None + + def __new__(cls, *args, **kwargs): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self, data): + self.data = data + +singleton1 = SingletonA("First") +singleton2 = SingletonA("Second") + +print(singleton1.data) # Output: First +print(singleton2.data) # Output: First +print(singleton1 is singleton2) # Output: True + +singleton1 = SingletonB("First") +singleton2 = SingletonB("Second") + +print(singleton1.data) # Output: Second +print(singleton2.data) # Output: Second +print(singleton1 is singleton2) # Output: True diff --git a/examples/other.py b/examples/other.py new file mode 100644 index 0000000..c55bfe6 --- /dev/null +++ b/examples/other.py @@ -0,0 +1,8 @@ +print("being imported") +def something(): + print("something") + third() + +def third(): + #fourth() + print("third") diff --git a/examples/repl.py b/examples/repl.py new file mode 100644 index 0000000..d545d9e --- /dev/null +++ b/examples/repl.py @@ -0,0 +1,7 @@ +x = 4 +def a(): + print("A") + print("B") + +a() +exit() diff --git a/examples/test.py b/examples/test.py new file mode 100644 index 0000000..40dd207 --- /dev/null +++ b/examples/test.py @@ -0,0 +1,101 @@ +print("Hello, World!") + +def add(a, b): + return a + b + +print(add(add(4, 5), add(4*4, 5+5))) + +x = False +print(4 >= 4) +print(x or True) +print(True and x) + +y = 6 +if y > 5: + print("y is greater than 5") + +z = 0 +while z <= 10: + print(z) + z = z + 1 + +class Person: + def __init__(self, name): + self.name = name + self.age = 0 + + def greet(self): + print("Hello, my name is:", self.name) + +p = Person("John") +p.greet() + +import other +# import it a second time to visual confirm module caching is working +# TODO find a better way to test this +import other +other.something() + +# Things to test on iterables: +# 1) literal +# 2) builtin +# 3) builtin from different type +# 4) index access +# 5) for-in loop +# 6) type builtin +# 7) equality +# 8) list/set comprehension +print([1,2.1]) +print(list([2,3,4,5])) +a = [1,2,3] +print(a) +print(a[0]) +for i in a: + print(i) +print(list(range(5,10))) + +# Sets +b = {1,2} +print(b) +b = set({2,3}) +for i in b: + print(i) + +# Tuples +c = (1, 2) +print(c) +for i in c: + print(i) +print(c[0]) +print(tuple(range(4))) + +# Ranges +d = range(5) +print(d) +e = range(4, 10) +print(e) +f = range(2,12,2) +print(f) + +try: + print(4/0) +except: + print("Caught an error") +finally: + print("After the error") + +def test_kwargs(**kwargs): + print(kwargs['a']) + print(kwargs['b']) + +test_kwargs(a=1, b=2) +test_kwargs(**{'a': 1, 'b': 2}) +args = {'a': 1, 'b': 2} +test_kwargs(**args) + +for k, v in args.items(): + print(k, v) + +print(slice(2)) +print(slice(2,3)) +print(slice(2,3,4)) diff --git a/examples/todo/descriptor_protocol.py b/examples/todo/descriptor_protocol.py new file mode 100644 index 0000000..99254ab --- /dev/null +++ b/examples/todo/descriptor_protocol.py @@ -0,0 +1,26 @@ +class MyDescriptor: + def __get__(self, instance, owner): + return 4 * instance.val + + #def __set__(self, instance, value): + # ... + + #def __delete__(self, instance): + # ... + +class MyClass: + attribute = MyDescriptor() + + def __init__(self): + self.val = 11 + +a = MyClass() +print(a.attribute) + +a.attribute = 33 +print(a.attribute) + +del a.attribute +print(a.attribute) +del a.attribute +print(a.attribute) diff --git a/examples/todo/file.py b/examples/todo/file.py new file mode 100644 index 0000000..c8c3c1e --- /dev/null +++ b/examples/todo/file.py @@ -0,0 +1,3 @@ +# This file is also not tested. I may support this in the future. +with open("examples/async/tasks.py", "r") as f: + print(f.read()) diff --git a/examples/todo/loop.py b/examples/todo/loop.py new file mode 100644 index 0000000..f944764 --- /dev/null +++ b/examples/todo/loop.py @@ -0,0 +1,8 @@ +# Another unused test! +# This does not do integer division or the stack trace line numbers correctly. +a = 4 +while a > -5: + print(10 / a) + a = a - 1 + +print("Done!") diff --git a/examples/todo/member_access.py b/examples/todo/member_access.py new file mode 100644 index 0000000..3f139bb --- /dev/null +++ b/examples/todo/member_access.py @@ -0,0 +1,32 @@ +def print_err(a): + try: + print(a()) + except AttributeError as e: + print(type(e)) + except Exception as e: + print(e) + +class Foo: + class_attr = 6 + + def __init__(self): + self.attr = 4 + + def func(self): + return self.attr + +foo = Foo() +print(foo.attr) +print(foo.func) +print_err(lambda: Foo.attr) +print(Foo.func) +print(foo.class_attr) +print(Foo.class_attr) + +#print(type([].__doc__)) +print([].append) +#print(type(list.__doc__)) +print(list.append) + +print(type(list.__dict__)) +print(type(type.__dict__)) diff --git a/examples/todo/monkey_patching.py b/examples/todo/monkey_patching.py new file mode 100644 index 0000000..5127ed4 --- /dev/null +++ b/examples/todo/monkey_patching.py @@ -0,0 +1,11 @@ +class MyClass: + pass + +def my_method(self): + return "Hello from the method" + +# Binding a method to an instance at runtime +instance = MyClass() +import types +instance.my_method = types.MethodType(my_method, instance) +print(instance.my_method()) # Now 'my_method' is bound to 'instance' diff --git a/examples/todo/threads.py b/examples/todo/threads.py new file mode 100644 index 0000000..b1ff22f --- /dev/null +++ b/examples/todo/threads.py @@ -0,0 +1,18 @@ +import threading +import time + +def print_numbers(thread_name, count): + for i in range(1, count + 1): + print(f"{thread_name} prints {i}") + time.sleep(1) + +thread1 = threading.Thread(target=print_numbers, args=("Thread-1", 5,)) +thread2 = threading.Thread(target=print_numbers, args=("Thread-2", 5,)) + +thread1.start() +thread2.start() + +thread1.join() +thread2.join() + +print("Both threads have finished execution.") diff --git a/src/bytecode_vm/compiler/mod.rs b/src/bytecode_vm/compiler/mod.rs new file mode 100644 index 0000000..862c810 --- /dev/null +++ b/src/bytecode_vm/compiler/mod.rs @@ -0,0 +1,1277 @@ +use std::collections::HashMap; + +pub mod types; + +use crate::{ + bytecode_vm::{types::CompilerError, Opcode}, + core::Stack, + domain::Context, + parser::{ + types::{ + BinOp, Block, ConditionalBlock, Expr, ParsedArgDefinitions, ParsedArguments, Statement, + UnaryOp, + }, + Parser, + }, + types::errors::MemphisError, +}; + +use self::types::{Bytecode, BytecodeNameMap, CodeObject, CompiledProgram, Constant}; + +use super::indices::{BytecodeIndex, ConstantIndex, Index}; + +pub struct Compiler { + /// Variables defined in global scope, this maps their name to an index for use by the VM. + /// Variables defined in the local scope will be mapped inside of a [`CodeObject`]. + name_map: BytecodeNameMap, + + /// Constants discovered during compilation. These will be compiled into the + /// [`CompiledProgram`] which is handed off to the VM. + constant_pool: Vec, + + /// Keep a reference to the code object being constructed so we can associate things with it, + /// (i.e. variable names). + code_stack: Vec, + + context_stack: Stack, +} + +impl Compiler { + pub fn new() -> Self { + Self { + name_map: HashMap::new(), + constant_pool: vec![], + code_stack: vec![], + context_stack: Stack::with_initial(Context::Global), + } + } + + pub fn compile(&mut self, parser: &mut Parser) -> Result { + let parsed_program = parser.parse().map_err(MemphisError::Parser)?; + + let mut bytecode = vec![]; + for stmt in parsed_program.iter() { + let opcodes = self.compile_stmt(stmt).map_err(MemphisError::Compiler)?; + bytecode.extend(opcodes); + } + bytecode.push(Opcode::Halt); + + let code = CodeObject { + name: "__main__".into(), + bytecode, + arg_count: 0, + varnames: vec![], + }; + + Ok(CompiledProgram::new( + code, + self.constant_pool.clone(), + self.name_map.clone(), + )) + } + + fn compile_block(&mut self, block: &Block) -> Result { + let mut opcodes = vec![]; + for stmt in block.statements.iter() { + opcodes.extend(self.compile_stmt(stmt)?); + } + Ok(opcodes) + } + + fn compile_return(&mut self, expr: &[Expr]) -> Result { + if expr.len() > 1 { + unimplemented!("Multiple return values not yet supported in the bytecode VM.") + } + + let mut opcodes = vec![]; + opcodes.extend(self.compile_expr(&expr[0])?); + opcodes.push(Opcode::ReturnValue); + Ok(opcodes) + } + + fn compile_store(&mut self, name: &str) -> Opcode { + let index = self.get_or_set_var_index(name); + match self.read_context() { + Context::Global => Opcode::StoreGlobal(index), + Context::Local => Opcode::StoreFast(index), + } + } + + fn compile_assignment(&mut self, left: &Expr, right: &Expr) -> Result { + let mut opcodes = vec![]; + + match left { + Expr::Variable(name) => { + opcodes.extend(self.compile_expr(right)?); + let opcode = self.compile_store(name); + opcodes.push(opcode); + Ok(opcodes) + } + Expr::MemberAccess { object, field } => { + // Push the object onto the stack + opcodes.extend(self.compile_expr(object)?); + // Push the value to be assigned onto the stack + opcodes.extend(self.compile_expr(right)?); + let attr_index = + self.get_or_set_constant_index(Constant::String(field.to_string())); + // TODO this should probably use the name map instead of the constant index + opcodes.push(Opcode::SetAttr(attr_index)); + Ok(opcodes) + } + Expr::IndexAccess { .. } => { + unimplemented!("Index access assignment not yet supported in bytecode VM."); + } + _ => Err(CompilerError::SyntaxError( + "cannot assign to that expression type here.".into(), + )), + } + } + + fn compile_while_loop( + &mut self, + condition: &Expr, + body: &Block, + ) -> Result { + let mut opcodes = vec![]; + let condition_start = opcodes.len(); + opcodes.extend(self.compile_expr(condition)?); + + // Temporary offset, we will change this once we know the length of the loop body + let jump_if_false_placeholder = opcodes.len(); + opcodes.push(Opcode::Placeholder); + + opcodes.extend(self.compile_block(body)?); + + // Unconditional jump back to the start of the condition + // We must mark these as isize because we are doing subtraction with potential overflow + let jump_back_offset = condition_start as isize - opcodes.len() as isize - 1; + opcodes.push(Opcode::Jump(jump_back_offset)); + + // Update the JUMP_IF_FALSE offset now that we know the length of the loop body + let jump_if_false_offset = opcodes.len() as isize - jump_if_false_placeholder as isize - 1; + opcodes[jump_if_false_placeholder] = Opcode::JumpIfFalse(jump_if_false_offset); + + Ok(opcodes) + } + + fn compile_if_else( + &mut self, + if_part: &ConditionalBlock, + elif_parts: &[ConditionalBlock], + else_part: &Option, + ) -> Result { + if !elif_parts.is_empty() { + unreachable!("elif not yet supported in the bytecode VM.") + } + + let mut opcodes = vec![]; + opcodes.extend(self.compile_expr(&if_part.condition)?); + + // Temporary offset, we will change this once we know the length of the if condition body + let jump_if_false_placeholder = opcodes.len(); + opcodes.push(Opcode::Placeholder); + + opcodes.extend(self.compile_block(&if_part.block)?); + if let Some(else_part) = else_part { + let jump_else_placeholder = opcodes.len(); + opcodes.push(Opcode::Placeholder); + + let jump_if_false_offset = + opcodes.len() as isize - jump_if_false_placeholder as isize - 1; + opcodes[jump_if_false_placeholder] = Opcode::JumpIfFalse(jump_if_false_offset); + + opcodes.extend(self.compile_block(else_part)?); + let jump_else_offset = opcodes.len() as isize - jump_else_placeholder as isize - 1; + opcodes[jump_else_placeholder] = Opcode::Jump(jump_else_offset); + } else { + let jump_if_false_offset = + opcodes.len() as isize - jump_if_false_placeholder as isize - 1; + opcodes[jump_if_false_placeholder] = Opcode::JumpIfFalse(jump_if_false_offset); + } + + Ok(opcodes) + } + + fn compile_function_definition( + &mut self, + name: &str, + args: &ParsedArgDefinitions, + body: &Block, + decorators: &[Expr], + is_async: &bool, + ) -> Result { + if !decorators.is_empty() || *is_async { + unimplemented!( + "Decorators and async functions are not yet supported in the bytecode VM." + ) + } + + self.context_stack.push(Context::Local); + + let mut varnames = vec![]; + for param in args.args.iter() { + varnames.push(param.arg.clone()); + } + + let code_object = CodeObject { + name: name.to_string(), + bytecode: vec![], + arg_count: args.args.len(), + varnames, + }; + + self.code_stack.push(code_object); + let bytecode = self.compile_block(body)?; + + let mut code = self.code_stack.pop().unwrap(); + self.context_stack.pop(); + + code.bytecode = bytecode; + let code_index = self.get_or_set_constant_index(Constant::Code(code)); + let name_index = self.get_or_set_constant_index(Constant::String(name.to_string())); + + Ok(vec![ + Opcode::LoadConst(code_index), + Opcode::LoadConst(name_index), + Opcode::MakeFunction, + self.compile_store(name), + ]) + } + + fn compile_class_definition( + &mut self, + name: &str, + parents: &[Expr], + metaclass: &Option, + body: &Block, + ) -> Result { + if !parents.is_empty() { + unimplemented!("Inheritance not yet supported in the bytecode VM.") + } + if metaclass.is_some() { + unimplemented!("Metaclasses are not yet supported in the bytecode VM.") + } + + let code_object = CodeObject { + name: format!("", name), + bytecode: vec![], + arg_count: 0, + varnames: vec![], + }; + + self.context_stack.push(Context::Local); + self.code_stack.push(code_object); + + let class_body = self.compile_block(body)?; + + let mut code = self.code_stack.pop().unwrap(); + self.context_stack.pop(); + + code.bytecode = class_body; + code.bytecode.push(Opcode::EndClass); + + let mut bytecode = vec![Opcode::LoadBuildClass]; + let code_index = self.get_or_set_constant_index(Constant::Code(code)); + bytecode.push(Opcode::LoadConst(code_index)); + let name_index = self.get_or_set_constant_index(Constant::String(name.to_string())); + bytecode.push(Opcode::LoadConst(name_index)); + + // the 2 here refers to the name of the class and the class body + // once we support base classes, it will become 3 + bytecode.push(Opcode::PopAndCall(2)); + + let _ = self.get_or_set_var_index(name); + + Ok(bytecode) + } + + fn compile_string_literal(&mut self, value: &str) -> Result { + let index = self.get_or_set_constant_index(Constant::String(value.to_string())); + Ok(vec![Opcode::LoadConst(index)]) + } + + fn compile_variable(&mut self, name: &str) -> Result { + if let Some(index) = self.get_var_index(name) { + let opcode = match self.read_context() { + Context::Local => Opcode::LoadFast(index), + Context::Global => Opcode::LoadGlobal(index), + }; + Ok(vec![opcode]) + } else { + Err(CompilerError::NameError(format!( + "name '{}' is not defined", + name + ))) + } + } + + fn compile_unary_operation( + &mut self, + op: &UnaryOp, + right: &Expr, + ) -> Result { + let mut opcodes = Vec::new(); + opcodes.extend(self.compile_expr(right)?); + let opcode = match op { + UnaryOp::Minus => Some(Opcode::UnaryNegative), + // this acts as a no-op. can be overridden with __pos__ for custom classes + UnaryOp::Plus => None, + UnaryOp::Not => Some(Opcode::UnaryNot), + UnaryOp::BitwiseNot => Some(Opcode::UnaryInvert), + _ => unimplemented!( + "{}", + format!( + "Binary operation '{:?}' not yet supported in the bytecode VM.", + op + ) + ), + }; + if let Some(opcode) = opcode { + opcodes.push(opcode); + } + Ok(opcodes) + } + + fn compile_binary_operation( + &mut self, + left: &Expr, + op: &BinOp, + right: &Expr, + ) -> Result { + let mut opcodes = Vec::new(); + opcodes.extend(self.compile_expr(left)?); + opcodes.extend(self.compile_expr(right)?); + let opcode = match op { + BinOp::Add => Opcode::Iadd, + BinOp::Sub => Opcode::Isub, + BinOp::Mul => Opcode::Imul, + BinOp::Div => Opcode::Idiv, + BinOp::LessThan => Opcode::LessThan, + BinOp::GreaterThan => Opcode::GreaterThan, + _ => unimplemented!( + "{}", + format!( + "Binary operation '{:?}' not yet supported in the bytecode VM.", + op + ) + ), + }; + opcodes.push(opcode); + Ok(opcodes) + } + + fn compile_function_call( + &mut self, + name: &str, + args: &ParsedArguments, + callee: &Option>, + ) -> Result { + if callee.is_some() { + unimplemented!("Callees for function calls not yet supported in the bytecode VM.") + } + + if name == "print" { + if args.args[0].as_string().is_none() { + unimplemented!("Non-string args not yet supported for print in the bytecode VM.") + } + let index = self.get_or_set_constant_index(Constant::String( + args.args[0].as_string().unwrap().to_owned(), + )); + Ok(vec![Opcode::PrintConst(index)]) + } else { + let index = self.get_var_index(name); + if index.is_none() { + unimplemented!( + "{}", + format!("Function '{}' not yet supported in the bytecode VM.", name) + ) + } + let index = index.unwrap(); + let mut opcodes = vec![]; + // We push the args onto the stack in reverse call order so that we will pop + // them off in call order. + for arg in args.args.iter().rev() { + opcodes.extend(self.compile_expr(arg)?); + } + opcodes.push(Opcode::Call(index)); + Ok(opcodes) + } + } + + fn compile_method_call( + &mut self, + object: &Expr, + name: &str, + _args: &ParsedArguments, + ) -> Result { + let mut bytecode = vec![]; + bytecode.extend(self.compile_expr(object)?); + let attr_index = self.get_or_set_constant_index(Constant::String(name.to_string())); + // TODO this should probably use the name map instead of constant index + bytecode.push(Opcode::LoadAttr(attr_index)); + bytecode.push(Opcode::CallMethod(0)); + Ok(bytecode) + } + + fn compile_expr(&mut self, expr: &Expr) -> Result { + match expr { + Expr::NoOp => Ok(vec![]), + Expr::None => { + let index = self.get_or_set_constant_index(Constant::None); + Ok(vec![Opcode::LoadConst(index)]) + } + Expr::Boolean(value) => { + let index = self.get_or_set_constant_index(Constant::Boolean(*value)); + Ok(vec![Opcode::LoadConst(index)]) + } + Expr::Integer(value) => Ok(vec![Opcode::Push(*value)]), + Expr::StringLiteral(value) => self.compile_string_literal(value), + Expr::Variable(name) => self.compile_variable(name), + Expr::UnaryOperation { op, right } => self.compile_unary_operation(op, right), + Expr::BinaryOperation { left, op, right } => { + self.compile_binary_operation(left, op, right) + } + Expr::FunctionCall { name, args, callee } => { + self.compile_function_call(name, args, callee) + } + Expr::MethodCall { object, name, args } => self.compile_method_call(object, name, args), + _ => unimplemented!("Expression type {:?} not implemented for bytecode VM", expr), + } + } + + fn compile_stmt(&mut self, stmt: &Statement) -> Result { + match stmt { + Statement::Pass => Ok(vec![]), + Statement::Expression(expr) => self.compile_expr(expr), + Statement::Return(expr) => self.compile_return(expr), + Statement::Assignment { left, right } => self.compile_assignment(left, right), + Statement::WhileLoop { condition, body } => self.compile_while_loop(condition, body), + Statement::IfElse { + if_part, + elif_parts, + else_part, + } => self.compile_if_else(if_part, elif_parts, else_part), + Statement::FunctionDef { + name, + args, + body, + decorators, + is_async, + } => self.compile_function_definition(name, args, body, decorators, is_async), + Statement::ClassDef { + name, + parents, + metaclass, + body, + } => self.compile_class_definition(name, parents, metaclass, body), + _ => unimplemented!("Statement type {:?} not implemented for bytecode VM", stmt), + } + } + + fn get_or_set_var_index(&mut self, name: &str) -> BytecodeIndex { + match self.read_context() { + Context::Global => { + if let Some(index) = self.name_map.get(name) { + *index + } else { + let next_index = Index::new(self.name_map.len()); + self.name_map.insert(name.into(), next_index); + next_index + } + } + Context::Local => { + if let Some(code) = self.code_stack.last_mut() { + let next_index = Index::new(code.varnames.len()); + code.varnames.push(name.to_string()); + next_index + } else { + panic!("Not in local scope"); + } + } + } + } + + pub fn get_var_index(&self, name: &str) -> Option { + match self.read_context() { + Context::Global => self.name_map.get(name).copied(), + Context::Local => { + if let Some(code) = self.code_stack.last() { + if let Some(index) = find_index(&code.varnames, &name.to_string()) { + return Some(Index::new(index)); + } + } + + None + } + } + } + + fn get_or_set_constant_index(&mut self, value: Constant) -> ConstantIndex { + if let Some(index) = find_index(&self.constant_pool, &value) { + Index::new(index) + } else { + let next_index = self.constant_pool.len(); + self.constant_pool.push(value); + Index::new(next_index) + } + } + + /// This assumes we always have a context stack. + fn read_context(&self) -> Context { + self.context_stack.top().expect("failed to find context") + } +} + +fn find_index(vec: &[T], query: &T) -> Option { + vec.iter().enumerate().find_map( + |(index, value)| { + if value == query { + Some(index) + } else { + None + } + }, + ) +} + +#[cfg(test)] +mod bytecode_tests { + use super::*; + + use crate::parser::types::ParsedArguments; + + fn init_compiler() -> Compiler { + Compiler::new() + } + + #[test] + fn expression() { + let mut compiler = init_compiler(); + let expr = Expr::BinaryOperation { + left: Box::new(Expr::Integer(4)), + op: BinOp::Mul, + right: Box::new(Expr::BinaryOperation { + left: Box::new(Expr::Integer(2)), + op: BinOp::Add, + right: Box::new(Expr::Integer(3)), + }), + }; + + match compiler.compile_expr(&expr) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(bytecode) => { + assert_eq!( + bytecode, + vec![ + Opcode::Push(4), + Opcode::Push(2), + Opcode::Push(3), + Opcode::Iadd, + Opcode::Imul, + ] + ); + } + } + } + + #[test] + fn binary_operations() { + let mut compiler = init_compiler(); + let expr = Expr::BinaryOperation { + left: Box::new(Expr::Integer(4)), + op: BinOp::LessThan, + right: Box::new(Expr::Integer(5)), + }; + + match compiler.compile_expr(&expr) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(bytecode) => { + assert_eq!( + bytecode, + vec![Opcode::Push(4), Opcode::Push(5), Opcode::LessThan,] + ); + } + } + + let mut compiler = init_compiler(); + let expr = Expr::BinaryOperation { + left: Box::new(Expr::Integer(4)), + op: BinOp::GreaterThan, + right: Box::new(Expr::Integer(5)), + }; + + match compiler.compile_expr(&expr) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(bytecode) => { + assert_eq!( + bytecode, + vec![Opcode::Push(4), Opcode::Push(5), Opcode::GreaterThan,] + ); + } + } + } + + #[test] + fn unary_operations() { + let mut compiler = init_compiler(); + let expr = Expr::UnaryOperation { + op: UnaryOp::Minus, + right: Box::new(Expr::Integer(4)), + }; + + match compiler.compile_expr(&expr) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(bytecode) => { + assert_eq!(bytecode, vec![Opcode::Push(4), Opcode::UnaryNegative]); + } + } + + let mut compiler = init_compiler(); + let expr = Expr::UnaryOperation { + op: UnaryOp::Plus, + right: Box::new(Expr::Integer(4)), + }; + + match compiler.compile_expr(&expr) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(bytecode) => { + assert_eq!(bytecode, vec![Opcode::Push(4)]); + } + } + + let mut compiler = init_compiler(); + let expr = Expr::UnaryOperation { + op: UnaryOp::Not, + right: Box::new(Expr::Boolean(false)), + }; + + match compiler.compile_expr(&expr) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(bytecode) => { + assert_eq!( + bytecode, + vec![Opcode::LoadConst(Index::new(0)), Opcode::UnaryNot] + ); + } + } + + let mut compiler = init_compiler(); + let expr = Expr::UnaryOperation { + op: UnaryOp::BitwiseNot, + right: Box::new(Expr::Integer(4)), + }; + + match compiler.compile_expr(&expr) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(bytecode) => { + assert_eq!(bytecode, vec![Opcode::Push(4), Opcode::UnaryInvert]); + } + } + } + + #[test] + fn assignment() { + let mut compiler = init_compiler(); + let stmt = Statement::Assignment { + left: Expr::Variable("var".into()), + right: Expr::BinaryOperation { + left: Box::new(Expr::Integer(5)), + op: BinOp::Sub, + right: Box::new(Expr::Integer(2)), + }, + }; + + match compiler.compile_stmt(&stmt) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(bytecode) => { + assert_eq!( + bytecode, + vec![ + Opcode::Push(5), + Opcode::Push(2), + Opcode::Isub, + Opcode::StoreGlobal(Index::new(0)), + ] + ); + } + } + + let mut compiler = init_compiler(); + let stmt = Statement::Assignment { + left: Expr::Variable("var".into()), + right: Expr::StringLiteral("Hello World".into()), + }; + + match compiler.compile_stmt(&stmt) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(bytecode) => { + assert_eq!( + bytecode, + vec![ + Opcode::LoadConst(Index::new(0)), + Opcode::StoreGlobal(Index::new(0)), + ] + ); + } + } + + let mut compiler = init_compiler(); + let stmt = Statement::Assignment { + left: Expr::Variable("var".into()), + right: Expr::None, + }; + + match compiler.compile_stmt(&stmt) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(bytecode) => { + assert_eq!( + bytecode, + vec![ + Opcode::LoadConst(Index::new(0)), + Opcode::StoreGlobal(Index::new(0)), + ] + ); + } + } + + let mut compiler = init_compiler(); + compiler.get_or_set_var_index("a"); + let stmt = Statement::Expression(Expr::BinaryOperation { + left: Box::new(Expr::Integer(2)), + op: BinOp::Add, + right: Box::new(Expr::Variable("a".into())), + }); + + match compiler.compile_stmt(&stmt) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(bytecode) => { + assert_eq!( + bytecode, + vec![ + Opcode::Push(2), + Opcode::LoadGlobal(Index::new(0)), + Opcode::Iadd, + ] + ); + } + } + + let mut compiler = init_compiler(); + let stmt = Statement::Expression(Expr::BinaryOperation { + left: Box::new(Expr::Integer(2)), + op: BinOp::Add, + right: Box::new(Expr::Variable("b".into())), + }); + + match compiler.compile_stmt(&stmt) { + Err(e) => { + assert_eq!( + e, + CompilerError::NameError("name 'b' is not defined".into()) + ); + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn member_access() { + let mut compiler = init_compiler(); + compiler.get_or_set_var_index("foo"); + let stmt = Statement::Assignment { + left: Expr::MemberAccess { + object: Box::new(Expr::Variable("foo".into())), + field: "x".into(), + }, + right: Expr::Integer(4), + }; + + match compiler.compile_stmt(&stmt) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(bytecode) => { + assert_eq!( + bytecode, + vec![ + Opcode::LoadGlobal(Index::new(0)), + Opcode::Push(4), + Opcode::SetAttr(Index::new(0)) + ] + ); + } + } + } + + #[test] + fn while_loop() { + let mut compiler = init_compiler(); + let stmt = Statement::WhileLoop { + condition: Expr::BinaryOperation { + left: Box::new(Expr::Integer(4)), + op: BinOp::LessThan, + right: Box::new(Expr::Integer(5)), + }, + body: Block::new(vec![]), + }; + + match compiler.compile_stmt(&stmt) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(bytecode) => { + assert_eq!( + bytecode, + vec![ + Opcode::Push(4), + Opcode::Push(5), + Opcode::LessThan, + Opcode::JumpIfFalse(1), + Opcode::Jump(-5), + ] + ); + } + } + } + + #[test] + fn if_else() { + let mut compiler = init_compiler(); + let stmt = Statement::IfElse { + if_part: ConditionalBlock { + condition: Expr::BinaryOperation { + left: Box::new(Expr::Integer(4)), + op: BinOp::LessThan, + right: Box::new(Expr::Integer(5)), + }, + block: Block::new(vec![Statement::Assignment { + left: Expr::Variable("a".into()), + right: Expr::Integer(-1), + }]), + }, + elif_parts: vec![], + else_part: None, + }; + + match compiler.compile_stmt(&stmt) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(bytecode) => { + assert_eq!( + bytecode, + vec![ + Opcode::Push(4), + Opcode::Push(5), + Opcode::LessThan, + Opcode::JumpIfFalse(2), + Opcode::Push(-1), + Opcode::StoreGlobal(Index::new(0)), + ] + ); + } + } + + let mut compiler = init_compiler(); + let stmt = Statement::IfElse { + if_part: ConditionalBlock { + condition: Expr::BinaryOperation { + left: Box::new(Expr::Integer(4)), + op: BinOp::LessThan, + right: Box::new(Expr::Integer(5)), + }, + block: Block::new(vec![Statement::Assignment { + left: Expr::Variable("a".into()), + right: Expr::Integer(-3), + }]), + }, + elif_parts: vec![], + else_part: Some(Block::new(vec![Statement::Assignment { + left: Expr::Variable("a".into()), + right: Expr::Integer(3), + }])), + }; + + match compiler.compile_stmt(&stmt) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(bytecode) => { + assert_eq!( + bytecode, + vec![ + Opcode::Push(4), + Opcode::Push(5), + Opcode::LessThan, + Opcode::JumpIfFalse(3), + Opcode::Push(-3), + Opcode::StoreGlobal(Index::new(0)), + Opcode::Jump(2), + Opcode::Push(3), + Opcode::StoreGlobal(Index::new(0)), + ] + ); + } + } + } + + #[test] + fn function_call() { + let mut compiler = init_compiler(); + compiler.get_or_set_var_index("a"); + compiler.get_or_set_var_index("b"); + compiler.get_or_set_var_index("foo"); + let expr = Expr::FunctionCall { + name: "foo".into(), + args: ParsedArguments { + args: vec![Expr::Variable("a".into()), Expr::Variable("b".into())], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: None, + }; + + match compiler.compile_expr(&expr) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(bytecode) => { + assert_eq!( + bytecode, + vec![ + Opcode::LoadGlobal(Index::new(1)), + Opcode::LoadGlobal(Index::new(0)), + Opcode::Call(Index::new(2)), + ] + ); + } + } + } +} + +#[cfg(test)] +mod compiler_state_tests { + use super::*; + + use crate::{bytecode_vm::VmInterpreter, init::Builder}; + + fn init_interpreter(text: &str) -> (Parser, VmInterpreter) { + let (parser, interpreter) = Builder::new().text(text).build_vm_expl(); + + (parser, interpreter) + } + + fn name_map(program: &CompiledProgram, name: &str) -> usize { + *program.name_map.get(name).cloned().unwrap() + } + + #[test] + fn function_definition_with_parameters() { + let text = r#" +def foo(a, b): + a + b +"#; + let (mut parser, mut interpreter) = init_interpreter(text); + + match interpreter.compile(&mut parser) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(program) => { + assert_eq!( + program.code.bytecode, + vec![ + Opcode::LoadConst(Index::new(0)), + Opcode::LoadConst(Index::new(1)), + Opcode::MakeFunction, + Opcode::StoreGlobal(Index::new(0)), + Opcode::Halt, + ] + ); + assert_eq!( + program.constant_pool[0], + Constant::Code(CodeObject { + name: "foo".into(), + bytecode: vec![ + Opcode::LoadFast(Index::new(0)), + Opcode::LoadFast(Index::new(1)), + Opcode::Iadd, + ], + arg_count: 2, + varnames: vec!["a".into(), "b".into()], + }) + ); + assert_eq!(program.constant_pool[1], Constant::String("foo".into())); + assert_eq!(name_map(&program, "foo"), 0); + } + } + } + + #[test] + fn function_definition_with_local_var() { + let text = r#" +def foo(): + c = 10 +"#; + let (mut parser, mut interpreter) = init_interpreter(text); + + match interpreter.compile(&mut parser) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(program) => { + assert_eq!( + program.code.bytecode, + vec![ + Opcode::LoadConst(Index::new(0)), + Opcode::LoadConst(Index::new(1)), + Opcode::MakeFunction, + Opcode::StoreGlobal(Index::new(0)), + Opcode::Halt, + ] + ); + assert_eq!( + program.constant_pool[0], + Constant::Code(CodeObject { + name: "foo".into(), + bytecode: vec![Opcode::Push(10), Opcode::StoreFast(Index::new(0))], + arg_count: 0, + varnames: vec!["c".into()], + }) + ); + assert_eq!(program.constant_pool[1], Constant::String("foo".into())); + assert_eq!(name_map(&program, "foo"), 0); + } + } + } + + #[test] + fn function_definition_with_local_var_and_return() { + let text = r#" +def foo(): + c = 10 + return c +"#; + let (mut parser, mut interpreter) = init_interpreter(text); + + match interpreter.compile(&mut parser) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(program) => { + assert_eq!( + program.code.bytecode, + vec![ + Opcode::LoadConst(Index::new(0)), + Opcode::LoadConst(Index::new(1)), + Opcode::MakeFunction, + Opcode::StoreGlobal(Index::new(0)), + Opcode::Halt, + ] + ); + assert_eq!( + program.constant_pool[0], + Constant::Code(CodeObject { + name: "foo".into(), + bytecode: vec![ + Opcode::Push(10), + Opcode::StoreFast(Index::new(0)), + Opcode::LoadFast(Index::new(0)), + Opcode::ReturnValue + ], + arg_count: 0, + varnames: vec!["c".into()], + }) + ); + assert_eq!(program.constant_pool[1], Constant::String("foo".into())); + assert_eq!(name_map(&program, "foo"), 0); + } + } + } + + #[test] + fn function_definition_with_two_calls_and_no_return() { + let text = r#" +def hello(): + print("Hello") + +def world(): + print("World") + +hello() +world() +"#; + let (mut parser, mut interpreter) = init_interpreter(text); + + match interpreter.compile(&mut parser) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(program) => { + assert_eq!( + program.code.bytecode, + vec![ + Opcode::LoadConst(Index::new(1)), + Opcode::LoadConst(Index::new(2)), + Opcode::MakeFunction, + Opcode::StoreGlobal(Index::new(0)), + Opcode::LoadConst(Index::new(4)), + Opcode::LoadConst(Index::new(5)), + Opcode::MakeFunction, + Opcode::StoreGlobal(Index::new(1)), + Opcode::Call(Index::new(0)), + Opcode::Call(Index::new(1)), + Opcode::Halt, + ] + ); + assert_eq!(program.constant_pool.len(), 6); + assert_eq!(program.constant_pool[0], Constant::String("Hello".into())); + assert_eq!( + program.constant_pool[1], + Constant::Code(CodeObject { + name: "hello".into(), + bytecode: vec![Opcode::PrintConst(Index::new(0)),], + arg_count: 0, + varnames: vec![], + }) + ); + assert_eq!(program.constant_pool[2], Constant::String("hello".into())); + assert_eq!(program.constant_pool[3], Constant::String("World".into())); + assert_eq!( + program.constant_pool[4], + Constant::Code(CodeObject { + name: "world".into(), + bytecode: vec![Opcode::PrintConst(Index::new(3)),], + arg_count: 0, + varnames: vec![], + }) + ); + assert_eq!(program.constant_pool[5], Constant::String("world".into())); + assert_eq!(name_map(&program, "hello"), 0); + assert_eq!(name_map(&program, "world"), 1); + } + } + } + + #[test] + fn class_definition() { + let text = r#" +class Foo: + def bar(self): + return 99 +"#; + let (mut parser, mut interpreter) = init_interpreter(text); + + match interpreter.compile(&mut parser) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(program) => { + assert_eq!(program.constant_pool.len(), 4); + let Some(Constant::Code(code)) = program.constant_pool.get(0) else { + panic!() + }; + assert_eq!( + code, + &CodeObject { + name: "bar".into(), + bytecode: vec![Opcode::Push(99), Opcode::ReturnValue,], + arg_count: 1, + varnames: vec!["self".into()], + } + ); + assert_eq!(program.constant_pool[1], Constant::String("bar".into())); + let Some(Constant::Code(code)) = program.constant_pool.get(2) else { + panic!() + }; + assert_eq!( + code.bytecode, + vec![ + Opcode::LoadConst(Index::new(0)), + Opcode::LoadConst(Index::new(1)), + Opcode::MakeFunction, + Opcode::StoreFast(Index::new(0)), + Opcode::EndClass, + ] + ); + assert_eq!(code.varnames.len(), 1); + assert_eq!(code.varnames[0], "bar"); + assert_eq!(program.constant_pool[3], Constant::String("Foo".into())); + assert_eq!( + program.code.bytecode, + vec![ + Opcode::LoadBuildClass, + Opcode::LoadConst(Index::new(2)), + Opcode::LoadConst(Index::new(3)), + Opcode::PopAndCall(2), + Opcode::Halt, + ] + ); + assert_eq!(program.name_map.len(), 1); + assert_eq!(name_map(&program, "Foo"), 0); + } + } + } + + #[test] + fn class_instantiation() { + let text = r#" +class Foo: + def bar(): + return 99 + +f = Foo() +"#; + let (mut parser, mut interpreter) = init_interpreter(text); + + match interpreter.compile(&mut parser) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(program) => { + assert_eq!( + program.code.bytecode, + vec![ + Opcode::LoadBuildClass, + Opcode::LoadConst(Index::new(2)), + Opcode::LoadConst(Index::new(3)), + Opcode::PopAndCall(2), + Opcode::Call(Index::new(0)), + Opcode::StoreGlobal(Index::new(1)), + Opcode::Halt, + ] + ); + assert_eq!(program.name_map.len(), 2); + assert_eq!(name_map(&program, "Foo"), 0); + assert_eq!(name_map(&program, "f"), 1); + } + } + } + + #[test] + fn class_instantiation_and_method_call() { + let text = r#" +class Foo: + def bar(self): + return 99 + +f = Foo() +b = f.bar() +"#; + let (mut parser, mut interpreter) = init_interpreter(text); + + match interpreter.compile(&mut parser) { + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(program) => { + assert_eq!( + program.code.bytecode, + vec![ + Opcode::LoadBuildClass, + Opcode::LoadConst(Index::new(2)), + Opcode::LoadConst(Index::new(3)), + Opcode::PopAndCall(2), + Opcode::Call(Index::new(0)), + Opcode::StoreGlobal(Index::new(1)), + Opcode::LoadGlobal(Index::new(1)), + Opcode::LoadAttr(Index::new(1)), + Opcode::CallMethod(0), + Opcode::StoreGlobal(Index::new(2)), + Opcode::Halt, + ] + ); + assert_eq!(program.name_map.len(), 3); + assert_eq!(name_map(&program, "Foo"), 0); + assert_eq!(name_map(&program, "f"), 1); + assert_eq!(name_map(&program, "b"), 2); + assert_eq!(program.constant_pool.len(), 4); + let Some(Constant::Code(_)) = program.constant_pool.get(0) else { + panic!() + }; + assert_eq!(program.constant_pool[1], Constant::String("bar".into())); + let Some(Constant::Code(_)) = program.constant_pool.get(2) else { + panic!() + }; + assert_eq!(program.constant_pool[3], Constant::String("Foo".into())); + } + } + } +} diff --git a/src/bytecode_vm/compiler/types.rs b/src/bytecode_vm/compiler/types.rs new file mode 100644 index 0000000..c24c9f8 --- /dev/null +++ b/src/bytecode_vm/compiler/types.rs @@ -0,0 +1,92 @@ +use std::collections::HashMap; +use std::fmt::{Display, Error, Formatter}; + +use crate::bytecode_vm::indices::BytecodeIndex; +use crate::bytecode_vm::opcode::Opcode; + +pub type Bytecode = Vec; +pub type BytecodeNameMap = HashMap; + +/// The values which are passed to the VM are a subset of the types of [`Value`]. +#[derive(Debug, PartialEq, Clone)] +pub enum Constant { + None, + Boolean(bool), + String(String), + Code(CodeObject), +} + +impl Display for Constant { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + match self { + Constant::None => write!(f, "None"), + Constant::Boolean(i) => write!(f, "{}", i), + Constant::String(i) => write!(f, "{}", i), + Constant::Code(i) => write!(f, "{}", i), + } + } +} + +pub struct CompiledProgram { + pub code: CodeObject, + pub constant_pool: Vec, + pub name_map: BytecodeNameMap, +} + +impl CompiledProgram { + pub fn new(code: CodeObject, constant_pool: Vec, name_map: BytecodeNameMap) -> Self { + Self { + code, + constant_pool, + name_map, + } + } +} + +impl Display for CompiledProgram { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + writeln!(f, "\nnames:")?; + for (name, index) in self.name_map.iter() { + writeln!(f, "{}: {:?}", name, index)?; + } + + writeln!(f, "\nconstants:")?; + for (index, constant) in self.constant_pool.iter().enumerate() { + writeln!(f, "{}: {}", index, constant)?; + } + + for constant in self.constant_pool.iter() { + if let Constant::Code(code) = constant { + writeln!(f, "\n{}:", code.name)?; + for (index, opcode) in code.bytecode.iter().enumerate() { + writeln!(f, "{}: {}", index, opcode)?; + } + } + } + + writeln!(f, "\n{}:", self.code.name)?; + for (index, opcode) in self.code.bytecode.iter().enumerate() { + writeln!(f, "{}: {}", index, opcode)?; + } + + Ok(()) + } +} + +/// Represents the bytecode and associated metadata for a block of Python code. It's a compiled +/// version of the source code, containing instructions that the VM can execute. This is immutable +/// and does not know about the context in which it is executed, meaning it doesn't hold references +/// to the global or local variables it operates on. +#[derive(Clone, PartialEq, Debug)] +pub struct CodeObject { + pub name: String, + pub bytecode: Bytecode, + pub arg_count: usize, + pub varnames: Vec, +} + +impl Display for CodeObject { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + write!(f, "", self.name) + } +} diff --git a/src/bytecode_vm/indices.rs b/src/bytecode_vm/indices.rs new file mode 100644 index 0000000..b44937a --- /dev/null +++ b/src/bytecode_vm/indices.rs @@ -0,0 +1,81 @@ +use std::any; +use std::fmt::{Debug, Display, Error, Formatter}; +use std::marker::PhantomData; +use std::ops::Deref; + +/// An unsigned integer wrapper which provides type safety. This is particularly useful when +/// dealing with indices used across the bytecode compiler and the VM as common integer values such +/// as 0, 1, etc, can be interpreted many different ways. +#[derive(Copy, Clone, PartialEq, Hash, Eq)] +pub struct Index { + value: usize, + _marker: PhantomData, +} + +impl Index { + pub fn new(value: usize) -> Self { + Self { + value, + _marker: PhantomData, + } + } +} + +impl Deref for Index { + type Target = usize; + + fn deref(&self) -> &Self::Target { + &self.value + } +} + +impl Display for Index { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + write!(f, "{}", self.value) + } +} + +impl Debug for Index { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + let full_type_name = any::type_name::(); + let type_name = full_type_name.rsplit("::").next().unwrap(); + write!(f, "{}({})", type_name, self.value) + } +} + +#[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)] +pub struct BytecodeMarker; +#[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)] +pub struct GlobalStoreMarker; +#[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)] +pub struct ObjectTableMarker; +#[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)] +pub struct ConstantMarker; + +pub type BytecodeIndex = Index; +pub type GlobalStoreIndex = Index; +pub type ObjectTableIndex = Index; +pub type ConstantIndex = Index; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_debug_output() { + let index: BytecodeIndex = Index::new(4); + assert_eq!(format!("{:?}", index), "BytecodeMarker(4)".to_string()) + } + + #[test] + fn test_display_output() { + let index: BytecodeIndex = Index::new(4); + assert_eq!(format!("{}", index), "4".to_string()) + } + + #[test] + fn test_dereference() { + let index: BytecodeIndex = Index::new(4); + assert_eq!(*index, 4) + } +} diff --git a/src/bytecode_vm/interpreter.rs b/src/bytecode_vm/interpreter.rs new file mode 100644 index 0000000..fc87d5a --- /dev/null +++ b/src/bytecode_vm/interpreter.rs @@ -0,0 +1,389 @@ +use crate::{ + bytecode_vm::{types::Value, Compiler, VirtualMachine}, + core::{log, InterpreterEntrypoint, LogLevel}, + parser::Parser, + types::errors::MemphisError, +}; + +use super::compiler::types::CompiledProgram; + +pub struct VmInterpreter { + compiler: Compiler, + // TODO this shouldn't need to be public, we're using it inside a few tests right now + pub vm: VirtualMachine, +} + +impl VmInterpreter { + pub fn new() -> Self { + Self { + compiler: Compiler::new(), + vm: VirtualMachine::new(), + } + } + + pub fn take(&mut self, name: &str) -> Option { + if let Some(bytecode_index) = self.compiler.get_var_index(name) { + let reference = self.vm.load_global(bytecode_index)?; + return Some(self.vm.take(reference)); + } + + None + } + + pub fn compile(&mut self, parser: &mut Parser) -> Result { + self.compiler.compile(parser) + } +} + +impl Default for VmInterpreter { + fn default() -> Self { + Self::new() + } +} + +impl InterpreterEntrypoint for VmInterpreter { + type Return = Value; + + fn run(&mut self, parser: &mut Parser) -> Result { + let program = self.compile(parser)?; + log(LogLevel::Trace, || format!("{}", program)); + self.vm.load(program); + self.vm.run_loop().map_err(MemphisError::Vm) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use crate::{bytecode_vm::vm::types::Object, init::Builder}; + + fn init(text: &str) -> (Parser, VmInterpreter) { + let (parser, interpreter) = Builder::new().text(text).build_vm_expl(); + + (parser, interpreter) + } + + fn take_obj_attr(interpreter: &mut VmInterpreter, object: Object, attr: &str) -> Value { + interpreter.vm.take( + object + .read(attr.into(), |reference| { + interpreter.vm.dereference(reference) + }) + .unwrap(), + ) + } + + #[test] + fn expression() { + let text = "4 * (2 + 3)"; + let (mut parser, mut interpreter) = init(text); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(result) => { + assert_eq!(result, Value::Integer(20)); + } + } + + let text = "4 < 5"; + let (mut parser, mut interpreter) = init(text); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(result) => { + assert_eq!(result, Value::Boolean(true)); + } + } + + let text = "4 > 5"; + let (mut parser, mut interpreter) = init(text); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(result) => { + assert_eq!(result, Value::Boolean(false)); + } + } + } + + #[test] + fn assignment() { + let text = r#" +a = 5 - 3 +"#; + let (mut parser, mut interpreter) = init(text); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!(interpreter.take("a"), Some(Value::Integer(2))); + } + } + + let text = r#" +a = "Hello World" +"#; + let (mut parser, mut interpreter) = init(text); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.take("a"), + Some(Value::String("Hello World".into())) + ); + } + } + + let text = r#" +a = 5 - 3 +b = 10 +c = None +"#; + let (mut parser, mut interpreter) = init(text); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!(interpreter.take("a"), Some(Value::Integer(2))); + assert_eq!(interpreter.take("b"), Some(Value::Integer(10))); + assert_eq!(interpreter.take("c"), Some(Value::None)); + } + } + + let text = r#" +a = 5 - 3 +b = 10 + a +"#; + let (mut parser, mut interpreter) = init(text); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!(interpreter.take("a"), Some(Value::Integer(2))); + assert_eq!(interpreter.take("b"), Some(Value::Integer(12))); + } + } + } + + #[test] + fn while_loop() { + let text = r#" +i = 0 +n = 4 +while i < n: + i = i + 1 +"#; + let (mut parser, mut interpreter) = init(text); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!(interpreter.take("i"), Some(Value::Integer(4))); + } + } + } + + #[test] + fn function_call_with_parameters() { + let text = r#" +def foo(a, b): + return a + b + +c = foo(2, 9) +"#; + let (mut parser, mut interpreter) = init(text); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!(interpreter.take("c"), Some(Value::Integer(11))); + } + } + } + + #[test] + fn function_call_with_local_var() { + let text = r#" +def foo(a, b): + c = 9 + return a + b + c + +d = foo(2, 9) +"#; + let (mut parser, mut interpreter) = init(text); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!(interpreter.take("d"), Some(Value::Integer(20))); + } + } + } + + #[test] + fn function_call_with_no_return() { + let text = r#" +def hello(): + print("Hello") + +def world(): + print("World") + +hello() +world() +"#; + let (mut parser, mut interpreter) = init(text); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => {} + } + } + + #[test] + fn class_definition() { + let text = r#" +class Foo: + def bar(): + return 4 +"#; + let (mut parser, mut interpreter) = init(text); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + let Some(Value::Class(class)) = interpreter.take("Foo") else { + panic!("Did not find class Foo") + }; + assert_eq!(class.name, "Foo"); + let Value::Function(ref function) = + *interpreter.vm.dereference(class.read("bar").unwrap()) + else { + panic!("Did not find function bar") + }; + assert_eq!(function.name, "bar"); + } + } + } + + #[test] + fn class_instantiation() { + let text = r#" +class Foo: + def bar(): + return 4 + +f = Foo() +"#; + let (mut parser, mut interpreter) = init(text); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + let Some(Value::Object(object)) = interpreter.take("f") else { + panic!("Did not find object f") + }; + assert_eq!( + interpreter + .vm + .dereference(object.class_ref()) + .as_class() + .name, + "Foo" + ); + } + } + } + + #[test] + fn class_with_method_call() { + let text = r#" +class Foo: + def bar(self): + return 4 + +f = Foo() +b = f.bar() +"#; + let (mut parser, mut interpreter) = init(text); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + let Some(Value::Integer(b)) = interpreter.take("b") else { + panic!("Did not find object f") + }; + assert_eq!(b, 4); + } + } + } + + #[test] + fn class_with_member_access() { + let text = r#" +class Foo: + pass + +f = Foo() +f.x = 4 +"#; + let (mut parser, mut interpreter) = init(text); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + let Some(Value::Object(f)) = interpreter.take("f") else { + panic!("Did not find object f") + }; + assert_eq!(take_obj_attr(&mut interpreter, f, "x"), Value::Integer(4)); + } + } + } + + #[test] + fn class_with_bound_method() { + let text = r#" +class Foo: + def bar(self): + self.x = 4 + +f = Foo() +f.bar() +"#; + let (mut parser, mut interpreter) = init(text); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + let Some(Value::Object(f)) = interpreter.take("f") else { + panic!("Did not find object f") + }; + assert_eq!(take_obj_attr(&mut interpreter, f, "x"), Value::Integer(4)); + } + } + } + + #[test] + fn class_instantiation_with_constructor() { + let text = r#" +class Foo: + def __init__(self): + self.x = 44 + +f = Foo() +"#; + let (mut parser, mut interpreter) = init(text); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + let Some(Value::Object(f)) = interpreter.take("f") else { + panic!("Did not find object f") + }; + assert_eq!(take_obj_attr(&mut interpreter, f, "x"), Value::Integer(44)); + } + } + } +} diff --git a/src/bytecode_vm/mod.rs b/src/bytecode_vm/mod.rs new file mode 100644 index 0000000..12765f0 --- /dev/null +++ b/src/bytecode_vm/mod.rs @@ -0,0 +1,12 @@ +pub mod compiler; +pub mod indices; +mod interpreter; +mod opcode; +pub mod types; +#[allow(clippy::module_inception)] +pub mod vm; + +use compiler::Compiler; +pub use interpreter::VmInterpreter; +use opcode::Opcode; +use vm::VirtualMachine; diff --git a/src/bytecode_vm/opcode.rs b/src/bytecode_vm/opcode.rs new file mode 100644 index 0000000..313b009 --- /dev/null +++ b/src/bytecode_vm/opcode.rs @@ -0,0 +1,110 @@ +use std::fmt::{Display, Error, Formatter}; + +use super::indices::{BytecodeIndex, ConstantIndex}; + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Opcode { + /// Treat the top two values on the stack as integers, add them together, and push their sum + /// back onto the stack. + Iadd, + /// Treat the top two values on the stack as integers, subtract them from each other, and push + /// their difference back onto the stack. + Isub, + /// Integer Multiply + Imul, + /// Integer Divide + Idiv, + /// Compare two values on the stack and push a boolean result back onto the stack based on + /// whether the first value is less than the second value. + LessThan, + /// Compare two values on the stack and push a boolean result back onto the stack based on + /// whether the first value is greater than the second value. + GreaterThan, + /// Implements STACK[-1] = -STACK[-1]. + UnaryNegative, + /// Implements STACK[-1] = not STACK[-1]. + UnaryNot, + /// Implements STACK[-1] = ~STACK[-1]. + UnaryInvert, + /// Push an integer value onto the stack. This is in preparation for another instruction. + Push(i64), + /// Push the value found at the specified index in the constant pool onto the stack. + LoadConst(ConstantIndex), + /// Write the top value of the stack into the local variable indicated by the specified index. + StoreFast(BytecodeIndex), + /// Write the top value of the stack into the global variable indicated by the specified index. + StoreGlobal(BytecodeIndex), + /// Read the local variable indicated by the specified index and push the value onto the stack. + LoadFast(BytecodeIndex), + /// Read the global variable indicated by the specified index and push the value onto the stack. + LoadGlobal(BytecodeIndex), + /// Pop an object off the stack, find the attribute name specified by the given index, look up + /// the attribute with that name off the object, and push it onto the stack. + LoadAttr(ConstantIndex), + /// Pop a value and object off the stack and set the attribute of the object to that value. The + /// attribute name is specified by the given index. + SetAttr(ConstantIndex), + /// Pushes `__build_class__` onto the stack. It is later called by the VM to construct a class, + /// NOT instantiate an object of that class. This is directly inspired by how CPython does it. + LoadBuildClass, + /// Uncomditional jump to an offset. This is signed because you can jump in reverse. + Jump(isize), + /// Conditional jump to an offset based on the value on the top of the stack. This is signed + /// because you can jump in reverse. + JumpIfFalse(isize), + /// Create a function object from a code object, encapsulating the information needed to call + /// the function later. + MakeFunction, + /// Call the function specified by the index. + Call(BytecodeIndex), + /// Call the function from the top of the stack with the specified number of arguments. + PopAndCall(usize), + /// Call the method from the top of the stack with the specified number of arguments. + CallMethod(usize), + /// Return the value on the stack to the caller. + ReturnValue, + /// Print a constant from the pool. + PrintConst(ConstantIndex), + /// Indicate that a we have reached the end of a class body definition. + EndClass, + /// Stop the VM + Halt, + /// Used internally to the compiler when constructing jump offsets. + Placeholder, +} + +impl Display for Opcode { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + match self { + Opcode::Iadd => write!(f, "IADD"), + Opcode::Isub => write!(f, "ISUB"), + Opcode::Imul => write!(f, "IMUL"), + Opcode::Idiv => write!(f, "IDIV"), + Opcode::LessThan => write!(f, "LESS_THAN"), + Opcode::GreaterThan => write!(f, "GREATER_THAN"), + Opcode::UnaryNegative => write!(f, "UNARY_NEGATIVE"), + Opcode::UnaryNot => write!(f, "UNARY_NOT"), + Opcode::UnaryInvert => write!(f, "UNARY_INVERT"), + Opcode::Push(i) => write!(f, "PUSH {}", i), + Opcode::LoadConst(i) => write!(f, "LOAD_CONST {}", i), + Opcode::StoreFast(i) => write!(f, "STORE_FAST {}", i), + Opcode::StoreGlobal(i) => write!(f, "STORE_GLOBAL {}", i), + Opcode::LoadFast(i) => write!(f, "LOAD_FAST {}", i), + Opcode::LoadGlobal(i) => write!(f, "LOAD_GLOBAL {}", i), + Opcode::LoadAttr(i) => write!(f, "LOAD_ATTR {}", i), + Opcode::SetAttr(i) => write!(f, "SET_ATTR {}", i), + Opcode::LoadBuildClass => write!(f, "LOAD_BUILD_CLASS"), + Opcode::Jump(i) => write!(f, "JUMP {}", i), + Opcode::JumpIfFalse(i) => write!(f, "JUMP_IF_FALSE {}", i), + Opcode::MakeFunction => write!(f, "MAKE_FUNCTION"), + Opcode::Call(i) => write!(f, "CALL {}", i), + Opcode::PopAndCall(i) => write!(f, "POP_AND_CALL {}", i), + Opcode::CallMethod(i) => write!(f, "CALL_METHOD {}", i), + Opcode::ReturnValue => write!(f, "RETURN_VALUE"), + Opcode::PrintConst(i) => write!(f, "PRINT_CONST {}", i), + Opcode::EndClass => write!(f, "END_CLASS"), + Opcode::Halt => write!(f, "HALT"), + Opcode::Placeholder => unreachable!(), + } + } +} diff --git a/src/bytecode_vm/types.rs b/src/bytecode_vm/types.rs new file mode 100644 index 0000000..3ff0a43 --- /dev/null +++ b/src/bytecode_vm/types.rs @@ -0,0 +1,144 @@ +use std::fmt::{Display, Error, Formatter}; + +use crate::core::Voidable; + +use super::compiler::types::{CodeObject, Constant}; +use super::vm::types::{Class, FunctionObject, Method, Object, Reference}; + +#[derive(Clone, PartialEq, Debug)] +pub enum Value { + Void, + None, + Integer(i64), + String(String), + Boolean(bool), + Class(Class), + Object(Object), + Code(CodeObject), + Function(FunctionObject), + Method(Method), + BuiltinFunction, +} + +impl Default for Value { + fn default() -> Self { + Self::Void + } +} + +impl Voidable for Value { + fn is_void(&self) -> bool { + matches!(self, Value::Void) + } +} + +impl From for Value { + fn from(value: Reference) -> Self { + match value { + Reference::Void => Value::Void, + Reference::Int(i) => Value::Integer(i), + Reference::Bool(i) => Value::Boolean(i), + // These require a lookup using VM state and must be converted before this function. + Reference::ObjectRef(_) | Reference::ConstantRef(_) => unreachable!(), + } + } +} + +impl From<&Constant> for Value { + fn from(value: &Constant) -> Self { + match value { + Constant::None => Value::None, + Constant::Boolean(i) => Value::Boolean(*i), + Constant::String(i) => Value::String(i.to_string()), + Constant::Code(i) => Value::Code(i.clone()), + } + } +} + +impl Display for Value { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + match self { + Value::Void => unreachable!(), + Value::None => write!(f, "None"), + Value::Integer(i) => write!(f, "{}", i), + Value::String(i) => write!(f, "{}", i), + Value::Boolean(i) => write!(f, "{}", i), + Value::Code(i) => write!(f, "{}", i), + _ => unimplemented!("Type {:?} unimplemented in the bytecode VM.", self), + } + } +} + +impl Value { + pub fn as_integer(&self) -> i64 { + match self { + Value::Integer(i) => *i, + _ => panic!("expected integer"), + } + } + + pub fn as_boolean(&self) -> bool { + match self { + Value::Boolean(i) => *i, + _ => panic!("expected boolean"), + } + } + + pub fn as_string(&self) -> &str { + match self { + Value::String(i) => i, + _ => panic!("expected string"), + } + } + + pub fn as_code(&self) -> &CodeObject { + match self { + Value::Code(i) => i, + _ => panic!("expected code"), + } + } + + pub fn as_function(&self) -> &FunctionObject { + match self { + Value::Function(i) => i, + _ => panic!("expected method"), + } + } + + pub fn as_method(&self) -> &Method { + match self { + Value::Method(i) => i, + _ => panic!("expected method"), + } + } + + pub fn as_object(&self) -> &Object { + match self { + Value::Object(i) => i, + _ => panic!("expected object"), + } + } + + pub fn as_class(&self) -> &Class { + match self { + Value::Class(i) => i, + _ => panic!("expected object"), + } + } +} + +#[derive(Clone, PartialEq, Debug)] +pub enum VmError { + StackUnderflow, + StackOverflow, + VariableNotFound, + RuntimeError, +} + +#[allow(clippy::enum_variant_names)] +#[derive(Clone, PartialEq, Debug)] +pub enum CompilerError { + NameError(String), + SyntaxError(String), + RuntimeError, +} diff --git a/src/bytecode_vm/vm/frame.rs b/src/bytecode_vm/vm/frame.rs new file mode 100644 index 0000000..8f78aa4 --- /dev/null +++ b/src/bytecode_vm/vm/frame.rs @@ -0,0 +1,50 @@ +use std::collections::HashMap; + +use crate::bytecode_vm::opcode::Opcode; + +use super::types::{FunctionObject, Namespace, Reference}; + +#[derive(Debug)] +pub struct Frame { + pub function: FunctionObject, + + /// The program counter indicating the current point of execution for the immutable block of + /// bytecode held by this [`Frame`]. + pub pc: usize, + + /// Analogous to CPython's co_varnames, which is the names of the local variables beginning + /// with the function arguments. + pub varnames: Vec, + + /// The stack which holds all the local variables themselves, beginning with the function + /// arguments. + pub locals: Vec, +} + +impl Frame { + pub fn new(function: FunctionObject, args: Vec) -> Self { + let varnames = function.code.varnames.clone(); + Frame { + function, + pc: 0, + varnames, + locals: args, + } + } + + pub fn get_inst(&self) -> Opcode { + self.function.code.bytecode[self.pc] + } + + pub fn is_finished(&self) -> bool { + self.pc == self.function.code.bytecode.len() + } + + pub fn namespace(&self) -> Namespace { + let mut namespace = HashMap::new(); + for (index, varname) in self.varnames.iter().enumerate() { + namespace.insert(varname.to_owned(), self.locals[index]); + } + namespace + } +} diff --git a/src/bytecode_vm/vm/mod.rs b/src/bytecode_vm/vm/mod.rs new file mode 100644 index 0000000..79ea034 --- /dev/null +++ b/src/bytecode_vm/vm/mod.rs @@ -0,0 +1,511 @@ +use std::{borrow::Cow, collections::HashMap, mem}; + +use crate::{ + bytecode_vm::{ + compiler::types::CompiledProgram, + types::{Value, VmError}, + Opcode, + }, + core::{log, LogLevel, Stack}, + treewalk::types::utils::Dunder, +}; + +mod frame; +pub mod types; + +use self::types::{Class, FunctionObject, Method, Object}; +use self::{frame::Frame, types::Reference}; + +use super::{ + compiler::types::{BytecodeNameMap, Constant}, + indices::{BytecodeIndex, ConstantIndex, GlobalStoreIndex, Index, ObjectTableIndex}, +}; + +pub struct VirtualMachine { + /// All code which is executed lives inside a [`Frame`] on this call stack. + call_stack: Vec, + + /// Constants handed to us by the compiler as part of the [`CompiledProgram`]. + constant_pool: Vec, + + /// The indices used by the compiled bytecode will not change at runtime, but they can point to + /// different names over the course of the execution of the program. + name_map: BytecodeNameMap, + + /// Just like its name says, we need a map to translate from the indices found in the compiled + /// bytecode and that variables location in the global store. + index_map: HashMap, + + /// The runtime mapping of global variables to their values. + global_store: Vec, + + /// This is kind of similar to the heap. When an object is created, it will live here and a + /// reference to it will be placed on the stack. Objects here can be from any function context. + /// This store retains ownership of the Rust objects throughout the runtime. When non-primitive + /// objects are pushed onto the stack, a referenced is used so as to not take ownership of the + /// objects. + object_table: Vec, + + /// We must keep a stack of class definitions that we have begun so that when they finish, we + /// know with which name to associate the namespace. The index here references a name from the + /// constant pool. + class_stack: Stack, +} + +impl VirtualMachine { + pub fn new() -> Self { + Self { + call_stack: vec![], + constant_pool: vec![], + name_map: HashMap::new(), + index_map: HashMap::new(), + global_store: vec![], + object_table: vec![], + class_stack: Stack::default(), + } + } + + pub fn load(&mut self, program: CompiledProgram) { + log(LogLevel::Debug, || format!("{}", program)); + self.constant_pool = program.constant_pool; + self.name_map = program.name_map; + + let function = FunctionObject::new(program.code.name.clone(), program.code); + + let new_frame = Frame::new(function, vec![]); + self.call_stack.push(new_frame); + } + + pub fn read_constant(&self, index: ConstantIndex) -> Option { + self.constant_pool.get(*index).map(|c| c.into()) + } + + fn update_fn(&mut self, index: ObjectTableIndex, function: F) + where + F: FnOnce(&mut Value), + { + if let Some(object_value) = self.object_table.get_mut(*index) { + function(object_value) + } + } + + fn store_global_by_name(&mut self, name: &str, value: Reference) { + let bytecode_index = self.name_map.get(name).unwrap(); + self.store_global(*bytecode_index, value); + } + + fn store_global(&mut self, bytecode_index: BytecodeIndex, value: Reference) { + let global_store_index = if let Some(index) = self.index_map.get(&bytecode_index) { + *index + } else { + let next_index = Index::new(self.global_store.len()); + self.index_map.insert(bytecode_index, next_index); + next_index + }; + + if self.global_store.len() == *global_store_index { + self.global_store.push(value); + } else { + self.global_store[*global_store_index] = value; + } + } + + fn store_local(&mut self, index: BytecodeIndex, value: Reference) { + let frame_index = self.call_stack.len().checked_sub(1).unwrap(); + if self.call_stack[frame_index].locals.len() == *index { + self.call_stack[frame_index].locals.push(value); + } else { + self.call_stack[frame_index].locals[*index] = value; + } + } + + pub fn load_global(&self, bytecode_index: BytecodeIndex) -> Option { + let global_store_index = self.index_map[&bytecode_index]; + self.global_store.get(*global_store_index).copied() + } + + fn pop(&mut self) -> Result { + if let Some(frame) = self.call_stack.last_mut() { + if let Some(value) = frame.locals.pop() { + return Ok(value); + } + } + + Err(VmError::StackUnderflow) + } + + fn push(&mut self, value: Reference) -> Result<(), VmError> { + if let Some(frame) = self.call_stack.last_mut() { + frame.locals.push(value); + } + + Ok(()) + } + + fn return_val(&mut self) -> Value { + if let Some(frame) = self.call_stack.last() { + if let Some(value) = frame.locals.last() { + return self.take(*value); + } + } + + Value::Void + } + + /// This does not kick off a separate loop; instead, `run_loop` continues execution with the + /// new frame. + fn execute_function(&mut self, function: FunctionObject, args: Vec) { + let frame = Frame::new(function, args); + self.call_stack.push(frame); + } + + fn execute_method(&mut self, method: Method, args: Vec) { + let mut bound_args = vec![method.receiver]; + bound_args.extend(args); + + self.execute_function(method.function.clone(), bound_args); + } + + /// Extract primitives and resolve any references to a [`Value`]. A [`Cow`] is returned to make + /// it difficult to accidentally mutate an object. All modifications should occur through VM + /// instructions. + pub fn dereference(&self, reference: Reference) -> Cow<'_, Value> { + match reference { + Reference::ObjectRef(index) => Cow::Borrowed(self.object_table.get(*index).unwrap()), + Reference::ConstantRef(index) => Cow::Owned(self.read_constant(index).unwrap()), + // convert primitives directly + _ => Cow::Owned(reference.into()), + } + } + + /// Convert a [`Reference`] to a [`Value`] taking full ownership. This will remove any objects + /// from the VM's management and should only be used at the end of execution (i.e. the final + /// return value, in tests, etc). + pub fn take(&mut self, reference: Reference) -> Value { + match reference { + Reference::ObjectRef(index) => { + mem::replace(&mut self.object_table[*index], Value::Void) + } + Reference::ConstantRef(index) => self.constant_pool.get(*index).unwrap().into(), + _ => reference.into(), + } + } + + /// Primitives are stored inline on the stack, we create a reference to the global store for + /// all other types. + fn create(&mut self, value: Value) -> Reference { + match value { + Value::Integer(_) | Value::Boolean(_) | Value::Void => value.into(), + _ => { + let index = Index::new(self.object_table.len()); + self.object_table.push(value); + Reference::ObjectRef(index) + } + } + } + + pub fn run_loop(&mut self) -> Result { + while let Some(current_frame_index) = self.call_stack.len().checked_sub(1) { + let opcode = self.call_stack[current_frame_index].get_inst(); + + log(LogLevel::Debug, || { + format!("Frame ({}) Opcode: {:?}", current_frame_index, opcode) + }); + match opcode { + Opcode::Iadd => { + let reference = self.pop()?; + let b = self.dereference(reference).as_integer(); + let reference = self.pop()?; + let a = self.dereference(reference).as_integer(); + self.push(Reference::Int(a + b))?; + } + Opcode::Isub => { + let reference = self.pop()?; + let b = self.dereference(reference).as_integer(); + let reference = self.pop()?; + let a = self.dereference(reference).as_integer(); + self.push(Reference::Int(a - b))?; + } + Opcode::Imul => { + let reference = self.pop()?; + let b = self.dereference(reference).as_integer(); + let reference = self.pop()?; + let a = self.dereference(reference).as_integer(); + self.push(Reference::Int(a * b))?; + } + Opcode::Idiv => { + let reference = self.pop()?; + let b = self.dereference(reference).as_integer(); + let reference = self.pop()?; + let a = self.dereference(reference).as_integer(); + self.push(Reference::Int(a / b))?; + } + Opcode::LessThan => { + let reference = self.pop()?; + let right = self.dereference(reference).as_integer(); + let reference = self.pop()?; + let left = self.dereference(reference).as_integer(); + self.push(Reference::Bool(left < right))?; + } + Opcode::GreaterThan => { + let reference = self.pop()?; + let right = self.dereference(reference).as_integer(); + let reference = self.pop()?; + let left = self.dereference(reference).as_integer(); + self.push(Reference::Bool(left > right))?; + } + Opcode::UnaryNegative => { + let reference = self.pop()?; + let right = self.dereference(reference).as_integer(); + self.push(Reference::Int(-right))?; + } + Opcode::UnaryNot => { + let reference = self.pop()?; + let right = self.dereference(reference).as_boolean(); + self.push(Reference::Bool(!right))?; + } + Opcode::UnaryInvert => { + let reference = self.pop()?; + let right = self.dereference(reference).as_integer(); + self.push(Reference::Int(!right))?; + } + Opcode::Push(val) => self.push(Reference::Int(val))?, + Opcode::LoadConst(index) => { + self.push(Reference::ConstantRef(index))?; + } + Opcode::StoreFast(bytecode_index) => { + let reference = self.pop()?; + self.store_local(bytecode_index, reference); + } + Opcode::StoreGlobal(bytecode_index) => { + let reference = self.pop()?; + self.store_global(bytecode_index, reference); + } + Opcode::LoadFast(bytecode_index) => { + if let Some(frame) = self.call_stack.last() { + let value = frame.locals[*bytecode_index]; + self.push(value)?; + } + } + Opcode::LoadGlobal(bytecode_index) => { + let reference = self.load_global(bytecode_index).unwrap(); + self.push(reference)?; + } + Opcode::LoadAttr(attr_index) => { + let reference = self.pop()?; + let object = self.dereference(reference); + let name = self.read_constant(attr_index).unwrap(); + let attr = object + .as_object() + .read(name.as_string(), |reference| self.dereference(reference)) + .unwrap(); + let attr_val = self.dereference(attr); + let bound_attr = if let Value::Function(ref function) = *attr_val { + self.create(Value::Method(Method::new(reference, function.clone()))) + } else { + attr + }; + self.push(bound_attr)?; + } + Opcode::SetAttr(attr_index) => { + let value = self.pop()?; + let Reference::ObjectRef(obj_index) = self.pop()? else { + panic!() + }; + + let name = self.read_constant(attr_index).unwrap(); + self.update_fn(obj_index, |object_value| { + let Value::Object(object) = object_value else { + panic!() + }; + object.write(name.as_string(), value); + }); + } + Opcode::LoadBuildClass => { + let reference = self.create(Value::BuiltinFunction); + self.push(reference)?; + } + Opcode::Jump(offset) => { + let new_pc = + (self.call_stack[current_frame_index].pc as isize + offset) as usize; + self.call_stack[current_frame_index].pc = new_pc; + } + Opcode::JumpIfFalse(offset) => { + let reference = self.pop()?; + let condition = self.dereference(reference).as_boolean(); + if !condition { + let new_pc = + (self.call_stack[current_frame_index].pc as isize + offset) as usize; + self.call_stack[current_frame_index].pc = new_pc; + } + } + Opcode::PrintConst(index) => { + let value = self.read_constant(index).unwrap(); + println!("{}", value); + } + Opcode::MakeFunction => { + let reference = self.pop()?; + let name = self.dereference(reference).as_string().to_string(); + let reference = self.pop()?; + let code = self.dereference(reference).as_code().clone(); + let function = FunctionObject::new(name, code); + let reference = self.create(Value::Function(function)); + self.push(reference)?; + } + Opcode::Call(bytecode_index) => { + let reference = self + .load_global(bytecode_index) + .ok_or(VmError::VariableNotFound)?; + let value = self.dereference(reference); + match *value { + Value::Function(ref function) => { + let function = function.clone(); + let mut args = vec![]; + for _ in 0..function.code.arg_count { + args.push(self.pop()?); + } + self.execute_function(function, args); + } + Value::Class(ref class) => { + let init_method = class.read(Dunder::Init.value()); + let object = Object::new(reference); + let reference = self.create(Value::Object(object)); + self.push(reference)?; + + if let Some(init_method) = init_method { + let init = self.dereference(init_method).as_function().clone(); + // Subtract one because of `self` + let args = (0..init.code.arg_count - 1) + .map(|_| self.pop()) + .collect::, _>>()?; + let method = Method::new(reference, init); + self.execute_method(method, args); + } + } + _ => panic!("not callable!"), + } + } + Opcode::PopAndCall(argc) => { + let args = (0..argc) + .map(|_| self.pop()) + .collect::, _>>()?; + let reference = self.pop()?; + let func = self.dereference(reference); + match *func { + // this is the placeholder for __build_class__ at the moment + Value::BuiltinFunction => { + self.build_class(args); + } + _ => unimplemented!(), + }; + } + Opcode::CallMethod(_) => { + let reference = self.pop()?; + let method = self.dereference(reference); + self.execute_method(method.as_method().clone(), vec![]); + } + Opcode::ReturnValue => { + let return_value = self.pop().unwrap_or(Reference::Void); + + // Exit the loop if there are no more frames + if self.call_stack.is_empty() { + break; + } + + self.call_stack.pop(); + // Push the return value to the caller's frame + self.push(return_value)?; + + // Because we have already manipulated the call stack, we can skip the + // end-of-iteration checks and handling. + continue; + } + Opcode::EndClass => { + // Grab the frame before it gets popped off the call stack below. Its local are + // the class namespace for the class we just finished defining. + let frame = self.call_stack.last().unwrap(); + + let name = self.class_stack.pop().expect("Failed to get class name"); + let class = Class::new(name.clone(), frame.namespace()); + + let reference = self.create(Value::Class(class)); + // TODO this is always treated as a global, need to supported nested classes + self.store_global_by_name(&name, reference); + } + Opcode::Halt => break, + // This is in an internal error that indicates a jump offset was not properly set + // by the compiler. This opcode should not leak into the VM. + Opcode::Placeholder => return Err(VmError::RuntimeError), + } + + // Increment PC for all instructions. A select few may skip this step by calling + // `continue` above but this is not recommended. + self.call_stack[current_frame_index].pc += 1; + + // Handle functions that complete without explicit return + if self.call_stack[current_frame_index].is_finished() { + self.call_stack.pop(); + } + } + + Ok(self.return_val()) + } + + /// This is intended to be functionally equivalent to `__build_class__` in CPython. + fn build_class(&mut self, args: Vec) { + let name = self.dereference(args[0]).as_string().to_string(); + let code = self.dereference(args[1]).as_code().clone(); + let function = FunctionObject::new(name.clone(), code); + + self.class_stack.push(name); + self.execute_function(function, vec![]); + } +} + +impl Default for VirtualMachine { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use crate::{ + bytecode_vm::VmInterpreter, core::InterpreterEntrypoint, init::Builder, parser::Parser, + }; + + fn init(text: &str) -> (Parser, VmInterpreter) { + let (parser, interpreter) = Builder::new().text(text).build_vm_expl(); + + (parser, interpreter) + } + + #[test] + /// We're testing for basic memory-efficiency here. The original implementation created + /// unnecessary copies of the object. + fn object_store_duplicates() { + let text = r#" +class Foo: + def __init__(self): + self.x = 44 + +f = Foo() +"#; + let (mut parser, mut interpreter) = init(text); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + let objects: Vec<&Value> = interpreter + .vm + .object_table + .iter() + .filter(|object| matches!(object, Value::Object(_))) + .collect(); + assert_eq!(objects.len(), 1); + } + } + } +} diff --git a/src/bytecode_vm/vm/types.rs b/src/bytecode_vm/vm/types.rs new file mode 100644 index 0000000..6bee605 --- /dev/null +++ b/src/bytecode_vm/vm/types.rs @@ -0,0 +1,112 @@ +use std::{borrow::Cow, collections::HashMap}; + +use crate::bytecode_vm::{ + compiler::types::CodeObject, + indices::{ConstantIndex, ObjectTableIndex}, + types::Value, +}; + +pub type Namespace = HashMap; + +/// Primitive values live directly on the stack. +/// [`StackValue::ObjectRef`] items reference an object in the object table. +/// [`StackValue::ConstantRef`] items reference an immutable object in the constant pool. +#[derive(Debug, PartialEq, Clone, Copy)] +pub enum Reference { + Void, + Int(i64), + Bool(bool), + ObjectRef(ObjectTableIndex), + ConstantRef(ConstantIndex), +} + +impl From for Reference { + fn from(value: Value) -> Self { + match value { + Value::Void => Reference::Void, + Value::Integer(i) => Reference::Int(i), + Value::Boolean(i) => Reference::Bool(i), + _ => unimplemented!(), + } + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct Class { + pub name: String, + namespace: Namespace, +} + +impl Class { + pub fn new(name: String, namespace: Namespace) -> Self { + Self { name, namespace } + } + + pub fn read(&self, name: &str) -> Option { + self.namespace.get(name).cloned() + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct Object { + class: Reference, + namespace: Namespace, +} + +impl<'a> Object { + pub fn new(class: Reference) -> Self { + Self { + class, + namespace: HashMap::new(), + } + } + + #[cfg(test)] + pub fn class_ref(&self) -> Reference { + self.class + } + + pub fn read(&self, name: &str, deref: T) -> Option + where + T: FnOnce(Reference) -> Cow<'a, Value>, + { + if let Some(result) = self.namespace.get(name) { + return Some(*result); + } + + let class = deref(self.class); + class.as_class().namespace.get(name).cloned() + } + + pub fn write(&mut self, name: &str, value: Reference) { + self.namespace.insert(name.to_string(), value); + } +} + +/// This encapsulates a [`CodeObject`] along with the execution environment in which the function +/// was defined (global variables, closure/free variables). This is what gets created when you +/// define a function in Python. This is not bound to any particular instance of a class when +/// defined at the class level. +#[derive(Clone, PartialEq, Debug)] +pub struct FunctionObject { + pub name: String, + pub code: CodeObject, +} + +impl FunctionObject { + pub fn new(name: String, code: CodeObject) -> Self { + Self { name, code } + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct Method { + pub receiver: Reference, + pub function: FunctionObject, +} + +impl Method { + pub fn new(receiver: Reference, function: FunctionObject) -> Self { + Self { receiver, function } + } +} diff --git a/src/core/container.rs b/src/core/container.rs new file mode 100644 index 0000000..b9b0fe3 --- /dev/null +++ b/src/core/container.rs @@ -0,0 +1,63 @@ +use std::{ + cell::RefCell, + fmt::{Error, Formatter, Pointer}, + ops::Deref, + rc::Rc, +}; + +#[derive(Debug, PartialEq)] +pub struct Container(Rc>); + +impl Container { + pub fn new(value: T) -> Self { + Container(Rc::new(RefCell::new(value))) + } + + /// This compares that two `Container` objects have the same identity, meaning that they point + /// to the same place in memory. + /// + /// The `==` operator uses the derived `PartialEq` trait to check whether two `Container` + /// objects have the same values `T`. + pub fn same_identity(&self, other: &Self) -> bool { + Rc::ptr_eq(&self.0, &other.0) + } +} + +/// When we print the pointer value of a `Container` we are usually interested in the pointer +/// value of the `Rc<_>`. +impl Pointer for Container { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + write!(f, "{:p}", self.0) + } +} + +impl Deref for Container { + type Target = RefCell; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +/// The intention when calling `clone()` on a `Container` is that the reference count to this +/// object should be increased but the inner `T` should not be cloned. We implement the `clone` +/// method ourselves, rather than deriving it, so that the `Clone` trait is not required to be +/// implemented on type `T`. +impl Clone for Container { + fn clone(&self) -> Self { + Container(self.0.clone()) + } +} + +/// Provide an alternative way to initialize a `Container`. +pub trait Storable { + fn store(self) -> Container + where + Self: Sized; +} + +impl Storable for i64 { + fn store(self) -> Container { + Container::new(self) + } +} diff --git a/src/core/log.rs b/src/core/log.rs new file mode 100644 index 0000000..7347940 --- /dev/null +++ b/src/core/log.rs @@ -0,0 +1,35 @@ +/// These are each allowed to be dead code in case the code base happens to not have any at that +/// level at the moment. +#[derive(Debug, PartialEq, PartialOrd)] +pub enum LogLevel { + #[allow(dead_code)] + Error, + #[allow(dead_code)] + Warn, + #[allow(dead_code)] + Info, + #[allow(dead_code)] + Debug, + #[allow(dead_code)] + Trace, +} + +static CURRENT_LOG_LEVEL: LogLevel = LogLevel::Info; + +/// Accept a closure to avoid calling the construction of the formatted strings until necessary +/// (i.e. we know that our logging level asks for it). +pub fn log String>(level: LogLevel, message_fn: F) { + if level <= CURRENT_LOG_LEVEL { + let message = message_fn(); + println!("[{:?}] {}", level, message); + } +} + +/// Sometimes we want to log something that does not return a string but we still want to respect +/// our logging level. +#[allow(dead_code)] +pub fn log_impure(level: LogLevel, message_fn: F) { + if level <= CURRENT_LOG_LEVEL { + message_fn(); + } +} diff --git a/src/core/mod.rs b/src/core/mod.rs new file mode 100644 index 0000000..e178773 --- /dev/null +++ b/src/core/mod.rs @@ -0,0 +1,23 @@ +use std::fmt::Display; + +mod container; +mod log; +mod stack; + +pub use container::{Container, Storable}; +#[allow(unused_imports)] +pub use log::{log, log_impure, LogLevel}; +pub use stack::Stack; + +use crate::{parser::Parser, types::errors::MemphisError}; + +/// Return types which Void are used internally, but should never be displayed to the developer. +pub trait Voidable { + fn is_void(&self) -> bool; +} + +pub trait InterpreterEntrypoint { + type Return: Display + Voidable; + + fn run(&mut self, parser: &mut Parser) -> Result; +} diff --git a/src/core/stack.rs b/src/core/stack.rs new file mode 100644 index 0000000..5f71668 --- /dev/null +++ b/src/core/stack.rs @@ -0,0 +1,68 @@ +use std::sync::RwLock; + +pub struct Stack { + elements: RwLock>, +} + +impl Stack +where + T: Clone, +{ + pub fn new(items: Vec) -> Self { + Self { + elements: RwLock::new(items), + } + } + + pub fn with_initial(item: T) -> Self { + Self::new(vec![item]) + } + + pub fn default() -> Self { + Self::new(vec![]) + } + + /// It is safe to call `unwrap()` after `write()` in our single-threaded context. + /// + /// If this is used in a multi-threaded context in the future, we will need to consider lock + /// poisoning, which happens when a panic occurs in a thread while it holds a lock. + pub fn push(&self, item: T) { + self.elements.write().unwrap().push(item); + } + + pub fn pop(&self) -> Option { + self.elements.write().unwrap().pop() + } + + /// Return the top element of the stack. If a `Container` is used, this can still be used in + /// mutable contexts because of its interior mutability. + pub fn top(&self) -> Option { + self.elements.read().unwrap().last().cloned() + } + + /// Perform a mutable operation `F` on the top element while maintaining the safety of the + /// `RwLockWriteGuard`. + pub fn with_top_mut(&self, f: F) -> Option + where + F: FnOnce(&mut T) -> R, + { + let mut guard = self.elements.write().unwrap(); + guard.last_mut().map(f) + } + + /// We call clone to avoid holding a lock for a long time. + /// + /// We must guarantee `DoubleEndedIterator` is called so that this iterator may be used in + /// reverse. + pub fn iter(&self) -> impl DoubleEndedIterator { + self.elements.read().unwrap().clone().into_iter() + } + + /// We call clone to avoid holding a lock for a long time. + /// + /// We must guarantee `DoubleEndedIterator` is called so that this iterator may be used in + /// reverse. + pub fn iter_mut(&self) -> impl DoubleEndedIterator { + self.elements.write().unwrap().clone().into_iter() + } +} diff --git a/src/crosscheck/adapters.rs b/src/crosscheck/adapters.rs new file mode 100644 index 0000000..52a53a2 --- /dev/null +++ b/src/crosscheck/adapters.rs @@ -0,0 +1,91 @@ +use crate::core::InterpreterEntrypoint; +use crate::init::Builder; + +use super::test_value::TestValue; +use super::traits::InterpreterTest; + +pub struct BytecodeVmAdapter; + +impl InterpreterTest for BytecodeVmAdapter { + fn execute(&self, code: &str) -> TestValue { + let (mut parser, mut interpreter) = Builder::new().text(code).build_vm_expl(); + + match interpreter.run(&mut parser) { + Ok(r) => r.into(), + Err(e) => panic!("{}", e), + } + } + + fn execute_and_return(&self, code: &str, var: &str) -> TestValue { + let (mut parser, mut interpreter) = Builder::new().text(code).build_vm_expl(); + + match interpreter.run(&mut parser) { + Ok(_) => interpreter + .take(var) + .unwrap_or_else(|| panic!("Variable {} not found", var)) + .into(), + Err(e) => panic!("{}", e), + } + } + + fn execute_and_return_vars(&self, code: &str, vars: Vec<&str>) -> Vec { + let (mut parser, mut interpreter) = Builder::new().text(code).build_vm_expl(); + + match interpreter.run(&mut parser) { + Ok(_) => vars + .iter() + .map(|var| { + interpreter + .take(var) + .unwrap_or_else(|| panic!("Variable {} not found", var)) + .into() + }) + .collect(), + Err(e) => panic!("{}", e), + } + } +} + +pub struct TreewalkAdapter; + +impl InterpreterTest for TreewalkAdapter { + fn execute(&self, code: &str) -> TestValue { + let (mut parser, mut interpreter) = Builder::new().text(code).build_treewalk_expl(); + + match interpreter.run(&mut parser) { + Ok(r) => r.into(), + Err(e) => panic!("{}", e), + } + } + + fn execute_and_return(&self, code: &str, var: &str) -> TestValue { + let (mut parser, mut interpreter) = Builder::new().text(code).build_treewalk_expl(); + + match interpreter.run(&mut parser) { + Ok(_) => interpreter + .state + .read(var) + .unwrap_or_else(|| panic!("Variable {} not found", var)) + .into(), + Err(e) => panic!("{}", e), + } + } + + fn execute_and_return_vars(&self, code: &str, vars: Vec<&str>) -> Vec { + let (mut parser, mut interpreter) = Builder::new().text(code).build_treewalk_expl(); + + match interpreter.run(&mut parser) { + Ok(_) => vars + .iter() + .map(|var| { + interpreter + .state + .read(var) + .unwrap_or_else(|| panic!("Variable {} not found", var)) + .into() + }) + .collect(), + Err(e) => panic!("{}", e), + } + } +} diff --git a/src/crosscheck/mod.rs b/src/crosscheck/mod.rs new file mode 100644 index 0000000..bd55c62 --- /dev/null +++ b/src/crosscheck/mod.rs @@ -0,0 +1,7 @@ +mod adapters; +mod test_value; +mod traits; + +pub use adapters::{BytecodeVmAdapter, TreewalkAdapter}; +pub use test_value::TestValue; +pub use traits::InterpreterTest; diff --git a/src/crosscheck/test_value.rs b/src/crosscheck/test_value.rs new file mode 100644 index 0000000..a9fcfca --- /dev/null +++ b/src/crosscheck/test_value.rs @@ -0,0 +1,50 @@ +use crate::bytecode_vm::types::Value; +use crate::treewalk::types::ExprResult; + +/// A common implementation to represent the return value of a Python expression for use in +/// crosscheck tests. This frees each engine up to implement their return values as they like, +/// provided the [`From`] trait is implemented. +#[derive(Clone, Debug, PartialEq)] +pub enum TestValue { + Void, + None, + Integer(i64), + String(String), + Boolean(bool), +} + +impl From for TestValue { + fn from(value: Value) -> Self { + match value { + Value::Void => TestValue::Void, + Value::None => TestValue::None, + Value::Integer(val) => TestValue::Integer(val), + Value::String(val) => TestValue::String(val), + Value::Boolean(val) => TestValue::Boolean(val), + _ => unimplemented!( + "Conversion to TestValue not implemented for type {:?}", + value + ), + } + } +} + +impl From for TestValue { + fn from(value: ExprResult) -> Self { + match value { + ExprResult::Void => TestValue::Void, + ExprResult::None => TestValue::None, + ExprResult::Integer(_) => { + TestValue::Integer(value.as_integer_val().expect("failed to get integer")) + } + ExprResult::String(_) => { + TestValue::String(value.as_string().expect("failed to get string")) + } + ExprResult::Boolean(val) => TestValue::Boolean(val), + _ => unimplemented!( + "Conversion to TestValue not implemented for type '{}'", + value.get_type() + ), + } + } +} diff --git a/src/crosscheck/traits.rs b/src/crosscheck/traits.rs new file mode 100644 index 0000000..8dff023 --- /dev/null +++ b/src/crosscheck/traits.rs @@ -0,0 +1,28 @@ +use std::collections::HashMap; + +use super::TestValue; + +/// The primary crosscheck interface, allowing us to run test Python code through multiple +/// interpreter engines and compare the output. +pub trait InterpreterTest { + fn execute(&self, code: &str) -> TestValue; + fn execute_and_return(&self, code: &str, var: &str) -> TestValue; + fn execute_and_return_vars(&self, code: &str, vars: Vec<&str>) -> Vec; + + fn assert_expr_expected(&self, code: &str, expected: TestValue) { + let result = self.execute(code); + assert_eq!(result, expected); + } + + fn assert_var_expected(&self, code: &str, var: &str, expected: TestValue) { + let result = self.execute_and_return(code, var); + assert_eq!(result, expected); + } + + fn assert_vars_expected(&self, code: &str, vars: HashMap<&str, TestValue>) { + let result = self.execute_and_return_vars(code, vars.keys().cloned().collect()); + for (index, var) in vars.iter().enumerate() { + assert_eq!(&result[index], var.1); + } + } +} diff --git a/src/domain/mod.rs b/src/domain/mod.rs new file mode 100644 index 0000000..4f92cf0 --- /dev/null +++ b/src/domain/mod.rs @@ -0,0 +1,7 @@ +/// Python's scoping rules rely on determining whether the current context is global (sometimes +/// known as module scope) or local (sometimes known as function scope). +#[derive(Clone, Debug)] +pub enum Context { + Global, + Local, +} diff --git a/src/fixtures/call_stack/call_stack.py b/src/fixtures/call_stack/call_stack.py new file mode 100644 index 0000000..4ffd5f1 --- /dev/null +++ b/src/fixtures/call_stack/call_stack.py @@ -0,0 +1,2 @@ +import other +other.middle_call() diff --git a/src/fixtures/call_stack/other.py b/src/fixtures/call_stack/other.py new file mode 100644 index 0000000..0ade3a3 --- /dev/null +++ b/src/fixtures/call_stack/other.py @@ -0,0 +1,5 @@ +def middle_call(): + last_call() + +def last_call(): + unknown() diff --git a/src/fixtures/imports/other.py b/src/fixtures/imports/other.py new file mode 100644 index 0000000..1e7164b --- /dev/null +++ b/src/fixtures/imports/other.py @@ -0,0 +1,21 @@ +def something(): + return 5 + +def something_else(): + return 6 + +def something_third(): + return 7 + +def something_fourth(): + return 8 + +def second_call(): + return something_else() + +def kwargs_call(**kwargs): + return kwargs['a'] + +class OtherClass: + def run(self): + return something_fourth() diff --git a/src/fixtures/imports/regular_import.py b/src/fixtures/imports/regular_import.py new file mode 100644 index 0000000..e3a6c33 --- /dev/null +++ b/src/fixtures/imports/regular_import.py @@ -0,0 +1,4 @@ +import other +x = other.something() +y = other.second_call() +z = other.OtherClass diff --git a/src/fixtures/imports/regular_import_b.py b/src/fixtures/imports/regular_import_b.py new file mode 100644 index 0000000..e7ba8a0 --- /dev/null +++ b/src/fixtures/imports/regular_import_b.py @@ -0,0 +1,2 @@ +import other +y = other.kwargs_call(a=7) diff --git a/src/fixtures/imports/relative/main_a.py b/src/fixtures/imports/relative/main_a.py new file mode 100644 index 0000000..da0226d --- /dev/null +++ b/src/fixtures/imports/relative/main_a.py @@ -0,0 +1,4 @@ +from mypackage.myothermodule import add + +x = add('1', '1') +print(x) diff --git a/src/fixtures/imports/relative/main_b.py b/src/fixtures/imports/relative/main_b.py new file mode 100644 index 0000000..dca94f9 --- /dev/null +++ b/src/fixtures/imports/relative/main_b.py @@ -0,0 +1,4 @@ +import mypackage.myothermodule + +x = mypackage.myothermodule.add('1', '1') +print(x) diff --git a/src/fixtures/imports/relative/main_c.py b/src/fixtures/imports/relative/main_c.py new file mode 100644 index 0000000..fe2a964 --- /dev/null +++ b/src/fixtures/imports/relative/main_c.py @@ -0,0 +1,4 @@ +import mypackage.myothermodule as aliased_module + +x = aliased_module.add('1', '1') +print(x) diff --git a/src/fixtures/imports/relative/mypackage/mymodule.py b/src/fixtures/imports/relative/mypackage/mymodule.py new file mode 100644 index 0000000..5630f41 --- /dev/null +++ b/src/fixtures/imports/relative/mypackage/mymodule.py @@ -0,0 +1,2 @@ +def as_int(a): + return int(a) diff --git a/src/fixtures/imports/relative/mypackage/myothermodule.py b/src/fixtures/imports/relative/mypackage/myothermodule.py new file mode 100644 index 0000000..2adae93 --- /dev/null +++ b/src/fixtures/imports/relative/mypackage/myothermodule.py @@ -0,0 +1,4 @@ +from .mymodule import as_int + +def add(a, b): + return as_int(a) + as_int(b) diff --git a/src/fixtures/imports/selective_import_a.py b/src/fixtures/imports/selective_import_a.py new file mode 100644 index 0000000..bef914c --- /dev/null +++ b/src/fixtures/imports/selective_import_a.py @@ -0,0 +1,2 @@ +from other import something +x = something() diff --git a/src/fixtures/imports/selective_import_b.py b/src/fixtures/imports/selective_import_b.py new file mode 100644 index 0000000..8efb919 --- /dev/null +++ b/src/fixtures/imports/selective_import_b.py @@ -0,0 +1,3 @@ +from other import something, something_else, second_call +y = something_else() +z = second_call() diff --git a/src/fixtures/imports/selective_import_c.py b/src/fixtures/imports/selective_import_c.py new file mode 100644 index 0000000..183568c --- /dev/null +++ b/src/fixtures/imports/selective_import_c.py @@ -0,0 +1,2 @@ +from other import something, something_else +y = something_third() diff --git a/src/fixtures/imports/selective_import_d.py b/src/fixtures/imports/selective_import_d.py new file mode 100644 index 0000000..c4ce5fa --- /dev/null +++ b/src/fixtures/imports/selective_import_d.py @@ -0,0 +1,2 @@ +from other import * +z = something_fourth() diff --git a/src/fixtures/imports/selective_import_e.py b/src/fixtures/imports/selective_import_e.py new file mode 100644 index 0000000..33a93ec --- /dev/null +++ b/src/fixtures/imports/selective_import_e.py @@ -0,0 +1,4 @@ +from other import OtherClass +o = OtherClass() +x = o.run() +print(x) diff --git a/src/fixtures/imports/selective_import_f.py b/src/fixtures/imports/selective_import_f.py new file mode 100644 index 0000000..8da6a2b --- /dev/null +++ b/src/fixtures/imports/selective_import_f.py @@ -0,0 +1,3 @@ +from other import something_else as alias_name, second_call +y = alias_name() +z = second_call() diff --git a/src/init/builder.rs b/src/init/builder.rs new file mode 100644 index 0000000..1876fdf --- /dev/null +++ b/src/init/builder.rs @@ -0,0 +1,111 @@ +use std::path::PathBuf; +use std::process; + +use crate::bytecode_vm::VmInterpreter; +use crate::core::{Container, InterpreterEntrypoint}; +use crate::lexer::Lexer; +use crate::parser::Parser; +use crate::treewalk::{Interpreter, LoadedModule, StackFrame, State}; + +pub struct Builder { + text: Option, + state: Option>, +} + +impl Default for Builder { + fn default() -> Self { + Self::new() + } +} + +impl Builder { + pub fn new() -> Self { + Builder { + text: None, + state: None, + } + } + + pub fn path(&mut self, filename: &str) -> &mut Self { + self.init_state(); + let input = match self + .state + .clone() + .unwrap() + .load_root(PathBuf::from(filename)) + { + Some(c) => c, + None => { + eprintln!("Error reading file {}", filename); + process::exit(1); + } + }; + self.text = input.text(); + self.state + .clone() + .unwrap() + .push_context(StackFrame::new_root(input.path())); + self + } + + pub fn text(&mut self, text: &str) -> &mut Self { + self.init_state(); + + // hmm this shouldn't be necessary, especially for VM runs + let stack_frame = StackFrame::new_module(LoadedModule::new_virtual(text)); + self.state.clone().unwrap().push_context(stack_frame); + self.text = Some(text.into()); + self + } + + pub fn module(&mut self, module: LoadedModule) -> &mut Self { + self.init_state(); + let stack_frame = StackFrame::new_module(module.clone()); + self.state.clone().unwrap().push_context(stack_frame); + self.text = module.text(); + self + } + + pub fn state(&mut self, state: Container) -> &mut Self { + if self.state.is_some() { + panic!("State already set! Must call `state` before `text` or `path`."); + } + self.state = Some(state); + self + } + + fn init_state(&mut self) { + self.state = match self.state.clone() { + Some(s) => Some(s), + None => Some(Container::new(State::new())), + }; + } + + pub fn parser(&mut self) -> Parser { + if self.state.is_none() { + panic!("State never set! Did you forget to call `text` or `path`?"); + } + if self.text.is_none() { + panic!("Text never set! Did you forget to call `text` or `path`?"); + } + let lexer = Lexer::new(&self.text.clone().unwrap()); + Parser::new(lexer.tokens(), self.state.clone().unwrap()) + } + + pub fn build(&mut self) -> (Parser, impl InterpreterEntrypoint) { + (self.parser(), Interpreter::new(self.state.clone().unwrap())) + } + + pub fn build_vm(&mut self) -> (Parser, impl InterpreterEntrypoint) { + (self.parser(), VmInterpreter::new()) + } + + pub fn build_treewalk_expl(&mut self) -> (Parser, Interpreter) { + (self.parser(), Interpreter::new(self.state.clone().unwrap())) + } + + /// When we test the [`VmInterpreter`] we know what type it will be. + pub fn build_vm_expl(&mut self) -> (Parser, VmInterpreter) { + (self.parser(), VmInterpreter::new()) + } +} diff --git a/src/init/memphis.rs b/src/init/memphis.rs new file mode 100644 index 0000000..1610ca7 --- /dev/null +++ b/src/init/memphis.rs @@ -0,0 +1,41 @@ +use std::process; + +use super::Builder as MemphisBuilder; +#[cfg(feature = "llvm_backend")] +use crate::llvm_backend::compile_ast_to_llvm; +use crate::{core::InterpreterEntrypoint, Engine}; + +pub struct Memphis; + +impl Memphis { + pub fn start(filepath: &str, engine: Engine) { + match engine { + Engine::TreeWalk => { + let (mut parser, mut interpreter) = MemphisBuilder::new().path(filepath).build(); + + match interpreter.run(&mut parser) { + Ok(_) => {} + Err(err) => { + eprintln!("{}", err); + process::exit(1); + } + } + } + Engine::BytecodeVm => { + let (mut parser, mut interpreter) = MemphisBuilder::new().path(filepath).build_vm(); + + match interpreter.run(&mut parser) { + Ok(_) => {} + Err(err) => { + eprintln!("{}", err); + process::exit(1); + } + } + } + #[cfg(feature = "llvm_backend")] + Engine::LlvmBackend => { + compile_ast_to_llvm(); + } + } + } +} diff --git a/src/init/mod.rs b/src/init/mod.rs new file mode 100644 index 0000000..8e79f96 --- /dev/null +++ b/src/init/mod.rs @@ -0,0 +1,7 @@ +mod builder; +mod memphis; +mod repl; + +pub use builder::Builder; +pub use memphis::Memphis; +pub use repl::Repl; diff --git a/src/init/repl.rs b/src/init/repl.rs new file mode 100644 index 0000000..7231e71 --- /dev/null +++ b/src/init/repl.rs @@ -0,0 +1,106 @@ +use std::io; +use std::io::Write; +use std::process; + +use crate::core::{Container, InterpreterEntrypoint, Voidable}; +use crate::init::Builder; +use crate::lexer::Lexer; +use crate::parser::Parser; +use crate::treewalk::State; +use crate::types::errors::MemphisError; + +pub struct Repl { + in_block: bool, + errors: Vec, +} + +impl Default for Repl { + fn default() -> Self { + Self::new() + } +} + +impl Repl { + pub fn new() -> Self { + Repl { + // this may need to become a state for a FSM, but this seems to be + // working fine for now + in_block: false, + errors: vec![], + } + } + + fn marker(&mut self) -> String { + if !self.in_block { + ">>>".to_string() + } else { + "...".to_string() + } + } + + pub fn run(&mut self) { + println!( + "memphis {} REPL (Type 'exit()' to quit)", + env!("CARGO_PKG_VERSION") + ); + + let state = Container::new(State::new()); + + let (_, mut interpreter) = Builder::new().state(state.clone()).text("").build(); + let mut input = String::new(); + + loop { + print!("{} ", self.marker()); + io::stdout().flush().expect("Failed to flush stdout"); + + let mut line = String::new(); + io::stdin() + .read_line(&mut line) + .expect("Failed to read line"); + + if line.trim_end() == "exit()" { + println!("Exiting..."); + + let error_code = match self.errors.len() { + 0 => 0, + _ => 1, + }; + process::exit(error_code); + } + + input.push_str(&line); + + if self.should_interpret(&input) { + let lexer = Lexer::new(&input); + let mut parser = Parser::new(lexer.tokens(), state.clone()); + match interpreter.run(&mut parser) { + Ok(i) => { + if !i.is_void() { + println!("{}", i); + } + } + Err(err) => { + self.errors.push(err.clone()); + eprintln!("{}", err); + } + } + + input.clear(); + self.in_block = false; + } else { + self.in_block = true; + } + } + } + + fn should_interpret(&mut self, input: &str) -> bool { + let last_two = &input[input.len() - 2..]; + if !self.in_block { + // The start of blocks always begin with : and a newline + last_two != ":\n" + } else { + // The end of blocks are indicated by an empty line + last_two == "\n\n" + } + } +} diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs new file mode 100644 index 0000000..79fa944 --- /dev/null +++ b/src/lexer/mod.rs @@ -0,0 +1,2520 @@ +use std::{iter::Peekable, str::Chars}; + +pub mod types; + +use self::types::{MultilineString, Token}; +use crate::{ + core::{log, LogLevel}, + types::errors::LexerError, +}; + +pub struct Lexer { + tokens: Vec, + error: Option, + + /// When `None`, we are not inside a multiline string. When `Some`, contains the character we + /// must see to end the multiline string. If a multiline string is not assigned to a variable, + /// it can act as a multiline comment. + multiline_string: Option, + + /// How many nested data structures deep are we? Nested data structures here are those beginning + /// with {, [, or (. + multiline_context: usize, + + /// Internal `Lexer` state indicating whether we are tokenizing an expression between `{..}` in + /// an f-string. + in_f_string_expr: bool, +} + +impl Lexer { + pub fn new(input: &str) -> Self { + let mut lexer = Lexer { + tokens: Vec::new(), + error: None, + multiline_string: None, + multiline_context: 0, + in_f_string_expr: false, + }; + let _ = lexer.tokenize(input); + + lexer + } + + pub fn tokens(&self) -> Vec { + self.tokens.clone() + } + + /// Since we tokenize one line at a time, we must consider whether we are inside a multiline + /// string or not when choosing to emit a `Newline` token or insert a `\n` char into ongoing + /// multiline string literal. + fn emit_newline(&mut self) { + match self.multiline_string { + None => { + self.tokens.push(Token::Newline); + } + Some(ref mut s) => { + s.literal.push('\n'); + } + } + } + + fn check_in_block(&self) -> bool { + self.multiline_context == 0 && self.multiline_string.is_none() + } + + fn tokenize(&mut self, input: &str) -> Result, LexerError> { + self.tokens.clear(); + + let mut indentation_stack: Vec = vec![0]; + + for line in input.lines() { + if line.is_empty() { + self.emit_newline(); + continue; + } + + let mut chars = line.chars().peekable(); + + let mut spaces = 0; + while let Some(&c) = chars.peek() { + if c == ' ' { + spaces += 1; + chars.next(); + } else { + break; + } + } + + if self.check_in_block() { + if spaces > *indentation_stack.last().unwrap() { + indentation_stack.push(spaces); + self.tokens.push(Token::Indent); + } else { + while spaces < *indentation_stack.last().unwrap() { + indentation_stack.pop(); + self.tokens.push(Token::Dedent); + } + } + } + + self.tokenize_line(line.trim_start())?; + self.emit_newline(); + } + if self.tokens.last() == Some(&Token::Newline) { + self.tokens.remove(self.tokens.len() - 1); + } + + while indentation_stack.len() > 1 { + indentation_stack.pop(); + self.tokens.push(Token::Dedent); + } + + self.tokens.push(Token::Eof); + + Ok(self.tokens.clone()) + } + + fn tokenize_binary_literal(&self, chars: &mut Peekable) -> Token { + let mut literal = String::from("0b"); + + while let Some(&c) = chars.peek() { + if c.is_digit(2) { + literal.push(c); + chars.next(); + } else { + break; + } + } + + Token::BinaryLiteral(literal) + } + + fn tokenize_octal_literal(&self, chars: &mut Peekable) -> Token { + let mut literal = String::from("0o"); + + while let Some(&c) = chars.peek() { + if c.is_digit(8) { + literal.push(c); + chars.next(); + } else { + break; + } + } + + Token::OctalLiteral(literal) + } + + fn tokenize_hex_literal(&self, chars: &mut Peekable) -> Token { + let mut literal = String::from("0x"); + + while let Some(&c) = chars.peek() { + if c.is_ascii_hexdigit() { + literal.push(c); + chars.next(); + } else { + break; + } + } + + Token::HexLiteral(literal) + } + + fn parse_literal_in_f_string(&mut self, chars: &mut Peekable) { + let mut literal = String::new(); + while let Some(&c) = chars.peek() { + if c == '{' { + if !literal.is_empty() { + self.tokens.push(Token::StringLiteral(literal)); + } + chars.next(); + self.tokens.push(Token::LBrace); + self.in_f_string_expr = true; + break; + } else if c == '"' { + if !literal.is_empty() { + self.tokens.push(Token::StringLiteral(literal)); + } + chars.next(); + self.tokens.push(Token::FStringEnd); + break; + } else { + literal.push(c); + chars.next(); + } + } + } + + pub fn tokenize_line(&mut self, input: &str) -> Result<(), LexerError> { + let mut chars = input.chars().peekable(); + + while let Some(&c) = chars.peek() { + log(LogLevel::Trace, || format!("char: {}", c)); + if c == '#' { + // Comments cause the rest of the line to be ignored + break; + } else if let Some(string) = &self.multiline_string { + if c == string.end_char + && chars.clone().nth(1) == Some(string.end_char) + && chars.clone().nth(2) == Some(string.end_char) + { + chars.next(); + chars.next(); + chars.next(); + if string.raw { + self.tokens + .push(Token::RawStringLiteral(string.literal.clone())); + } else { + self.tokens + .push(Token::StringLiteral(string.literal.clone())); + } + self.multiline_string = None; + } else { + chars.next(); + if let Some(ref mut s) = self.multiline_string { + s.literal.push(c); + } else { + panic!("Expected a raw string literal, but found None"); + } + } + } else if (c == '"' + && chars.clone().nth(1) == Some('"') + && chars.clone().nth(2) == Some('"')) + || (c == '\'' + && chars.clone().nth(1) == Some('\'') + && chars.clone().nth(2) == Some('\'')) + { + self.multiline_string = Some(MultilineString::new(false, c)); + chars.next(); + chars.next(); + chars.next(); + } else if (c == 'r' + && (chars.clone().nth(1) == Some('"') + && chars.clone().nth(2) == Some('"') + && chars.clone().nth(3) == Some('"'))) + || (chars.clone().nth(1) == Some('\'') + && chars.clone().nth(2) == Some('\'') + && chars.clone().nth(3) == Some('\'')) + { + self.multiline_string = + Some(MultilineString::new(true, chars.clone().nth(1).unwrap())); + chars.next(); + chars.next(); + chars.next(); + chars.next(); + } else if c == '.' + && chars.clone().nth(1) == Some('.') + && chars.clone().nth(2) == Some('.') + { + chars.next(); + chars.next(); + chars.next(); + self.tokens.push(Token::Ellipsis); + } else if c.is_whitespace() { + chars.next(); + } else if self.in_f_string_expr && c == '}' { + chars.next(); + self.tokens.push(Token::RBrace); + self.in_f_string_expr = false; + self.parse_literal_in_f_string(&mut chars); + } else if matches!(c, 'f' | 'F') && chars.clone().nth(1) == Some('"') { + chars.next(); + chars.next(); + self.tokens.push(Token::FStringStart); + self.parse_literal_in_f_string(&mut chars); + } else if c == 'b' && chars.clone().nth(1) == Some('\'') { + chars.next(); + chars.next(); + let mut literal = String::new(); + while let Some(&c) = chars.peek() { + if c == '\'' { + chars.next(); + break; + } else { + literal.push(c); + chars.next(); + } + } + self.tokens.push(Token::ByteStringLiteral(literal)); + } else if c == '0' && matches!(chars.clone().nth(1), Some('b' | 'B')) { + chars.next(); + chars.next(); + self.tokens.push(self.tokenize_binary_literal(&mut chars)); + } else if c == '0' && matches!(chars.clone().nth(1), Some('o' | 'O')) { + chars.next(); + chars.next(); + self.tokens.push(self.tokenize_octal_literal(&mut chars)); + } else if c == '0' && matches!(chars.clone().nth(1), Some('x' | 'X')) { + chars.next(); + chars.next(); + self.tokens.push(self.tokenize_hex_literal(&mut chars)); + } else if matches!(c, 'r' | 'R') && chars.clone().nth(1) == Some('"') { + chars.next(); + chars.next(); + let mut literal = String::new(); + while let Some(&c) = chars.peek() { + if c == '"' { + chars.next(); + break; + } else { + literal.push(c); + chars.next(); + } + } + self.tokens.push(Token::RawStringLiteral(literal)); + } else if c == '"' { + let mut literal = String::new(); + chars.next(); + while let Some(&c) = chars.peek() { + if c == '"' { + chars.next(); + break; + } else { + literal.push(c); + chars.next(); + } + } + self.tokens.push(Token::StringLiteral(literal)); + } else if c == '\'' { + let mut literal = String::new(); + chars.next(); + while let Some(&c) = chars.peek() { + if c == '\'' { + chars.next(); + break; + } else { + literal.push(c); + chars.next(); + } + } + self.tokens.push(Token::StringLiteral(literal)); + } else if c.is_alphabetic() || c == '_' { + let mut identifier = String::new(); + while let Some(&c) = chars.peek() { + if c.is_alphanumeric() || c == '_' { + identifier.push(c); + chars.next(); + } else { + break; + } + } + + let token = match identifier.as_str() { + "def" => Token::Def, + "del" => Token::Del, + "lambda" => Token::Lambda, + "if" => Token::If, + "elif" => Token::Elif, + "else" => Token::Else, + "while" => Token::While, + "for" => Token::For, + "in" => Token::In, + "is" => Token::Is, + "return" => Token::Return, + "yield" => Token::Yield, + "pass" => Token::Pass, + "and" => Token::And, + "or" => Token::Or, + "not" => Token::Not, + "class" => Token::Class, + "try" => Token::Try, + "except" => Token::Except, + "finally" => Token::Finally, + "raise" => Token::Raise, + "from" => Token::From, + "as" => Token::As, + "with" => Token::With, + "import" => Token::Import, + "assert" => Token::Assert, + "None" => Token::None, + "Ellipsis" => Token::Ellipsis, + "NotImplemented" => Token::NotImplemented, + "True" => Token::BooleanLiteral(true), + "False" => Token::BooleanLiteral(false), + "async" => Token::Async, + "await" => Token::Await, + "continue" => Token::Continue, + "break" => Token::Break, + "nonlocal" => Token::Nonlocal, + "global" => Token::Global, + _ => Token::Identifier(identifier), + }; + self.tokens.push(token); + } else if c.is_ascii_digit() { + let mut value = String::new(); + let mut is_scientific = false; + while let Some(&c) = chars.peek() { + if matches!(c, 'e' | 'E') { + is_scientific = true; + } + + if c.is_ascii_digit() + || matches!(c, '.' | 'e' | 'E') + // We should only see a dash char '-' if we know we are in scientific + // notation. + || (is_scientific && c == '-') + { + value.push(c); + chars.next(); + } else { + break; + } + } + + if value.contains('.') || value.contains('e') || value.contains('E') { + self.tokens + .push(Token::FloatingPoint(value.parse().unwrap())); + } else { + self.tokens.push(Token::Integer(value.parse().unwrap())); + } + } else if matches!(c, '+' | '-' | '*' | '/' | '&' | '^' | '|' | '%' | '@') + && chars.clone().nth(1) == Some('=') + { + chars.next(); + chars.next(); + let token = match c { + '+' => Token::PlusEquals, + '-' => Token::MinusEquals, + '*' => Token::AsteriskEquals, + '/' => Token::SlashEquals, + '&' => Token::BitwiseAndEquals, + '^' => Token::BitwiseXorEquals, + '|' => Token::BitwiseOrEquals, + '%' => Token::ModEquals, + '@' => Token::MatMulEquals, + _ => unreachable!(), + }; + self.tokens.push(token); + } else if matches!(c, '/' | '*' | '<' | '>') + // Look for a second character in a row which is the same + && chars.clone().nth(1) == Some(c) + && chars.clone().nth(2) == Some('=') + { + let mut identifier = String::new(); + while let Some(&c) = chars.peek() { + identifier.push(c); + chars.next(); + + if c == '=' { + break; + } + } + + let token = match identifier.as_str() { + "//=" => Token::DoubleSlashEquals, + "**=" => Token::ExpoEquals, + "<<=" => Token::LeftShiftEquals, + ">>=" => Token::RightShiftEquals, + _ => unreachable!(), + }; + self.tokens.push(token); + } else if c == '-' && chars.clone().nth(1) == Some('>') { + chars.next(); + chars.next(); + self.tokens.push(Token::ReturnTypeArrow); + } else if matches!(c, '=' | '!' | '<' | '>') { + let mut operator = String::new(); + while let Some(&c) = chars.peek() { + if matches!(c, '=' | '!' | '<' | '>') { + operator.push(c); + chars.next(); + } else { + break; + } + } + + let token = match operator.as_str() { + "==" => Token::Equal, + "!=" => Token::NotEqual, + "<" => Token::LessThan, + ">" => Token::GreaterThan, + "<=" => Token::LessThanOrEqual, + ">=" => Token::GreaterThanOrEqual, + "=" => Token::Assign, + "!" => Token::Exclamation, + "<<" => Token::LeftShift, + ">>" => Token::RightShift, + _ => { + self.error = Some(LexerError::UnexpectedCharacter(c)); + Token::InvalidCharacter(c) + } + }; + self.tokens.push(token); + } else if c == '*' && matches!(chars.clone().nth(1), Some('*')) { + chars.next(); + chars.next(); + self.tokens.push(Token::DoubleAsterisk); + } else if c == '/' && matches!(chars.clone().nth(1), Some('/')) { + chars.next(); + chars.next(); + self.tokens.push(Token::DoubleSlash); + } else { + let token = match c { + '.' => Token::Dot, + '+' => Token::Plus, + '-' => Token::Minus, + '*' => Token::Asterisk, + '/' => Token::Slash, + '(' => Token::LParen, + ')' => Token::RParen, + '[' => Token::LBracket, + ']' => Token::RBracket, + '{' => Token::LBrace, + '}' => Token::RBrace, + ',' => Token::Comma, + ':' => Token::Colon, + '@' => Token::AtSign, + '&' => Token::BitwiseAnd, + '|' => Token::BitwiseOr, + '^' => Token::BitwiseXor, + '~' => Token::BitwiseNot, + '%' => Token::Modulo, + '\n' => Token::Newline, + _ => { + self.error = Some(LexerError::UnexpectedCharacter(c)); + Token::InvalidCharacter(c) + } + }; + + // Detect when we are inside multi-line data structures, which should not be + // treated the same as blocks. + if matches!(token, Token::LParen | Token::LBrace | Token::LBracket) { + self.multiline_context += 1; + } else if matches!(token, Token::RParen | Token::RBrace | Token::RBracket) { + self.multiline_context -= 1; + } + + self.tokens.push(token); + chars.next(); + } + } + + if self.error.is_none() { + Ok(()) + } else { + Err(self.error.clone().unwrap()) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn function_definition() { + let input = r#" +def add(x, y): + return x + y +"#; + let lexer = Lexer::new(input); + + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Def, + Token::Identifier("add".to_string()), + Token::LParen, + Token::Identifier("x".to_string()), + Token::Comma, + Token::Identifier("y".to_string()), + Token::RParen, + Token::Colon, + Token::Newline, + Token::Indent, + Token::Return, + Token::Identifier("x".to_string()), + Token::Plus, + Token::Identifier("y".to_string()), + Token::Dedent, + Token::Eof, + ] + ); + } + + #[test] + fn invalid_character() { + let input = "2 + $"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![Token::Integer(2), Token::Plus, Token::InvalidCharacter('$'),] + ); + } + + #[test] + fn comparison_operators() { + let input = "a > b"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("a".to_string()), + Token::GreaterThan, + Token::Identifier("b".to_string()), + Token::Eof, + ] + ); + + let input = "a < b"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("a".to_string()), + Token::LessThan, + Token::Identifier("b".to_string()), + Token::Eof, + ] + ); + + let input = "a == b"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("a".to_string()), + Token::Equal, + Token::Identifier("b".to_string()), + Token::Eof, + ] + ); + + let input = "a != b"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("a".to_string()), + Token::NotEqual, + Token::Identifier("b".to_string()), + Token::Eof, + ] + ); + + let input = "a >= b"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("a".to_string()), + Token::GreaterThanOrEqual, + Token::Identifier("b".to_string()), + Token::Eof, + ] + ); + + let input = "a <= b"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("a".to_string()), + Token::LessThanOrEqual, + Token::Identifier("b".to_string()), + Token::Eof, + ] + ); + } + + #[test] + fn boolean_expressions() { + let input = "a and b"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("a".to_string()), + Token::And, + Token::Identifier("b".to_string()), + Token::Eof, + ] + ); + + let input = "a or b"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("a".to_string()), + Token::Or, + Token::Identifier("b".to_string()), + Token::Eof, + ] + ); + + let input = "a in b"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("a".to_string()), + Token::In, + Token::Identifier("b".to_string()), + Token::Eof, + ] + ); + + let input = "a is None"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("a".to_string()), + Token::Is, + Token::None, + Token::Eof, + ] + ); + + let input = "not b"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![Token::Not, Token::Identifier("b".to_string()), Token::Eof,] + ); + + let input = "not (b or c)"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Not, + Token::LParen, + Token::Identifier("b".to_string()), + Token::Or, + Token::Identifier("c".to_string()), + Token::RParen, + Token::Eof, + ] + ); + } + + #[test] + fn boolean_literals() { + let input = "x = True"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("x".to_string()), + Token::Assign, + Token::BooleanLiteral(true), + Token::Eof, + ] + ); + + let input = "x = False"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("x".to_string()), + Token::Assign, + Token::BooleanLiteral(false), + Token::Eof, + ] + ); + + let input = "x = None"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("x".to_string()), + Token::Assign, + Token::None, + Token::Eof, + ] + ); + + let input = "return None"; + let lexer = Lexer::new(input); + assert_eq!(lexer.tokens, vec![Token::Return, Token::None, Token::Eof,]); + } + + #[test] + fn if_else() { + let input = r#" +if x > 0: + print("Greater") +elif x > -10: + print("Middle") +else: + print("Less") +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::If, + Token::Identifier("x".to_string()), + Token::GreaterThan, + Token::Integer(0), + Token::Colon, + Token::Newline, + Token::Indent, + Token::Identifier("print".to_string()), + Token::LParen, + Token::StringLiteral("Greater".to_string()), + Token::RParen, + Token::Newline, + Token::Dedent, + Token::Elif, + Token::Identifier("x".to_string()), + Token::GreaterThan, + Token::Minus, + Token::Integer(10), + Token::Colon, + Token::Newline, + Token::Indent, + Token::Identifier("print".to_string()), + Token::LParen, + Token::StringLiteral("Middle".to_string()), + Token::RParen, + Token::Newline, + Token::Dedent, + Token::Else, + Token::Colon, + Token::Newline, + Token::Indent, + Token::Identifier("print".to_string()), + Token::LParen, + Token::StringLiteral("Less".to_string()), + Token::RParen, + Token::Dedent, + Token::Eof, + ] + ); + } + + #[test] + fn while_loop() { + let input = r#" +while True: + print("busy loop") +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::While, + Token::BooleanLiteral(true), + Token::Colon, + Token::Newline, + Token::Indent, + Token::Identifier("print".to_string()), + Token::LParen, + Token::StringLiteral("busy loop".to_string()), + Token::RParen, + Token::Dedent, + Token::Eof, + ] + ); + } + + #[test] + fn class_definition() { + let input = r#" +class Foo: + def __init__(self): + self.x = 0 + + def bar(self): + return self.x +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Class, + Token::Identifier("Foo".to_string()), + Token::Colon, + Token::Newline, + Token::Indent, + Token::Def, + Token::Identifier("__init__".to_string()), + Token::LParen, + Token::Identifier("self".to_string()), + Token::RParen, + Token::Colon, + Token::Newline, + Token::Indent, + Token::Identifier("self".to_string()), + Token::Dot, + Token::Identifier("x".to_string()), + Token::Assign, + Token::Integer(0), + Token::Newline, + Token::Newline, + Token::Dedent, + Token::Def, + Token::Identifier("bar".to_string()), + Token::LParen, + Token::Identifier("self".to_string()), + Token::RParen, + Token::Colon, + Token::Newline, + Token::Indent, + Token::Return, + Token::Identifier("self".to_string()), + Token::Dot, + Token::Identifier("x".to_string()), + Token::Dedent, + Token::Dedent, + Token::Eof, + ] + ); + } + + #[test] + fn class_instantiation() { + let input = "foo = Foo()\n"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("foo".to_string()), + Token::Assign, + Token::Identifier("Foo".to_string()), + Token::LParen, + Token::RParen, + Token::Eof, + ] + ); + } + + #[test] + fn method_invocation() { + let input = "foo = Foo()\nfoo.bar()"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("foo".to_string()), + Token::Assign, + Token::Identifier("Foo".to_string()), + Token::LParen, + Token::RParen, + Token::Newline, + Token::Identifier("foo".to_string()), + Token::Dot, + Token::Identifier("bar".to_string()), + Token::LParen, + Token::RParen, + Token::Eof, + ] + ); + } + + #[test] + fn regular_import() { + let input = "import other"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Import, + Token::Identifier("other".to_string()), + Token::Eof, + ] + ); + } + + #[test] + fn selective_import() { + let input = "from other import something"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::From, + Token::Identifier("other".to_string()), + Token::Import, + Token::Identifier("something".to_string()), + Token::Eof, + ] + ); + + let input = "from other import something as something_else"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::From, + Token::Identifier("other".to_string()), + Token::Import, + Token::Identifier("something".to_string()), + Token::As, + Token::Identifier("something_else".to_string()), + Token::Eof, + ] + ); + + let input = "from other import *"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::From, + Token::Identifier("other".to_string()), + Token::Import, + Token::Asterisk, + Token::Eof, + ] + ); + + let input = "from other import something, something_else"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::From, + Token::Identifier("other".to_string()), + Token::Import, + Token::Identifier("something".to_string()), + Token::Comma, + Token::Identifier("something_else".to_string()), + Token::Eof, + ] + ); + } + + #[test] + fn comment() { + let input = r#" +foo = Foo(3) # new instance +# x = foo.baz() +foo.bar() +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("foo".to_string()), + Token::Assign, + Token::Identifier("Foo".to_string()), + Token::LParen, + Token::Integer(3), + Token::RParen, + Token::Newline, + Token::Newline, + Token::Identifier("foo".to_string()), + Token::Dot, + Token::Identifier("bar".to_string()), + Token::LParen, + Token::RParen, + Token::Eof, + ] + ); + } + + #[test] + fn floating_point() { + let input = "x = 3.14"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("x".to_string()), + Token::Assign, + Token::FloatingPoint(3.14), + Token::Eof, + ] + ); + + let input = "x = 2.5e-3"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("x".to_string()), + Token::Assign, + Token::FloatingPoint(2.5e-3), + Token::Eof, + ] + ); + + let input = "x = 2.5E-3"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("x".to_string()), + Token::Assign, + Token::FloatingPoint(2.5e-3), + Token::Eof, + ] + ); + + let input = "x = 2E-3"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("x".to_string()), + Token::Assign, + Token::FloatingPoint(2e-3), + Token::Eof, + ] + ); + + let input = "x = 2E3"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("x".to_string()), + Token::Assign, + Token::FloatingPoint(2e3), + Token::Eof, + ] + ); + } + + #[test] + fn negative_numbers() { + let input = "-3.14"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![Token::Minus, Token::FloatingPoint(3.14), Token::Eof,] + ); + + let input = "-3"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![Token::Minus, Token::Integer(3), Token::Eof,] + ); + + let input = "2 - 3"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Integer(2), + Token::Minus, + Token::Integer(3), + Token::Eof, + ] + ); + + let input = "-2e-3"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![Token::Minus, Token::FloatingPoint(2e-3), Token::Eof,] + ); + + let input = "3-i"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Integer(3), + Token::Minus, + Token::Identifier("i".into()), + Token::Eof, + ] + ); + + let input = "2 + -3"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Integer(2), + Token::Plus, + Token::Minus, + Token::Integer(3), + Token::Eof, + ] + ); + + let input = "-(3)"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Minus, + Token::LParen, + Token::Integer(3), + Token::RParen, + Token::Eof, + ] + ); + + let input = "-(2 + 3)"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Minus, + Token::LParen, + Token::Integer(2), + Token::Plus, + Token::Integer(3), + Token::RParen, + Token::Eof, + ] + ); + } + + #[test] + fn lists() { + let input = "[1,2,3]"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::LBracket, + Token::Integer(1), + Token::Comma, + Token::Integer(2), + Token::Comma, + Token::Integer(3), + Token::RBracket, + Token::Eof, + ] + ); + + let input = "[1, 2, 3]"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::LBracket, + Token::Integer(1), + Token::Comma, + Token::Integer(2), + Token::Comma, + Token::Integer(3), + Token::RBracket, + Token::Eof, + ] + ); + + let input = "a = [1, 2, 3]"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("a".to_string()), + Token::Assign, + Token::LBracket, + Token::Integer(1), + Token::Comma, + Token::Integer(2), + Token::Comma, + Token::Integer(3), + Token::RBracket, + Token::Eof, + ] + ); + + let input = "list([1, 2, 3])"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("list".to_string()), + Token::LParen, + Token::LBracket, + Token::Integer(1), + Token::Comma, + Token::Integer(2), + Token::Comma, + Token::Integer(3), + Token::RBracket, + Token::RParen, + Token::Eof, + ] + ); + } + + #[test] + fn sets() { + let input = "{1,2,3}"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::LBrace, + Token::Integer(1), + Token::Comma, + Token::Integer(2), + Token::Comma, + Token::Integer(3), + Token::RBrace, + Token::Eof, + ] + ); + + let input = "{1, 2, 3}"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::LBrace, + Token::Integer(1), + Token::Comma, + Token::Integer(2), + Token::Comma, + Token::Integer(3), + Token::RBrace, + Token::Eof, + ] + ); + + let input = "a = {1, 2, 3}"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("a".to_string()), + Token::Assign, + Token::LBrace, + Token::Integer(1), + Token::Comma, + Token::Integer(2), + Token::Comma, + Token::Integer(3), + Token::RBrace, + Token::Eof, + ] + ); + + let input = "set({1, 2, 3})"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("set".to_string()), + Token::LParen, + Token::LBrace, + Token::Integer(1), + Token::Comma, + Token::Integer(2), + Token::Comma, + Token::Integer(3), + Token::RBrace, + Token::RParen, + Token::Eof, + ] + ); + } + + #[test] + fn index_access() { + let input = "a[0]"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("a".to_string()), + Token::LBracket, + Token::Integer(0), + Token::RBracket, + Token::Eof, + ] + ); + + let input = "[0,1][1]"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::LBracket, + Token::Integer(0), + Token::Comma, + Token::Integer(1), + Token::RBracket, + Token::LBracket, + Token::Integer(1), + Token::RBracket, + Token::Eof, + ] + ); + } + + #[test] + fn for_in_loop() { + let input = r#" +for i in a: + print(a) +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::For, + Token::Identifier("i".to_string()), + Token::In, + Token::Identifier("a".to_string()), + Token::Colon, + Token::Newline, + Token::Indent, + Token::Identifier("print".to_string()), + Token::LParen, + Token::Identifier("a".to_string()), + Token::RParen, + Token::Dedent, + Token::Eof, + ] + ); + } + + #[test] + fn list_comprehension() { + let input = r#" +b = [ i * 2 for i in a ] +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("b".to_string()), + Token::Assign, + Token::LBracket, + Token::Identifier("i".to_string()), + Token::Asterisk, + Token::Integer(2), + Token::For, + Token::Identifier("i".to_string()), + Token::In, + Token::Identifier("a".to_string()), + Token::RBracket, + Token::Eof, + ] + ); + } + + #[test] + fn tuples() { + let input = r#" +(1,2) +print((1,2)) +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::LParen, + Token::Integer(1), + Token::Comma, + Token::Integer(2), + Token::RParen, + Token::Newline, + Token::Identifier("print".to_string()), + Token::LParen, + Token::LParen, + Token::Integer(1), + Token::Comma, + Token::Integer(2), + Token::RParen, + Token::RParen, + Token::Eof, + ] + ); + } + + #[test] + fn generators() { + let input = r#" +def countdown(n): + while n > 0: + yield n + n = n - 1 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Def, + Token::Identifier("countdown".to_string()), + Token::LParen, + Token::Identifier("n".to_string()), + Token::RParen, + Token::Colon, + Token::Newline, + Token::Indent, + Token::While, + Token::Identifier("n".to_string()), + Token::GreaterThan, + Token::Integer(0), + Token::Colon, + Token::Newline, + Token::Indent, + Token::Yield, + Token::Identifier("n".to_string()), + Token::Newline, + Token::Identifier("n".to_string()), + Token::Assign, + Token::Identifier("n".to_string()), + Token::Minus, + Token::Integer(1), + Token::Dedent, + Token::Dedent, + Token::Eof, + ] + ); + } + + #[test] + fn inheritance() { + let input = r#" +class Foo(Parent): + def __init__(self): + self.x = 0 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Class, + Token::Identifier("Foo".to_string()), + Token::LParen, + Token::Identifier("Parent".to_string()), + Token::RParen, + Token::Colon, + Token::Newline, + Token::Indent, + Token::Def, + Token::Identifier("__init__".to_string()), + Token::LParen, + Token::Identifier("self".to_string()), + Token::RParen, + Token::Colon, + Token::Newline, + Token::Indent, + Token::Identifier("self".to_string()), + Token::Dot, + Token::Identifier("x".to_string()), + Token::Assign, + Token::Integer(0), + Token::Dedent, + Token::Dedent, + Token::Eof, + ] + ); + } + + #[test] + fn dictionaries() { + let input = r#" +a = { "b": 4, 'c': 5 } +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("a".to_string()), + Token::Assign, + Token::LBrace, + Token::StringLiteral("b".to_string()), + Token::Colon, + Token::Integer(4), + Token::Comma, + Token::StringLiteral("c".to_string()), + Token::Colon, + Token::Integer(5), + Token::RBrace, + Token::Eof, + ] + ); + } + + #[test] + fn async_await() { + let input = r#" +async def main(): + task_1 = asyncio.create_task(task1()) + await task_1 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Async, + Token::Def, + Token::Identifier("main".to_string()), + Token::LParen, + Token::RParen, + Token::Colon, + Token::Newline, + Token::Indent, + Token::Identifier("task_1".to_string()), + Token::Assign, + Token::Identifier("asyncio".to_string()), + Token::Dot, + Token::Identifier("create_task".to_string()), + Token::LParen, + Token::Identifier("task1".to_string()), + Token::LParen, + Token::RParen, + Token::RParen, + Token::Newline, + Token::Await, + Token::Identifier("task_1".to_string()), + Token::Dedent, + Token::Eof, + ] + ); + } + + #[test] + fn multiline_string() { + let input = r#" +"""comment 5-lines +5-types +""" +a = 1 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::StringLiteral("comment 5-lines\n5-types\n".into()), + Token::Newline, + Token::Identifier("a".to_string()), + Token::Assign, + Token::Integer(1), + Token::Eof, + ] + ); + + let input = r#" +'''comment 5-lines +5-types +''' +a = 1 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::StringLiteral("comment 5-lines\n5-types\n".into()), + Token::Newline, + Token::Identifier("a".to_string()), + Token::Assign, + Token::Integer(1), + Token::Eof, + ] + ); + } + + #[test] + fn assert() { + let input = r#" +assert True +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Assert, + Token::BooleanLiteral(true), + Token::Eof, + ] + ); + } + + #[test] + fn try_except_finally() { + let input = r#" +try: + 4 / 0 +except: + a = 2 +finally: + a = 3 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Try, + Token::Colon, + Token::Newline, + Token::Indent, + Token::Integer(4), + Token::Slash, + Token::Integer(0), + Token::Newline, + Token::Dedent, + Token::Except, + Token::Colon, + Token::Newline, + Token::Indent, + Token::Identifier("a".into()), + Token::Assign, + Token::Integer(2), + Token::Newline, + Token::Dedent, + Token::Finally, + Token::Colon, + Token::Newline, + Token::Indent, + Token::Identifier("a".into()), + Token::Assign, + Token::Integer(3), + Token::Dedent, + Token::Eof, + ] + ); + } + + #[test] + fn hex_literal() { + let input = r#" +a = 0x0010 +b +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("a".into()), + Token::Assign, + Token::HexLiteral("0x0010".into()), + Token::Newline, + Token::Identifier("b".into()), + Token::Eof, + ] + ); + } + + #[test] + fn octal_literal() { + let input = r#" +a = 0o0010 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("a".into()), + Token::Assign, + Token::OctalLiteral("0o0010".into()), + Token::Eof, + ] + ); + } + + #[test] + fn binary_literal() { + let input = r#" +a = 0b0010 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("a".into()), + Token::Assign, + Token::BinaryLiteral("0b0010".into()), + Token::Eof, + ] + ); + } + + #[test] + fn kwargs() { + let input = r#" +def add(*args, **kwargs): + pass +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Def, + Token::Identifier("add".to_string()), + Token::LParen, + Token::Asterisk, + Token::Identifier("args".to_string()), + Token::Comma, + Token::DoubleAsterisk, + Token::Identifier("kwargs".to_string()), + Token::RParen, + Token::Colon, + Token::Newline, + Token::Indent, + Token::Pass, + Token::Dedent, + Token::Eof, + ] + ); + } + + #[test] + fn decorator() { + let input = r#" +@test_decorator +def get_val(): + return 2 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::AtSign, + Token::Identifier("test_decorator".to_string()), + Token::Newline, + Token::Def, + Token::Identifier("get_val".to_string()), + Token::LParen, + Token::RParen, + Token::Colon, + Token::Newline, + Token::Indent, + Token::Return, + Token::Integer(2), + Token::Dedent, + Token::Eof, + ] + ); + } + + #[test] + fn raise() { + let input = r#" +raise Exception +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Raise, + Token::Identifier("Exception".into()), + Token::Eof, + ] + ); + } + + #[test] + fn context_manager() { + let input = r#" +with open('test.txt') as f: + f.read() +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::With, + Token::Identifier("open".into()), + Token::LParen, + Token::StringLiteral("test.txt".into()), + Token::RParen, + Token::As, + Token::Identifier("f".into()), + Token::Colon, + Token::Newline, + Token::Indent, + Token::Identifier("f".into()), + Token::Dot, + Token::Identifier("read".into()), + Token::LParen, + Token::RParen, + Token::Dedent, + Token::Eof, + ] + ); + } + + #[test] + fn ellipsis() { + let input = r#" +type(...) +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("type".into()), + Token::LParen, + Token::Ellipsis, + Token::RParen, + Token::Eof, + ] + ); + + let input = r#" +type(Ellipsis) +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("type".into()), + Token::LParen, + Token::Ellipsis, + Token::RParen, + Token::Eof, + ] + ); + } + + #[test] + fn delete() { + let input = r#" +del a +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Del, + Token::Identifier("a".into()), + Token::Eof, + ] + ); + } + + #[test] + fn byte_string() { + let input = r#" +b'hello' +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::ByteStringLiteral("hello".into()), + Token::Eof, + ] + ); + } + + #[test] + fn compound_assignment() { + let input = r#" +a += 1 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("a".into()), + Token::PlusEquals, + Token::Integer(1), + Token::Eof, + ] + ); + + let input = r#" +a -= 1 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("a".into()), + Token::MinusEquals, + Token::Integer(1), + Token::Eof, + ] + ); + + let input = r#" +a *= 1 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("a".into()), + Token::AsteriskEquals, + Token::Integer(1), + Token::Eof, + ] + ); + + let input = r#" +a /= 1 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("a".into()), + Token::SlashEquals, + Token::Integer(1), + Token::Eof, + ] + ); + + let input = r#" +a &= 1 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("a".into()), + Token::BitwiseAndEquals, + Token::Integer(1), + Token::Eof, + ] + ); + + let input = r#" +a ^= 1 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("a".into()), + Token::BitwiseXorEquals, + Token::Integer(1), + Token::Eof, + ] + ); + + let input = r#" +a |= 1 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("a".into()), + Token::BitwiseOrEquals, + Token::Integer(1), + Token::Eof, + ] + ); + + let input = r#" +a //= 1 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("a".into()), + Token::DoubleSlashEquals, + Token::Integer(1), + Token::Eof, + ] + ); + + let input = r#" +a <<= 1 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("a".into()), + Token::LeftShiftEquals, + Token::Integer(1), + Token::Eof, + ] + ); + + let input = r#" +a %= 1 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("a".into()), + Token::ModEquals, + Token::Integer(1), + Token::Eof, + ] + ); + + let input = r#" +a @= 1 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("a".into()), + Token::MatMulEquals, + Token::Integer(1), + Token::Eof, + ] + ); + + let input = r#" +a **= 1 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("a".into()), + Token::ExpoEquals, + Token::Integer(1), + Token::Eof, + ] + ); + + let input = r#" +a >>= 1 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Identifier("a".into()), + Token::RightShiftEquals, + Token::Integer(1), + Token::Eof, + ] + ); + } + + #[test] + fn f_strings() { + let input = r#" +f"Hello {name}" +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::FStringStart, + Token::StringLiteral("Hello ".into()), + Token::LBrace, + Token::Identifier("name".into()), + Token::RBrace, + Token::FStringEnd, + Token::Eof, + ] + ); + + let input = r#" +f"Hello" +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::FStringStart, + Token::StringLiteral("Hello".into()), + Token::FStringEnd, + Token::Eof, + ] + ); + + let input = r#" +f"Hello {name} goodbye {other}." +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::FStringStart, + Token::StringLiteral("Hello ".into()), + Token::LBrace, + Token::Identifier("name".into()), + Token::RBrace, + Token::StringLiteral(" goodbye ".into()), + Token::LBrace, + Token::Identifier("other".into()), + Token::RBrace, + Token::StringLiteral(".".into()), + Token::FStringEnd, + Token::Eof, + ] + ); + + let input = r#" +f"{first}{last}" +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::FStringStart, + Token::LBrace, + Token::Identifier("first".into()), + Token::RBrace, + Token::LBrace, + Token::Identifier("last".into()), + Token::RBrace, + Token::FStringEnd, + Token::Eof, + ] + ); + + let input = r#" +f"{first}{last!r}" +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::FStringStart, + Token::LBrace, + Token::Identifier("first".into()), + Token::RBrace, + Token::LBrace, + Token::Identifier("last".into()), + Token::Exclamation, + Token::Identifier("r".into()), + Token::RBrace, + Token::FStringEnd, + Token::Eof, + ] + ); + } + + #[test] + fn raw_strings() { + let input = r#"r"hello""#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![Token::RawStringLiteral("hello".into()), Token::Eof,] + ); + + let input = r#" +r"""OS routines for NT or Posix depending on what system we're on. + +This exports: +""" +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::RawStringLiteral( + "OS routines for NT or Posix depending on what system we're on.\n\nThis exports:\n" + .into() + ), + Token::Eof, + ] + ); + } + + #[test] + fn binary_operators() { + let input = "a // b"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("a".into()), + Token::DoubleSlash, + Token::Identifier("b".into()), + Token::Eof, + ] + ); + + let input = "a & b"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("a".into()), + Token::BitwiseAnd, + Token::Identifier("b".into()), + Token::Eof, + ] + ); + + let input = "a | b"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("a".into()), + Token::BitwiseOr, + Token::Identifier("b".into()), + Token::Eof, + ] + ); + + let input = "a ^ b"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("a".into()), + Token::BitwiseXor, + Token::Identifier("b".into()), + Token::Eof, + ] + ); + + let input = "a % b"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("a".into()), + Token::Modulo, + Token::Identifier("b".into()), + Token::Eof, + ] + ); + + let input = "~a"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![Token::BitwiseNot, Token::Identifier("a".into()), Token::Eof,] + ); + + let input = "a << b"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("a".into()), + Token::LeftShift, + Token::Identifier("b".into()), + Token::Eof, + ] + ); + + let input = "a >> b"; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Identifier("a".into()), + Token::RightShift, + Token::Identifier("b".into()), + Token::Eof, + ] + ); + } + + #[test] + fn control_flow() { + let input = r#" +for i in a: + continue +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::For, + Token::Identifier("i".to_string()), + Token::In, + Token::Identifier("a".to_string()), + Token::Colon, + Token::Newline, + Token::Indent, + Token::Continue, + Token::Dedent, + Token::Eof, + ] + ); + + let input = r#" +for i in a: + break +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::For, + Token::Identifier("i".to_string()), + Token::In, + Token::Identifier("a".to_string()), + Token::Colon, + Token::Newline, + Token::Indent, + Token::Break, + Token::Dedent, + Token::Eof, + ] + ); + } + + #[test] + fn lambda() { + let input = r#" +lambda: 4 +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Lambda, + Token::Colon, + Token::Integer(4), + Token::Eof, + ] + ); + } + + #[test] + fn type_hints() { + let input = r#" +def add(a: str, b: str) -> int: + pass +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Def, + Token::Identifier("add".into()), + Token::LParen, + Token::Identifier("a".into()), + Token::Colon, + Token::Identifier("str".into()), + Token::Comma, + Token::Identifier("b".into()), + Token::Colon, + Token::Identifier("str".into()), + Token::RParen, + Token::ReturnTypeArrow, + Token::Identifier("int".into()), + Token::Colon, + Token::Newline, + Token::Indent, + Token::Pass, + Token::Dedent, + Token::Eof, + ] + ); + } + + #[test] + fn scope_modifiers() { + let input = r#" +nonlocal var +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Nonlocal, + Token::Identifier("var".into()), + Token::Eof, + ] + ); + + let input = r#" +global var +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![ + Token::Newline, + Token::Global, + Token::Identifier("var".into()), + Token::Eof, + ] + ); + } + + #[test] + fn not_implemented() { + let input = r#" +NotImplemented +"#; + let lexer = Lexer::new(input); + assert_eq!( + lexer.tokens, + vec![Token::Newline, Token::NotImplemented, Token::Eof,] + ); + } +} diff --git a/src/lexer/types.rs b/src/lexer/types.rs new file mode 100644 index 0000000..52ef9e3 --- /dev/null +++ b/src/lexer/types.rs @@ -0,0 +1,150 @@ +pub struct MultilineString { + pub raw: bool, + pub literal: String, + pub end_char: char, +} + +impl MultilineString { + pub fn new(raw: bool, end_char: char) -> Self { + Self { + raw, + end_char, + literal: String::new(), + } + } +} + +#[derive(Debug, PartialEq, Clone)] +pub enum Token { + Def, + Del, + Lambda, + If, + Else, + Elif, + While, + Return, + For, + In, + Yield, + Pass, + Class, + Try, + Except, + Finally, + Raise, + From, + As, + Import, + With, + Assert, + Dot, + And, + Or, + Not, + Is, + BitwiseAnd, + BitwiseOr, + BitwiseXor, + BitwiseNot, + LeftShift, + RightShift, + Modulo, + ReturnTypeArrow, + Indent, + Dedent, + Identifier(String), + StringLiteral(String), + RawStringLiteral(String), + ByteStringLiteral(String), + None, + Ellipsis, + NotImplemented, + BooleanLiteral(bool), + // This is unsigned because the minus unary operator is not handled by + // this lexer, but rather left for the parser which has better context. + Integer(u64), + FloatingPoint(f64), + HexLiteral(String), + OctalLiteral(String), + BinaryLiteral(String), + DoubleAsterisk, + Plus, + Minus, + Asterisk, + Slash, + DoubleSlash, + PlusEquals, + MinusEquals, + AsteriskEquals, + SlashEquals, + BitwiseAndEquals, + BitwiseXorEquals, + BitwiseOrEquals, + DoubleSlashEquals, + ModEquals, + MatMulEquals, + ExpoEquals, + LeftShiftEquals, + RightShiftEquals, + GreaterThan, + LessThan, + Equal, + NotEqual, + GreaterThanOrEqual, + LessThanOrEqual, + Assign, + Comma, + Colon, + Exclamation, + AtSign, + LParen, + RParen, + LBracket, + RBracket, + LBrace, + RBrace, + Break, + Continue, + Async, + Await, + FStringStart, + FStringEnd, + Nonlocal, + Global, + Newline, + Eof, + InvalidCharacter(char), +} + +impl Token { + /// These types, when called with type(..), are considered type aliases. These were introduced + /// in Python 3.9 from PEP 613: https://peps.python.org/pep-0613/ + pub fn is_type(&self) -> bool { + match self { + Token::Identifier(i) => matches!(i.as_str(), "list" | "dict" | "int" | "str"), + Token::Ellipsis => true, + _ => false, + } + } + + /// Checks if this token is one of the `+=`, `-=`, etc. variants. + pub fn is_compound_assign(&self) -> bool { + matches!( + self, + Token::PlusEquals + | Token::MinusEquals + | Token::AsteriskEquals + | Token::SlashEquals + | Token::BitwiseAndEquals + | Token::BitwiseOrEquals + | Token::BitwiseXorEquals + | Token::DoubleSlashEquals + | Token::LeftShiftEquals + | Token::RightShiftEquals + | Token::ModEquals + | Token::MatMulEquals + | Token::ExpoEquals + ) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..22acdbb --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,19 @@ +mod bytecode_vm; +mod core; +pub mod crosscheck; +mod domain; +pub mod init; +mod lexer; +#[cfg(feature = "llvm_backend")] +mod llvm_backend; +mod parser; +mod treewalk; +mod types; + +#[derive(PartialEq)] +pub enum Engine { + TreeWalk, + BytecodeVm, + #[cfg(feature = "llvm_backend")] + LlvmBackend, +} diff --git a/src/llvm_backend/mod.rs b/src/llvm_backend/mod.rs new file mode 100644 index 0000000..79efa8f --- /dev/null +++ b/src/llvm_backend/mod.rs @@ -0,0 +1,77 @@ +use inkwell::{ + context::Context, + targets::{InitializationConfig, Target}, + IntPredicate, +}; + +use crate::core::{log, log_impure, LogLevel}; + +pub fn compile_ast_to_llvm() { + log(LogLevel::Warn, || { + "llvm-backend is HIGHLY EXPERIMENTAL. Use with caution.".to_string() + }); + + let context = Context::create(); + Target::initialize_native(&InitializationConfig::default()).unwrap(); + + let module = context.create_module("ast_to_llvm"); + let execution_engine = module + .create_jit_execution_engine(inkwell::OptimizationLevel::None) + .unwrap(); + + let builder = context.create_builder(); + + let i32_type = context.i32_type(); + let void_type = context.void_type(); + let fn_type = void_type.fn_type(&[], false); + let main_function = module.add_function("main", fn_type, None); + let entry_block = context.append_basic_block(main_function, "entry"); + let loop_cond_block = context.append_basic_block(main_function, "loop_cond"); + let loop_body_block = context.append_basic_block(main_function, "loop_body"); + let after_loop_block = context.append_basic_block(main_function, "after_loop"); + + builder.position_at_end(entry_block); + let i_initial = i32_type.const_int(0, false); + let _ = builder.build_unconditional_branch(loop_cond_block); + + builder.position_at_end(loop_cond_block); + let phi_i = builder.build_phi(i32_type, "i").unwrap(); + phi_i.add_incoming(&[(&i_initial, entry_block)]); + + let n_val = i32_type.const_int(10000, false); + let cond = builder + .build_int_compare( + IntPredicate::SLT, + phi_i.as_basic_value().into_int_value(), + n_val, + "loopcond", + ) + .unwrap(); + let _ = builder.build_conditional_branch(cond, loop_body_block, after_loop_block); + + builder.position_at_end(loop_body_block); + let one = i32_type.const_int(1, false); + let new_i_val = builder + .build_int_add(phi_i.as_basic_value().into_int_value(), one, "newi") + .unwrap(); + phi_i.add_incoming(&[(&new_i_val, loop_body_block)]); + let _ = builder.build_unconditional_branch(loop_cond_block); + + builder.position_at_end(after_loop_block); + let _ = builder.build_return(None); + + module.verify().unwrap(); + + log_impure(LogLevel::Trace, || module.print_to_stderr()); + + unsafe { + let main_fn = execution_engine + .get_function::("main") + .unwrap(); + main_fn.call(); + } + + // We should eventually support print statements such as these in the IR though I don't know + // yet what that looks like. + println!("Done"); +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..aedd9b5 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,32 @@ +use std::{env, process}; + +use memphis::init::{Memphis, Repl}; +use memphis::Engine; + +/// I could see the default becoming [`Engine::BytecodeVm`] in the future once it supports more. +const DEFAULT_ENGINE: Engine = Engine::TreeWalk; + +fn main() { + let args: Vec = env::args().collect(); + + let engine = if let Some(mode) = args.get(2) { + match mode.to_lowercase().as_str() { + "vm" | "bytecode_vm" => Engine::BytecodeVm, + #[cfg(feature = "llvm_backend")] + "llvm" | "llvm_backend" | "native" => Engine::LlvmBackend, + "tw" | "treewalk" => Engine::TreeWalk, + _ => panic!("Unsupported engine: {}", mode), + } + } else { + DEFAULT_ENGINE + }; + + match args.len() { + 1 => Repl::new().run(), + 2 | 3 => Memphis::start(&args[1], engine), + _ => { + eprintln!("Usage: memphis []"); + process::exit(1); + } + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..3eff5ac --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,5670 @@ +use std::collections::HashMap; + +pub mod static_analysis; +pub mod types; + +use crate::core::{log, Container, LogLevel}; +use crate::lexer::types::Token; +use crate::parser::types::{ + Alias, BinOp, Block, CompoundOperator, ConditionalBlock, ExceptClause, ExceptionInstance, + ExceptionLiteral, Expr, ExprFormat, FStringPart, ForClause, FormatOption, HandledException, + ImportPath, ImportedItem, LogicalOp, LoopIndex, ParsedArgDefinition, ParsedArgDefinitions, + ParsedArgument, ParsedArguments, ParsedSliceParams, Statement, TypeNode, UnaryOp, +}; +use crate::treewalk::State; +use crate::types::errors::ParserError; + +pub struct Parser { + state: Container, + tokens: Vec, + current_token: Token, + position: usize, + line_number: usize, +} + +impl Parser { + pub fn new(tokens: Vec, state: Container) -> Self { + let current_token = tokens.first().cloned().unwrap_or(Token::Eof); + Parser { + state, + tokens, + current_token, + position: 0, + line_number: 1, + } + } + + pub fn is_finished(&self) -> bool { + self.current_token == Token::Eof + } + + fn peek(&self, ahead: usize) -> Token { + self.tokens + .get(self.position + ahead) + .cloned() + .unwrap_or(Token::Eof) + } + + /// Get a reference to a slice of the remaining tokens. + fn remaining_tokens(&self) -> &[Token] { + &self.tokens[self.position..] + } + + /// Does the slice of reamining tokens contain the sought after token? + fn has(&self, target: &Token) -> bool { + self.remaining_tokens().contains(target) + } + + /// How far away is the given token, if it exists in the reaming tokens? This is useful on + /// slice operations. + fn num_away(&self, target: &Token) -> Result { + self.remaining_tokens() + .iter() + .position(|token| token == target) + .ok_or(ParserError::ExpectedToken(target.clone(), Token::Eof)) + } + + /// Check whether the next `tokens.len()` tokens matches those provided, without consuming any + /// tokens. This is useful for multi-token operations or where extra context is needed. + fn peek_ahead_contains(&self, tokens: Vec) -> bool { + for (index, token) in tokens.into_iter().enumerate() { + if token != self.peek(index) { + return false; + } + } + + true + } + + /// If we are inside a string literal, we must check for newline characters rather than + /// tokens. These are produced by `Lexer::emit_newline`. + fn advance_line_number_if_needed(&mut self) { + if self.current_token == Token::Newline { + self.line_number += 1; + } else if let Token::StringLiteral(string) = &self.current_token { + self.line_number += string.matches('\n').count(); + } else if let Token::RawStringLiteral(string) = &self.current_token { + self.line_number += string.matches('\n').count(); + } + } + + fn consume(&mut self, expected: Token) -> Result<(), ParserError> { + log(LogLevel::Trace, || { + format!("Token: {:?}", self.current_token) + }); + + if self.current_token == expected { + self.advance_line_number_if_needed(); + + self.position += 1; + self.current_token = self + .tokens + .get(self.position) + .cloned() + .unwrap_or(Token::Eof); + Ok(()) + } else { + Err(ParserError::ExpectedToken( + expected, + self.current_token.clone(), + )) + } + } + + fn consume_optional(&mut self, expected: Token) { + if self.current_token == expected { + let _ = self.consume(expected); + } + } + + fn consume_optional_many(&mut self, expected: Token) { + while self.current_token == expected { + let _ = self.consume(expected.clone()); + } + } + + /// Parse an expression in a context where tuples may be expected. A good option if you're not + /// sure. By tuples here, we mean those that are not indicated by parentheses (those are + /// handled by detecting a LParen in `parse_factor`). + /// + /// ```python + /// 4, 5 + /// a = 4, 5 + /// ``` + /// + /// All other expression parsing is immediately delegated to `parse_simple_expr`. + fn parse_expr(&mut self) -> Result { + let left = self.parse_simple_expr()?; + + if self.current_token == Token::Comma { + let mut items = vec![left]; + while self.current_token == Token::Comma { + self.consume(Token::Comma)?; + items.push(self.parse_simple_expr()?); + } + + Ok(Expr::Tuple(items)) + } else { + Ok(left) + } + } + + /// Parse an expression where open tuples are not expected. If you need to support this in a + /// given context (i.e. a = 4, 5), try `parse_expr`. + /// + /// This may not need to be public, it's only being used in interpreter tests (outside of this + /// struct). + pub fn parse_simple_expr(&mut self) -> Result { + if self.current_token == Token::Await { + self.parse_await_expr() + } else { + self.parse_ternary_expr() + } + } + + fn parse_await_expr(&mut self) -> Result { + self.consume(Token::Await)?; + let right = self.parse_ternary_expr()?; + Ok(Expr::Await { + right: Box::new(right), + }) + } + + /// Implements the Python precedence order in reverse call stack order, meaning the operators + /// evaluated last will be detected first during this recursive descent. + /// + /// Python precedence order is: + /// - Exponentiation (**) - `parse_exponentiation` + /// - Literals, Identifiers - `parse_factor` + /// - Member Access, Index Access - `parse_access_operations` + /// - Multiplication, Division, Modulo, and Comparison Operators -`parse_term` + /// - Logical operators (AND/OR) - `parse_logical_term` + /// - Addition, Subtraction - `parse_add_sub` + /// - Bitwise Shifts (<<, >>) - `parse_bitwise_shift` + /// - Bitwise AND (&), OR (|), XOR (^) - `parse_binary_expr` + /// - Ternary Expression (inline-if) - `parse_ternary_expr` + fn parse_ternary_expr(&mut self) -> Result { + let if_value = self.parse_binary_expr()?; + + // If we saw a `Expr::NoOp`, the binary expression ended in a newline and we can skip the + // ternary check. Will this always be true? + if self.current_token == Token::If && if_value != Expr::NoOp { + self.consume(Token::If)?; + let condition = self.parse_binary_expr()?; + self.consume(Token::Else)?; + let else_value = self.parse_binary_expr()?; + + return Ok(Expr::TernaryOp { + condition: Box::new(condition), + if_value: Box::new(if_value), + else_value: Box::new(else_value), + }); + } + + Ok(if_value) + } + + fn parse_binary_expr(&mut self) -> Result { + let mut left = self.parse_bitwise_shift()?; + + while self.current_token == Token::BitwiseAnd + || self.current_token == Token::BitwiseOr + || self.current_token == Token::BitwiseXor + { + let op = match self.current_token { + Token::BitwiseAnd => BinOp::BitwiseAnd, + Token::BitwiseOr => BinOp::BitwiseOr, + Token::BitwiseXor => BinOp::BitwiseXor, + _ => unreachable!(), + }; + self.consume(self.current_token.clone())?; + let right = self.parse_bitwise_shift()?; + left = Expr::BinaryOperation { + left: Box::new(left), + op, + right: Box::new(right), + }; + } + + Ok(left) + } + + fn parse_add_sub(&mut self) -> Result { + let mut left = self.parse_logical_term()?; + + while self.current_token == Token::Plus || self.current_token == Token::Minus { + let op = match self.current_token { + Token::Plus => BinOp::Add, + Token::Minus => BinOp::Sub, + _ => unreachable!(), + }; + self.consume(self.current_token.clone())?; + let right = self.parse_logical_term()?; + left = Expr::BinaryOperation { + left: Box::new(left), + op, + right: Box::new(right), + }; + } + + Ok(left) + } + + fn parse_bitwise_shift(&mut self) -> Result { + let mut left = self.parse_add_sub()?; + + while self.current_token == Token::LeftShift || self.current_token == Token::RightShift { + let op = match self.current_token { + Token::LeftShift => BinOp::LeftShift, + Token::RightShift => BinOp::RightShift, + _ => unreachable!(), + }; + self.consume(self.current_token.clone())?; + let right = self.parse_add_sub()?; + left = Expr::BinaryOperation { + left: Box::new(left), + op, + right: Box::new(right), + }; + } + + Ok(left) + } + + fn parse_member_access(&mut self, left: Expr) -> Result { + self.consume(Token::Dot)?; + let field = self.parse_identifier()?; + + if self.current_token == Token::LParen { + let args = self.parse_function_call_args()?; + + Ok(Expr::MethodCall { + object: Box::new(left), + name: field.clone(), + args, + }) + } else { + Ok(Expr::MemberAccess { + object: Box::new(left), + field: field.clone(), + }) + } + } + + fn parse_index_access(&mut self, left: Expr) -> Result { + self.consume(Token::LBracket)?; + // [::2] + let params = if self.peek_ahead_contains(vec![Token::Colon, Token::Colon]) { + self.consume(Token::Colon)?; + self.consume(Token::Colon)?; + let step = Some(Box::new(self.parse_simple_expr()?)); + (true, None, None, step) + // [:2] + } else if self.peek_ahead_contains(vec![Token::Colon]) { + self.consume(Token::Colon)?; + let stop = Some(Box::new(self.parse_simple_expr()?)); + (true, None, stop, None) + // [2:] + // if there is a Colon immediately before the next RBracket + } else if self.has(&Token::Colon) + && self.num_away(&Token::Colon)? + 1 == self.num_away(&Token::RBracket)? + { + let start = Some(Box::new(self.parse_simple_expr()?)); + self.consume(Token::Colon)?; + (true, start, None, None) + // [1:1:1] or [2:5] + // if there is a Colon before the next RBracket + } else if self.has(&Token::Colon) + && self.num_away(&Token::Colon)? < self.num_away(&Token::RBracket)? + { + let start = Some(Box::new(self.parse_simple_expr()?)); + self.consume(Token::Colon)?; + let stop = Some(Box::new(self.parse_simple_expr()?)); + let step = if self.current_token == Token::Colon { + self.consume(Token::Colon)?; + Some(Box::new(self.parse_simple_expr()?)) + } else { + None + }; + (true, start, stop, step) + // [1] + } else { + let index = Some(Box::new(self.parse_simple_expr()?)); + (false, index, None, None) + }; + self.consume(Token::RBracket)?; + + if !params.0 { + Ok(Expr::IndexAccess { + object: Box::new(left), + index: params.1.unwrap(), + }) + } else { + Ok(Expr::SliceOperation { + object: Box::new(left), + params: ParsedSliceParams { + start: params.1, + stop: params.2, + step: params.3, + }, + }) + } + } + + /// This is recursive to the right to create a right-associativity binary operator. + fn parse_exponentiation(&mut self) -> Result { + let mut left = self.parse_factor()?; + + while self.current_token == Token::DoubleAsterisk { + self.consume(Token::DoubleAsterisk)?; + let right = self.parse_exponentiation()?; + left = Expr::BinaryOperation { + left: Box::new(left), + op: BinOp::Expo, + right: Box::new(right), + }; + } + + Ok(left) + } + + fn parse_access_operations(&mut self) -> Result { + let mut left = self.parse_exponentiation()?; + + while self.current_token == Token::Dot || self.current_token == Token::LBracket { + left = match self.current_token { + Token::Dot => self.parse_member_access(left)?, + Token::LBracket => self.parse_index_access(left)?, + _ => unreachable!(), + }; + } + + if self.current_token == Token::LParen { + let args = self.parse_function_call_args()?; + left = Expr::FunctionCall { + name: "".into(), + args, + callee: Some(Box::new(left)), + } + } + + Ok(left) + } + + fn parse_logical_term(&mut self) -> Result { + let mut left = self.parse_term()?; + + while self.current_token == Token::And || self.current_token == Token::Or { + let op = match self.current_token { + Token::And => LogicalOp::And, + Token::Or => LogicalOp::Or, + _ => unreachable!(), + }; + self.consume(self.current_token.clone())?; + let right = self.parse_term()?; + left = Expr::LogicalOperation { + left: Box::new(left), + op, + right: Box::new(right), + }; + + self.consume_optional_many(Token::Newline); + } + + Ok(left) + } + + fn parse_term(&mut self) -> Result { + let mut left = self.parse_access_operations()?; + + while self.current_token == Token::Asterisk + || self.current_token == Token::Slash + || self.current_token == Token::DoubleSlash + || self.current_token == Token::Modulo + || self.current_token == Token::AtSign + { + let op = match self.current_token { + Token::Asterisk => BinOp::Mul, + Token::Slash => BinOp::Div, + Token::DoubleSlash => BinOp::IntegerDiv, + Token::Modulo => BinOp::Mod, + Token::AtSign => BinOp::MatMul, + _ => unreachable!(), + }; + self.consume(self.current_token.clone())?; + let right = self.parse_access_operations()?; + left = Expr::BinaryOperation { + left: Box::new(left), + op, + right: Box::new(right), + }; + + self.consume_optional_many(Token::Newline); + } + + while self.current_token == Token::LessThan + || self.current_token == Token::LessThanOrEqual + || self.current_token == Token::GreaterThan + || self.current_token == Token::GreaterThanOrEqual + || self.current_token == Token::Equal + || self.current_token == Token::NotEqual + || self.current_token == Token::In + || self.peek_ahead_contains(vec![Token::Not, Token::In]) + || self.current_token == Token::Is + || self.peek_ahead_contains(vec![Token::Is, Token::Not]) + { + // Handle two tokens to produce one `BinOp::NotIn` operation. If this gets too messy, + // we could look to move multi-word tokens into the lexer. + let op = if self.peek_ahead_contains(vec![Token::Not, Token::In]) { + self.consume(Token::Not)?; + self.consume(Token::In)?; + BinOp::NotIn + } else if self.peek_ahead_contains(vec![Token::Is, Token::Not]) { + self.consume(Token::Is)?; + self.consume(Token::Not)?; + BinOp::IsNot + } else { + let op = match self.current_token { + Token::LessThan => BinOp::LessThan, + Token::LessThanOrEqual => BinOp::LessThanOrEqual, + Token::GreaterThan => BinOp::GreaterThan, + Token::GreaterThanOrEqual => BinOp::GreaterThanOrEqual, + Token::Equal => BinOp::Equals, + Token::NotEqual => BinOp::NotEquals, + Token::In => BinOp::In, + Token::Is => BinOp::Is, + _ => unreachable!(), + }; + self.consume(self.current_token.clone())?; + op + }; + + let right = self.parse_term()?; + left = Expr::BinaryOperation { + left: Box::new(left), + op, + right: Box::new(right), + }; + + self.consume_optional_many(Token::Newline); + } + + Ok(left) + } + + fn parse_minus(&mut self) -> Result { + self.consume(Token::Minus)?; + match self.current_token.clone() { + Token::Integer(i) => { + self.consume(Token::Integer(i))?; + Ok(Expr::Integer(-(i as i64))) + } + Token::FloatingPoint(i) => { + self.consume(Token::FloatingPoint(i))?; + Ok(Expr::FloatingPoint(-i)) + } + _ => { + let right = self.parse_term()?; + Ok(Expr::UnaryOperation { + op: UnaryOp::Minus, + right: Box::new(right), + }) + } + } + } + + /// The unary plus operator is a no-op for integers and floats, but exists to provide custom + /// behaviors using `Dunder::Pos`. + fn parse_plus(&mut self) -> Result { + self.consume(Token::Plus)?; + match self.current_token.clone() { + Token::Integer(i) => { + self.consume(Token::Integer(i))?; + Ok(Expr::Integer(i as i64)) + } + Token::FloatingPoint(i) => { + self.consume(Token::FloatingPoint(i))?; + Ok(Expr::FloatingPoint(i)) + } + _ => { + let right = self.parse_term()?; + Ok(Expr::UnaryOperation { + op: UnaryOp::Plus, + right: Box::new(right), + }) + } + } + } + + fn parse_factor(&mut self) -> Result { + match self.current_token.clone() { + Token::Minus => self.parse_minus(), + Token::Plus => self.parse_plus(), + Token::Asterisk => { + self.consume(Token::Asterisk)?; + let right = self.parse_simple_expr()?; + Ok(Expr::UnaryOperation { + op: UnaryOp::Unpack, + right: Box::new(right), + }) + } + Token::Not => { + self.consume(Token::Not)?; + let right = self.parse_term()?; + Ok(Expr::UnaryOperation { + op: UnaryOp::Not, + right: Box::new(right), + }) + } + Token::BitwiseNot => { + self.consume(Token::BitwiseNot)?; + let right = self.parse_term()?; + Ok(Expr::UnaryOperation { + op: UnaryOp::BitwiseNot, + right: Box::new(right), + }) + } + Token::None => { + self.consume(Token::None)?; + Ok(Expr::None) + } + Token::NotImplemented => { + self.consume(Token::NotImplemented)?; + Ok(Expr::NotImplemented) + } + Token::Ellipsis => { + self.consume(Token::Ellipsis)?; + Ok(Expr::Ellipsis) + } + Token::Integer(i) => { + self.consume(Token::Integer(i))?; + Ok(Expr::Integer(i as i64)) + } + Token::FloatingPoint(i) => { + self.consume(Token::FloatingPoint(i))?; + Ok(Expr::FloatingPoint(i)) + } + Token::BooleanLiteral(b) => { + self.consume(Token::BooleanLiteral(b))?; + Ok(Expr::Boolean(b)) + } + Token::Identifier(_) => { + if self.peek(1) == Token::LParen { + let name = self.parse_identifier()?; + let args = self.parse_function_call_args()?; + + let first_call = if self.state.is_class(&name) { + Expr::ClassInstantiation { name, args } + } else { + Expr::FunctionCall { + name, + args, + callee: None, + } + }; + + Ok(first_call) + } else if self.current_token.is_type() { + let type_node = self.parse_type_node()?; + + match type_node { + TypeNode::Basic(type_) => Ok(Expr::Variable(type_)), + _ => Ok(Expr::TypeNode(type_node)), + } + } else { + Ok(Expr::Variable(self.parse_identifier()?)) + } + } + Token::LParen => self.parse_tuple(), + Token::LBracket => self.parse_list(), + Token::LBrace => self.parse_set(), + Token::Lambda => self.parse_lambda(), + Token::StringLiteral(literal) => { + self.consume(Token::StringLiteral(literal.clone()))?; + Ok(Expr::StringLiteral(literal)) + } + Token::RawStringLiteral(literal) => { + // TODO store the raw-ness here so that we do not escape characters + self.consume(Token::RawStringLiteral(literal.clone()))?; + Ok(Expr::StringLiteral(literal)) + } + Token::ByteStringLiteral(literal) => { + self.consume(Token::ByteStringLiteral(literal.clone()))?; + Ok(Expr::ByteStringLiteral(literal.as_bytes().to_vec())) + } + Token::BinaryLiteral(literal) => self.parse_binary_literal(literal), + Token::OctalLiteral(literal) => self.parse_octal_literal(literal), + Token::HexLiteral(literal) => self.parse_hex_literal(literal), + Token::Newline => { + self.consume(Token::Newline)?; + Ok(Expr::NoOp) + } + Token::FStringStart => self.parse_f_string(), + _ => Err(ParserError::UnexpectedToken(self.current_token.clone())), + } + } + + fn parse_indented_block(&mut self) -> Result { + self.consume_optional_many(Token::Newline); + self.consume(Token::Indent)?; + + let mut statements = Vec::new(); + while self.current_token != Token::Dedent { + if self.current_token == Token::Newline { + self.consume(Token::Newline)?; + } else { + statements.push(self.parse_statement()?); + } + } + self.consume(Token::Dedent)?; + self.consume_optional_many(Token::Newline); + + Ok(Block::new(statements)) + } + + fn parse_import_path(&mut self) -> Result { + match self.current_token { + Token::Dot => { + self.consume(Token::Dot)?; + let mut levels = 0; + while self.current_token == Token::Dot { + self.consume(Token::Dot)?; + levels += 1; + } + + let path = if matches!(self.current_token, Token::Identifier(_)) { + let mut path = vec![self.parse_identifier()?]; + while self.current_token == Token::Dot { + self.consume(Token::Dot)?; + path.push(self.parse_identifier()?); + } + path + } else { + vec![] + }; + + Ok(ImportPath::Relative(levels, path)) + } + _ => { + let mut path = vec![self.parse_identifier()?]; + while self.current_token == Token::Dot { + self.consume(Token::Dot)?; + path.push(self.parse_identifier()?); + } + + Ok(ImportPath::Absolute(path)) + } + } + } + + fn parse_alias(&mut self) -> Result, ParserError> { + if self.current_token == Token::As { + self.consume(Token::As)?; + let alias = self.parse_identifier()?; + Ok(Some(alias)) + } else { + Ok(None) + } + } + + fn parse_regular_import(&mut self) -> Result { + self.consume(Token::Import)?; + let import_path = self.parse_import_path()?; + let alias = self.parse_alias()?; + + Ok(Statement::RegularImport { import_path, alias }) + } + + fn parse_selective_import(&mut self) -> Result { + self.consume(Token::From)?; + let import_path = self.parse_import_path()?; + + self.consume(Token::Import)?; + let stmt = match self.current_token { + Token::Asterisk => { + self.consume(Token::Asterisk)?; + Statement::SelectiveImport { + import_path, + items: vec![], + wildcard: true, + } + } + _ => { + self.consume_optional(Token::LParen); + + let mut items = Vec::new(); + loop { + self.consume_optional_many(Token::Newline); + let symbol = self.parse_identifier()?; + let alias = self.parse_alias()?; + + let item = alias.map_or(ImportedItem::Direct(symbol.clone()), |a| { + ImportedItem::Alias(Alias { + symbol: symbol.clone(), + alias_symbol: Some(a), + }) + }); + items.push(item); + + match self.current_token { + Token::Comma => { + self.consume(Token::Comma)?; + continue; + } + Token::Newline | Token::Eof | Token::RParen => { + if self.current_token == Token::RParen { + self.consume(Token::RParen)?; + } + break; + } + _ => { + return Err(ParserError::ExpectedToken( + Token::Comma, + self.current_token.clone(), + )); + } + } + } + + Statement::SelectiveImport { + import_path, + items, + wildcard: false, + } + } + }; + + Ok(stmt) + } + + fn parse_binary_literal(&mut self, literal: String) -> Result { + self.consume(Token::BinaryLiteral(literal.clone()))?; + + let result = i64::from_str_radix(&literal[2..], 2).map_err(|_| ParserError::SyntaxError)?; + Ok(Expr::Integer(result)) + } + + fn parse_octal_literal(&mut self, literal: String) -> Result { + self.consume(Token::OctalLiteral(literal.clone()))?; + + let result = i64::from_str_radix(&literal[2..], 8).map_err(|_| ParserError::SyntaxError)?; + Ok(Expr::Integer(result)) + } + + fn parse_hex_literal(&mut self, literal: String) -> Result { + self.consume(Token::HexLiteral(literal.clone()))?; + + let result = + i64::from_str_radix(&literal[2..], 16).map_err(|_| ParserError::SyntaxError)?; + Ok(Expr::Integer(result)) + } + + fn parse_type_node(&mut self) -> Result { + let mut nodes = vec![]; + + loop { + let node = match self.current_token { + Token::Identifier(ref identifier) => match identifier.as_str() { + "int" => { + self.consume(Token::Identifier("int".into()))?; + TypeNode::Basic("int".into()) + } + "str" => { + self.consume(Token::Identifier("str".into()))?; + TypeNode::Basic("str".into()) + } + "dict" => { + self.consume(Token::Identifier("dict".into()))?; + TypeNode::Basic("dict".into()) + } + "list" => { + self.consume(Token::Identifier("list".into()))?; + + if self.current_token == Token::LBracket { + self.consume(Token::LBracket)?; + let parameters = self.parse_type_node()?; + self.consume(Token::RBracket)?; + + TypeNode::Generic { + base_type: "list".into(), + parameters: vec![parameters], + } + } else { + TypeNode::Basic("list".into()) + } + } + _ => unimplemented!(), + }, + Token::Ellipsis => { + self.consume(Token::Ellipsis)?; + // should this be modeled in a better way? + // this is from _collections_abc.py: EllipsisType = type(...) + TypeNode::Basic("...".into()) + } + _ => unimplemented!(), + }; + + nodes.push(node); + + if self.current_token != Token::BitwiseOr { + break; + } + self.consume(Token::BitwiseOr)?; + } + + if nodes.len() == 1 { + Ok(nodes[0].clone()) + } else { + Ok(TypeNode::Union(nodes)) + } + } + + fn parse_exception_literal(&mut self) -> Result { + let symbol = self.parse_identifier()?; + let literal = match symbol.as_str() { + "ZeroDivisionError" => Some(ExceptionLiteral::ZeroDivisionError), + "Exception" => Some(ExceptionLiteral::Exception), + "IOError" => Some(ExceptionLiteral::IOError), + "ImportError" => Some(ExceptionLiteral::ImportError), + "StopIteration" => Some(ExceptionLiteral::StopIteration), + "TypeError" => Some(ExceptionLiteral::TypeError), + // TODO we don't handle ExceptionLiteral::Custom in the interpreter yet + _ => Some(ExceptionLiteral::Custom(symbol.clone())), + }; + + literal.ok_or(ParserError::ExpectedException(symbol)) + } + + fn parse_context_manager(&mut self) -> Result { + self.consume(Token::With)?; + let expr = self.parse_simple_expr()?; + + let variable = if self.current_token == Token::As { + self.consume(Token::As)?; + Some(self.parse_identifier()?) + } else { + None + }; + self.consume(Token::Colon)?; + let block = self.parse_indented_block()?; + + Ok(Statement::ContextManager { + expr, + variable, + block, + }) + } + + fn parse_raise(&mut self) -> Result { + self.consume(Token::Raise)?; + + let instance = if matches!(self.current_token, Token::Identifier(_)) { + let literal = self.parse_exception_literal()?; + + let args = if self.current_token == Token::LParen { + self.parse_function_call_args()? + } else { + ParsedArguments::empty() + }; + + // TODO support exception chaining here and in the interpreter + if self.current_token == Token::From { + self.consume(Token::From)?; + let _from = self.parse_simple_expr()?; + } + Some(ExceptionInstance { literal, args }) + } else { + None + }; + + Ok(Statement::Raise(instance)) + } + + fn parse_try_except(&mut self) -> Result { + self.consume(Token::Try)?; + self.consume(Token::Colon)?; + let try_block = self.parse_indented_block()?; + + let mut except_clauses: Vec = vec![]; + while self.current_token == Token::Except { + self.consume(Token::Except)?; + if self.current_token == Token::Colon { + self.consume(Token::Colon)?; + let except_block = self.parse_indented_block()?; + except_clauses.push(ExceptClause { + exception_types: vec![], + block: except_block, + }); + } else if self.current_token == Token::LParen { + self.consume(Token::LParen)?; + let mut literals = vec![]; + while self.current_token != Token::RParen { + let literal = self.parse_exception_literal()?; + literals.push(HandledException { + literal, + alias: None, + }); + self.consume_optional(Token::Comma); + } + + self.consume(Token::RParen)?; + self.consume(Token::Colon)?; + let except_block = self.parse_indented_block()?; + except_clauses.push(ExceptClause { + exception_types: literals, + block: except_block, + }); + } else { + let literal = self.parse_exception_literal()?; + let exception_type = if self.current_token == Token::As { + self.consume(Token::As)?; + let alias = Some(self.parse_identifier()?); + HandledException { literal, alias } + } else { + HandledException { + literal, + alias: None, + } + }; + + self.consume(Token::Colon)?; + let except_block = self.parse_indented_block()?; + except_clauses.push(ExceptClause { + exception_types: vec![exception_type], + block: except_block, + }); + } + } + + let else_block = if self.current_token == Token::Else { + self.consume(Token::Else)?; + self.consume(Token::Colon)?; + Some(self.parse_indented_block()?) + } else { + None + }; + + let finally_block = if self.current_token == Token::Finally { + self.consume(Token::Finally)?; + self.consume(Token::Colon)?; + Some(self.parse_indented_block()?) + } else { + None + }; + + if except_clauses.is_empty() && finally_block.is_none() { + return Err(ParserError::SyntaxError); + } + + Ok(Statement::TryExcept { + try_block, + except_clauses, + else_block, + finally_block, + }) + } + + fn parse_if_else(&mut self) -> Result { + self.consume(Token::If)?; + let condition = self.parse_simple_expr()?; + self.consume(Token::Colon)?; + let if_part = ConditionalBlock { + condition, + block: self.parse_indented_block()?, + }; + + let mut elif_parts: Vec = vec![]; + while self.current_token == Token::Elif { + self.consume(Token::Elif)?; + let condition = self.parse_simple_expr()?; + self.consume(Token::Colon)?; + let elif_parts_part = ConditionalBlock { + condition, + block: self.parse_indented_block()?, + }; + + // We must use push because these will be evaluated in order + elif_parts.push(elif_parts_part); + } + + let else_part = if self.current_token == Token::Else { + self.consume(Token::Else)?; + self.consume(Token::Colon)?; + Some(self.parse_indented_block()?) + } else { + None + }; + + Ok(Statement::IfElse { + if_part, + elif_parts, + else_part, + }) + } + + fn parse_for_in_loop(&mut self) -> Result { + self.consume(Token::For)?; + + let index_a = self.parse_identifier()?; + let index = if self.current_token == Token::Comma { + self.consume(Token::Comma)?; + let index_b = self.parse_identifier()?; + LoopIndex::Tuple(vec![index_a, index_b]) + } else { + LoopIndex::Variable(index_a) + }; + + self.consume(Token::In)?; + let range = self.parse_simple_expr()?; + self.consume(Token::Colon)?; + let body = self.parse_indented_block()?; + + let else_block = if self.current_token == Token::Else { + self.consume(Token::Else)?; + self.consume(Token::Colon)?; + Some(self.parse_indented_block()?) + } else { + None + }; + + Ok(Statement::ForInLoop { + index, + iterable: range, + body, + else_block, + }) + } + + /// Parse a parent class looking for one of the following syntaxes: + /// ```python + /// class Foo(Bar): pass + /// class Foo(module.Baz): pass + /// ``` + /// + /// We use `parse_simple_expr` here because we do not want to catch any Expr::Tuple, which + /// would be returned for multiple inheritance if we used `parse_expr`. + fn parse_parent_class(&mut self) -> Result { + let parent = self.parse_simple_expr()?; + + if !matches!(parent, Expr::Variable(_) | Expr::MemberAccess { .. }) { + Err(ParserError::SyntaxError) + } else { + Ok(parent) + } + } + + fn parse_class_definition(&mut self) -> Result { + self.consume(Token::Class)?; + let name = self.parse_identifier()?; + + let mut parents = vec![]; + let mut metaclass = None; + + if self.current_token == Token::LParen { + self.consume(Token::LParen)?; + while self.current_token != Token::RParen { + if self + .peek_ahead_contains(vec![Token::Identifier("metaclass".into()), Token::Assign]) + { + // Support for metaclasses, i.e. the `__new__` method which constructs a class + // (instead of an object like the normal `__new__` method). + // + // Context: PEP 3115 (https://peps.python.org/pep-3115/) + // ``` + // class ABC(metaclass=ABCMeta): + // pass + // ``` + self.consume(Token::Identifier("metaclass".into()))?; + self.consume(Token::Assign)?; + metaclass = Some(self.parse_identifier()?); + break; + } + + parents.push(self.parse_parent_class()?); + + self.consume_optional(Token::Comma); + } + self.consume(Token::RParen)?; + } + + self.consume(Token::Colon)?; + let body = if self.current_token == Token::Newline { + self.parse_indented_block()? + } else { + // Support single-line empty definitions. + // + // Example: + // class Foo: pass + Block::new(vec![self.parse_statement()?]) + }; + + Ok(Statement::ClassDef { + name, + parents, + metaclass, + body, + }) + } + + fn parse_function_definition(&mut self) -> Result { + let mut decorators: Vec = vec![]; + + while self.current_token == Token::AtSign { + self.consume(Token::AtSign)?; + decorators.push(self.parse_simple_expr()?); + self.consume_optional_many(Token::Newline); + } + + let is_async = if self.current_token == Token::Async { + self.consume(Token::Async)?; + true + } else { + false + }; + + self.consume(Token::Def)?; + let name = self.parse_identifier()?; + let args = self.parse_function_def_args()?; + + // Support type hints in the return type + if self.current_token == Token::ReturnTypeArrow { + self.consume(Token::ReturnTypeArrow)?; + let _type = self.parse_simple_expr()?; + } + + self.consume(Token::Colon)?; + + let body = if self.current_token == Token::Indent || self.current_token == Token::Newline { + self.parse_indented_block()? + } else { + // Support single-line functions + // Examples: + // def _f() : pass + // def four(): return 4 + Block::new(vec![self.parse_statement()?]) + }; + + Ok(Statement::FunctionDef { + name, + args, + body, + decorators, + is_async, + }) + } + + fn parse_comma_separated_expr(&mut self) -> Result, ParserError> { + let mut exprs = vec![]; + loop { + let expr = self.parse_simple_expr()?; + exprs.push(expr); + + if self.current_token != Token::Comma { + break; + } + self.consume(Token::Comma)?; + } + + Ok(exprs) + } + + fn parse_delete(&mut self) -> Result { + self.consume(Token::Del)?; + let exprs = self.parse_comma_separated_expr()?; + Ok(Statement::Delete(exprs)) + } + + fn parse_return(&mut self) -> Result { + self.consume(Token::Return)?; + let exprs = self.parse_comma_separated_expr()?; + Ok(Statement::Return(exprs)) + } + + /// Return the full AST. This will consume all the tokens. + pub fn parse(&mut self) -> Result, ParserError> { + let mut stmts = vec![]; + while !self.is_finished() { + let stmt = self.parse_statement()?; + stmts.push(stmt); + } + + Ok(stmts) + } + + pub fn parse_statement(&mut self) -> Result { + self.state.set_line(self.line_number); + match self.current_token.clone() { + Token::Del => self.parse_delete(), + Token::Def => self.parse_function_definition(), + Token::AtSign => self.parse_function_definition(), + Token::Async => self.parse_function_definition(), + Token::Assert => { + self.consume(Token::Assert)?; + let expr = self.parse_simple_expr()?; + Ok(Statement::Assert(expr)) + } + Token::Class => self.parse_class_definition(), + Token::Return => self.parse_return(), + Token::Pass => { + self.consume(Token::Pass)?; + Ok(Statement::Pass) + } + Token::Break => { + self.consume(Token::Break)?; + Ok(Statement::Break) + } + Token::Continue => { + self.consume(Token::Continue)?; + Ok(Statement::Continue) + } + Token::Yield => { + self.consume(Token::Yield)?; + + if self.current_token == Token::From { + self.consume(Token::From)?; + let expr = self.parse_simple_expr()?; + Ok(Statement::YieldFrom(expr)) + } else { + let expr = self.parse_simple_expr()?; + Ok(Statement::Yield(expr)) + } + } + Token::Nonlocal => { + self.consume(Token::Nonlocal)?; + Ok(Statement::Nonlocal(self.parse_identifier()?)) + } + Token::Global => { + self.consume(Token::Global)?; + Ok(Statement::Global(self.parse_identifier()?)) + } + Token::If => self.parse_if_else(), + Token::While => { + self.consume(Token::While)?; + let condition = self.parse_simple_expr()?; + self.consume(Token::Colon)?; + let body = self.parse_indented_block()?; + Ok(Statement::WhileLoop { condition, body }) + } + Token::For => self.parse_for_in_loop(), + Token::Import => self.parse_regular_import(), + Token::From => self.parse_selective_import(), + Token::Try => self.parse_try_except(), + Token::Raise => self.parse_raise(), + Token::With => self.parse_context_manager(), + _ => self.parse_statement_without_starting_keyword(), + } + } + + fn parse_statement_without_starting_keyword(&mut self) -> Result { + let left = self.parse_expr()?; + + if self.current_token == Token::Assign { + self.consume(Token::Assign)?; + match left { + Expr::Tuple(vars) => Ok(Statement::UnpackingAssignment { + left: vars, + right: self.parse_expr()?, + }), + _ => { + let mut left_items = vec![left]; + + let mut right = self.parse_expr()?; + while self.current_token == Token::Assign { + self.consume(Token::Assign)?; + left_items.push(right); + right = self.parse_expr()?; + } + + if left_items.len() > 1 { + Ok(Statement::MultipleAssignment { + left: left_items, + right, + }) + } else { + Ok(Statement::Assignment { + left: left_items[0].clone(), + right, + }) + } + } + } + } else if self.current_token.is_compound_assign() { + let operator = match self.current_token { + Token::PlusEquals => CompoundOperator::Add, + Token::MinusEquals => CompoundOperator::Subtract, + Token::AsteriskEquals => CompoundOperator::Multiply, + Token::SlashEquals => CompoundOperator::Divide, + Token::BitwiseAndEquals => CompoundOperator::BitwiseAnd, + Token::BitwiseOrEquals => CompoundOperator::BitwiseOr, + Token::BitwiseXorEquals => CompoundOperator::BitwiseXor, + Token::DoubleSlashEquals => CompoundOperator::IntegerDiv, + Token::LeftShiftEquals => CompoundOperator::LeftShift, + Token::RightShiftEquals => CompoundOperator::RightShift, + Token::ModEquals => CompoundOperator::Mod, + Token::MatMulEquals => CompoundOperator::MatMul, + Token::ExpoEquals => CompoundOperator::Expo, + _ => unreachable!(), + }; + self.consume(self.current_token.clone())?; + + let value = self.parse_simple_expr()?; + Ok(Statement::CompoundAssignment { + operator, + target: Box::new(left), + value: Box::new(value), + }) + } else { + Ok(Statement::Expression(left)) + } + } + + fn parse_lambda(&mut self) -> Result { + self.consume(Token::Lambda)?; + self.consume(Token::Colon)?; + + let expr = if self.current_token == Token::LParen { + self.consume(Token::LParen)?; + self.consume(Token::Yield)?; + self.consume(Token::RParen)?; + Expr::NoOp + } else { + self.parse_simple_expr()? + }; + + Ok(Expr::Lambda { + args: Box::new(ParsedArgDefinitions { + args: vec![], + args_var: None, + kwargs_var: None, + }), + expr: Box::new(expr.clone()), + is_generator: matches!(expr, Expr::NoOp), + }) + } + + fn parse_list(&mut self) -> Result { + let mut items = Vec::new(); + + self.consume(Token::LBracket)?; + while self.current_token != Token::RBracket { + self.consume_optional_many(Token::Newline); + let expr = self.parse_simple_expr()?; + self.consume_optional_many(Token::Newline); + + items.push(expr.clone()); + + if self.current_token == Token::Comma { + self.consume(Token::Comma)?; + + // Handle trailing comma + self.consume_optional_many(Token::Newline); + if self.current_token == Token::RBracket { + self.consume(Token::RBracket)?; + return Ok(Expr::List(items)); + } + } else if self.current_token == Token::RBracket { + self.consume(Token::RBracket)?; + return Ok(Expr::List(items)); + } + + if self.current_token == Token::For { + let mut clauses = vec![]; + while self.current_token == Token::For { + clauses.push(self.parse_comprehension_clause()?); + } + + self.consume(Token::RBracket)?; + + return Ok(Expr::ListComprehension { + body: Box::new(expr), + clauses, + }); + } + } + + // You should only get here if this was an empty literal. + assert_eq!(items.len(), 0); + self.consume(Token::RBracket)?; + Ok(Expr::List(vec![])) + } + + fn parse_f_string(&mut self) -> Result { + self.consume(Token::FStringStart)?; + + let mut parts = vec![]; + while self.current_token != Token::FStringEnd { + match self.current_token.clone() { + Token::StringLiteral(s) => { + self.consume(Token::StringLiteral(s.to_string()))?; + parts.push(FStringPart::String(s.to_string())); + } + Token::LBrace => { + self.consume(Token::LBrace)?; + let expr = self.parse_simple_expr()?; + + let format = if self.current_token == Token::Exclamation { + self.consume(Token::Exclamation)?; + if let Token::Identifier(token) = self.current_token.clone() { + self.consume(Token::Identifier(token.to_string()))?; + match token.as_str() { + "r" => FormatOption::Repr, + "s" => FormatOption::Str, + "a" => FormatOption::Ascii, + _ => { + return Err(ParserError::UnexpectedToken( + self.current_token.clone(), + )) + } + } + } else { + return Err(ParserError::UnexpectedToken(self.current_token.clone())); + } + } else { + FormatOption::Str + }; + + self.consume(Token::RBrace)?; + parts.push(FStringPart::Expr(ExprFormat { + expr: Box::new(expr), + format, + })); + } + _ => return Err(ParserError::UnexpectedToken(self.current_token.clone())), + } + } + + self.consume(Token::FStringEnd)?; + Ok(Expr::FString(parts)) + } + + fn parse_set(&mut self) -> Result { + let mut items = HashMap::new(); + + self.consume(Token::LBrace)?; + while self.current_token != Token::RBrace { + self.consume_optional_many(Token::Newline); + let item = self.parse_simple_expr()?; + self.consume_optional_many(Token::Newline); + + match self.current_token { + Token::Comma => { + self.consume(Token::Comma)?; + items.insert(item.clone(), Expr::NoOp); + + // Handle trailing comma + self.consume_optional_many(Token::Newline); + if self.current_token == Token::RBrace { + self.consume(Token::RBrace)?; + return Ok(Expr::Set(items.keys().cloned().collect())); + } + } + Token::RBrace => { + self.consume(Token::RBrace)?; + items.insert(item.clone(), Expr::NoOp); + return Ok(Expr::Set(items.keys().cloned().collect())); + } + Token::For => { + let mut clauses = vec![]; + while self.current_token == Token::For { + clauses.push(self.parse_comprehension_clause()?); + } + self.consume(Token::RBrace)?; + return Ok(Expr::SetComprehension { + body: Box::new(item), + clauses, + }); + } + Token::Colon => { + self.consume(Token::Colon)?; + let second = self.parse_simple_expr()?; + items.insert(item.clone(), second.clone()); + + match self.current_token { + Token::Comma => self.consume(Token::Comma)?, + Token::RBrace => { + self.consume(Token::RBrace)?; + return Ok(Expr::Dict(items)); + } + Token::For => { + self.consume(Token::For)?; + let key = self.parse_identifier()?; + self.consume(Token::Comma)?; + let value = self.parse_identifier()?; + self.consume(Token::In)?; + let range = self.parse_simple_expr()?; + self.consume(Token::RBrace)?; + return Ok(Expr::DictComprehension { + key, + value, + range: Box::new(range), + key_body: Box::new(item), + value_body: Box::new(second), + }); + } + _ => return Err(ParserError::UnexpectedToken(self.current_token.clone())), + } + } + _ => return Err(ParserError::UnexpectedToken(self.current_token.clone())), + } + } + + // You should only get here if this was an empty literal. + assert_eq!(items.len(), 0); + self.consume(Token::RBrace)?; + Ok(Expr::Dict(HashMap::new())) + } + + fn parse_function_def_args(&mut self) -> Result { + self.consume(Token::LParen)?; + + let mut args = Vec::new(); + let mut args_var = None; + let mut kwargs_var = None; + while self.current_token != Token::RParen { + self.consume_optional_many(Token::Newline); + + // This is to support positional-only parameters. + // Context: PEP 570 (https://peps.python.org/pep-0570/) + // TODO test positional-only parameters now that we support args/kwargs + if self.current_token == Token::Slash { + self.consume(Token::Slash)?; + + // We will only see a comma if the slash isn't the last "parameter". + // We test this is the "slash_args" interpreter test. This is also found in + // types.py in the standard lib. + if self.current_token == Token::Comma { + self.consume(Token::Comma)?; + } else { + break; + } + } + + if self.current_token == Token::Asterisk { + self.consume(Token::Asterisk)?; + + // We will see an asterisk without a trailing identifier for keyword-only + // parameters. TODO we do not yet enforce this. + // Context: PEP 3102 (https://peps.python.org/pep-3102/) + if matches!(self.current_token, Token::Identifier(_)) { + args_var = Some(self.parse_identifier()?); + } + + // If *args is not at the end of the args (only kwargs can come after), we must + // allow for a comma. This is similar to how we optionally consume a comma as the + // last step of each loop iteration. + self.consume_optional(Token::Comma); + continue; + } + + if self.current_token == Token::DoubleAsterisk { + self.consume(Token::DoubleAsterisk)?; + kwargs_var = Some(self.parse_identifier()?); + self.consume_optional_many(Token::Newline); + break; + } + + let arg = self.parse_identifier()?; + let default = if self.current_token == Token::Assign { + self.consume(Token::Assign)?; + Some(self.parse_simple_expr()?) + } else { + None + }; + + args.push(ParsedArgDefinition { arg, default }); + + // Support for type hints. Will there be reason to store these alongside the params + // themselves? Perhaps for future toolings like memphis-lsp. + if self.current_token == Token::Colon { + self.consume(Token::Colon)?; + let _type = self.parse_simple_expr()?; + } + + self.consume_optional(Token::Comma); + self.consume_optional_many(Token::Newline); + } + self.consume(Token::RParen)?; + + Ok(ParsedArgDefinitions { + args, + args_var, + kwargs_var, + }) + } + + fn parse_function_call_args(&mut self) -> Result { + self.consume(Token::LParen)?; + + let mut args = Vec::new(); + let mut kwargs = HashMap::new(); + let mut args_var = None; + let mut kwargs_var = None; + while self.current_token != Token::RParen { + self.consume_optional_many(Token::Newline); + + if self.current_token == Token::Asterisk { + self.consume(Token::Asterisk)?; + let args_expr = self.parse_simple_expr()?; + match args_expr { + Expr::Variable(_) | Expr::MemberAccess { .. } => { + args_var = Some(Box::new(args_expr)) + } + _ => return Err(ParserError::SyntaxError), + }; + + // If *args is not at the end of the args (only kwargs can come after), we must + // allow for a comma. This is similar to how we optionally consume a comma as the + // last step of each loop iteration. + self.consume_optional(Token::Comma); + continue; + } + + // This is to support the formats + // - foo(**{'a': 2, 'b': 1}) + // - foo(**args) + // + // Python technically allows you to do both of these, but we do not support that yet. + // If you do that, I think kwargs_var would need to become a Vec since you could have + // more than one of a single type. + if self.current_token == Token::DoubleAsterisk { + self.consume(Token::DoubleAsterisk)?; + let kwargs_expr = self.parse_simple_expr()?; + match kwargs_expr { + Expr::Dict(dict) => { + for (key, value) in dict { + let key_name = key.as_string().ok_or(ParserError::SyntaxError)?; + kwargs.insert(key_name.clone(), value); + } + } + Expr::Variable(_) | Expr::MemberAccess { .. } => { + kwargs_var = Some(Box::new(kwargs_expr)) + } + _ => return Err(ParserError::SyntaxError), + }; + self.consume_optional_many(Token::Newline); + break; + } + + match self.parse_function_call_arg()? { + // This is to support the format foo(a=2, b=1) + ParsedArgument::Keyword { arg, expr } => { + kwargs.insert(arg, expr); + } + ParsedArgument::Positional(expr) => { + args.push(expr); + } + } + + self.consume_optional(Token::Comma); + self.consume_optional_many(Token::Newline); + } + + self.consume(Token::RParen)?; + + Ok(ParsedArguments { + args, + kwargs, + args_var, + kwargs_var, + }) + } + + /// An argument in a function call can be either variable `a` or contain an equals such as + /// `a = 4`. We originally (and ignorantly) called `parse_statement` but that contains too many + /// other cases to be safely used inside function call parsing. + fn parse_function_call_arg(&mut self) -> Result { + let expr = self.parse_simple_expr()?; + if self.current_token == Token::Assign { + self.consume(Token::Assign)?; + + let arg = expr.as_variable().ok_or(ParserError::SyntaxError)?.into(); + + Ok(ParsedArgument::Keyword { + arg, + expr: self.parse_simple_expr()?, + }) + } else if self.current_token == Token::For { + Ok(ParsedArgument::Positional( + self.parse_generator_comprehension(&expr)?, + )) + } else { + Ok(ParsedArgument::Positional(expr)) + } + } + + fn parse_generator_comprehension(&mut self, body: &Expr) -> Result { + let mut clauses = vec![]; + while self.current_token == Token::For { + clauses.push(self.parse_comprehension_clause()?); + } + Ok(Expr::GeneratorComprehension { + body: Box::new(body.clone()), + clauses, + }) + } + + fn parse_comprehension_clause(&mut self) -> Result { + self.consume(Token::For)?; + let mut indices = vec![self.parse_identifier()?]; + while self.current_token == Token::Comma { + self.consume(Token::Comma)?; + indices.push(self.parse_identifier()?); + } + self.consume(Token::In)?; + + // We do not use `parse_expr` here because it can think that an expression of the + // form `a if True` is the start of a ternary operation and expect an `else` token + // next. By calling `parse_binary_expr`, we enter the parse tree below where + // ternary operations are handled. + let iterable = self.parse_binary_expr()?; + + self.consume_optional_many(Token::Newline); + + let condition = if self.current_token == Token::If { + self.consume(Token::If)?; + Some(Box::new(self.parse_simple_expr()?)) + } else { + None + }; + + Ok(ForClause { + indices, + iterable: Box::new(iterable), + condition, + }) + } + + /// Single elements without a comma will be returned as is, everything else will be wrapped in + /// `Expr::Tuple`. + /// + /// For example: + /// + /// (4) => Expr::Integer(4) + /// (4,) => Expr::Tuple(vec!\[Expr::Integer(4)\]) + /// + fn parse_tuple(&mut self) -> Result { + self.consume(Token::LParen)?; + + let mut args = Vec::new(); + let mut is_single_element = true; + while self.current_token != Token::RParen { + self.consume_optional_many(Token::Newline); + let expr = self.parse_simple_expr()?; + args.push(expr.clone()); + self.consume_optional_many(Token::Newline); + + if self.current_token == Token::Comma { + self.consume(Token::Comma)?; + is_single_element = false; + } + + if self.current_token == Token::For { + // If you saw a For token, we must be in list comprehension. + assert_eq!(args.len(), 1); + let gen_comp = self.parse_generator_comprehension(&expr)?; + + self.consume(Token::RParen)?; + return Ok(gen_comp); + } + } + + self.consume(Token::RParen)?; + + if args.len() == 1 && is_single_element { + Ok(args.into_iter().next().unwrap()) + } else { + Ok(Expr::Tuple(args)) + } + } + + /// Parse a `Token::Identifier` without any semantic analysis. + fn parse_identifier(&mut self) -> Result { + match self.current_token.clone() { + Token::Identifier(name) => { + let name_clone = name.clone(); + self.consume(Token::Identifier(name_clone.clone()))?; + Ok(name_clone) + } + _ => Err(ParserError::UnexpectedToken(self.current_token.clone())), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::init::Builder; + use crate::treewalk::types::{Class, ExprResult}; + use crate::treewalk::LoadedModule; + use crate::treewalk::Scope; + use std::collections::HashSet; + + fn init(text: &str) -> Parser { + Builder::new() + .module(LoadedModule::new_virtual(text)) + .build() + .0 + } + + fn init_state(text: &str, state: Container) -> Parser { + Builder::new() + .state(state.clone()) + .module(LoadedModule::new_virtual(text)) + .build() + .0 + } + + /// Helper for parsing multiline statements. + fn parse(parser: &mut Parser) -> Result { + parser.consume_optional(Token::Newline); + parser.parse_statement() + } + + #[test] + fn expression() { + let input = "2 + 3 * (4 - 1)"; + let mut parser = init(input); + + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Integer(2)), + op: BinOp::Add, + right: Box::new(Expr::BinaryOperation { + left: Box::new(Expr::Integer(3)), + op: BinOp::Mul, + right: Box::new(Expr::BinaryOperation { + left: Box::new(Expr::Integer(4)), + op: BinOp::Sub, + right: Box::new(Expr::Integer(1)), + }), + }), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "2 // 3"; + let mut parser = init(input); + + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Integer(2)), + op: BinOp::IntegerDiv, + right: Box::new(Expr::Integer(3)), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn string_literal() { + let input = "\"Hello\""; + let mut parser = init(input); + + let expected_ast = Expr::StringLiteral("Hello".into()); + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "\"\".join([])"; + let mut parser = init(input); + + let expected_ast = Expr::MethodCall { + object: Box::new(Expr::StringLiteral("".into())), + name: "join".into(), + args: ParsedArguments { + args: vec![Expr::List(vec![])], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn variable_assignment() { + let input = " +a = 2 +"; + let mut parser = init(input); + + let expected_ast = Statement::Assignment { + left: Expr::Variable("a".to_string()), + right: Expr::Integer(2), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = " +b = a + 3 +"; + let mut parser = init(input); + + let expected_ast = Statement::Assignment { + left: Expr::Variable("b".to_string()), + right: Expr::BinaryOperation { + left: Box::new(Expr::Variable("a".to_string())), + op: BinOp::Add, + right: Box::new(Expr::Integer(3)), + }, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = " +a, b = (1, 2) +"; + let mut parser = init(input); + + let expected_ast = Statement::UnpackingAssignment { + left: vec![ + Expr::Variable("a".to_string()), + Expr::Variable("b".to_string()), + ], + right: Expr::Tuple(vec![Expr::Integer(1), Expr::Integer(2)]), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn function_call() { + let input = "print(\"Hello, World!\")"; + let mut parser = init(input); + + let expected_ast = Expr::FunctionCall { + name: "print".to_string(), + args: ParsedArguments { + args: vec![Expr::StringLiteral("Hello, World!".to_string())], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: None, + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = " +a(*self.args, **self.kwargs) +"; + let mut parser = init(input); + + let expected_ast = Statement::Expression(Expr::FunctionCall { + name: "a".into(), + args: ParsedArguments { + args: vec![], + kwargs: HashMap::new(), + args_var: Some(Box::new(Expr::MemberAccess { + object: Box::new(Expr::Variable("self".into())), + field: "args".into(), + })), + kwargs_var: Some(Box::new(Expr::MemberAccess { + object: Box::new(Expr::Variable("self".into())), + field: "kwargs".into(), + })), + }, + callee: None, + }); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn function_definition() { + let input = " +def add(x, y): + return x + y +"; + let mut parser = init(input); + + let expected_ast = Statement::FunctionDef { + name: "add".to_string(), + args: ParsedArgDefinitions { + args: vec![ + ParsedArgDefinition { + arg: "x".into(), + default: None, + }, + ParsedArgDefinition { + arg: "y".into(), + default: None, + }, + ], + args_var: None, + kwargs_var: None, + }, + body: Block { + statements: vec![Statement::Return(vec![Expr::BinaryOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: BinOp::Add, + right: Box::new(Expr::Variable("y".to_string())), + }])], + }, + decorators: vec![], + is_async: false, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = " +def _f(): pass +"; + let mut parser = init(input); + + let expected_ast = Statement::FunctionDef { + name: "_f".to_string(), + args: ParsedArgDefinitions { + args: vec![], + args_var: None, + kwargs_var: None, + }, + body: Block { + statements: vec![Statement::Pass], + }, + decorators: vec![], + is_async: false, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = " +lambda: 4 +"; + let mut parser = init(input); + + let expected_ast = Statement::Expression(Expr::Lambda { + args: Box::new(ParsedArgDefinitions { + args: vec![], + args_var: None, + kwargs_var: None, + }), + expr: Box::new(Expr::Integer(4)), + is_generator: false, + }); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = " +lambda: (yield) +"; + let mut parser = init(input); + + let expected_ast = Statement::Expression(Expr::Lambda { + args: Box::new(ParsedArgDefinitions { + args: vec![], + args_var: None, + kwargs_var: None, + }), + expr: Box::new(Expr::NoOp), + is_generator: true, + }); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = " +(lambda: (yield))() +"; + let mut parser = init(input); + + let expected_ast = Statement::Expression(Expr::FunctionCall { + name: "".into(), + args: ParsedArguments { + args: vec![], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: Some(Box::new(Expr::Lambda { + args: Box::new(ParsedArgDefinitions { + args: vec![], + args_var: None, + kwargs_var: None, + }), + expr: Box::new(Expr::NoOp), + is_generator: true, + })), + }); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +def __init__( + self, *, indent=None, +): + pass +"#; + let mut parser = init(input); + + let expected_ast = Statement::FunctionDef { + name: "__init__".into(), + args: ParsedArgDefinitions { + args: vec![ + ParsedArgDefinition { + arg: "self".into(), + default: None, + }, + ParsedArgDefinition { + arg: "indent".into(), + default: Some(Expr::None), + }, + ], + args_var: None, + kwargs_var: None, + }, + body: Block::new(vec![Statement::Pass]), + decorators: vec![], + is_async: false, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = " +return a, b +"; + let mut parser = init(input); + + let expected_ast = + Statement::Return(vec![Expr::Variable("a".into()), Expr::Variable("b".into())]); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn boolean_expressions() { + let input = "x and y\n"; + let mut parser = init(input); + + let expected_ast = Expr::LogicalOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: LogicalOp::And, + right: Box::new(Expr::Variable("y".to_string())), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "x or y\n"; + let mut parser = init(input); + + let expected_ast = Expr::LogicalOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: LogicalOp::Or, + right: Box::new(Expr::Variable("y".to_string())), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "x or not y\n"; + let mut parser = init(input); + + let expected_ast = Expr::LogicalOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: LogicalOp::Or, + right: Box::new(Expr::UnaryOperation { + op: UnaryOp::Not, + right: Box::new(Expr::Variable("y".to_string())), + }), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "not (x or y)\n"; + let mut parser = init(input); + + let expected_ast = Expr::UnaryOperation { + op: UnaryOp::Not, + right: Box::new(Expr::LogicalOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: LogicalOp::Or, + right: Box::new(Expr::Variable("y".to_string())), + }), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn comparison_operators() { + let input = "x == y"; + let mut parser = init(input); + + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: BinOp::Equals, + right: Box::new(Expr::Variable("y".to_string())), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "x != y"; + let mut parser = init(input); + + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: BinOp::NotEquals, + right: Box::new(Expr::Variable("y".to_string())), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "x < y"; + let mut parser = init(input); + + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: BinOp::LessThan, + right: Box::new(Expr::Variable("y".to_string())), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "x > y"; + let mut parser = init(input); + + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: BinOp::GreaterThan, + right: Box::new(Expr::Variable("y".to_string())), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "x >= y"; + let mut parser = init(input); + + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: BinOp::GreaterThanOrEqual, + right: Box::new(Expr::Variable("y".to_string())), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "x <= y"; + let mut parser = init(input); + + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: BinOp::LessThanOrEqual, + right: Box::new(Expr::Variable("y".to_string())), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "x in y"; + let mut parser = init(input); + + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: BinOp::In, + right: Box::new(Expr::Variable("y".to_string())), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "x not in y"; + let mut parser = init(input); + + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: BinOp::NotIn, + right: Box::new(Expr::Variable("y".to_string())), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "x is None"; + let mut parser = init(input); + + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: BinOp::Is, + right: Box::new(Expr::None), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "x is not None"; + let mut parser = init(input); + + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: BinOp::IsNot, + right: Box::new(Expr::None), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn boolean_operators() { + let input = "x = True\n"; + let mut parser = init(input); + + let expected_ast = Statement::Assignment { + left: Expr::Variable("x".to_string()), + right: Expr::Boolean(true), + }; + + match parser.parse_statement() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "True or False\n"; + let mut parser = init(input); + + let expected_ast = Expr::LogicalOperation { + left: Box::new(Expr::Boolean(true)), + op: LogicalOp::Or, + right: Box::new(Expr::Boolean(false)), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "x = None\n"; + let mut parser = init(input); + + let expected_ast = Statement::Assignment { + left: Expr::Variable("x".to_string()), + right: Expr::None, + }; + + match parser.parse_statement() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "return None\n"; + let mut parser = init(input); + + let expected_ast = Statement::Return(vec![Expr::None]); + + match parser.parse_statement() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn if_else() { + let input = r#" +if x > 0: + print("Greater") +elif x > -10: + print("Medium") +else: + print("Less") +"#; + let mut parser = init(input); + + let expected_ast = Statement::IfElse { + if_part: ConditionalBlock { + condition: Expr::BinaryOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: BinOp::GreaterThan, + right: Box::new(Expr::Integer(0)), + }, + block: Block { + statements: vec![Statement::Expression(Expr::FunctionCall { + name: "print".to_string(), + args: ParsedArguments { + args: vec![Expr::StringLiteral("Greater".to_string())], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: None, + })], + }, + }, + elif_parts: vec![ConditionalBlock { + condition: Expr::BinaryOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: BinOp::GreaterThan, + right: Box::new(Expr::Integer(-10)), + }, + block: Block { + statements: vec![Statement::Expression(Expr::FunctionCall { + name: "print".to_string(), + args: ParsedArguments { + args: vec![Expr::StringLiteral("Medium".to_string())], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: None, + })], + }, + }], + else_part: Some(Block { + statements: vec![Statement::Expression(Expr::FunctionCall { + name: "print".to_string(), + args: ParsedArguments { + args: vec![Expr::StringLiteral("Less".to_string())], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: None, + })], + }), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +if x > 0: + print("Greater") +elif x > -10: + print("Medium") +elif x > -20: + print("Less") +"#; + let mut parser = init(input); + + let expected_ast = Statement::IfElse { + if_part: ConditionalBlock { + condition: Expr::BinaryOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: BinOp::GreaterThan, + right: Box::new(Expr::Integer(0)), + }, + block: Block { + statements: vec![Statement::Expression(Expr::FunctionCall { + name: "print".to_string(), + args: ParsedArguments { + args: vec![Expr::StringLiteral("Greater".to_string())], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: None, + })], + }, + }, + elif_parts: vec![ + ConditionalBlock { + condition: Expr::BinaryOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: BinOp::GreaterThan, + right: Box::new(Expr::Integer(-10)), + }, + block: Block { + statements: vec![Statement::Expression(Expr::FunctionCall { + name: "print".to_string(), + args: ParsedArguments { + args: vec![Expr::StringLiteral("Medium".to_string())], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: None, + })], + }, + }, + ConditionalBlock { + condition: Expr::BinaryOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: BinOp::GreaterThan, + right: Box::new(Expr::Integer(-20)), + }, + block: Block { + statements: vec![Statement::Expression(Expr::FunctionCall { + name: "print".to_string(), + args: ParsedArguments { + args: vec![Expr::StringLiteral("Less".to_string())], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: None, + })], + }, + }, + ], + else_part: None, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +if x > 0: + print("Greater") +"#; + let mut parser = init(input); + + let expected_ast = Statement::IfElse { + if_part: ConditionalBlock { + condition: Expr::BinaryOperation { + left: Box::new(Expr::Variable("x".to_string())), + op: BinOp::GreaterThan, + right: Box::new(Expr::Integer(0)), + }, + block: Block { + statements: vec![Statement::Expression(Expr::FunctionCall { + name: "print".to_string(), + args: ParsedArguments { + args: vec![Expr::StringLiteral("Greater".to_string())], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: None, + })], + }, + }, + elif_parts: vec![], + else_part: None, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +if (a == 1 + and b + and c): + pass +"#; + let mut parser = init(input); + + let expected_ast = Statement::IfElse { + if_part: ConditionalBlock { + condition: Expr::LogicalOperation { + left: Box::new(Expr::LogicalOperation { + left: Box::new(Expr::BinaryOperation { + left: Box::new(Expr::Variable("a".into())), + op: BinOp::Equals, + right: Box::new(Expr::Integer(1)), + }), + op: LogicalOp::And, + right: Box::new(Expr::Variable("b".into())), + }), + op: LogicalOp::And, + right: Box::new(Expr::Variable("c".into())), + }, + block: Block::new(vec![Statement::Pass]), + }, + elif_parts: vec![], + else_part: None, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn while_loop() { + let input = " +while True: + print(\"busy loop\") +"; + let mut parser = init(input); + + let expected_ast = Statement::WhileLoop { + condition: Expr::Boolean(true), + body: Block { + statements: vec![Statement::Expression(Expr::FunctionCall { + name: "print".to_string(), + args: ParsedArguments { + args: vec![Expr::StringLiteral("busy loop".to_string())], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: None, + })], + }, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn class_definition() { + let input = r#" +class Foo: + def __init__(self): + self.x = 0 + + def bar(self): + print(self.x) +"#; + let mut parser = init(input); + + let expected_ast = Statement::ClassDef { + name: "Foo".to_string(), + parents: vec![], + metaclass: None, + body: Block { + statements: vec![ + Statement::FunctionDef { + name: "__init__".to_string(), + args: ParsedArgDefinitions { + args: vec![ParsedArgDefinition { + arg: "self".to_string(), + default: None, + }], + args_var: None, + kwargs_var: None, + }, + body: Block { + statements: vec![Statement::Assignment { + left: Expr::MemberAccess { + object: Box::new(Expr::Variable("self".to_string())), + field: "x".to_string(), + }, + right: Expr::Integer(0), + }], + }, + decorators: vec![], + is_async: false, + }, + Statement::FunctionDef { + name: "bar".to_string(), + args: ParsedArgDefinitions { + args: vec![ParsedArgDefinition { + arg: "self".to_string(), + default: None, + }], + args_var: None, + kwargs_var: None, + }, + body: Block { + statements: vec![Statement::Expression(Expr::FunctionCall { + name: "print".to_string(), + args: ParsedArguments { + args: vec![Expr::MemberAccess { + object: Box::new(Expr::Variable("self".to_string())), + field: "x".to_string(), + }], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: None, + })], + }, + decorators: vec![], + is_async: false, + }, + ], + }, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +class Foo(Bar, Baz): pass +"#; + let mut parser = init(input); + + let expected_ast = Statement::ClassDef { + name: "Foo".to_string(), + parents: vec![Expr::Variable("Bar".into()), Expr::Variable("Baz".into())], + metaclass: None, + body: Block::new(vec![Statement::Pass]), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +class Foo(module.Bar): pass +"#; + let mut parser = init(input); + + let expected_ast = Statement::ClassDef { + name: "Foo".to_string(), + parents: vec![Expr::MemberAccess { + object: Box::new(Expr::Variable("module".into())), + field: "Bar".into(), + }], + metaclass: None, + body: Block::new(vec![Statement::Pass]), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn class_instantiation() { + let input = r#" +foo = Foo() +"#; + + let mut symbol_table = Scope::default(); + let foo = Class::new_base("Foo".to_string(), vec![], None, Scope::default()); + symbol_table.insert("Foo", ExprResult::Class(foo)); + + let state = Container::new(State::new()); + state.push_local(Container::new(symbol_table)); + + let mut parser = init_state(input, state); + + let expected_ast = Statement::Assignment { + left: Expr::Variable("foo".to_string()), + right: Expr::ClassInstantiation { + name: "Foo".to_string(), + args: ParsedArguments::empty(), + }, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn method_invocation() { + let mut symbol_table = Scope::default(); + let foo = Class::new_base("Foo".to_string(), vec![], None, Scope::default()); + symbol_table.insert("Foo", ExprResult::Class(foo)); + + let state = Container::new(State::new()); + state.push_local(Container::new(symbol_table)); + + let input = r#" +foo = Foo() +"#; + let mut parser = init_state(input, state.clone()); + + let expected_ast = Statement::Assignment { + left: Expr::Variable("foo".to_string()), + right: Expr::ClassInstantiation { + name: "Foo".to_string(), + args: ParsedArguments::empty(), + }, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +foo.bar() +"#; + let mut parser = init_state(input, state); + + let expected_ast = Statement::Expression(Expr::MethodCall { + object: Box::new(Expr::Variable("foo".to_string())), + name: "bar".to_string(), + args: ParsedArguments::empty(), + }); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn regular_import() { + let input = "import other"; + let mut parser = init(input); + + let expected_ast = Statement::RegularImport { + import_path: ImportPath::Absolute(vec!["other".into()]), + alias: None, + }; + + match parser.parse_statement() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +import other as b +pass +"#; + let mut parser = init(input); + + let expected_ast = Statement::RegularImport { + import_path: ImportPath::Absolute(vec!["other".into()]), + alias: Some("b".into()), + }; + + // Before we handling Token::As processing, this test would fail, but only once it began + // parsing the next statement. We needed to parse two statements here to produce the + // failing test. + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, Statement::Pass), + } + + let input = "mypackage.myothermodule.add('1', '1')"; + let mut parser = init(input); + + let expected_ast = Expr::MethodCall { + object: Box::new(Expr::MemberAccess { + object: Box::new(Expr::Variable("mypackage".into())), + field: "myothermodule".into(), + }), + name: "add".into(), + args: ParsedArguments { + args: vec![ + Expr::StringLiteral("1".into()), + Expr::StringLiteral("1".into()), + ], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +cls._abc_registry.add(subclass) +"#; + let mut parser = init(input); + + let expected_ast = Statement::Expression(Expr::MethodCall { + object: Box::new(Expr::MemberAccess { + object: Box::new(Expr::Variable("cls".into())), + field: "_abc_registry".into(), + }), + name: "add".into(), + args: ParsedArguments { + args: vec![Expr::Variable("subclass".into())], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + }); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn selective_import() { + let input = "from other import something"; + let mut parser = init(input); + + let expected_ast = Statement::SelectiveImport { + import_path: ImportPath::Absolute(vec!["other".into()]), + items: vec![ImportedItem::Direct("something".to_string())], + wildcard: false, + }; + + match parser.parse_statement() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "from other import something, something_else"; + let mut parser = init(input); + + let expected_ast = Statement::SelectiveImport { + import_path: ImportPath::Absolute(vec!["other".into()]), + items: vec![ + ImportedItem::Direct("something".to_string()), + ImportedItem::Direct("something_else".to_string()), + ], + wildcard: false, + }; + + match parser.parse_statement() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "from other import *"; + let mut parser = init(input); + + let expected_ast = Statement::SelectiveImport { + import_path: ImportPath::Absolute(vec!["other".into()]), + items: vec![], + wildcard: true, + }; + + match parser.parse_statement() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "from other import something, something_else as imported_name"; + let mut parser = init(input); + + let expected_ast = Statement::SelectiveImport { + import_path: ImportPath::Absolute(vec!["other".into()]), + items: vec![ + ImportedItem::Direct("something".to_string()), + ImportedItem::Alias(Alias { + symbol: "something_else".to_string(), + alias_symbol: Some("imported_name".to_string()), + }), + ], + wildcard: false, + }; + + match parser.parse_statement() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "from other.module import something, something_else as imported_name"; + let mut parser = init(input); + + let expected_ast = Statement::SelectiveImport { + import_path: ImportPath::Absolute(vec!["other".into(), "module".into()]), + items: vec![ + ImportedItem::Direct("something".to_string()), + ImportedItem::Alias(Alias { + symbol: "something_else".to_string(), + alias_symbol: Some("imported_name".to_string()), + }), + ], + wildcard: false, + }; + + match parser.parse_statement() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "from . import something"; + let mut parser = init(input); + + let expected_ast = Statement::SelectiveImport { + import_path: ImportPath::Relative(0, vec![]), + items: vec![ImportedItem::Direct("something".to_string())], + wildcard: false, + }; + + match parser.parse_statement() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "from .other.module import something, something_else as imported_name"; + let mut parser = init(input); + + let expected_ast = Statement::SelectiveImport { + import_path: ImportPath::Relative(0, vec!["other".into(), "module".into()]), + items: vec![ + ImportedItem::Direct("something".to_string()), + ImportedItem::Alias(Alias { + symbol: "something_else".to_string(), + alias_symbol: Some("imported_name".to_string()), + }), + ], + wildcard: false, + }; + + match parser.parse_statement() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "from ..other.module import something, something_else as imported_name"; + let mut parser = init(input); + + let expected_ast = Statement::SelectiveImport { + import_path: ImportPath::Relative(1, vec!["other".into(), "module".into()]), + items: vec![ + ImportedItem::Direct("something".to_string()), + ImportedItem::Alias(Alias { + symbol: "something_else".to_string(), + alias_symbol: Some("imported_name".to_string()), + }), + ], + wildcard: false, + }; + + match parser.parse_statement() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +from ..other.module import (something, + something_else as imported_name) +"#; + let mut parser = init(input); + + let expected_ast = Statement::SelectiveImport { + import_path: ImportPath::Relative(1, vec!["other".into(), "module".into()]), + items: vec![ + ImportedItem::Direct("something".to_string()), + ImportedItem::Alias(Alias { + symbol: "something_else".to_string(), + alias_symbol: Some("imported_name".to_string()), + }), + ], + wildcard: false, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +from ..other.module import (something as imported_name, + something_else) +"#; + let mut parser = init(input); + + let expected_ast = Statement::SelectiveImport { + import_path: ImportPath::Relative(1, vec!["other".into(), "module".into()]), + items: vec![ + ImportedItem::Alias(Alias { + symbol: "something".to_string(), + alias_symbol: Some("imported_name".to_string()), + }), + ImportedItem::Direct("something_else".to_string()), + ], + wildcard: false, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn floating_point() { + let input = r#" +a = 3.14 +"#; + let mut parser = init(input); + + let expected_ast = Statement::Assignment { + left: Expr::Variable("a".to_string()), + right: Expr::FloatingPoint(3.14), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +b = a + 2.5e-3 +"#; + let mut parser = init(input); + + let expected_ast = Statement::Assignment { + left: Expr::Variable("b".to_string()), + right: Expr::BinaryOperation { + left: Box::new(Expr::Variable("a".to_string())), + op: BinOp::Add, + right: Box::new(Expr::FloatingPoint(2.5e-3)), + }, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn negative_numbers() { + let input = "-3.14"; + let mut parser = init(input); + let expected_ast = Expr::FloatingPoint(-3.14); + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "-3"; + let mut parser = init(input); + let expected_ast = Expr::Integer(-3); + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "2 - 3"; + let mut parser = init(input); + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Integer(2)), + op: BinOp::Sub, + right: Box::new(Expr::Integer(3)), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "-2e-3"; + let mut parser = init(input); + let expected_ast = Expr::FloatingPoint(-2e-3); + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "2 + -3"; + let mut parser = init(input); + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Integer(2)), + op: BinOp::Add, + right: Box::new(Expr::Integer(-3)), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "-(3)"; + let mut parser = init(input); + let expected_ast = Expr::UnaryOperation { + op: UnaryOp::Minus, + right: Box::new(Expr::Integer(3)), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "+(3)"; + let mut parser = init(input); + let expected_ast = Expr::UnaryOperation { + op: UnaryOp::Plus, + right: Box::new(Expr::Integer(3)), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "-(2 + 3)"; + let mut parser = init(input); + let expected_ast = Expr::UnaryOperation { + op: UnaryOp::Minus, + right: Box::new(Expr::BinaryOperation { + left: Box::new(Expr::Integer(2)), + op: BinOp::Add, + right: Box::new(Expr::Integer(3)), + }), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn lists() { + let input = "[1,2,3]"; + let mut parser = init(input); + let expected_ast = Expr::List(vec![Expr::Integer(1), Expr::Integer(2), Expr::Integer(3)]); + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "[1, 2, 3]"; + let mut parser = init(input); + let expected_ast = Expr::List(vec![Expr::Integer(1), Expr::Integer(2), Expr::Integer(3)]); + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +a = [1, + 2, + 3 +]"#; + let mut parser = init(input); + let expected_ast = Statement::Assignment { + left: Expr::Variable("a".into()), + right: Expr::List(vec![Expr::Integer(1), Expr::Integer(2), Expr::Integer(3)]), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = " +a = [1, 2, 3] +"; + let mut parser = init(input); + let expected_ast = Statement::Assignment { + left: Expr::Variable("a".to_string()), + right: Expr::List(vec![Expr::Integer(1), Expr::Integer(2), Expr::Integer(3)]), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "list([1, 2, 3])"; + let mut parser = init(input); + let expected_ast = Expr::ClassInstantiation { + name: "list".to_string(), + args: ParsedArguments { + args: vec![Expr::List(vec![ + Expr::Integer(1), + Expr::Integer(2), + Expr::Integer(3), + ])], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn sets() { + let input = "{1,2,3}"; + let mut parser = init(input); + let expected_ast = Expr::Set(HashSet::from([ + Expr::Integer(1), + Expr::Integer(2), + Expr::Integer(3), + ])); + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "{1, 2, 3}"; + let mut parser = init(input); + let expected_ast = Expr::Set(HashSet::from([ + Expr::Integer(1), + Expr::Integer(2), + Expr::Integer(3), + ])); + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "a = {1, 2, 3}"; + let mut parser = init(input); + let expected_ast = Statement::Assignment { + left: Expr::Variable("a".to_string()), + right: Expr::Set(HashSet::from([ + Expr::Integer(1), + Expr::Integer(2), + Expr::Integer(3), + ])), + }; + + match parser.parse_statement() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "set({1, 2, 3})"; + let mut parser = init(input); + let expected_ast = Expr::ClassInstantiation { + name: "set".to_string(), + args: ParsedArguments { + args: vec![Expr::Set(HashSet::from([ + Expr::Integer(1), + Expr::Integer(2), + Expr::Integer(3), + ]))], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +{ + 1, + 2, + 3 +}"#; + let mut parser = init(input); + let expected_ast = Statement::Expression(Expr::Set(HashSet::from([ + Expr::Integer(1), + Expr::Integer(2), + Expr::Integer(3), + ]))); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +{ + 1, + 2, + 3, +}"#; + let mut parser = init(input); + let expected_ast = Statement::Expression(Expr::Set(HashSet::from([ + Expr::Integer(1), + Expr::Integer(2), + Expr::Integer(3), + ]))); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn tuples() { + let input = "(1,2,3)"; + let mut parser = init(input); + let expected_ast = Expr::Tuple(vec![Expr::Integer(1), Expr::Integer(2), Expr::Integer(3)]); + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "(1, 2, 3)"; + let mut parser = init(input); + let expected_ast = Expr::Tuple(vec![Expr::Integer(1), Expr::Integer(2), Expr::Integer(3)]); + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "1, 2, 3"; + let mut parser = init(input); + let expected_ast = Expr::Tuple(vec![Expr::Integer(1), Expr::Integer(2), Expr::Integer(3)]); + + match parser.parse_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "a = (1, 2, 3)"; + let mut parser = init(input); + let expected_ast = Statement::Assignment { + left: Expr::Variable("a".to_string()), + right: Expr::Tuple(vec![Expr::Integer(1), Expr::Integer(2), Expr::Integer(3)]), + }; + + match parser.parse_statement() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "a = 1, 2, 3"; + let mut parser = init(input); + let expected_ast = Statement::Assignment { + left: Expr::Variable("a".to_string()), + right: Expr::Tuple(vec![Expr::Integer(1), Expr::Integer(2), Expr::Integer(3)]), + }; + + match parser.parse_statement() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "tuple((1, 2, 3))"; + let mut parser = init(input); + let expected_ast = Expr::ClassInstantiation { + name: "tuple".to_string(), + args: ParsedArguments { + args: vec![Expr::Tuple(vec![ + Expr::Integer(1), + Expr::Integer(2), + Expr::Integer(3), + ])], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +tuple((1, + 2, + 3)) +"#; + let mut parser = init(input); + let expected_ast = Statement::Expression(Expr::ClassInstantiation { + name: "tuple".to_string(), + args: ParsedArguments { + args: vec![Expr::Tuple(vec![ + Expr::Integer(1), + Expr::Integer(2), + Expr::Integer(3), + ])], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + }); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn index_access() { + let input = "a[0]"; + let mut parser = init(input); + let expected_ast = Expr::IndexAccess { + object: Box::new(Expr::Variable("a".to_string())), + index: Box::new(Expr::Integer(0)), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "[0,1][1]"; + let mut parser = init(input); + let expected_ast = Expr::IndexAccess { + object: Box::new(Expr::List(vec![Expr::Integer(0), Expr::Integer(1)])), + index: Box::new(Expr::Integer(1)), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "a[1] = 0"; + let mut parser = init(input); + let expected_ast = Statement::Assignment { + left: Expr::IndexAccess { + object: Box::new(Expr::Variable("a".to_string())), + index: Box::new(Expr::Integer(1)), + }, + right: Expr::Integer(0), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn for_in_loop() { + let input = r#" +for i in a: + print(i) +"#; + let mut parser = init(input); + let expected_ast = Statement::ForInLoop { + index: LoopIndex::Variable("i".into()), + iterable: Expr::Variable("a".into()), + body: Block { + statements: vec![Statement::Expression(Expr::FunctionCall { + name: "print".into(), + args: ParsedArguments { + args: vec![Expr::Variable("i".into())], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: None, + })], + }, + else_block: None, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +for k, v in a.items(): + print(v) +"#; + let mut parser = init(input); + let expected_ast = Statement::ForInLoop { + index: LoopIndex::Tuple(vec!["k".into(), "v".into()]), + iterable: Expr::MethodCall { + object: Box::new(Expr::Variable("a".into())), + name: "items".into(), + args: ParsedArguments::empty(), + }, + body: Block { + statements: vec![Statement::Expression(Expr::FunctionCall { + name: "print".into(), + args: ParsedArguments { + args: vec![Expr::Variable("v".into())], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: None, + })], + }, + else_block: None, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn list_comprehension() { + let input = "[ i * 2 for i in a ]"; + let mut parser = init(input); + let expected_ast = Expr::ListComprehension { + body: Box::new(Expr::BinaryOperation { + left: Box::new(Expr::Variable("i".to_string())), + op: BinOp::Mul, + right: Box::new(Expr::Integer(2)), + }), + clauses: vec![ForClause { + indices: vec!["i".to_string()], + iterable: Box::new(Expr::Variable("a".to_string())), + condition: None, + }], + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "[i*2 for i in a if True]"; + let mut parser = init(input); + let expected_ast = Expr::ListComprehension { + body: Box::new(Expr::BinaryOperation { + left: Box::new(Expr::Variable("i".to_string())), + op: BinOp::Mul, + right: Box::new(Expr::Integer(2)), + }), + clauses: vec![ForClause { + indices: vec!["i".to_string()], + iterable: Box::new(Expr::Variable("a".to_string())), + condition: Some(Box::new(Expr::Boolean(true))), + }], + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn generators() { + let input = r#" +def countdown(n): + while n > 0: + yield n + n = n - 1 +"#; + let mut parser = init(input); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => { + let expected_body = Block { + statements: vec![Statement::WhileLoop { + condition: Expr::BinaryOperation { + left: Box::new(Expr::Variable("n".to_string())), + op: BinOp::GreaterThan, + right: Box::new(Expr::Integer(0)), + }, + body: Block { + statements: vec![ + Statement::Yield(Expr::Variable("n".to_string())), + Statement::Assignment { + left: Expr::Variable("n".to_string()), + right: Expr::BinaryOperation { + left: Box::new(Expr::Variable("n".to_string())), + op: BinOp::Sub, + right: Box::new(Expr::Integer(1)), + }, + }, + ], + }, + }], + }; + + assert!(matches!(ast, Statement::FunctionDef { + body, + .. + } if body == expected_body)) + } + } + + let input = r#" +yield from a +"#; + let mut parser = init(input); + + let expected_ast = Statement::YieldFrom(Expr::Variable("a".into())); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn inheritance() { + let input = r#" +class Foo(Parent): + def __init__(self): + self.x = 0 +"#; + let mut parser = init(input); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => { + let expected_parent = vec![Expr::Variable("Parent".into())]; + assert!(matches!(ast, Statement::ClassDef { + parents, + .. + } if parents == expected_parent)) + } + } + + let input = r#" +class Foo(metaclass=Parent): + pass +"#; + let mut parser = init(input); + + let expected_ast = Statement::ClassDef { + name: "Foo".to_string(), + parents: vec![], + metaclass: Some("Parent".to_string()), + body: Block { + statements: vec![Statement::Pass], + }, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +class Foo(Bar, metaclass=Parent): + pass +"#; + let mut parser = init(input); + + let expected_ast = Statement::ClassDef { + name: "Foo".to_string(), + parents: vec![Expr::Variable("Bar".into())], + metaclass: Some("Parent".to_string()), + body: Block { + statements: vec![Statement::Pass], + }, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +class InterfaceMeta(type): + pass +"#; + let mut parser = init(input); + + let expected_ast = Statement::ClassDef { + name: "InterfaceMeta".to_string(), + parents: vec![Expr::Variable("type".into())], + metaclass: None, + body: Block { + statements: vec![Statement::Pass], + }, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn dictionaries() { + let input = r#" +a = { "b": 4, 'c': 5 } +"#; + let mut parser = init(input); + + let expected_ast = Statement::Assignment { + left: Expr::Variable("a".to_string()), + right: Expr::Dict(HashMap::from([ + (Expr::StringLiteral("b".to_string()), Expr::Integer(4)), + (Expr::StringLiteral("c".to_string()), Expr::Integer(5)), + ])), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn async_await() { + let input = r#" +async def main(): + task_1 = asyncio.create_task(task1()) + return await task_1 +"#; + let mut parser = init(input); + + let expected_ast = Statement::FunctionDef { + name: "main".to_string(), + args: ParsedArgDefinitions { + args: vec![], + args_var: None, + kwargs_var: None, + }, + body: Block { + statements: vec![ + Statement::Assignment { + left: Expr::Variable("task_1".to_string()), + right: Expr::MethodCall { + object: Box::new(Expr::Variable("asyncio".to_string())), + name: "create_task".to_string(), + args: ParsedArguments { + args: vec![Expr::FunctionCall { + name: "task1".to_string(), + args: ParsedArguments::empty(), + callee: None, + }], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + }, + }, + Statement::Return(vec![Expr::Await { + right: Box::new(Expr::Variable("task_1".to_string())), + }]), + ], + }, + decorators: vec![], + is_async: true, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn assert() { + let input = r#" +assert True +"#; + let mut parser = init(input); + + let expected_ast = Statement::Assert(Expr::Boolean(true)); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn try_except_finally() { + let input = r#" +try: + 4 / 0 +except: + a = 2 +finally: + a = 3 +"#; + let mut parser = init(input); + + let expected_ast = Statement::TryExcept { + try_block: Block { + statements: vec![Statement::Expression(Expr::BinaryOperation { + left: Box::new(Expr::Integer(4)), + op: BinOp::Div, + right: Box::new(Expr::Integer(0)), + })], + }, + except_clauses: vec![ExceptClause { + exception_types: vec![], + block: Block { + statements: vec![Statement::Assignment { + left: Expr::Variable("a".into()), + right: Expr::Integer(2), + }], + }, + }], + else_block: None, + finally_block: Some(Block { + statements: vec![Statement::Assignment { + left: Expr::Variable("a".into()), + right: Expr::Integer(3), + }], + }), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +try: + 4 / 0 +except ZeroDivisionError as e: + a = 2 +finally: + a = 3 +"#; + let mut parser = init(input); + + let expected_ast = Statement::TryExcept { + try_block: Block { + statements: vec![Statement::Expression(Expr::BinaryOperation { + left: Box::new(Expr::Integer(4)), + op: BinOp::Div, + right: Box::new(Expr::Integer(0)), + })], + }, + except_clauses: vec![ExceptClause { + exception_types: vec![HandledException { + literal: ExceptionLiteral::ZeroDivisionError, + alias: Some("e".into()), + }], + block: Block { + statements: vec![Statement::Assignment { + left: Expr::Variable("a".into()), + right: Expr::Integer(2), + }], + }, + }], + else_block: None, + finally_block: Some(Block { + statements: vec![Statement::Assignment { + left: Expr::Variable("a".into()), + right: Expr::Integer(3), + }], + }), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +try: + 4 / 0 +except ZeroDivisionError as e: + a = 2 +else: + a = 4 +finally: + a = 3 +"#; + let mut parser = init(input); + + let expected_ast = Statement::TryExcept { + try_block: Block { + statements: vec![Statement::Expression(Expr::BinaryOperation { + left: Box::new(Expr::Integer(4)), + op: BinOp::Div, + right: Box::new(Expr::Integer(0)), + })], + }, + except_clauses: vec![ExceptClause { + exception_types: vec![HandledException { + literal: ExceptionLiteral::ZeroDivisionError, + alias: Some("e".into()), + }], + block: Block { + statements: vec![Statement::Assignment { + left: Expr::Variable("a".into()), + right: Expr::Integer(2), + }], + }, + }], + else_block: Some(Block { + statements: vec![Statement::Assignment { + left: Expr::Variable("a".into()), + right: Expr::Integer(4), + }], + }), + finally_block: Some(Block { + statements: vec![Statement::Assignment { + left: Expr::Variable("a".into()), + right: Expr::Integer(3), + }], + }), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn binary_literal() { + let input = r#" +a = 0b0010 +"#; + let mut parser = init(input); + + let expected_ast = Statement::Assignment { + left: Expr::Variable("a".into()), + right: Expr::Integer(2), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn octal_literal() { + let input = r#" +a = 0o0010 +"#; + let mut parser = init(input); + + let expected_ast = Statement::Assignment { + left: Expr::Variable("a".into()), + right: Expr::Integer(8), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn hex_literal() { + let input = r#" +a = 0x0010 +"#; + let mut parser = init(input); + + let expected_ast = Statement::Assignment { + left: Expr::Variable("a".into()), + right: Expr::Integer(16), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn args_and_kwargs() { + let input = r#" +def test_args(*args): + pass +"#; + let mut parser = init(input); + + let expected_ast = Statement::FunctionDef { + name: "test_args".into(), + args: ParsedArgDefinitions { + args: vec![], + args_var: Some("args".into()), + kwargs_var: None, + }, + body: Block { + statements: vec![Statement::Pass], + }, + decorators: vec![], + is_async: false, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +def test_args(*args, **kwargs): + pass +"#; + let mut parser = init(input); + + let expected_ast = Statement::FunctionDef { + name: "test_args".into(), + args: ParsedArgDefinitions { + args: vec![], + args_var: Some("args".into()), + kwargs_var: Some("kwargs".into()), + }, + body: Block { + statements: vec![Statement::Pass], + }, + decorators: vec![], + is_async: false, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +def test_kwargs(**kwargs): + print(kwargs['a']) +"#; + let mut parser = init(input); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => { + let expected_args = ParsedArgDefinitions { + args: vec![], + args_var: None, + kwargs_var: Some("kwargs".into()), + }; + assert!(matches!(ast, Statement::FunctionDef { + args, + .. + } if expected_args == args)) + } + } + + let input = r#" +def test_default(file=None): + pass +"#; + let mut parser = init(input); + + let expected_ast = Statement::FunctionDef { + name: "test_default".into(), + args: ParsedArgDefinitions { + args: vec![ParsedArgDefinition { + arg: "file".into(), + default: Some(Expr::None), + }], + args_var: None, + kwargs_var: None, + }, + body: Block { + statements: vec![Statement::Pass], + }, + decorators: vec![], + is_async: false, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +test_kwargs(a=1, b=2) +"#; + let mut parser = init(input); + + let expected_ast = Statement::Expression(Expr::FunctionCall { + name: "test_kwargs".into(), + args: ParsedArguments { + args: vec![], + kwargs: HashMap::from([ + ("b".into(), Expr::Integer(2)), + ("a".into(), Expr::Integer(1)), + ]), + args_var: None, + kwargs_var: None, + }, + callee: None, + }); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +test_kwargs(**{'a':1, 'b':2}) +"#; + let mut parser = init(input); + + let expected_ast = Statement::Expression(Expr::FunctionCall { + name: "test_kwargs".into(), + args: ParsedArguments { + args: vec![], + kwargs: HashMap::from([ + ("b".into(), Expr::Integer(2)), + ("a".into(), Expr::Integer(1)), + ]), + args_var: None, + kwargs_var: None, + }, + callee: None, + }); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +test_kwargs(**kwargs) +"#; + let mut parser = init(input); + + let expected_ast = Statement::Expression(Expr::FunctionCall { + name: "test_kwargs".into(), + args: ParsedArguments { + args: vec![], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: Some(Box::new(Expr::Variable("kwargs".into()))), + }, + callee: None, + }); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +test_kwargs(*args) +"#; + let mut parser = init(input); + + let expected_ast = Statement::Expression(Expr::FunctionCall { + name: "test_kwargs".into(), + args: ParsedArguments { + args: vec![], + kwargs: HashMap::new(), + args_var: Some(Box::new(Expr::Variable("args".into()))), + kwargs_var: None, + }, + callee: None, + }); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +test_kwargs(*args, **kwargs) +"#; + let mut parser = init(input); + + let expected_ast = Statement::Expression(Expr::FunctionCall { + name: "test_kwargs".into(), + args: ParsedArguments { + args: vec![], + kwargs: HashMap::new(), + args_var: Some(Box::new(Expr::Variable("args".into()))), + kwargs_var: Some(Box::new(Expr::Variable("kwargs".into()))), + }, + callee: None, + }); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +deprecated("collections.abc.ByteString", +) +"#; + let mut parser = init(input); + + let expected_ast = Statement::Expression(Expr::FunctionCall { + name: "deprecated".into(), + args: ParsedArguments { + args: vec![Expr::StringLiteral("collections.abc.ByteString".into())], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: None, + }); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn decorator() { + let input = r#" +@test_decorator +def get_val(): + return 2 +"#; + let mut parser = init(input); + + let expected_ast = Statement::FunctionDef { + name: "get_val".into(), + args: ParsedArgDefinitions { + args: vec![], + args_var: None, + kwargs_var: None, + }, + body: Block { + statements: vec![Statement::Return(vec![Expr::Integer(2)])], + }, + decorators: vec![Expr::Variable("test_decorator".into())], + is_async: false, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +test_decorator(get_val_undecorated)() +"#; + let mut parser = init(input); + + let expected_ast = Statement::Expression(Expr::FunctionCall { + name: "".into(), + args: ParsedArguments { + args: vec![], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: Some(Box::new(Expr::FunctionCall { + name: "test_decorator".into(), + args: ParsedArguments { + args: vec![Expr::Variable("get_val_undecorated".into())], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: None, + })), + }); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn raise() { + let input = r#" +raise Exception +"#; + let mut parser = init(input); + + let expected_ast = Statement::Raise(Some(ExceptionInstance { + literal: ExceptionLiteral::Exception, + args: ParsedArguments::empty(), + })); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +raise Exception("message") +"#; + let mut parser = init(input); + + let expected_ast = Statement::Raise(Some(ExceptionInstance { + literal: ExceptionLiteral::Exception, + args: ParsedArguments { + args: vec![Expr::StringLiteral("message".into())], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + })); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +raise +"#; + let mut parser = init(input); + + let expected_ast = Statement::Raise(None); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +raise Exception("message") from None +"#; + let mut parser = init(input); + + let expected_ast = Statement::Raise(Some(ExceptionInstance { + literal: ExceptionLiteral::Exception, + args: ParsedArguments { + args: vec![Expr::StringLiteral("message".into())], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + })); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn context_manager() { + let input = r#" +with open('test.txt') as f: + pass +"#; + let mut parser = init(input); + + let expected_ast = Statement::ContextManager { + expr: Expr::FunctionCall { + name: "open".into(), + args: ParsedArguments { + args: vec![Expr::StringLiteral("test.txt".into())], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: None, + }, + variable: Some("f".into()), + block: Block { + statements: vec![Statement::Pass], + }, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +with open('test.txt'): + pass +"#; + let mut parser = init(input); + + let expected_ast = Statement::ContextManager { + expr: Expr::FunctionCall { + name: "open".into(), + args: ParsedArguments { + args: vec![Expr::StringLiteral("test.txt".into())], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: None, + }, + variable: None, + block: Block { + statements: vec![Statement::Pass], + }, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn type_alias() { + let input = r#" +a = list[int] +"#; + let mut parser = init(input); + + let expected_ast = Statement::Assignment { + left: Expr::Variable("a".into()), + right: Expr::TypeNode(TypeNode::Generic { + base_type: "list".into(), + parameters: vec![TypeNode::Basic("int".into())], + }), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +u = int | str +"#; + let mut parser = init(input); + + let expected_ast = Statement::Assignment { + left: Expr::Variable("u".into()), + right: Expr::TypeNode(TypeNode::Union(vec![ + TypeNode::Basic("int".into()), + TypeNode::Basic("str".into()), + ])), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn delete() { + let input = r#" +del a +"#; + let mut parser = init(input); + + let expected_ast = Statement::Delete(vec![Expr::Variable("a".into())]); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +del a, b, c +"#; + let mut parser = init(input); + + let expected_ast = Statement::Delete(vec![ + Expr::Variable("a".into()), + Expr::Variable("b".into()), + Expr::Variable("c".into()), + ]); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn byte_string() { + let input = r#" +a = b'hello' +"#; + let mut parser = init(input); + + let expected_ast = Statement::Assignment { + left: Expr::Variable("a".into()), + right: Expr::ByteStringLiteral("hello".into()), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn compound_operator() { + let input = r#" +a += 1 +"#; + let mut parser = init(input); + + let expected_ast = Statement::CompoundAssignment { + operator: CompoundOperator::Add, + target: Box::new(Expr::Variable("a".into())), + value: Box::new(Expr::Integer(1)), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +a -= 1 +"#; + let mut parser = init(input); + + let expected_ast = Statement::CompoundAssignment { + operator: CompoundOperator::Subtract, + target: Box::new(Expr::Variable("a".into())), + value: Box::new(Expr::Integer(1)), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +a *= 1 +"#; + let mut parser = init(input); + + let expected_ast = Statement::CompoundAssignment { + operator: CompoundOperator::Multiply, + target: Box::new(Expr::Variable("a".into())), + value: Box::new(Expr::Integer(1)), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +a /= 1 +"#; + let mut parser = init(input); + + let expected_ast = Statement::CompoundAssignment { + operator: CompoundOperator::Divide, + target: Box::new(Expr::Variable("a".into())), + value: Box::new(Expr::Integer(1)), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +a &= 1 +"#; + let mut parser = init(input); + + let expected_ast = Statement::CompoundAssignment { + operator: CompoundOperator::BitwiseAnd, + target: Box::new(Expr::Variable("a".into())), + value: Box::new(Expr::Integer(1)), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +a |= 1 +"#; + let mut parser = init(input); + + let expected_ast = Statement::CompoundAssignment { + operator: CompoundOperator::BitwiseOr, + target: Box::new(Expr::Variable("a".into())), + value: Box::new(Expr::Integer(1)), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +a ^= 1 +"#; + let mut parser = init(input); + + let expected_ast = Statement::CompoundAssignment { + operator: CompoundOperator::BitwiseXor, + target: Box::new(Expr::Variable("a".into())), + value: Box::new(Expr::Integer(1)), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +a //= 1 +"#; + let mut parser = init(input); + + let expected_ast = Statement::CompoundAssignment { + operator: CompoundOperator::IntegerDiv, + target: Box::new(Expr::Variable("a".into())), + value: Box::new(Expr::Integer(1)), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +a <<= 1 +"#; + let mut parser = init(input); + + let expected_ast = Statement::CompoundAssignment { + operator: CompoundOperator::LeftShift, + target: Box::new(Expr::Variable("a".into())), + value: Box::new(Expr::Integer(1)), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +a >>= 1 +"#; + let mut parser = init(input); + + let expected_ast = Statement::CompoundAssignment { + operator: CompoundOperator::RightShift, + target: Box::new(Expr::Variable("a".into())), + value: Box::new(Expr::Integer(1)), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +a %= 1 +"#; + let mut parser = init(input); + + let expected_ast = Statement::CompoundAssignment { + operator: CompoundOperator::Mod, + target: Box::new(Expr::Variable("a".into())), + value: Box::new(Expr::Integer(1)), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +a @= 1 +"#; + let mut parser = init(input); + + let expected_ast = Statement::CompoundAssignment { + operator: CompoundOperator::MatMul, + target: Box::new(Expr::Variable("a".into())), + value: Box::new(Expr::Integer(1)), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +a **= 1 +"#; + let mut parser = init(input); + + let expected_ast = Statement::CompoundAssignment { + operator: CompoundOperator::Expo, + target: Box::new(Expr::Variable("a".into())), + value: Box::new(Expr::Integer(1)), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "~a"; + let mut parser = init(input); + let expected_ast = Expr::UnaryOperation { + op: UnaryOp::BitwiseNot, + right: Box::new(Expr::Variable("a".into())), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "a % b"; + let mut parser = init(input); + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Variable("a".into())), + op: BinOp::Mod, + right: Box::new(Expr::Variable("b".into())), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "a @ b"; + let mut parser = init(input); + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Variable("a".into())), + op: BinOp::MatMul, + right: Box::new(Expr::Variable("b".into())), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn f_strings() { + let input = r#"f"Hello {name}.""#; + let mut parser = init(input); + + let expected_ast = Expr::FString(vec![ + FStringPart::String("Hello ".into()), + FStringPart::Expr(ExprFormat { + expr: Box::new(Expr::Variable("name".into())), + format: FormatOption::Str, + }), + FStringPart::String(".".into()), + ]); + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#"f"{first}{last}""#; + let mut parser = init(input); + + let expected_ast = Expr::FString(vec![ + FStringPart::Expr(ExprFormat { + expr: Box::new(Expr::Variable("first".into())), + format: FormatOption::Str, + }), + FStringPart::Expr(ExprFormat { + expr: Box::new(Expr::Variable("last".into())), + format: FormatOption::Str, + }), + ]); + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#"f"Hello""#; + let mut parser = init(input); + + let expected_ast = Expr::FString(vec![FStringPart::String("Hello".into())]); + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#"f"Hello {name} goodbye {other}""#; + let mut parser = init(input); + + let expected_ast = Expr::FString(vec![ + FStringPart::String("Hello ".into()), + FStringPart::Expr(ExprFormat { + expr: Box::new(Expr::Variable("name".into())), + format: FormatOption::Str, + }), + FStringPart::String(" goodbye ".into()), + FStringPart::Expr(ExprFormat { + expr: Box::new(Expr::Variable("other".into())), + format: FormatOption::Str, + }), + ]); + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#"f"Age: {num + 1}""#; + let mut parser = init(input); + + let expected_ast = Expr::FString(vec![ + FStringPart::String("Age: ".into()), + FStringPart::Expr(ExprFormat { + expr: Box::new(Expr::BinaryOperation { + left: Box::new(Expr::Variable("num".into())), + op: BinOp::Add, + right: Box::new(Expr::Integer(1)), + }), + format: FormatOption::Str, + }), + ]); + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#"f"Hello {name!r} goodbye {other}""#; + let mut parser = init(input); + + let expected_ast = Expr::FString(vec![ + FStringPart::String("Hello ".into()), + FStringPart::Expr(ExprFormat { + expr: Box::new(Expr::Variable("name".into())), + format: FormatOption::Repr, + }), + FStringPart::String(" goodbye ".into()), + FStringPart::Expr(ExprFormat { + expr: Box::new(Expr::Variable("other".into())), + format: FormatOption::Str, + }), + ]); + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn binary_operators() { + let input = "a & b"; + let mut parser = init(input); + + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Variable("a".into())), + op: BinOp::BitwiseAnd, + right: Box::new(Expr::Variable("b".into())), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "a | b"; + let mut parser = init(input); + + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Variable("a".into())), + op: BinOp::BitwiseOr, + right: Box::new(Expr::Variable("b".into())), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "a ^ b"; + let mut parser = init(input); + + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Variable("a".into())), + op: BinOp::BitwiseXor, + right: Box::new(Expr::Variable("b".into())), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "a << b"; + let mut parser = init(input); + + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Variable("a".into())), + op: BinOp::LeftShift, + right: Box::new(Expr::Variable("b".into())), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "a >> b"; + let mut parser = init(input); + + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Variable("a".into())), + op: BinOp::RightShift, + right: Box::new(Expr::Variable("b".into())), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "a ** b"; + let mut parser = init(input); + + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::Variable("a".into())), + op: BinOp::Expo, + right: Box::new(Expr::Variable("b".into())), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "2 * 3 << 2 + 4 & 205"; + let mut parser = init(input); + + let expected_ast = Expr::BinaryOperation { + left: Box::new(Expr::BinaryOperation { + left: Box::new(Expr::BinaryOperation { + left: Box::new(Expr::Integer(2)), + op: BinOp::Mul, + right: Box::new(Expr::Integer(3)), + }), + op: BinOp::LeftShift, + right: Box::new(Expr::BinaryOperation { + left: Box::new(Expr::Integer(2)), + op: BinOp::Add, + right: Box::new(Expr::Integer(4)), + }), + }), + op: BinOp::BitwiseAnd, + right: Box::new(Expr::Integer(205)), + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn control_flow() { + let input = r#" +for i in a: + break +"#; + let mut parser = init(input); + let expected_ast = Statement::ForInLoop { + index: LoopIndex::Variable("i".into()), + iterable: Expr::Variable("a".into()), + body: Block { + statements: vec![Statement::Break], + }, + else_block: None, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +for i in a: + continue +"#; + let mut parser = init(input); + let expected_ast = Statement::ForInLoop { + index: LoopIndex::Variable("i".into()), + iterable: Expr::Variable("a".into()), + body: Block { + statements: vec![Statement::Continue], + }, + else_block: None, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +for i in a: + break +else: + pass +"#; + let mut parser = init(input); + let expected_ast = Statement::ForInLoop { + index: LoopIndex::Variable("i".into()), + iterable: Expr::Variable("a".into()), + body: Block { + statements: vec![Statement::Break], + }, + else_block: Some(Block { + statements: vec![Statement::Pass], + }), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn type_hints() { + let input = " +def add(x: str, y: str) -> str: + return x + y +"; + let mut parser = init(input); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => { + let expected_args = vec![ + ParsedArgDefinition { + arg: "x".into(), + default: None, + }, + ParsedArgDefinition { + arg: "y".into(), + default: None, + }, + ]; + + assert!(matches!( + ast, + Statement::FunctionDef { + args: ParsedArgDefinitions { + args, + args_var: None, + kwargs_var: None, + }, + .. + } if args == expected_args + )) + } + } + } + + #[test] + fn slices() { + let input = "a[1:1:1]"; + let mut parser = init(input); + let expected_ast = Expr::SliceOperation { + object: Box::new(Expr::Variable("a".into())), + params: ParsedSliceParams { + start: Some(Box::new(Expr::Integer(1))), + stop: Some(Box::new(Expr::Integer(1))), + step: Some(Box::new(Expr::Integer(1))), + }, + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "a[2:5]"; + let mut parser = init(input); + let expected_ast = Expr::SliceOperation { + object: Box::new(Expr::Variable("a".into())), + params: ParsedSliceParams { + start: Some(Box::new(Expr::Integer(2))), + stop: Some(Box::new(Expr::Integer(5))), + step: None, + }, + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "a[:5]"; + let mut parser = init(input); + let expected_ast = Expr::SliceOperation { + object: Box::new(Expr::Variable("a".into())), + params: ParsedSliceParams { + start: None, + stop: Some(Box::new(Expr::Integer(5))), + step: None, + }, + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "a[3:]"; + let mut parser = init(input); + let expected_ast = Expr::SliceOperation { + object: Box::new(Expr::Variable("a".into())), + params: ParsedSliceParams { + start: Some(Box::new(Expr::Integer(3))), + stop: None, + step: None, + }, + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "a[::2]"; + let mut parser = init(input); + let expected_ast = Expr::SliceOperation { + object: Box::new(Expr::Variable("a".into())), + params: ParsedSliceParams { + start: None, + stop: None, + step: Some(Box::new(Expr::Integer(2))), + }, + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = "new_bases[i+shift:shift+1]"; + let mut parser = init(input); + let expected_ast = Expr::SliceOperation { + object: Box::new(Expr::Variable("new_bases".into())), + params: ParsedSliceParams { + start: Some(Box::new(Expr::BinaryOperation { + left: Box::new(Expr::Variable("i".into())), + op: BinOp::Add, + right: Box::new(Expr::Variable("shift".into())), + })), + stop: Some(Box::new(Expr::BinaryOperation { + left: Box::new(Expr::Variable("shift".into())), + op: BinOp::Add, + right: Box::new(Expr::Integer(1)), + })), + step: None, + }, + }; + + match parser.parse_simple_expr() { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn closures() { + let input = " +def outer(): + a = 1 + b = 2 + def inner(): + b = 3 + print(a) +"; + let mut parser = init(input); + + let expected_ast = Statement::FunctionDef { + name: "outer".into(), + args: ParsedArgDefinitions { + args: vec![], + args_var: None, + kwargs_var: None, + }, + body: Block { + statements: vec![ + Statement::Assignment { + left: Expr::Variable("a".into()), + right: Expr::Integer(1), + }, + Statement::Assignment { + left: Expr::Variable("b".into()), + right: Expr::Integer(2), + }, + Statement::FunctionDef { + name: "inner".into(), + args: ParsedArgDefinitions { + args: vec![], + args_var: None, + kwargs_var: None, + }, + body: Block { + statements: vec![ + Statement::Assignment { + left: Expr::Variable("b".into()), + right: Expr::Integer(3), + }, + Statement::Expression(Expr::FunctionCall { + name: "print".into(), + args: ParsedArguments { + args: vec![Expr::Variable("a".into())], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: None, + }), + ], + }, + decorators: vec![], + is_async: false, + }, + ], + }, + decorators: vec![], + is_async: false, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn scope_modifiers() { + let input = " +nonlocal var +"; + let mut parser = init(input); + + let expected_ast = Statement::Nonlocal("var".into()); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = " +global var +"; + let mut parser = init(input); + + let expected_ast = Statement::Global("var".into()); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn ternary_operation() { + let input = r#" +a = 4 if True else 5 +"#; + let mut parser = init(input); + + let expected_ast = Statement::Assignment { + left: Expr::Variable("a".into()), + right: Expr::TernaryOp { + condition: Box::new(Expr::Boolean(true)), + if_value: Box::new(Expr::Integer(4)), + else_value: Box::new(Expr::Integer(5)), + }, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +a = 4 + x if b == 6 else 5 << 2 +"#; + let mut parser = init(input); + + let expected_ast = Statement::Assignment { + left: Expr::Variable("a".into()), + right: Expr::TernaryOp { + condition: Box::new(Expr::BinaryOperation { + left: Box::new(Expr::Variable("b".into())), + op: BinOp::Equals, + right: Box::new(Expr::Integer(6)), + }), + if_value: Box::new(Expr::BinaryOperation { + left: Box::new(Expr::Integer(4)), + op: BinOp::Add, + right: Box::new(Expr::Variable("x".into())), + }), + else_value: Box::new(Expr::BinaryOperation { + left: Box::new(Expr::Integer(5)), + op: BinOp::LeftShift, + right: Box::new(Expr::Integer(2)), + }), + }, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn more_tokens() { + let input = r#" +Ellipsis +"#; + let mut parser = init(input); + + let expected_ast = Statement::Expression(Expr::Ellipsis); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn generator_comprehension() { + let input = r#" +a = (i * 2 for i in b) +"#; + let mut parser = init(input); + + let expected_ast = Statement::Assignment { + left: Expr::Variable("a".into()), + right: Expr::GeneratorComprehension { + body: Box::new(Expr::BinaryOperation { + left: Box::new(Expr::Variable("i".to_string())), + op: BinOp::Mul, + right: Box::new(Expr::Integer(2)), + }), + clauses: vec![ForClause { + indices: vec!["i".into()], + iterable: Box::new(Expr::Variable("b".into())), + condition: None, + }], + }, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +foo(i * 2 for i in b) +"#; + let mut parser = init(input); + + let expected_ast = Statement::Expression(Expr::FunctionCall { + name: "foo".to_string(), + args: ParsedArguments { + args: vec![Expr::GeneratorComprehension { + body: Box::new(Expr::BinaryOperation { + left: Box::new(Expr::Variable("i".to_string())), + op: BinOp::Mul, + right: Box::new(Expr::Integer(2)), + }), + clauses: vec![ForClause { + indices: vec!["i".into()], + iterable: Box::new(Expr::Variable("b".into())), + condition: None, + }], + }], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + }, + callee: None, + }); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn default_args() { + let input = r#" +def foo(data=None): + pass +"#; + let mut parser = init(input); + + let expected_ast = Statement::FunctionDef { + name: "foo".into(), + args: ParsedArgDefinitions { + args: vec![ParsedArgDefinition { + arg: "data".into(), + default: Some(Expr::None), + }], + args_var: None, + kwargs_var: None, + }, + body: Block::new(vec![Statement::Pass]), + decorators: vec![], + is_async: false, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn unpacking() { + let input = r#" +(*l,) +"#; + let mut parser = init(input); + + let expected_ast = Statement::Expression(Expr::Tuple(vec![Expr::UnaryOperation { + op: UnaryOp::Unpack, + right: Box::new(Expr::Variable("l".into())), + }])); + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + + let input = r#" +if True: + a, b = b, a +"#; + let mut parser = init(input); + + let expected_ast = Statement::IfElse { + if_part: ConditionalBlock { + condition: Expr::Boolean(true), + block: Block::new(vec![Statement::UnpackingAssignment { + left: vec![Expr::Variable("a".into()), Expr::Variable("b".into())], + right: Expr::Tuple(vec![ + Expr::Variable("b".into()), + Expr::Variable("a".into()), + ]), + }]), + }, + elif_parts: vec![], + else_part: None, + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } + + #[test] + fn multiple_assignment() { + let input = r#" +a = b = True +"#; + let mut parser = init(input); + + let expected_ast = Statement::MultipleAssignment { + left: vec![Expr::Variable("a".into()), Expr::Variable("b".into())], + right: Expr::Boolean(true), + }; + + match parse(&mut parser) { + Err(e) => panic!("Parser error: {:?}", e), + Ok(ast) => assert_eq!(ast, expected_ast), + } + } +} diff --git a/src/parser/static_analysis.rs b/src/parser/static_analysis.rs new file mode 100644 index 0000000..54fdb7b --- /dev/null +++ b/src/parser/static_analysis.rs @@ -0,0 +1,137 @@ +use std::collections::HashSet; + +use crate::{ + core::{log, LogLevel}, + parser::types::{Block, Expr, Statement, Variable}, +}; + +pub trait Visitor { + fn visit_block(&mut self, program: &Block); + fn visit_statement(&mut self, statement: &Statement); +} + +pub struct YieldDetector { + pub found_yield: bool, +} + +impl Visitor for YieldDetector { + fn visit_block(&mut self, _program: &Block) {} + + fn visit_statement(&mut self, statement: &Statement) { + if matches!(statement, Statement::Yield(_)) { + self.found_yield = true; + } + } +} + +#[derive(Debug)] +pub struct FunctionAnalysisVisitor { + local_vars: HashSet, + accessed_vars: Vec, +} + +impl Default for FunctionAnalysisVisitor { + fn default() -> Self { + Self::new() + } +} + +impl FunctionAnalysisVisitor { + pub fn new() -> Self { + FunctionAnalysisVisitor { + local_vars: HashSet::new(), + accessed_vars: vec![], + } + } + + /// We return a Vec<_> here because the order is defined as the order in which the variables + /// are accessed. + pub fn get_free_vars(&self) -> Vec { + self.accessed_vars + .iter() + .filter(|item| !self.local_vars.contains(*item)) + .cloned() + .collect() + } + + fn check_for_local_vars(&mut self, statement: &Statement) { + match statement { + Statement::UnpackingAssignment { left, .. } => { + for var in left.iter() { + if let Some(name) = var.as_variable() { + self.local_vars.insert(name.into()); + } + } + } + Statement::Assignment { left, .. } => { + if let Some(name) = left.as_variable() { + self.local_vars.insert(name.into()); + } + } + Statement::CompoundAssignment { target, .. } => { + if let Some(name) = target.as_variable() { + self.local_vars.insert(name.into()); + } + } + _ => {} + } + } + + // When we still did this in the parser, we had this check and comment about not considering + // modules as accessed. We probably want to add that back here. + // + // /// The check with `is_module` is a little bit weird but ultimately makes sense. + // /// + // /// A module does not need to be saved in a closure. + // /// ``` + // /// asyncio.create_task(task1()) + // /// ``` + // /// + // /// Objects or variables do. + // /// ``` + // /// print(b) + // /// print(c.attr) + // /// ``` + // fn save_accessed_var(&mut self, name: &str) { + // if !self.state.is_module(name) { + // self.function_context_stack.with_top_mut(|context| { + // context.insert_accessed_var(name.to_string()); + // }); + // } + // } + // There are more cases where variables might be accessed we should add here, such as + // variable reads, attribute accesses on objects. + fn check_for_accessed_vars(&mut self, statement: &Statement) { + match statement { + Statement::Expression(expr) => match expr { + Expr::FunctionCall { args, .. } => { + for arg in args.args.iter() { + if let Some(name) = arg.as_variable() { + self.accessed_vars.push(name.into()); + } + } + } + Expr::NoOp => {} + _ => {} + }, + Statement::Nonlocal(var) => { + self.accessed_vars.push(var.into()); + } + Statement::Global(var) => { + self.accessed_vars.push(var.into()); + } + _ => {} + } + } +} + +impl Visitor for FunctionAnalysisVisitor { + fn visit_block(&mut self, _program: &Block) {} + + fn visit_statement(&mut self, statement: &Statement) { + log(LogLevel::Trace, || format!("Visiting {:?}", statement)); + + self.check_for_local_vars(statement); + self.check_for_accessed_vars(statement); + } +} diff --git a/src/parser/types.rs b/src/parser/types.rs new file mode 100644 index 0000000..62a52bb --- /dev/null +++ b/src/parser/types.rs @@ -0,0 +1,655 @@ +use std::collections::{HashMap, HashSet}; +use std::hash::{Hash, Hasher}; + +use crate::parser::static_analysis::{FunctionAnalysisVisitor, Visitor}; +use crate::types::errors::InterpreterError; + +/// There are a handful of places where we reference a variable and it must be a variable name +/// only, not an expression. There is nothing to resolve or evaluate on these Using [`String`] +/// here works, but we create a [`Variable`] to be more expressive and add type-safety. +pub type Variable = String; + +#[derive(Debug, PartialEq, Clone)] +pub enum TypeNode { + Generic { + base_type: String, + parameters: Vec, + }, + Union(Vec), + Basic(String), +} + +/// The three conversion modes supported in Python f-strings. These are specified by !s (the +/// default), !r, and !a, respectively. +/// Reference: https://docs.python.org/3/reference/lexical_analysis.html#f-strings +#[derive(Debug, PartialEq, Clone)] +pub enum FormatOption { + Str, + Repr, + Ascii, +} + +/// A container for an `Expr` inside braces in an f-string and an optional conversion identifier +/// `FormatOption`. It's not optional in this struct because the parser defaults to +/// `FormatOption::Str`. +#[derive(Debug, PartialEq, Clone)] +pub struct ExprFormat { + pub expr: Box, + pub format: FormatOption, +} + +#[derive(Debug, PartialEq, Clone)] +pub enum FStringPart { + String(String), + Expr(ExprFormat), +} + +#[derive(Debug, PartialEq, Clone)] +pub enum ImportPath { + Absolute(Vec), + /// Any import path which _starts_ with a dot. The `usize` represents the number of levels up + /// to look for the path. This means that this exxample is in the same directory + /// + /// ```python + /// from .package.module import symbol + /// ``` + /// + /// While this would look one directory up + /// + /// ```python + /// from ..package.module import symbol + /// ``` + Relative(usize, Vec), +} + +impl ImportPath { + pub fn as_str(&self) -> String { + match self { + ImportPath::Absolute(path) => path.join("."), + ImportPath::Relative(levels, path) => ".".repeat(*levels) + &path.join("."), + } + } + + pub fn segments(&self) -> Vec { + match self { + ImportPath::Absolute(path) => path.clone(), + ImportPath::Relative(_, path) => path.clone(), + } + } +} + +#[derive(Debug, Hash, Eq, PartialEq, Clone)] +pub enum BinOp { + Add, + Sub, + Mul, + Div, + IntegerDiv, + In, + NotIn, + Is, + IsNot, + GreaterThan, + LessThan, + GreaterThanOrEqual, + LessThanOrEqual, + Equals, + NotEquals, + BitwiseAnd, + BitwiseOr, + BitwiseXor, + LeftShift, + RightShift, + Mod, + MatMul, + Expo, +} + +#[derive(Debug, Hash, Eq, PartialEq, Clone)] +pub enum UnaryOp { + Not, + Minus, + Plus, + BitwiseNot, + Unpack, +} + +#[derive(Debug, Hash, Eq, PartialEq, Clone)] +pub enum LogicalOp { + And, + Or, +} + +/// A `FunctionContext` is needed to determine what variables are accessed from an outer scope as +/// part of a closure. +#[derive(Clone, PartialEq, Debug)] +pub struct Closure { + free_vars: Vec, +} + +impl Closure { + pub fn new(free_vars: Vec) -> Self { + Self { free_vars } + } + + pub fn get_free_vars(&self) -> Vec { + self.free_vars.clone() + } +} + +impl From for Closure { + fn from(visitor: FunctionAnalysisVisitor) -> Self { + Self::new(visitor.get_free_vars()) + } +} + +/// An individual function argument and its optional default. +#[derive(Clone, PartialEq, Debug)] +pub struct ParsedArgDefinition { + pub arg: Variable, + pub default: Option, +} + +#[derive(Clone, PartialEq, Debug, Default)] +pub struct ParsedArgDefinitions { + /// The variables for all the positional arguments. + /// ```python + /// def foo(a, b): + /// ... + /// ``` + pub args: Vec, + + /// An optional variable to hold arguments passed in for variable arity. + /// ```python + /// def foo(*args): + /// ... + /// ``` + pub args_var: Option, + + /// An optional variable to hold arguments passed in by keyword. + /// ```python + /// def foo(**kwargs): + /// ... + /// ``` + pub kwargs_var: Option, +} + +pub enum ParsedArgument { + Keyword { arg: Variable, expr: Expr }, + Positional(Expr), +} + +/// All the information provided when a function is called (besides of the name of the function). +#[derive(Clone, PartialEq, Debug)] +pub struct ParsedArguments { + /// Any args passed in positionally. + /// ```python + /// foo(1, 2) + /// ``` + pub args: Vec, + + /// Any keyword arguments passed in as literals. For example, + /// ```python + /// foo(a=1, b=2) + /// foo(**{'a': 1, 'b': 2}) + /// ``` + pub kwargs: HashMap, + + /// Any variable-arity arguments passed in through a variable. For example, + /// ```python + /// args = [1, 2] + /// foo(**args) + /// ``` + /// The `Expr` here references a variable which will be read during the interpreter stage. + pub args_var: Option>, + + /// Any keyword arguments passed in through a variable. For example, + /// ```python + /// kwargs = {'a': 1, 'b': 2} + /// foo(**kwargs) + /// ``` + /// The `Expr` here references a variable which will be read during the interpreter stage. + pub kwargs_var: Option>, +} + +impl ParsedArguments { + pub fn empty() -> Self { + Self { + args: vec![], + kwargs: HashMap::new(), + args_var: None, + kwargs_var: None, + } + } +} + +#[derive(Clone, PartialEq, Debug)] +pub struct ParsedSliceParams { + pub start: Option>, + pub stop: Option>, + pub step: Option>, +} + +#[derive(Clone, PartialEq, Debug)] +pub struct ForClause { + pub indices: Vec, + pub iterable: Box, + pub condition: Option>, +} + +#[derive(Debug, PartialEq, Clone)] +pub enum Expr { + NoOp, + None, + NotImplemented, + Ellipsis, + Integer(i64), + FloatingPoint(f64), + Boolean(bool), + Variable(String), + StringLiteral(String), + ByteStringLiteral(Vec), + List(Vec), + Set(HashSet), + Dict(HashMap), + Tuple(Vec), + FString(Vec), + BinaryOperation { + left: Box, + op: BinOp, + right: Box, + }, + UnaryOperation { + op: UnaryOp, + right: Box, + }, + LogicalOperation { + left: Box, + op: LogicalOp, + right: Box, + }, + TernaryOp { + condition: Box, + if_value: Box, + else_value: Box, + }, + MemberAccess { + object: Box, + field: String, + }, + IndexAccess { + object: Box, + index: Box, + }, + SliceOperation { + object: Box, + params: ParsedSliceParams, + }, + Await { + right: Box, + }, + FunctionCall { + name: String, + args: ParsedArguments, + callee: Option>, + }, + ClassInstantiation { + name: String, + args: ParsedArguments, + }, + MethodCall { + object: Box, + name: String, + args: ParsedArguments, + }, + GeneratorComprehension { + body: Box, + clauses: Vec, + }, + ListComprehension { + body: Box, + clauses: Vec, + }, + SetComprehension { + body: Box, + clauses: Vec, + }, + DictComprehension { + key: String, + value: String, + range: Box, + key_body: Box, + value_body: Box, + }, + Lambda { + args: Box, + expr: Box, + is_generator: bool, + }, + TypeNode(TypeNode), +} + +impl Expr { + pub fn as_variable(&self) -> Option<&String> { + match self { + Expr::Variable(name) => Some(name), + _ => None, + } + } + + pub fn as_string(&self) -> Option<&String> { + match self { + Expr::StringLiteral(name) => Some(name), + _ => None, + } + } +} + +// For some reason, we have to create this here for the Eq trait to be +// satisfied for f64. +impl Eq for Expr {} + +// Is the empty function body going to cause weirdness on HashSet? +impl Hash for Expr { + fn hash(&self, _state: &mut H) + where + H: Hasher, + { + } +} + +#[derive(Debug, PartialEq, Clone)] +pub enum ExceptionLiteral { + Exception, + ZeroDivisionError, + IOError, + ImportError, + StopIteration, + TypeError, + Custom(String), +} + +impl TryFrom for ExceptionLiteral { + type Error = InterpreterError; + + fn try_from(value: InterpreterError) -> Result { + if let InterpreterError::DivisionByZero(..) = value { + Ok(ExceptionLiteral::ZeroDivisionError) + } else if let InterpreterError::ModuleNotFound(..) = value { + Ok(ExceptionLiteral::ImportError) + } else if let InterpreterError::TypeError(..) = value { + Ok(ExceptionLiteral::TypeError) + } else { + Err(InterpreterError::RuntimeError) + } + } +} + +#[derive(Clone, PartialEq, Debug)] +pub struct ExceptionInstance { + pub literal: ExceptionLiteral, + pub args: ParsedArguments, +} + +#[derive(Clone, PartialEq, Debug)] +pub struct ExceptClause { + pub exception_types: Vec, + pub block: Block, +} + +#[derive(Debug, PartialEq, Clone)] +pub struct HandledException { + pub literal: ExceptionLiteral, + pub alias: Option, +} + +#[derive(Clone, PartialEq, Debug)] +pub struct ConditionalBlock { + pub condition: Expr, + pub block: Block, +} + +#[derive(Debug, PartialEq, Clone)] +pub struct Block { + pub statements: Vec, +} + +impl Block { + pub fn new(statements: Vec) -> Self { + Self { statements } + } + + pub fn accept(&self, visitor: &mut V) { + visitor.visit_block(self); + for statement in &self.statements { + statement.accept(visitor); + } + } +} + +/// This represents one of the comma-separated values being imported. This is only used in +/// selective imports right now. +/// +/// ```python +/// from module_a import one, two as three, four +/// ``` +#[derive(Debug, PartialEq, Clone)] +pub enum ImportedItem { + Direct(String), + Alias(Alias), +} + +impl ImportedItem { + pub fn as_imported_symbol(&self) -> String { + match self { + ImportedItem::Direct(name) => name.clone(), + ImportedItem::Alias(alias) => alias.remap(), + } + } + + pub fn as_original_symbol(&self) -> String { + match self { + ImportedItem::Direct(name) => name.clone(), + ImportedItem::Alias(alias) => alias.original(), + } + } +} + +#[derive(Debug, PartialEq, Clone)] +pub struct Alias { + pub symbol: String, + pub alias_symbol: Option, +} + +impl Alias { + fn original(&self) -> String { + self.symbol.to_owned() + } + + fn remap(&self) -> String { + if let Some(s) = &self.alias_symbol { + return s.to_string(); + } + + self.symbol.to_owned() + } +} + +/// Indicate whether a single variable or a `Tuple` of variable should be unpacked on each +/// iteration of a `for` loop. +#[derive(Debug, PartialEq, Clone)] +pub enum LoopIndex { + /// Used when the range returns a single value. + /// ```python + /// for i in a: + /// ... + /// ``` + Variable(String), + + /// Used when the range returns a tuple of values. + /// ```python + /// for k, v in a.items() + /// ... + /// ``` + Tuple(Vec), +} + +/// Perform the listed operation before assigning the result. +#[derive(Debug, PartialEq, Clone)] +pub enum CompoundOperator { + Add, + Subtract, + Multiply, + Divide, + BitwiseAnd, + BitwiseOr, + BitwiseXor, + IntegerDiv, + LeftShift, + RightShift, + Mod, + MatMul, + Expo, +} + +impl CompoundOperator { + pub fn to_bin_op(&self) -> BinOp { + match self { + CompoundOperator::Add => BinOp::Add, + CompoundOperator::Subtract => BinOp::Sub, + CompoundOperator::Multiply => BinOp::Mul, + CompoundOperator::Divide => BinOp::Div, + CompoundOperator::BitwiseAnd => BinOp::BitwiseAnd, + CompoundOperator::BitwiseOr => BinOp::BitwiseOr, + CompoundOperator::BitwiseXor => BinOp::BitwiseXor, + CompoundOperator::IntegerDiv => BinOp::IntegerDiv, + CompoundOperator::LeftShift => BinOp::LeftShift, + CompoundOperator::RightShift => BinOp::RightShift, + CompoundOperator::Mod => BinOp::Mod, + CompoundOperator::MatMul => BinOp::MatMul, + CompoundOperator::Expo => BinOp::Expo, + } + } +} + +#[derive(Debug, PartialEq, Clone)] +pub enum Statement { + Expression(Expr), + Pass, + Break, + Continue, + Assert(Expr), + Delete(Vec), + Assignment { + left: Expr, + right: Expr, + }, + /// This is different than `UnpackingAssignment` in that every value on the LHS is assigned the + /// valued of the RHS. + /// + /// Will these two ever happen at the same time? Perhaps. + MultipleAssignment { + left: Vec, + right: Expr, + }, + UnpackingAssignment { + left: Vec, + right: Expr, + }, + CompoundAssignment { + operator: CompoundOperator, + target: Box, + value: Box, + }, + FunctionDef { + name: String, + args: ParsedArgDefinitions, + body: Block, + decorators: Vec, + is_async: bool, + }, + ClassDef { + name: String, + parents: Vec, + metaclass: Option, + body: Block, + }, + Return(Vec), + Yield(Expr), + YieldFrom(Expr), + Nonlocal(Variable), + Global(Variable), + IfElse { + if_part: ConditionalBlock, + elif_parts: Vec, + else_part: Option, + }, + WhileLoop { + condition: Expr, + body: Block, + }, + ForInLoop { + index: LoopIndex, + iterable: Expr, + body: Block, + else_block: Option, + }, + RegularImport { + import_path: ImportPath, + alias: Option, + }, + SelectiveImport { + import_path: ImportPath, + items: Vec, + wildcard: bool, + }, + TryExcept { + try_block: Block, + except_clauses: Vec, + else_block: Option, + finally_block: Option, + }, + Raise(Option), + ContextManager { + expr: Expr, + variable: Option, + block: Block, + }, +} + +impl Statement { + /// Visit this statement, then walk the AST to any nested blocks. + pub fn accept(&self, visitor: &mut V) { + visitor.visit_statement(self); + + match self { + Statement::FunctionDef { body, .. } => { + body.accept(visitor); + } + Statement::WhileLoop { body, .. } => { + body.accept(visitor); + } + Statement::ForInLoop { body, .. } => { + body.accept(visitor); + } + Statement::ContextManager { block, .. } => { + block.accept(visitor); + } + Statement::IfElse { + if_part, + elif_parts, + else_part, + } => { + if_part.block.accept(visitor); + for part in elif_parts { + part.block.accept(visitor); + } + + if let Some(else_part) = else_part { + else_part.accept(visitor); + } + } + _ => {} + } + } +} diff --git a/src/treewalk/call_stack.rs b/src/treewalk/call_stack.rs new file mode 100644 index 0000000..a3a6ac8 --- /dev/null +++ b/src/treewalk/call_stack.rs @@ -0,0 +1,122 @@ +use std::fmt::{Display, Error, Formatter}; +use std::path::PathBuf; + +use crate::treewalk::types::function::Function; + +use super::LoadedModule; + +#[derive(Debug, PartialEq, Clone)] +pub struct StackFrame { + pub function_name: Option, + pub file_path: Option, + pub line_number: usize, +} + +impl StackFrame { + pub fn new_root(file_path: PathBuf) -> Self { + Self { + function_name: None, + file_path: Some(file_path), + line_number: 1, + } + } + + pub fn new_module(module: LoadedModule) -> Self { + Self { + function_name: Some(module.name()), + file_path: Some(module.path()), + line_number: 1, + } + } + + pub fn new_function(function: Function) -> Self { + Self { + function_name: Some(function.name), + file_path: Some(function.module.borrow().path()), + line_number: function.line_number, + } + } + + fn set_line(&mut self, line: usize) { + self.line_number = line; + } +} + +#[derive(Debug, PartialEq, Clone)] +pub struct CallStack { + pub frames: Vec, +} + +// Example from Python: +// +// File [1] "/Users/tyler/Documents/repos/memphis/examples/test.py", [3] line 37, in [3] +// [3] other.something() +// File [2] "/Users/tyler/Documents/repos/memphis/examples/other.py", [4] line 4, in [4] something +// [4] third() +// File [2] "/Users/tyler/Documents/repos/memphis/examples/other.py", [5] line 7, in [5] third +// [5] fourth() +// +// Events: +// [1] root module loaded +// [2] other.py imported +// [3] other.something() called +// [4] third() called +// [5] fourth() called unsuccessfully, error thrown + +impl Default for CallStack { + fn default() -> Self { + Self::new() + } +} + +impl CallStack { + pub fn new() -> Self { + Self { frames: vec![] } + } + + pub fn push_context(&mut self, stack_frame: StackFrame) { + self.frames.push(stack_frame); + } + + pub fn pop_context(&mut self) -> Option { + self.frames.pop() + } + + pub fn set_line(&mut self, line: usize) { + self.frames.last_mut().unwrap().set_line(line); + } + + /// This is useful for stack traces, so that you know what line number to begin counting from + /// when executing a block. + pub fn line_number(&self) -> usize { + self.frames.last().unwrap().line_number + } + + /// This is useful for relative imports, so that you know where a path is relative from. + pub fn current_path(&self) -> Option { + self.frames.last().unwrap().file_path.clone() + } +} + +impl Display for CallStack { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + writeln!(f, "Traceback (most recent call last):")?; + for frame in &self.frames { + let file_path = frame + .file_path + .as_ref() + .and_then(|f| f.to_str()) + .unwrap_or(""); + let context = frame + .function_name + .clone() + .unwrap_or("".to_string()); + writeln!( + f, + " File \"{}\", line {}, in {}", + file_path, frame.line_number, context + )?; + } + Ok(()) + } +} diff --git a/src/treewalk/evaluators.rs b/src/treewalk/evaluators.rs new file mode 100644 index 0000000..0b086ce --- /dev/null +++ b/src/treewalk/evaluators.rs @@ -0,0 +1,188 @@ +use crate::{ + core::{Container, Storable}, + parser::types::{BinOp, LogicalOp, UnaryOp}, + treewalk::types::{ExprResult, List}, + types::errors::InterpreterError, +}; + +use super::CallStack; + +pub(crate) fn evaluate_logical_op( + left: bool, + op: &LogicalOp, + right: bool, +) -> Result { + match op { + LogicalOp::And => Ok(ExprResult::Boolean(left && right)), + LogicalOp::Or => Ok(ExprResult::Boolean(left || right)), + } +} + +pub(crate) fn evaluate_integer_operation( + left: i64, + op: &BinOp, + right: i64, + call_stack: CallStack, +) -> Result { + match op { + BinOp::Add => Ok(ExprResult::Integer(Container::new(left + right))), + BinOp::Sub => Ok(ExprResult::Integer(Container::new(left - right))), + BinOp::Mul => Ok(ExprResult::Integer(Container::new(left * right))), + BinOp::Div => { + if right == 0 { + Err(InterpreterError::DivisionByZero( + "division by zero".into(), + call_stack, + )) + } else { + Ok(ExprResult::Integer(Container::new(left / right))) + } + } + BinOp::IntegerDiv => { + if right == 0 { + Err(InterpreterError::DivisionByZero( + "integer division or modulo by zero".into(), + call_stack, + )) + } else { + Ok(ExprResult::Integer(Container::new(left / right))) + } + } + BinOp::Mod => { + if right == 0 { + Err(InterpreterError::DivisionByZero( + "integer division or modulo by zero".into(), + call_stack, + )) + } else { + Ok(ExprResult::Integer(Container::new(left % right))) + } + } + BinOp::GreaterThan => Ok(ExprResult::Boolean(left > right)), + BinOp::LessThan => Ok(ExprResult::Boolean(left < right)), + BinOp::GreaterThanOrEqual => Ok(ExprResult::Boolean(left >= right)), + BinOp::LessThanOrEqual => Ok(ExprResult::Boolean(left <= right)), + BinOp::Equals => Ok(ExprResult::Boolean(left == right)), + BinOp::NotEquals => Ok(ExprResult::Boolean(left != right)), + BinOp::BitwiseAnd => Ok(ExprResult::Integer(Container::new(left & right))), + BinOp::BitwiseOr => Ok(ExprResult::Integer(Container::new(left | right))), + BinOp::BitwiseXor => Ok(ExprResult::Integer(Container::new(left ^ right))), + BinOp::LeftShift => { + if right > 100 { + // TODO support long ranges. This is found in _collections_abc.py + // longrange_iterator = type(iter(range(1 << 1000))) + Ok(ExprResult::Integer(Container::new(left << 10))) + } else { + Ok(ExprResult::Integer(Container::new(left << right))) + } + } + BinOp::RightShift => Ok(ExprResult::Integer(Container::new(left >> right))), + BinOp::In => Err(InterpreterError::ExpectedIterable(call_stack)), + BinOp::NotIn => Err(InterpreterError::ExpectedIterable(call_stack)), + BinOp::Expo => { + let right: u32 = right + .try_into() + .map_err(|_| InterpreterError::RuntimeError)?; + Ok(ExprResult::Integer(Container::new(left.pow(right)))) + } + _ => unreachable!(), + } +} + +pub(crate) fn evaluate_floating_point_operation( + left: f64, + op: &BinOp, + right: f64, + call_stack: CallStack, +) -> Result { + match op { + BinOp::Add => Ok(ExprResult::FloatingPoint(left + right)), + BinOp::Sub => Ok(ExprResult::FloatingPoint(left - right)), + BinOp::Mul => Ok(ExprResult::FloatingPoint(left * right)), + BinOp::Div => { + if right == 0.0 { + Err(InterpreterError::DivisionByZero( + "float division by zero".into(), + call_stack, + )) + } else { + Ok(ExprResult::FloatingPoint(left / right)) + } + } + BinOp::GreaterThan => Ok(ExprResult::Boolean(left > right)), + BinOp::LessThan => Ok(ExprResult::Boolean(left < right)), + BinOp::GreaterThanOrEqual => Ok(ExprResult::Boolean(left >= right)), + BinOp::LessThanOrEqual => Ok(ExprResult::Boolean(left <= right)), + BinOp::Equals => Ok(ExprResult::Boolean(left == right)), + BinOp::NotEquals => Ok(ExprResult::Boolean(left != right)), + _ => unimplemented!(), + } +} + +pub(crate) fn evaluate_object_comparison( + left: ExprResult, + op: &BinOp, + right: ExprResult, +) -> Result { + match op { + BinOp::Equals => Ok(ExprResult::Boolean(left == right)), + BinOp::NotEquals => Ok(ExprResult::Boolean(left != right)), + BinOp::Is => Ok(ExprResult::Boolean(left.is(&right))), + BinOp::IsNot => Ok(ExprResult::Boolean(!left.is(&right))), + _ => unimplemented!(), + } +} + +pub(crate) fn evaluate_unary_operation( + op: &UnaryOp, + right: ExprResult, + call_stack: CallStack, +) -> Result { + match op { + UnaryOp::Minus => Ok(right.negated()), + // this acts as a no-op. can be overridden with __pos__ for custom classes + UnaryOp::Plus => Ok(right), + UnaryOp::Not => Ok(right.inverted()), + UnaryOp::BitwiseNot => { + let i = right.as_integer().ok_or(InterpreterError::TypeError( + Some(format!( + "bad operand type for unary ~: '{}'", + right.get_type() + )), + call_stack, + ))?; + + let o = !*i.borrow(); + + Ok(ExprResult::Integer(o.store())) + } + UnaryOp::Unpack => { + let list = right + .as_list() + // Attempted to unpack a non-iterable + .ok_or(InterpreterError::TypeError( + Some(format!( + "Value after * must be an iterable, not {}", + right.get_type() + )), + call_stack, + ))?; + Ok(ExprResult::List(list)) + } + } +} + +pub(crate) fn evaluate_list_operation( + left: Container, + op: &BinOp, + right: Container, +) -> Result { + let l = left.borrow().clone(); + let r = right.borrow().clone(); + match op { + BinOp::Add => Ok(ExprResult::List(Container::new(l + r))), + BinOp::Equals => Ok(ExprResult::Boolean(l == r)), + BinOp::NotEquals => Ok(ExprResult::Boolean(l != r)), + _ => unimplemented!(), + } +} diff --git a/src/treewalk/execution_context.rs b/src/treewalk/execution_context.rs new file mode 100644 index 0000000..fa6c5c5 --- /dev/null +++ b/src/treewalk/execution_context.rs @@ -0,0 +1,71 @@ +use crate::{ + core::{Container, Stack}, + treewalk::types::{Class, ExprResult, Function}, +}; + +/// This struct stores data for operations related to function calls and class/instance contexts. +pub struct ExecutionContextManager { + /// A stack to hold the current [`Class`] being defined (i.e. its lexical scope). We need this + /// so we can associate a function with its class. + lexical_class_stack: Stack>, + + /// A stack to hold the current [`Function`] being evaluated. A method will push something onto + /// this stack and the receiver stack below. + current_function_stack: Stack>, + + /// A stack to hold the current [`ExprResult`] being evaluated on. We need this for whenver + /// `super()` is called. + /// + /// We do not need a container here because the [`Object`] and [`Class`] variants of + /// [`ExprResult`] already are wrapped in a [`Container`]. + current_receiver_stack: Stack, +} + +impl ExecutionContextManager { + pub fn new() -> Self { + Self { + lexical_class_stack: Stack::default(), + current_function_stack: Stack::default(), + current_receiver_stack: Stack::default(), + } + } + + pub fn push_class(&mut self, class: Container) { + self.lexical_class_stack.push(class); + } + + pub fn pop_class(&mut self) -> Option> { + self.lexical_class_stack.pop() + } + + pub fn push_function(&mut self, function: Container) { + self.current_function_stack.push(function); + } + + pub fn pop_function(&mut self) -> Option> { + self.current_function_stack.pop() + } + + pub fn push_receiver(&mut self, receiver: ExprResult) { + self.current_receiver_stack.push(receiver); + } + + pub fn pop_receiver(&mut self) -> Option { + self.current_receiver_stack.pop() + } + + /// Return the currently executing function. + pub fn read_current_function(&self) -> Option> { + self.current_function_stack.top() + } + + /// Return the currently executing receiver. + pub fn read_current_receiver(&self) -> Option { + self.current_receiver_stack.top() + } + + /// Return the current class according to lexical scoping rules. + pub fn read_class(&self) -> Option> { + self.lexical_class_stack.top() + } +} diff --git a/src/treewalk/executor.rs b/src/treewalk/executor.rs new file mode 100644 index 0000000..d4e17ed --- /dev/null +++ b/src/treewalk/executor.rs @@ -0,0 +1,223 @@ +use crate::{ + core::Container, + treewalk::types::{ + builtins::utils, function::BindingType, pausable::Pausable, traits::Callable, + utils::ResolvedArguments, Coroutine, ExprResult, + }, + types::errors::InterpreterError, +}; + +use super::Interpreter; + +/// An event loop which runs `Coroutine` objects using the `CoroutineExecutor` utility. +pub struct Executor { + pub current_coroutine: Container>>, + running: Container>>, + spawned: Container>>, + to_wait: Container, Container)>>, + + /// In theory this does not need to be in a `Container` because it is not shared. However, we + /// currently call `executor.call(..)` with a reference to the `Executor`, which does not allow + /// itself to be borrowed as mutable. We may want to unwind this in the future. + sleep_indicator: Container>, +} + +impl Default for Executor { + fn default() -> Self { + Self::new() + } +} + +impl Executor { + /// Create an `Executor`. + pub fn new() -> Self { + Self { + current_coroutine: Container::new(None), + running: Container::new(vec![]), + spawned: Container::new(vec![]), + to_wait: Container::new(vec![]), + sleep_indicator: Container::new(None), + } + } + + fn set_current_coroutine(&self, coroutine: Container) { + *self.current_coroutine.borrow_mut() = Some(coroutine); + } + + fn clear_current_coroutine(&self) { + *self.current_coroutine.borrow_mut() = None; + } + + /// Do the next piece of work on a given `Coroutine`. After its work is done, check to + /// see if it was put to sleep and handle it accordingly. + fn call( + &self, + interpreter: &Interpreter, + coroutine: Container, + ) -> Result { + self.set_current_coroutine(coroutine.clone()); + + coroutine.run_until_pause(interpreter)?; + + if let Some(duration) = *self.sleep_indicator.borrow() { + coroutine.borrow_mut().sleep(duration); + } + *self.sleep_indicator.borrow_mut() = None; + + self.clear_current_coroutine(); + Ok(ExprResult::None) + } + + /// The main interface to the `Executor` event loop. An `ExprResult` will be returned once the + /// coroutine has resolved. + pub fn run( + &self, + interpreter: &Interpreter, + coroutine: Container, + ) -> Result { + let executor = Container::new(self); + executor + .borrow() + .running + .borrow_mut() + .push(coroutine.clone()); + + loop { + // Run every coroutine on this event loop that has work to do. + for c in executor.borrow().running.borrow_mut().iter() { + if c.borrow().has_work() { + let _ = executor.borrow().call(interpreter, c.clone())?; + } + } + + for pair in executor.borrow().to_wait.borrow_mut().iter() { + pair.0.borrow_mut().wait_on(pair.1.clone()); + if !pair.1.has_started() { + executor.borrow().spawn(pair.1.clone())?; + } + } + executor.borrow().to_wait.borrow_mut().clear(); + + // Call any coroutines spawned during this iteration and add them to the queue for the + // next iteration. + for c in executor.borrow().spawned.borrow_mut().iter() { + let _ = executor.borrow().call(interpreter, c.clone())?; + executor.borrow().running.borrow_mut().push(c.clone()); + } + executor.borrow().spawned.borrow_mut().clear(); + + // The event loop exits when its original coroutine has completed all its work. Other + // spawned coroutines may or may not be finished by this time. + if let Some(result) = coroutine.borrow().is_finished() { + return Ok(result); + } + } + } + + /// Launch a new `Coroutine`. This will be consumed at the end of the current iteration of the event loop. + pub fn spawn(&self, coroutine: Container) -> Result { + coroutine.context().start(); + self.spawned.borrow_mut().push(coroutine.clone()); + Ok(ExprResult::Coroutine(coroutine)) + } + + pub fn sleep(&self, duration: f64) -> Result { + *self.sleep_indicator.borrow_mut() = Some(duration); + Err(InterpreterError::EncounteredSleep) + } + + pub fn set_wait_on(&self, first: Container, second: Container) { + self.to_wait + .borrow_mut() + .push((first.clone(), second.clone())); + } +} + +pub struct AsyncioRunBuiltin; +pub struct AsyncioSleepBuiltin; +pub struct AsyncioCreateTaskBuiltin; + +impl Callable for AsyncioRunBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + utils::validate_args(&args, 1, interpreter.state.call_stack())?; + + let coroutine = + args.get_arg(0) + .as_coroutine() + .ok_or(InterpreterError::ExpectedCoroutine( + interpreter.state.call_stack(), + ))?; + + let executor = interpreter.state.get_executor(); + let result = executor.borrow().run(interpreter, coroutine); + drop(executor); + result + } + + fn name(&self) -> String { + "run".into() + } + + fn binding_type(&self) -> BindingType { + // module functions must be static because modules are not bound to their methods + BindingType::Static + } +} + +impl Callable for AsyncioSleepBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + utils::validate_args(&args, 1, interpreter.state.call_stack())?; + + let duration = args + .get_arg(0) + .as_fp() + .ok_or(InterpreterError::ExpectedFloatingPoint( + interpreter.state.call_stack(), + ))?; + + interpreter.state.get_executor().borrow().sleep(duration) + } + + fn name(&self) -> String { + "sleep".into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} + +impl Callable for AsyncioCreateTaskBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + utils::validate_args(&args, 1, interpreter.state.call_stack())?; + + let coroutine = + args.get_arg(0) + .as_coroutine() + .ok_or(InterpreterError::ExpectedCoroutine( + interpreter.state.call_stack(), + ))?; + + interpreter.state.get_executor().borrow().spawn(coroutine) + } + + fn name(&self) -> String { + "create_task".into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} diff --git a/src/treewalk/interpreter.rs b/src/treewalk/interpreter.rs new file mode 100644 index 0000000..d30a31d --- /dev/null +++ b/src/treewalk/interpreter.rs @@ -0,0 +1,8530 @@ +use std::collections::{HashMap, HashSet}; +use std::fmt::Write; + +use super::{evaluators, Scope, StackFrame, State}; +use crate::core::{log, Container, InterpreterEntrypoint, LogLevel}; +use crate::init::Builder; +use crate::parser::types::{ + BinOp, Block, CompoundOperator, ConditionalBlock, ExceptClause, ExceptionInstance, + ExceptionLiteral, Expr, FStringPart, ForClause, ImportPath, ImportedItem, LogicalOp, LoopIndex, + ParsedArgDefinitions, ParsedArguments, ParsedSliceParams, Statement, TypeNode, UnaryOp, + Variable, +}; +use crate::parser::Parser; +use crate::treewalk::types::{ + class::InstantiationType, + function::{BindingType, FunctionType}, + iterators::GeneratorIterator, + traits::{Callable, MemberAccessor, ModuleInterface}, + utils::{Dunder, ResolvedArguments}, + Bytes, Class, Coroutine, Dict, ExprResult, Function, Generator, List, Module, Set, Slice, Str, + Super, Tuple, +}; +use crate::types::errors::{InterpreterError, MemphisError}; + +#[derive(Clone)] +pub struct Interpreter { + pub state: Container, +} + +impl Interpreter { + pub fn new(state: Container) -> Self { + Interpreter { state } + } + + pub fn call( + &self, + callable: Container>, + arguments: &ResolvedArguments, + ) -> Result { + match self._call(callable, arguments) { + Err(InterpreterError::EncounteredReturn(result)) => Ok(result), + Err(e) => Err(e), + Ok(result) => Ok(result), + } + } + + fn _call( + &self, + callable: Container>, + arguments: &ResolvedArguments, + ) -> Result { + let binding_type = callable.borrow().binding_type(); + let receiver = callable.borrow().receiver(); + + // Throw an error if an instance method is called without a receiver or with + // a class receiver. + if binding_type == BindingType::Instance + && (receiver.is_none() || receiver.clone().is_some_and(|r| r.is_class())) + { + return Err(InterpreterError::TypeError( + Some(format!( + "{} missing 1 required positional argument: 'self'", + callable.borrow().name() + )), + self.state.call_stack(), + )); + } + + let mut bound_args = arguments.clone(); + if let Some(receiver) = receiver { + bound_args.bind(receiver); + } + + callable.borrow().call(self, bound_args) + } + + pub fn evaluate_super(&self) -> Result { + let function = self + .state + .current_function() + .ok_or(InterpreterError::Exception(self.state.call_stack()))?; + + let binding_type = function.borrow().clone().binding_type(); + + match binding_type { + // If we are evaluating a static function, `super()` should just return the class the + // function belongs to. This should only occur for `Dunder::New`. To my knowledge, + // that is the only statically-bound function that permits calls to `super()`. + BindingType::Static => { + assert_eq!(function.borrow().name, Dunder::New.value()); + + let class = function + .borrow() + .clone() + .class_context + .ok_or(InterpreterError::Exception(self.state.call_stack()))?; + + Ok(ExprResult::Super(Container::new(Super::new( + ExprResult::Class(class), + )))) + } + // Otherwise, return the receiver for the given method. + BindingType::Instance | BindingType::Class => { + let receiver = self + .state + .current_receiver() + .ok_or(InterpreterError::Exception(self.state.call_stack()))?; + Ok(ExprResult::Super(Container::new(Super::new(receiver)))) + } + } + } + + fn evaluate_variable(&self, name: &str) -> Result { + self.state + .read(name) + .ok_or(InterpreterError::VariableNotFound( + name.to_owned(), + self.state.call_stack(), + )) + } + + fn evaluate_unary_operation( + &self, + op: &UnaryOp, + right: &Expr, + ) -> Result { + let right = self.evaluate_expr(right)?; + evaluators::evaluate_unary_operation(op, right, self.state.call_stack()) + } + + fn evaluate_ternary_operation( + &self, + condition: &Expr, + if_value: &Expr, + else_value: &Expr, + ) -> Result { + if self + .evaluate_expr(condition)? + .as_boolean() + .ok_or(InterpreterError::ExpectedBoolean(self.state.call_stack()))? + { + self.evaluate_expr(if_value) + } else { + self.evaluate_expr(else_value) + } + } + + fn evaluate_logical_operation( + &self, + left: &Expr, + op: &LogicalOp, + right: &Expr, + ) -> Result { + let left = self.evaluate_expr(left)?; + let right = self.evaluate_expr(right)?; + left.as_boolean() + .and_then(|left| { + right + .as_boolean() + .map(|right| evaluators::evaluate_logical_op(left, op, right)) + }) + .ok_or(InterpreterError::ExpectedBoolean(self.state.call_stack()))? + } + + fn evaluate_binary_operation( + &self, + left: &Expr, + op: &BinOp, + right: &Expr, + ) -> Result { + let left = self.evaluate_expr(left)?; + let right = self.evaluate_expr(right)?; + + if matches!(op, BinOp::In) { + if let Some(mut iterable) = right.try_into_iter() { + return Ok(ExprResult::Boolean(iterable.contains(left))); + } + return Err(InterpreterError::ExpectedIterable(self.state.call_stack())); + } + + if matches!(op, BinOp::NotIn) { + if let Some(mut iterable) = right.try_into_iter() { + return Ok(ExprResult::Boolean(!iterable.contains(left))); + } + return Err(InterpreterError::ExpectedIterable(self.state.call_stack())); + } + + if left.is_integer() && right.is_integer() { + left.as_integer_val() + .and_then(|left| { + right.as_integer_val().map(|right| { + evaluators::evaluate_integer_operation( + left, + op, + right, + self.state.call_stack(), + ) + }) + }) + .ok_or(InterpreterError::ExpectedInteger(self.state.call_stack()))? + } else if left.is_fp() && right.is_fp() { + left.as_fp() + .and_then(|left| { + right.as_fp().map(|right| { + evaluators::evaluate_floating_point_operation( + left, + op, + right, + self.state.call_stack(), + ) + }) + }) + .ok_or(InterpreterError::ExpectedFloatingPoint( + self.state.call_stack(), + ))? + } else if left.as_list().is_some() && right.as_list().is_some() { + left.as_list() + .and_then(|left| { + right + .as_list() + .map(|right| evaluators::evaluate_list_operation(left, op, right)) + }) + .ok_or(InterpreterError::ExpectedFloatingPoint( + self.state.call_stack(), + ))? + } else if left.as_object().is_some() + && right.as_object().is_some() + && matches!(op, BinOp::Equals | BinOp::NotEquals) + { + match op { + BinOp::Equals => self.evaluate_method( + left, + Dunder::Eq.value(), + &ResolvedArguments::default().add_arg(right), + ), + BinOp::NotEquals => { + if left + .as_object() + .unwrap() + .get(self, Dunder::Ne.value()) + .is_some() + { + self.evaluate_method( + left, + Dunder::Ne.value(), + &ResolvedArguments::default().add_arg(right), + ) + } else { + let result = self.evaluate_method( + left, + Dunder::Eq.value(), + &ResolvedArguments::default().add_arg(right), + )?; + Ok(result.inverted()) + } + } + _ => unreachable!(), + } + } else { + evaluators::evaluate_object_comparison(left, op, right) + } + } + + /// Let's walk through all of the cases that are supported here. + /// + /// expression | initiated by | found on | callable | returns | supported + /// -------------- --------------------------------------------------------------------------------------------- + /// foo.attr | instance | instance | no | 4 | yes + /// foo.func | instance | class | yes | | yes + /// Foo.attr | class | | N/A | | yes + /// Foo.func | class | class | yes | | yes + /// foo.class_attr | instance | class | no | 6 | yes + /// Foo.class_attr | class | class | no | 6 | yes + /// [].__doc__ | instance | class | no | {doc_string} | later + /// [].append | instance | class | yes | | yes + /// list.__doc__ | class | class | no | {doc_string} | later + /// list.append | class | class | yes | | yes + /// list.__dict__ | class | metaclass | no | {mappingproxy} | yes + /// list.mro | class | metaclass | yes | | later + /// type.__dict__ | metaclass | metaclass | no | {mappingproxy} | yes + /// type.mro | metaclass | metaclass | yes | | later + fn evaluate_member_access( + &self, + object: &Expr, + field: &str, + ) -> Result { + let result = self.evaluate_expr(object)?; + self.evaluate_member_access_inner(&result, field) + } + + fn evaluate_member_access_inner( + &self, + result: &ExprResult, + field: &str, + ) -> Result { + log(LogLevel::Debug, || { + format!("Member access {}.{}", result, field) + }); + Ok(result + .as_member_accessor(self) + .get(self, field) + .ok_or(InterpreterError::VariableNotFound( + field.to_string(), + self.state.call_stack(), + ))? + .bind_if_needed(self, result.clone())) + } + + fn evaluate_slice_operation( + &self, + object: &Expr, + params: &ParsedSliceParams, + ) -> Result { + let object_result = self.evaluate_expr(object)?; + let slice = Slice::resolve(self, params)?; + + object_result + .as_index_read() + .ok_or(InterpreterError::TypeError( + Some(format!( + "'{}' object is not subscriptable", + object_result.get_type() + )), + self.state.call_stack(), + ))? + .get(&ExprResult::Slice(slice.clone())) + .ok_or(InterpreterError::KeyError( + slice.to_string(), + self.state.call_stack(), + )) + } + + fn evaluate_index_access( + &self, + object: &Expr, + index: &Expr, + ) -> Result { + let index_result = self.evaluate_expr(index)?; + let object_result = self.evaluate_expr(object)?; + + object_result + .as_index_read() + .ok_or(InterpreterError::TypeError( + Some(format!( + "'{}' object is not subscriptable", + object_result.get_type() + )), + self.state.call_stack(), + ))? + .get(&index_result) + .ok_or(InterpreterError::KeyError( + index_result.to_string(), + self.state.call_stack(), + )) + } + + fn evaluate_list(&self, items: &[Expr]) -> Result { + items + .iter() + .map(|arg| self.evaluate_expr(arg)) + .collect::, _>>() + .map(|l| ExprResult::List(Container::new(List::new(l)))) + } + + fn evaluate_tuple(&self, items: &[Expr]) -> Result { + let mut results = vec![]; + for item in items { + let evaluated = self.evaluate_expr(item)?; + match item { + Expr::UnaryOperation { op, .. } => { + if op == &UnaryOp::Unpack { + if let Some(list) = evaluated.as_list() { + for elem in list { + results.push(elem); + } + } else { + // We use a list in `evaluate_unary_operation`, so something is wrong + // if we hit this case. + unreachable!() + } + } else { + results.push(evaluated); + } + } + _ => { + results.push(evaluated); + } + } + } + + Ok(ExprResult::Tuple(Container::new(Tuple::new(results)))) + } + + fn evaluate_set(&self, items: &HashSet) -> Result { + items + .iter() + .map(|arg| self.evaluate_expr(arg)) + .collect::, _>>() + .map(Set::new) + .map(Container::new) + .map(ExprResult::Set) + } + + fn evaluate_dict(&self, items: &HashMap) -> Result { + items + .iter() + .map(|(key, value)| Ok((self.evaluate_expr(key)?, self.evaluate_expr(value)?))) + .collect::, _>>() + .map(|d| ExprResult::Dict(Container::new(Dict::new(d)))) + } + + fn evaluate_await(&self, expr: &Expr) -> Result { + let coroutine_to_await = self + .evaluate_expr(expr)? + .as_coroutine() + .ok_or(InterpreterError::ExpectedCoroutine(self.state.call_stack()))?; + + if let Some(result) = coroutine_to_await.clone().borrow().is_finished() { + Ok(result) + } else if let Some(current_coroutine) = self + .state + .get_executor() + .borrow() + .current_coroutine + .borrow() + .clone() + { + self.state + .get_executor() + .borrow() + .set_wait_on(current_coroutine, coroutine_to_await); + Err(InterpreterError::EncounteredAwait) + } else { + Err(InterpreterError::ExpectedCoroutine(self.state.call_stack())) + } + } + + fn evaluate_delete(&self, exprs: &Vec) -> Result<(), InterpreterError> { + for expr in exprs { + match expr { + Expr::Variable(name) => { + self.state.delete(name); + } + Expr::IndexAccess { object, index } => { + let index_result = self.evaluate_expr(index)?; + let object_result = self.evaluate_expr(object)?; + object_result + .as_index_write() + .ok_or(InterpreterError::TypeError( + Some(format!( + "'{}' object does not support item deletion", + object_result.get_type() + )), + self.state.call_stack(), + ))? + .delete(&index_result); + } + Expr::MemberAccess { object, field } => { + self.evaluate_expr(object)? + .as_member_accessor(self) + .delete(field) + .ok_or(InterpreterError::VariableNotFound( + field.clone(), + self.state.call_stack(), + ))?; + } + _ => return Err(InterpreterError::ExpectedVariable(self.state.call_stack())), + } + } + + Ok(()) + } + + fn evaluate_return(&self, exprs: &[Expr]) -> Result { + assert!(!exprs.is_empty()); + + let results = exprs + .iter() + .map(|arg| self.evaluate_expr(arg)) + .collect::, _>>()?; + + let return_val = if results.len() > 1 { + ExprResult::Tuple(Container::new(Tuple::new(results))) + } else { + results[0].clone() + }; + + Err(InterpreterError::EncounteredReturn(return_val)) + } + + fn evaluate_assert(&self, expr: &Expr) -> Result<(), InterpreterError> { + if self + .evaluate_expr(expr)? + .as_boolean() + .ok_or(InterpreterError::ExpectedBoolean(self.state.call_stack()))? + { + Ok(()) + } else { + Err(InterpreterError::AssertionError(self.state.call_stack())) + } + } + + fn evaluate_f_string(&self, parts: &[FStringPart]) -> Result { + let mut result = String::new(); + for part in parts { + match part { + FStringPart::String(s) => { + result.push_str(s); + } + FStringPart::Expr(e) => { + let r = self.evaluate_expr(&e.expr)?; + write!(result, "{}", r).unwrap(); + } + } + } + + Ok(ExprResult::String(Str::new(result))) + } + + fn evaluate_block(&self, block: &Block) -> Result { + let mut result = ExprResult::Void; + for statement in &block.statements { + result = self.evaluate_statement(statement)?; + } + Ok(result) + } + + fn evaluate_function_call( + &self, + name: &str, + arguments: &ParsedArguments, + callee: &Option>, + ) -> Result { + let arguments = ResolvedArguments::from(self, arguments)?; + + let function = + if let Some(callee) = callee { + self.evaluate_expr(callee)?.as_callable().ok_or( + InterpreterError::FunctionNotFound("".into(), self.state.call_stack()), + )? + } else { + self.state + .read(name) + .and_then(|val| val.as_callable()) + .ok_or(InterpreterError::FunctionNotFound( + name.to_string(), + self.state.call_stack(), + ))? + }; + + let function_type = function.borrow().function_type(); + match function_type { + FunctionType::Generator => { + // TODO we may want to support builtin generators in the future. For now, we only + // support user-defined so we are safe to downcast to `Container`. + let function = function + .borrow() + .as_any() + .downcast_ref::>() + .cloned() + .ok_or(InterpreterError::ExpectedFunction(self.state.call_stack()))?; + let scope = Scope::new(self, &function, &arguments)?; + let generator_function = Generator::new(scope, function); + let generator_iterator = GeneratorIterator::new(generator_function, self.clone()); + Ok(ExprResult::Generator(Container::new(generator_iterator))) + } + FunctionType::Async => { + let function = function + .borrow() + .as_any() + .downcast_ref::>() + .cloned() + .ok_or(InterpreterError::ExpectedFunction(self.state.call_stack()))?; + let scope = Scope::new(self, &function, &arguments)?; + let coroutine = Coroutine::new(scope, function); + Ok(ExprResult::Coroutine(Container::new(coroutine))) + } + FunctionType::Regular => self.call(function, &arguments), + } + } + + pub fn invoke_function( + &self, + function: Container, + scope: Container, + ) -> Result { + let cross_module = !self + .state + .current_module() + .same_identity(&function.borrow().module); + if cross_module { + self.state.push_module(function.borrow().module.clone()); + } + self.state + .push_captured_env(function.borrow().captured_env.clone()); + self.state.push_local(scope); + self.state + .push_context(StackFrame::new_function(function.borrow().clone())); + self.state.push_function(function.clone()); + + // We do not propagate errors here because we still must restore the scopes and things + // before returning. + let result = self.evaluate_block(&function.borrow().body); + + // If an error is thrown, we should return that immediately without restoring any state. + if matches!(result, Ok(_) | Err(InterpreterError::EncounteredReturn(_))) { + self.state.pop_context(); + self.state.pop_function(); + self.state.pop_local(); + self.state.pop_captured_env(); + if cross_module { + self.state.pop_module(); + } + } + + result + } + + pub fn evaluate_method( + &self, + result: ExprResult, + name: &str, + arguments: &ResolvedArguments, + ) -> Result { + log(LogLevel::Debug, || { + format!("Calling method {}.{}", result, name) + }); + log(LogLevel::Trace, || { + format!("... from module: {}", self.state.current_module()) + }); + log(LogLevel::Trace, || { + format!( + "... from path: {}", + self.state.current_module().borrow().path().display() + ) + }); + if let Some(class) = self.state.current_class() { + log(LogLevel::Trace, || format!("... from class: {}", class)); + } + + let function = self + .evaluate_member_access_inner(&result, name)? + .as_callable() + .ok_or(InterpreterError::MethodNotFound( + name.to_string(), + self.state.call_stack(), + ))?; + + self.call(function, arguments) + } + + /// The `Dunder::New` method is always unbound, regardless of whether it called for an + /// object or a class. There is nothing for it to be bound to since the object/class has not + /// yet been created. The class (for an object) or metaclass (for a class) will be + /// explicitly provided as the first parameter. + pub fn evaluate_new_method( + &self, + result: &ExprResult, + arguments: &ResolvedArguments, + instantiation_type: InstantiationType, + ) -> Result { + let function = result + .get_class(self) + .get_new_method(self, &instantiation_type) + .ok_or(InterpreterError::MethodNotFound( + Dunder::New.into(), + self.state.call_stack(), + ))? + .as_callable() + .ok_or(InterpreterError::MethodNotFound( + Dunder::New.into(), + self.state.call_stack(), + ))?; + + self.call(function, arguments) + } + + fn evaluate_method_call( + &self, + obj: &Expr, + name: &str, + arguments: &ParsedArguments, + ) -> Result { + let arguments = ResolvedArguments::from(self, arguments)?; + let result = self.evaluate_expr(obj)?; + + if name == Dunder::New.value() { + let instantiation_type = match result { + ExprResult::Class(_) => InstantiationType::Class, + ExprResult::Object(_) => InstantiationType::Object, + ExprResult::Super(_) => InstantiationType::Super, + _ => unreachable!(), + }; + return self.evaluate_new_method(&result, &arguments, instantiation_type); + } + + self.evaluate_method(result, name, &arguments) + } + + fn evaluate_class_instantiation( + &self, + name: &str, + arguments: &ParsedArguments, + ) -> Result { + log(LogLevel::Debug, || format!("Instantiating: {}", name)); + log(LogLevel::Trace, || { + format!("... from module: {}", self.state.current_module()) + }); + log(LogLevel::Trace, || { + format!( + "... from path: {}", + self.state.current_module().borrow().path().display() + ) + }); + if let Some(class) = self.state.current_class() { + log(LogLevel::Trace, || format!("... from class: {}", class)); + } + + let result = self + .state + .read(name) + .ok_or(InterpreterError::ClassNotFound( + name.to_string(), + self.state.call_stack(), + ))?; + let class = result.as_callable().ok_or(InterpreterError::ClassNotFound( + name.to_string(), + self.state.call_stack(), + ))?; + + let arguments = ResolvedArguments::from(self, arguments)?; + + self.call(class, &arguments) + } + + fn evaluate_compound_assignment( + &self, + operator: &CompoundOperator, + target: &Expr, + value: &Expr, + ) -> Result<(), InterpreterError> { + let op = operator.to_bin_op(); + let result = self.evaluate_binary_operation(target, &op, value)?; + self.evaluate_assignment_inner(target, result) + } + + /// Assignment functionality shared by traditional assignment such as `a = 1` and compound + /// assignment such as `a += 1`. + fn evaluate_assignment_inner( + &self, + name: &Expr, + value: ExprResult, + ) -> Result<(), InterpreterError> { + match name { + Expr::Variable(name) => { + self.state.write(name, value.clone()); + } + Expr::MemberAccess { object, field } => { + self.evaluate_expr(object)? + .as_member_accessor(self) + .insert(field, value); + } + Expr::IndexAccess { object, index } => { + let index_result = self.evaluate_expr(index)?; + let object_result = self.evaluate_expr(object)?; + object_result + .as_index_write() + .ok_or(InterpreterError::TypeError( + Some(format!( + "'{}' object does not support item assignment", + object_result.get_type() + )), + self.state.call_stack(), + ))? + .insert(&index_result, value); + } + _ => return Err(InterpreterError::ExpectedVariable(self.state.call_stack())), + } + + Ok(()) + } + + fn evaluate_assignment(&self, name: &Expr, expr: &Expr) -> Result<(), InterpreterError> { + let result = self.evaluate_expr(expr)?; + self.evaluate_assignment_inner(name, result) + } + + fn evaluate_multiple_assignment( + &self, + left: &[Expr], + expr: &Expr, + ) -> Result<(), InterpreterError> { + let value = self.evaluate_expr(expr)?; + for name in left { + self.evaluate_assignment_inner(name, value.clone())?; + } + + Ok(()) + } + + /// Python can unpack any iterables, not any index reads. + fn evaluate_unpacking_assignment( + &self, + left: &[Expr], + expr: &Expr, + ) -> Result<(), InterpreterError> { + let results = self.evaluate_expr(expr)?.into_iter(); + let right_len = results.clone().count(); + let left_len = left.len(); + + if left_len < right_len { + return Err(InterpreterError::ValueError( + "too many values to unpack (expected ".to_string() + &left_len.to_string() + ")", + self.state.call_stack(), + )); + } + + if left.len() > right_len { + return Err(InterpreterError::ValueError( + "not enough values to unpack (expected ".to_string() + + &left_len.to_string() + + ", got " + + &right_len.to_string() + + ")", + self.state.call_stack(), + )); + } + + for (key, value) in left.iter().zip(results) { + self.evaluate_assignment_inner(key, value)?; + } + + Ok(()) + } + + fn evaluate_lambda( + &self, + arguments: &ParsedArgDefinitions, + expr: &Expr, + is_generator: &bool, + ) -> Result { + let block = match is_generator { + false => Block::new(vec![Statement::Expression(expr.clone())]), + true => Block::new(vec![Statement::Yield(Expr::NoOp)]), + }; + + let function = Container::new(Function::new_lambda( + self.state.clone(), + arguments.clone(), + block, + )); + + Ok(ExprResult::Function(function)) + } + + fn evaluate_function_def( + &self, + name: &str, + arguments: &ParsedArgDefinitions, + body: &Block, + decorators: &[Expr], + is_async: &bool, + ) -> Result<(), InterpreterError> { + let function = Container::new(Function::new( + self.state.clone(), + name.to_string(), + arguments.clone(), + body.clone(), + decorators.to_vec(), + *is_async, + )); + + // Decorators are applied to a function when it is defined and then the decorated version + // is written into the symbol table. + let result = function.apply_decorators(self)?; + + // We should note that what we write here it not always a `Function` or even a `Callable`. + // In the case of the `@property` decorator, what is written to the symbol table is a + // `MemberDescriptor`. + self.state.write(name, result); + Ok(()) + } + + fn evaluate_class_definition( + &self, + name: &str, + parents: &[Expr], + metaclass: &Option, + body: &Block, + ) -> Result<(), InterpreterError> { + log(LogLevel::Debug, || format!("Defining class: {}", name)); + let parent_classes = parents + .iter() + .map(|p| self.evaluate_expr(p)) + .collect::, _>>()? + .iter() + .map(|f| { + f.as_class() + .ok_or(InterpreterError::ExpectedClass(self.state.call_stack())) + }) + .collect::, _>>()?; + + let metaclass = metaclass + .clone() + .and_then(|p| self.state.read(p.as_str())) + .map(|d| { + d.as_class() + .ok_or(InterpreterError::ExpectedClass(self.state.call_stack())) + }) + .transpose()?; + + // We will update the scope on this class before we write it to the symbol table, but we + // must instantiate the class here so we can get a reference that can be associated with + // each function defined inside it. + let class = Class::new(self, name, parent_classes, metaclass)?; + + // We must use the class scope here in case it received any initialization from its + // metaclass `Dunder::New` method. + self.state + .push_local(Container::new(class.borrow().scope.clone())); + self.state.push_class(class.clone()); + self.evaluate_block(body)?; + self.state.pop_class(); + class.borrow_mut().scope = self + .state + .pop_local() + .ok_or(InterpreterError::RuntimeError)? + .borrow() + .clone(); + + self.state.write(name, ExprResult::Class(class)); + + Ok(()) + } + + /// At most one of the Blocks will be evaluated, once we know which one we can return the + /// result early. + fn evaluate_if_else( + &self, + if_part: &ConditionalBlock, + elif_parts: &[ConditionalBlock], + else_part: &Option, + ) -> Result<(), InterpreterError> { + let if_condition_result = self.evaluate_expr(&if_part.condition)?; + if if_condition_result + .as_boolean() + .ok_or(InterpreterError::ExpectedBoolean(self.state.call_stack()))? + { + self.evaluate_block(&if_part.block)?; + return Ok(()); + } + + for elif_part in elif_parts { + let elif_condition_result = self.evaluate_expr(&elif_part.condition)?; + if elif_condition_result + .as_boolean() + .ok_or(InterpreterError::ExpectedBoolean(self.state.call_stack()))? + { + self.evaluate_block(&elif_part.block)?; + return Ok(()); + } + } + + if let Some(else_part) = else_part { + self.evaluate_block(else_part)?; + return Ok(()); + } + + Ok(()) + } + + fn evaluate_while_loop(&self, condition: &Expr, body: &Block) -> Result<(), InterpreterError> { + while self + .evaluate_expr(condition)? + .as_boolean() + .ok_or(InterpreterError::ExpectedBoolean(self.state.call_stack()))? + { + match self.evaluate_block(body) { + Err(InterpreterError::EncounteredBreak) => { + break; + } + Err(InterpreterError::EncounteredContinue) => {} + Err(e) => return Err(e), + _ => {} + } + } + + Ok(()) + } + + fn evaluate_generator_comprehension( + &self, + body: &Expr, + clauses: &[ForClause], + ) -> Result { + let generator = Generator::new_from_comprehension(self.state.clone(), body, clauses); + let iterator = GeneratorIterator::new(generator, self.clone()); + Ok(ExprResult::Generator(Container::new(iterator))) + } + + fn evaluate_list_comprehension( + &self, + body: &Expr, + clauses: &[ForClause], + ) -> Result { + if let Some((first_clause, remaining_clauses)) = clauses.split_first() { + // Recursive case: Process the first ForClause + let mut output = vec![]; + for i in self.evaluate_expr(&first_clause.iterable)? { + if first_clause.indices.len() == 1 { + self.state.write(&first_clause.indices[0], i.clone()); + } else { + for (key, value) in first_clause.indices.iter().zip(i) { + self.state.write(key, value); + } + } + + if let Some(condition) = first_clause.condition.clone() { + if !self + .evaluate_expr(&condition)? + .as_boolean() + .ok_or(InterpreterError::ExpectedBoolean(self.state.call_stack()))? + { + continue; + } + } + + // Recursively handle the rest of the clauses. If `remaining_clauses` is empty, + // we'll hit the base case on the next call. + match self.evaluate_list_comprehension(body, remaining_clauses)? { + ExprResult::List(list) => output.extend(list), + single => output.push(single), + } + } + + Ok(ExprResult::List(Container::new(List::new(output)))) + } else { + // Base case: Evaluate the expression. We drop into this case when `clauses` is empty. + self.evaluate_expr(body) + } + } + + fn evaluate_set_comprehension( + &self, + body: &Expr, + clauses: &[ForClause], + ) -> Result { + self.evaluate_list_comprehension(body, clauses)? + .as_set() + .ok_or(InterpreterError::ExpectedSet(self.state.call_stack())) + .map(ExprResult::Set) + } + + fn evaluate_dict_comprehension( + &self, + key: &str, + value: &str, + range: &Expr, + key_body: &Expr, + value_body: &Expr, + ) -> Result { + let expr = self.evaluate_expr(range)?; + + let mut output = HashMap::new(); + for i in expr { + let tuple = i + .as_tuple() + .ok_or(InterpreterError::ExpectedTuple(self.state.call_stack()))?; + self.state.write(key, tuple.first()); + self.state.write(value, tuple.second()); + let key_result = self.evaluate_expr(key_body)?; + let value_result = self.evaluate_expr(value_body)?; + output.insert(key_result, value_result); + } + Ok(ExprResult::Dict(Container::new(Dict::new(output)))) + } + + fn evaluate_for_in_loop( + &self, + index: &LoopIndex, + range: &Expr, + body: &Block, + else_block: &Option, + ) -> Result<(), InterpreterError> { + let range_expr = self.evaluate_expr(range)?; + let mut encountered_break = false; + + for val_for_iteration in range_expr { + self.state.write_loop_index(index, val_for_iteration); + + match self.evaluate_block(body) { + Err(InterpreterError::EncounteredBreak) => { + encountered_break = true; + break; + } + Err(InterpreterError::EncounteredContinue) => {} + Err(e) => return Err(e), + _ => {} + } + } + + if !encountered_break { + if let Some(else_block) = else_block { + self.evaluate_block(else_block)?; + } + } + + Ok(()) + } + + fn import_module(&self, import_path: ImportPath) -> Result { + if let Some(module) = self.state.read(&import_path.as_str()) { + return Ok(module); + } + + // Fetch the call stack separately so we don't produce a mutable borrow error in the + // next statement. + let call_stack = self.state.call_stack(); + let loaded_module = self + .state + .load_module(&import_path, call_stack.current_path()) + .ok_or(InterpreterError::ModuleNotFound( + import_path.as_str().to_string(), + call_stack, + ))?; + + let (mut parser, mut interpreter) = Builder::new() + .state(self.state.clone()) + .module(loaded_module.clone()) + .build(); + + self.state + .push_module(Container::new(Module::new(loaded_module, Scope::default()))); + match interpreter.run(&mut parser) { + Ok(_) => {} + Err(MemphisError::Interpreter(e)) => return Err(e), + Err(MemphisError::Parser(e)) => { + println!("{}", e); + return Err(InterpreterError::SyntaxError(self.state.call_stack())); + } + _ => unreachable!(), + }; + + self.state.pop_context(); + let module = self + .state + .pop_module() + .ok_or(InterpreterError::RuntimeError)?; + Ok(ExprResult::Module(module)) + } + + fn evaluate_regular_import( + &self, + import_path: &ImportPath, + alias: &Option, + ) -> Result<(), InterpreterError> { + // A mutable ExprResult::Module that will be updated on each loop iteration + let mut inner_module = self.import_module(import_path.clone())?; + + // This is a case where it's simpler if we have an alias: just make the module available + // at the alias. + if let Some(alias) = alias { + self.state.write(alias, inner_module); + } else { + // Otherwise, we must create a module chain. For example: + // + // import mypackage.myothermodule + // + // must be used as + // + // mypackage.myothermodule.add('1', '1') + + // Iterate over the segments in reverse, skipping the last one + let segments = import_path.segments(); + for segment in segments.iter().rev().take(segments.len() - 1) { + let mut new_outer_module = Module::empty(); + new_outer_module.scope.insert(segment, inner_module); + inner_module = ExprResult::Module(Container::new(new_outer_module)); + } + + self.state + .write(import_path.segments().first().unwrap(), inner_module); + } + + Ok(()) + } + + fn evaluate_selective_import( + &self, + import_path: &ImportPath, + arguments: &[ImportedItem], + wildcard: &bool, + ) -> Result<(), InterpreterError> { + let module = self.import_module(import_path.clone())?.as_module().ok_or( + InterpreterError::ModuleNotFound( + import_path.as_str().to_string(), + self.state.call_stack(), + ), + )?; + + let mapped_imports = arguments + .iter() + .map(|arg| { + let original = arg.as_original_symbol(); + let imported = arg.as_imported_symbol(); + + (original, imported) + }) + .collect::>(); + + for module_symbol in module.dir() { + let aliased_symbol = match wildcard { + true => module_symbol.clone(), + false => { + if !mapped_imports.contains_key(&module_symbol) { + continue; + } + mapped_imports[&module_symbol].clone() + } + }; + + if let Some(value) = ModuleInterface::get(&*module, self, module_symbol.as_str()) { + self.state.write(&aliased_symbol, value.clone()); + } else { + return Err(InterpreterError::FunctionNotFound( + aliased_symbol.to_string(), + self.state.call_stack(), + )); + } + } + + Ok(()) + } + + fn evaluate_context_manager( + &self, + expr: &Expr, + variable: &Option, + block: &Block, + ) -> Result<(), InterpreterError> { + let expr_result = self.evaluate_expr(expr)?; + + let object = expr_result + .as_object() + .ok_or(InterpreterError::ExpectedObject(self.state.call_stack()))?; + + if object.get(self, Dunder::Enter.value()).is_none() + || object.get(self, Dunder::Exit.value()).is_none() + { + return Err(InterpreterError::MissingContextManagerProtocol( + self.state.call_stack(), + )); + } + + let result = self.evaluate_method( + expr_result.clone(), + Dunder::Enter.value(), + &ResolvedArguments::default(), + )?; + + if let Some(variable) = variable { + self.state.write(variable, result); + } + let block_result = self.evaluate_block(block); + + self.evaluate_method( + expr_result.clone(), + Dunder::Exit.value(), + &ResolvedArguments::default() + .add_arg(ExprResult::Void) + .add_arg(ExprResult::Void) + .add_arg(ExprResult::Void), + )?; + + // Return the exception if one is called. + block_result?; + + Ok(()) + } + + fn evaluate_raise(&self, instance: &Option) -> Result<(), InterpreterError> { + // TODO we should throw a 'RuntimeError: No active exception to reraise' + if instance.is_none() { + return Err(InterpreterError::EncounteredRaise); + } + + let instance = instance.as_ref().unwrap(); + let args = ResolvedArguments::from(self, &instance.args)?; + let error = match instance.literal { + ExceptionLiteral::TypeError => { + let message = if args.len() == 1 { + Some( + args.get_arg(0) + .as_string() + .ok_or(InterpreterError::ExpectedString(self.state.call_stack()))?, + ) + } else { + None + }; + + InterpreterError::TypeError(message, self.state.call_stack()) + } + _ => unimplemented!(), + }; + + Err(error) + } + + fn evaluate_try_except( + &self, + try_block: &Block, + except_clauses: &[ExceptClause], + else_block: &Option, + finally_block: &Option, + ) -> Result<(), InterpreterError> { + if let Err(error) = self.evaluate_block(try_block) { + // Only the first matching clause should be evaluated. They will still be in order + // here from the parsed code. + if let Some(except_clause) = except_clauses + .iter() + .find(|clause| error.matches_except_clause(&clause.exception_types)) + { + except_clause + .exception_types + .iter() + .filter_map(|et| et.alias.as_ref()) + .for_each(|alias| { + self.state + .write(alias, ExprResult::Exception(Box::new(error.clone()))); + }); + + match self.evaluate_block(&except_clause.block) { + Err(InterpreterError::EncounteredRaise) => return Err(error), + Err(second_error) => return Err(second_error), + Ok(_) => {} + } + } else { + // Uncaught errors should be raised + return Err(error); + } + } else if let Some(else_block) = else_block { + // Else block is only evaluated if an error was not thrown + self.evaluate_block(else_block)?; + } + + // Finally block is evaluated always if it exists + if let Some(finally_block) = finally_block { + self.evaluate_block(finally_block)?; + } + + Ok(()) + } + + /// TODO This should be moved to the semantic analysis + fn validate_nonlocal_context(&self, name: &str) -> Result<(), InterpreterError> { + // We could not find the variable `name` in an enclosing context. + if let Some(env) = self.state.read_captured_env() { + if env.borrow().read(name).is_none() { + return Err(InterpreterError::SyntaxError(self.state.call_stack())); + } + } + + // `nonlocal` cannot be used at the module-level (outside of a function, + // i.e. captured environment). + if self.state.read_captured_env().is_none() { + return Err(InterpreterError::SyntaxError(self.state.call_stack())); + } + + Ok(()) + } + + fn evaluate_nonlocal(&self, name: &Variable) -> Result<(), InterpreterError> { + self.validate_nonlocal_context(name)?; + self.state.mark_nonlocal(name); + + Ok(()) + } + + fn evaluate_global(&self, name: &Variable) -> Result<(), InterpreterError> { + self.state.mark_global(name); + + Ok(()) + } + + fn evaluate_type_node(&self, type_node: &TypeNode) -> Result { + Ok(ExprResult::TypeNode(type_node.into())) + } + + pub fn evaluate_expr(&self, expr: &Expr) -> Result { + match expr { + Expr::NoOp => Ok(ExprResult::Void), + Expr::None => Ok(ExprResult::None), + Expr::Ellipsis => Ok(ExprResult::Ellipsis), + Expr::NotImplemented => Ok(ExprResult::NotImplemented), + Expr::Integer(value) => Ok(ExprResult::Integer(Container::new(*value))), + Expr::FloatingPoint(value) => Ok(ExprResult::FloatingPoint(*value)), + Expr::Boolean(value) => Ok(ExprResult::Boolean(*value)), + Expr::StringLiteral(value) => Ok(ExprResult::String(Str::new(value.clone()))), + Expr::ByteStringLiteral(value) => { + Ok(ExprResult::Bytes(Container::new(Bytes::new(value.clone())))) + } + Expr::Variable(name) => self.evaluate_variable(name), + Expr::List(items) => self.evaluate_list(items), + Expr::Set(items) => self.evaluate_set(items), + Expr::Dict(dict) => self.evaluate_dict(dict), + Expr::Tuple(items) => self.evaluate_tuple(items), + Expr::GeneratorComprehension { body, clauses } => { + self.evaluate_generator_comprehension(body, clauses) + } + Expr::ListComprehension { body, clauses } => { + self.evaluate_list_comprehension(body, clauses) + } + Expr::SetComprehension { body, clauses } => { + self.evaluate_set_comprehension(body, clauses) + } + Expr::DictComprehension { + key, + value, + range, + key_body, + value_body, + } => self.evaluate_dict_comprehension(key, value, range, key_body, value_body), + Expr::UnaryOperation { op, right } => self.evaluate_unary_operation(op, right), + Expr::BinaryOperation { left, op, right } => { + self.evaluate_binary_operation(left, op, right) + } + Expr::Await { right } => self.evaluate_await(right), + Expr::FunctionCall { name, args, callee } => { + self.evaluate_function_call(name, args, callee) + } + Expr::ClassInstantiation { name, args } => { + self.evaluate_class_instantiation(name, args) + } + Expr::LogicalOperation { left, op, right } => { + self.evaluate_logical_operation(left, op, right) + } + Expr::TernaryOp { + condition, + if_value, + else_value, + } => self.evaluate_ternary_operation(condition, if_value, else_value), + Expr::MethodCall { object, name, args } => { + self.evaluate_method_call(object, name, args) + } + Expr::MemberAccess { object, field } => self.evaluate_member_access(object, field), + Expr::IndexAccess { object, index } => self.evaluate_index_access(object, index), + Expr::SliceOperation { object, params } => { + self.evaluate_slice_operation(object, params) + } + Expr::FString(parts) => self.evaluate_f_string(parts), + Expr::Lambda { + args, + expr, + is_generator, + } => self.evaluate_lambda(args, expr, is_generator), + Expr::TypeNode(type_node) => self.evaluate_type_node(type_node), + } + } + + pub fn evaluate_statement(&self, stmt: &Statement) -> Result { + // These are the only types of statements that will return a value. + match stmt { + Statement::Expression(expr) => return self.evaluate_expr(expr), + Statement::Return(expr) => return self.evaluate_return(expr), + _ => {} + }; + + let result = match stmt { + // These are handled above + Statement::Expression(_) | Statement::Return(_) => unreachable!(), + // This is unreachable because it should be handled inside `GeneratorExecutor`. + Statement::Yield(_) | Statement::YieldFrom(_) => unreachable!(), + Statement::Pass => Ok(()), + Statement::Break => Err(InterpreterError::EncounteredBreak), + Statement::Continue => Err(InterpreterError::EncounteredContinue), + Statement::Assert(expr) => self.evaluate_assert(expr), + Statement::Delete(expr) => self.evaluate_delete(expr), + Statement::Nonlocal(expr) => self.evaluate_nonlocal(expr), + Statement::Global(expr) => self.evaluate_global(expr), + Statement::Assignment { left, right } => self.evaluate_assignment(left, right), + Statement::MultipleAssignment { left, right } => { + self.evaluate_multiple_assignment(left, right) + } + Statement::UnpackingAssignment { left, right } => { + self.evaluate_unpacking_assignment(left, right) + } + Statement::CompoundAssignment { + operator, + target, + value, + } => self.evaluate_compound_assignment(operator, target, value), + Statement::FunctionDef { + name, + args, + body, + decorators, + is_async, + } => self.evaluate_function_def(name, args, body, decorators, is_async), + Statement::IfElse { + if_part, + elif_parts, + else_part, + } => self.evaluate_if_else(if_part, elif_parts, else_part), + Statement::WhileLoop { condition, body } => self.evaluate_while_loop(condition, body), + Statement::ForInLoop { + index, + iterable: range, + body, + else_block, + } => self.evaluate_for_in_loop(index, range, body, else_block), + Statement::ClassDef { + name, + parents, + metaclass, + body, + } => self.evaluate_class_definition(name, parents, metaclass, body), + Statement::RegularImport { import_path, alias } => { + self.evaluate_regular_import(import_path, alias) + } + Statement::SelectiveImport { + import_path, + items, + wildcard, + } => self.evaluate_selective_import(import_path, items, wildcard), + Statement::TryExcept { + try_block, + except_clauses, + else_block, + finally_block, + } => self.evaluate_try_except(try_block, except_clauses, else_block, finally_block), + Statement::Raise(exception) => self.evaluate_raise(exception), + Statement::ContextManager { + expr, + variable, + block, + } => self.evaluate_context_manager(expr, variable, block), + }; + + // Return an error if one is thrown, otherwise all statements will return void. + result?; + Ok(ExprResult::Void) + } +} + +impl InterpreterEntrypoint for Interpreter { + type Return = ExprResult; + + fn run(&mut self, parser: &mut Parser) -> Result { + let mut result = ExprResult::Void; + while !parser.is_finished() { + let stmt = parser.parse_statement().map_err(MemphisError::Parser)?; + result = self + .evaluate_statement(&stmt) + .map_err(MemphisError::Interpreter)?; + } + + Ok(result) + } +} + +#[cfg(test)] +mod tests { + use std::any::Any; + + use super::*; + use crate::core::Storable; + use crate::treewalk::types::{ByteArray, DictItems, DictKeys, DictValues, FrozenSet, Type}; + use crate::types::errors::ParserError; + + fn downcast(input: T) -> Interpreter { + let any_ref: &dyn Any = &input as &dyn Any; + any_ref.downcast_ref::().unwrap().clone() + } + + fn init_path(path: &str) -> (Parser, Interpreter) { + let (parser, interpreter) = Builder::new().path(path).build(); + + (parser, downcast(interpreter)) + } + + fn init(text: &str) -> (Parser, Interpreter) { + let (parser, interpreter) = Builder::new().text(text).build(); + + (parser, downcast(interpreter)) + } + + #[test] + fn undefined_variable() { + let input = "x + 1"; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::VariableNotFound( + "x".to_string(), + interpreter.state.call_stack(), + )) + ); + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn division_by_zero() { + let input = "1 / 0"; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::DivisionByZero( + "division by zero".into(), + interpreter.state.call_stack(), + )) + ); + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn expression() { + let input = "2 + 3 * (4 - 1)"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Integer(11.store())); + } + + #[test] + fn integer_division() { + let input = "2 // 3"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Integer(0.store())); + + let input = "5 // 3"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Integer(1.store())); + + let input = "5 // 0"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast); + + assert_eq!( + result, + Err(InterpreterError::DivisionByZero( + "integer division or modulo by zero".into(), + interpreter.state.call_stack() + )) + ); + } + + #[test] + fn integer_assignment() { + let input = r#" +a = 2 + 3 * 4 +b = a + 5 +c = None +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(14.store())) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(19.store())) + ); + assert_eq!(interpreter.state.read("c"), Some(ExprResult::None)); + } + } + } + + #[test] + fn strings() { + let input = r#" +a = "foo" +b = type(str.join) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::String(Str::new("foo".to_string()))) + ); + assert_eq!( + interpreter.state.read("b").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::BuiltinMethod) + ); + } + } + } + + #[test] + fn boolean_operators() { + let input = "True and False"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Boolean(false)); + + let input = "True or False"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Boolean(true)); + + let input = "not (True or False)"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Boolean(false)); + + let input = "True and not False"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Boolean(true)); + + let input = "not False"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Boolean(true)); + + let input = "not True"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Boolean(false)); + } + + // Confirm that the interpreter can evaluate boolean expressions. + #[test] + fn comparison_operators() { + let input = "2 == 1"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Boolean(false)); + + let input = "2 == 2"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Boolean(true)); + + let input = "2 != 1"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Boolean(true)); + + let input = "2 != 2"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Boolean(false)); + + let input = "2 > 1"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Boolean(true)); + + let input = "2 < 1"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Boolean(false)); + + let input = "1 <= 1"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Boolean(true)); + + let input = "1 >= 1"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Boolean(true)); + + let input = "4 in range(5)"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Boolean(true)); + + let input = "4 in range(3)"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Boolean(false)); + + let input = "4 not in range(5)"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Boolean(false)); + + let input = "4 not in range(3)"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Boolean(true)); + + let input = "4 is None"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Boolean(false)); + + let input = "4 is not None"; + let (mut parser, interpreter) = init(input); + + let ast = parser.parse_simple_expr().unwrap(); + let result = interpreter.evaluate_expr(&ast).unwrap(); + + assert_eq!(result, ExprResult::Boolean(true)); + } + + #[test] + fn print_builtin() { + // this test has no assertions because output capture only works in the integration tests + // and not the unit tests at the moment. + let input = r#" +print(3) +a = type(print) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::BuiltinFunction) + ); + } + } + } + + #[test] + fn string_literal() { + let input = r#" +print("Hello, World!") + +a = iter("") +b = type(iter("")) + +for i in iter("abcde"): + print(i) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert!(matches!( + interpreter.state.read("a"), + Some(ExprResult::StringIterator(_)) + )); + assert_eq!( + interpreter.state.read("b").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::StringIterator) + ); + } + } + } + + #[test] + fn function_definition() { + let input = r#" +def foo(a, b): + return a + b + +foo(2, 3) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + let expected_name = "foo".to_string(); + assert!(matches!( + interpreter.state.read("foo").unwrap().as_function().unwrap().borrow().clone(), + Function { + name, + .. + } if name == expected_name)); + } + } + + let input = r#" +def add(x, y): + return x + y + +a = add(2, 3) + +b = lambda: 4 +c = b() + +d = lambda: (yield) +e = d() + +f = type((lambda: (yield))()) + +async def g(): pass +h = g() +i = type(h) +h.close() + +async def j(): yield +k = g() +l = type(h) + +def _f(): pass +m = _f.__code__ +n = type(_f.__code__) +o = type(type(_f).__code__) +p = type(type(_f).__globals__) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(5.store())) + ); + let b = interpreter + .state + .read("b") + .unwrap() + .as_function() + .unwrap() + .borrow() + .clone(); + assert!(matches!(b, + Function { + body: Block { ref statements, .. }, + .. + } if statements.len() == 1 && + matches!(statements[0], Statement::Expression(Expr::Integer(4))) + )); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer(4.store())) + ); + let d = interpreter + .state + .read("d") + .unwrap() + .as_function() + .unwrap() + .borrow() + .clone(); + assert!(matches!(d, + Function { + body: Block { ref statements, .. }, + .. + } if statements.len() == 1 && + matches!(statements[0], Statement::Yield(Expr::NoOp)) + )); + assert!(matches!( + interpreter.state.read("e").unwrap(), + ExprResult::Generator(_) + )); + assert_eq!( + interpreter.state.read("f").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Generator) + ); + assert!(matches!( + interpreter.state.read("h").unwrap(), + ExprResult::Coroutine(_) + )); + // I commented this out when we removed Clone from Class. + //assert!(matches!( + // interpreter.state.read("i").unwrap().as_class().unwrap().borrow(), + // Class { name, .. } if name == "coroutine" + //)); + // TODO add support for async generators, which will change the next two assertions + assert!(matches!( + interpreter.state.read("k").unwrap(), + ExprResult::Coroutine(_) + )); + //assert!(matches!( + // interpreter.state.read("l").unwrap().as_class().unwrap().borrow().clone(), + // Class { name, .. } if name == "coroutine" + //)); + assert!(matches!( + interpreter.state.read("m").unwrap(), + ExprResult::Code(_) + )); + assert_eq!( + interpreter.state.read("n").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Code) + ); + assert_eq!( + interpreter.state.read("o").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::GetSetDescriptor) + ); + assert_eq!( + interpreter.state.read("p").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::MemberDescriptor) + ); + } + } + + // Test early return + let input = r#" +def foo(): + return 4 + return 5 + +a = foo() +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(4.store())) + ); + } + } + } + + #[test] + fn if_else() { + let input = r#" +z = "Empty" +y = 5 +if y > 0: + z = "Greater than 0" +elif y > -10: + z = "Greater than -10" +elif y > -20: + z = "Greater than -20" +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("z"), + Some(ExprResult::String(Str::new("Greater than 0".to_string()))) + ); + } + } + + let input = r#" +z = "Empty" +y = -5 +if y > 0: + z = "Greater than 0" +elif y > -10: + z = "Greater than -10" +elif y > -20: + z = "Greater than -20" +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("z"), + Some(ExprResult::String(Str::new("Greater than -10".to_string()))) + ); + } + } + + let input = r#" +z = "Empty" +y = -15 +if y > 0: + z = "Greater than 0" +elif y > -10: + z = "Greater than -10" +elif y > -20: + z = "Greater than -20" +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("z"), + Some(ExprResult::String(Str::new("Greater than -20".to_string()))) + ); + } + } + + let input = r#" +z = "Empty" +y = -25 +if y > 0: + z = "Greater than 0" +elif y > -10: + z = "Greater than -10" +elif y > -20: + z = "Greater than -20" +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("z"), + Some(ExprResult::String(Str::new("Empty".to_string()))) + ); + } + } + + let input = r#" +z = "Empty" +y = -25 +if y > 0: + z = "Greater than 0" +elif y > -10: + z = "Greater than -10" +elif y > -20: + z = "Greater than -20" +else: + z = "Else" +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("z"), + Some(ExprResult::String(Str::new("Else".to_string()))) + ); + } + } + + let input = r#" +z = 0 +if 4 in range(5): + z = 1 +else: + z = 2 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("z"), + Some(ExprResult::Integer(1.store())) + ); + } + } + } + + #[test] + fn while_loop() { + let input = r#" +z = 0 +while z < 10: + z = z + 1 + print("done") +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("z"), + Some(ExprResult::Integer(10.store())) + ); + } + } + } + + #[test] + fn class_definition() { + let input = r#" +class Foo: + def __init__(self): + self.x = 0 + + def bar(self): + print(self.x) +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert!(interpreter.state.is_class("Foo")); + } + } + } + + #[test] + fn class_instantiation() { + let input = r#" +class Foo: + def __init__(self, y): + self.x = 0 + self.y = y + +foo = Foo(3) +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert!(interpreter.state.is_class("Foo")); + assert!(!interpreter.state.is_class("foo")); + + let foo = match interpreter.state.read("foo") { + Some(ExprResult::Object(o)) => o, + _ => panic!("Expected an object."), + }; + assert_eq!( + foo.get(&interpreter, "y"), + Some(ExprResult::Integer(3.store())) + ); + assert_eq!( + foo.get(&interpreter, "x"), + Some(ExprResult::Integer(0.store())) + ); + } + } + + let input = r#" +class Foo: + def __init__(self, y): + self.x = 0 + self.y = y + a = 4 + + def bar(self): + print(self.x) + +foo = Foo(3) +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert!(interpreter.state.is_class("Foo")); + assert!(!interpreter.state.is_class("foo")); + + // This should be an object with foo.y == 3 and foo.x == 0 even + // when the last line of the constructor did not touch self. + let foo = match interpreter.state.read("foo") { + Some(ExprResult::Object(o)) => o, + _ => panic!("Expected an object."), + }; + assert_eq!( + foo.get(&interpreter, "y"), + Some(ExprResult::Integer(3.store())) + ); + assert_eq!( + foo.get(&interpreter, "x"), + Some(ExprResult::Integer(0.store())) + ); + } + } + } + + #[test] + fn method_invocation() { + let input = r#" +class Foo: + def __init__(self, y): + self.x = 0 + self.y = y + + def bar(self): + print(self.bash()) + return self.y + + def bash(self): + return self.x + +foo = Foo(3) +x = foo.bar() +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("x"), + Some(ExprResult::Integer(3.store())) + ); + } + } + + // Try the same test but with no constructor + let input = r#" +class Foo: + def bar(self): + self.x = 0 + self.y = 3 + print(self.x) + +foo = Foo() +foo.bar() +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert!(interpreter.state.is_class("Foo")); + assert!(!interpreter.state.is_class("foo")); + + let foo = match interpreter.state.read("foo") { + Some(ExprResult::Object(o)) => o, + _ => panic!("Expected an object."), + }; + + // These should be set even when it's not a constructor + assert_eq!( + foo.get(&interpreter, "y"), + Some(ExprResult::Integer(3.store())) + ); + assert_eq!( + foo.get(&interpreter, "x"), + Some(ExprResult::Integer(0.store())) + ); + } + } + } + + #[test] + fn regular_import() { + let (mut parser, mut interpreter) = init_path("src/fixtures/imports/regular_import.py"); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("x"), + Some(ExprResult::Integer(5.store())) + ); + assert_eq!( + interpreter.state.read("y"), + Some(ExprResult::Integer(6.store())) + ); + // This previously returned [`Type::Method`], which was an issue with binding + // classes (as callables) to their module. + assert_eq!(interpreter.state.read("z").unwrap().get_type(), Type::Type); + } + } + + let (mut parser, mut interpreter) = init_path("src/fixtures/imports/regular_import_b.py"); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("y"), + Some(ExprResult::Integer(7.store())) + ); + } + } + + let (mut parser, mut interpreter) = init_path("src/fixtures/imports/relative/main_b.py"); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("x"), + Some(ExprResult::Integer(2.store())) + ); + } + } + + let (mut parser, mut interpreter) = init_path("src/fixtures/imports/relative/main_c.py"); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("x"), + Some(ExprResult::Integer(2.store())) + ); + } + } + } + + #[test] + fn selective_import() { + let (mut parser, mut interpreter) = init_path("src/fixtures/imports/selective_import_a.py"); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("x"), + Some(ExprResult::Integer(5.store())) + ); + } + } + + let (mut parser, mut interpreter) = init_path("src/fixtures/imports/selective_import_b.py"); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("y"), + Some(ExprResult::Integer(6.store())) + ); + assert_eq!( + interpreter.state.read("z"), + Some(ExprResult::Integer(6.store())) + ); + } + } + + let (mut parser, mut interpreter) = init_path("src/fixtures/imports/selective_import_c.py"); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::FunctionNotFound( + "something_third".to_string(), + interpreter.state.call_stack() + )) + ) + } + Ok(_) => panic!("Expected an error!"), + } + + let (mut parser, mut interpreter) = init_path("src/fixtures/imports/selective_import_d.py"); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("z"), + Some(ExprResult::Integer(8.store())) + ); + } + } + + let (mut parser, mut interpreter) = init_path("src/fixtures/imports/selective_import_e.py"); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("x"), + Some(ExprResult::Integer(8.store())) + ); + } + } + + let (mut parser, mut interpreter) = init_path("src/fixtures/imports/selective_import_f.py"); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("y"), + Some(ExprResult::Integer(6.store())) + ); + assert_eq!( + interpreter.state.read("z"), + Some(ExprResult::Integer(6.store())) + ); + } + } + + let (mut parser, mut interpreter) = init_path("src/fixtures/imports/relative/main_a.py"); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("x"), + Some(ExprResult::Integer(2.store())) + ); + } + } + } + + #[test] + fn floating_point() { + let input = r#" +a = 3.14 +b = a + 2.5e-3 +c = 4 + 2.1 +d = 1.9 + 4 +e = d == 5.9 +f = d != 5.9 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::FloatingPoint(3.14)) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::FloatingPoint(3.1425)) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::FloatingPoint(6.1)) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::FloatingPoint(5.9)) + ); + assert_eq!(interpreter.state.read("e"), Some(ExprResult::Boolean(true))); + assert_eq!( + interpreter.state.read("f"), + Some(ExprResult::Boolean(false)) + ); + } + } + + let input = r#" +def add(x, y): + return x + y + +z = add(2.1, 3) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("z"), + Some(ExprResult::FloatingPoint(5.1)) + ); + } + } + } + + #[test] + fn negative_numbers() { + let input = r#" +a = -3.14 +b = -3 +c = 2 - 3 +d = -2e-3 +e = 2 + -3 +f = 2+-3 +g = -(3) +h = -(2+3) +i = +3 +j = +(-3) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::FloatingPoint(-3.14)) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer((-3).store())) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer((-1).store())) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::FloatingPoint(-2e-3)) + ); + assert_eq!( + interpreter.state.read("e"), + Some(ExprResult::Integer((-1).store())) + ); + assert_eq!( + interpreter.state.read("f"), + Some(ExprResult::Integer((-1).store())) + ); + assert_eq!( + interpreter.state.read("g"), + Some(ExprResult::Integer((-3).store())) + ); + assert_eq!( + interpreter.state.read("h"), + Some(ExprResult::Integer((-5).store())) + ); + assert_eq!( + interpreter.state.read("i"), + Some(ExprResult::Integer(3.store())) + ); + assert_eq!( + interpreter.state.read("j"), + Some(ExprResult::Integer((-3).store())) + ); + } + } + } + + #[test] + fn call_stack() { + let (mut parser, mut interpreter) = init_path("src/fixtures/call_stack/call_stack.py"); + + match interpreter.run(&mut parser) { + Err(e) => { + let call_stack = interpreter.state.call_stack(); + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::FunctionNotFound( + "unknown".to_string(), + call_stack.clone(), + )) + ); + + assert_eq!(call_stack.frames.len(), 3); + assert!(call_stack.frames[0] + .clone() + .file_path + .unwrap() + .to_str() + .unwrap() + .ends_with("src/fixtures/call_stack/call_stack.py")); + assert!(call_stack.frames[1] + .clone() + .file_path + .unwrap() + .to_str() + .unwrap() + .ends_with("src/fixtures/call_stack/other.py")); + assert!(call_stack.frames[2] + .clone() + .file_path + .unwrap() + .to_str() + .unwrap() + .ends_with("src/fixtures/call_stack/other.py")); + + assert_eq!(call_stack.frames[0].clone().function_name, None); + assert_eq!( + call_stack.frames[1].clone().function_name, + Some("middle_call".to_string()) + ); + assert_eq!( + call_stack.frames[2].clone().function_name, + Some("last_call".to_string()) + ); + assert_eq!(call_stack.frames[0].clone().line_number, 2); + assert_eq!(call_stack.frames[1].clone().line_number, 2); + assert_eq!(call_stack.frames[2].clone().line_number, 5); + } + Ok(_) => panic!("Expected an error!"), + } + + let input = r#" +""" +multiline string +we want to test that this is included in the line number in the call stack + + +more space here +""" +a = 4 +b = 10 +c = foo() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + let call_stack = interpreter.state.call_stack(); + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::FunctionNotFound( + "foo".to_string(), + call_stack.clone(), + )) + ); + + assert_eq!(call_stack.frames.len(), 1); + assert_eq!( + call_stack.frames[0].clone().file_path, + Some("".into()) + ); + assert_eq!( + call_stack.frames[0].clone().function_name, + Some("".into()) + ); + assert_eq!(call_stack.frames[0].clone().line_number, 11); + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn lists() { + let input = r#" +a = [1,2,3] +print(a) +b = [1,2.1] +c = list([1,2]) +d = list({1,2}) +e = list((1,2)) +f = list(range(2)) +g = [ + 1, + 2, +] +h = c + g +i = [] +j = iter([]) +k = type(iter([])) + +l = len([1]) +m = len([1,2,3,4,5]) +n = len([]) + +o = [].append +p = type([].append) +q = [1,2] +q.append(3) + +r = [3,4] +s = r.append +s(5) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()), + ExprResult::Integer(3.store()), + ])))) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::FloatingPoint(2.1) + ])))) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()) + ])))) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()) + ])))) + ); + assert_eq!( + interpreter.state.read("e"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()) + ])))) + ); + assert_eq!( + interpreter.state.read("f"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(0.store()), + ExprResult::Integer(1.store()) + ])))) + ); + assert_eq!( + interpreter.state.read("g"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()) + ])))) + ); + assert_eq!( + interpreter.state.read("h"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()), + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()) + ])))) + ); + assert_eq!( + interpreter.state.read("i"), + Some(ExprResult::List(Container::new(List::new(vec![])))) + ); + assert!(matches!( + interpreter.state.read("j"), + Some(ExprResult::ListIterator(_)) + )); + assert_eq!( + interpreter.state.read("k").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::ListIterator) + ); + assert_eq!( + interpreter.state.read("l"), + Some(ExprResult::Integer(1.store())) + ); + assert_eq!( + interpreter.state.read("m"), + Some(ExprResult::Integer(5.store())) + ); + assert_eq!( + interpreter.state.read("n"), + Some(ExprResult::Integer(0.store())) + ); + assert!(matches!( + interpreter.state.read("o"), + Some(ExprResult::Method(_)) + )); + assert_eq!( + interpreter.state.read("p").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Method) + ); + assert_eq!( + interpreter.state.read("q"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()), + ExprResult::Integer(3.store()), + ])))) + ); + assert!(matches!( + interpreter.state.read("s"), + Some(ExprResult::Method(_)) + )); + assert_eq!( + interpreter.state.read("r"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(3.store()), + ExprResult::Integer(4.store()), + ExprResult::Integer(5.store()), + ])))) + ); + } + } + + let input = "list([1,2,3], [1,2])"; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::WrongNumberOfArguments( + 1, + 3, + interpreter.state.call_stack() + )) + ) + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn sets() { + let input = r#" +a = {1,2,3} +print(a) +b = {1,2.1} +c = set({1,2}) +d = {1,2,2,1} +e = set([1,2]) +f = set((1,2)) +g = set(range(2)) +h = set() +i = iter(set()) +j = type(iter(set())) + +new_set = set() +new_set.add("five") +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Set(Container::new(Set::new(HashSet::from([ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()), + ExprResult::Integer(3.store()), + ]))))) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Set(Container::new(Set::new(HashSet::from([ + ExprResult::FloatingPoint(2.1), + ExprResult::Integer(1.store()), + ]))))) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Set(Container::new(Set::new(HashSet::from([ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()), + ]))))) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::Set(Container::new(Set::new(HashSet::from([ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()), + ]))))) + ); + assert_eq!( + interpreter.state.read("e"), + Some(ExprResult::Set(Container::new(Set::new(HashSet::from([ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()), + ]))))) + ); + assert_eq!( + interpreter.state.read("f"), + Some(ExprResult::Set(Container::new(Set::new(HashSet::from([ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()), + ]))))) + ); + assert_eq!( + interpreter.state.read("g"), + Some(ExprResult::Set(Container::new(Set::new(HashSet::from([ + ExprResult::Integer(0.store()), + ExprResult::Integer(1.store()), + ]))))) + ); + assert_eq!( + interpreter.state.read("h"), + Some(ExprResult::Set(Container::new(Set::default()))) + ); + assert!(matches!( + interpreter.state.read("i"), + Some(ExprResult::SetIterator(_)) + )); + assert_eq!( + interpreter.state.read("j").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::SetIterator) + ); + assert_eq!( + interpreter.state.read("new_set"), + Some(ExprResult::Set(Container::new(Set::new(HashSet::from([ + ExprResult::String(Str::new("five".into())) + ]))))) + ); + } + } + + let input = "set({1,2,3}, {1,2})"; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::WrongNumberOfArguments( + 1, + 2, + interpreter.state.call_stack() + )) + ) + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn tuples() { + let input = r#" +a = (1,2,3) +print(a) +b = (1,2.1) +c = tuple([1,2]) +d = tuple({1,2}) +e = tuple((1,2)) +f = tuple(range(2)) +g = iter(()) +h = type(iter(())) +i = (4,) +j = 9, 10 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()), + ExprResult::Integer(3.store()), + ])))) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::FloatingPoint(2.1) + ])))) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()) + ])))) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()) + ])))) + ); + assert_eq!( + interpreter.state.read("e"), + Some(ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()) + ])))) + ); + assert_eq!( + interpreter.state.read("f"), + Some(ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(0.store()), + ExprResult::Integer(1.store()) + ])))) + ); + assert!(matches!( + interpreter.state.read("g"), + Some(ExprResult::TupleIterator(_)) + )); + assert_eq!( + interpreter.state.read("h").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::TupleIterator) + ); + assert_eq!( + interpreter.state.read("i"), + Some(ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(4.store()), + ])))) + ); + assert_eq!( + interpreter.state.read("j"), + Some(ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(9.store()), + ExprResult::Integer(10.store()), + ])))) + ); + } + } + + let input = "tuple([1,2,3], [1,2])"; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::WrongNumberOfArguments( + 1, + 2, + interpreter.state.call_stack() + )) + ) + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn index_access() { + let input = r#" +a = [1,2,3] +b = a[0] +c = [1,2,3][1] +a[0] = 10 + +d = (1,2,3) +e = d[0] +f = (1,2,3)[1] +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(10.store()), + ExprResult::Integer(2.store()), + ExprResult::Integer(3.store()), + ])))) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(1.store())) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer(2.store())) + ); + assert_eq!( + interpreter.state.read("e"), + Some(ExprResult::Integer(1.store())) + ); + assert_eq!( + interpreter.state.read("f"), + Some(ExprResult::Integer(2.store())) + ); + } + } + + let input = r#" +d = (1,2,3) +d[0] = 10 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::TypeError( + Some("'tuple' object does not support item assignment".into()), + interpreter.state.call_stack() + )) + ); + } + Ok(_) => panic!("Expected an error!"), + } + + let input = r#" +d = (1,2,3) +del d[0] +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::TypeError( + Some("'tuple' object does not support item deletion".into()), + interpreter.state.call_stack() + )) + ); + } + Ok(_) => panic!("Expected an error!"), + } + + let input = r#" +4[1] +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::TypeError( + Some("'int' object is not subscriptable".into()), + interpreter.state.call_stack() + )) + ); + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn for_in_loop() { + let input = r#" +a = [2,4,6,8] +b = 0 +c = True +for i in a: + b = b + i + c = False + print(b) +print(b) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(20.store())) + ); + assert_eq!( + interpreter.state.read("i"), + Some(ExprResult::Integer(8.store())) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Boolean(false)) + ); + } + } + + let input = r#" +a = {2,4,6,8} +b = 0 +c = True +for i in a: + b = b + i + c = False + print(b) +print(b) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(20.store())) + ); + assert_eq!( + interpreter.state.read("i"), + Some(ExprResult::Integer(8.store())) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Boolean(false)) + ); + } + } + + let input = r#" +a = (2,4,6,8) +b = 0 +c = True +for i in a: + b = b + i + c = False + print(b) +print(b) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(20.store())) + ); + assert_eq!( + interpreter.state.read("i"), + Some(ExprResult::Integer(8.store())) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Boolean(false)) + ); + } + } + + let input = r#" +b = 0 +for i in range(5): + b = b + i +print(b) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(10.store())) + ); + } + } + + let input = r#" +a = {"a": 1,"b": 2} +b = 0 +for k, v in a.items(): + b = b + v +print(b) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(3.store())) + ); + } + } + } + + #[test] + fn ranges() { + let input = r#" +a = iter(range(0)) +b = type(iter(range(0))) +c = type(range(0)) +d = 0 +for i in range(3): + d += i + +e = 0 +r = range(3) +for i in r: + e += i +for i in r: + e += i +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert!(matches!( + interpreter.state.read("a"), + Some(ExprResult::RangeIterator(_)) + )); + assert_eq!( + interpreter.state.read("b").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::RangeIterator) + ); + assert_eq!( + interpreter.state.read("c").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Range) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::Integer(3.store())) + ); + assert_eq!( + interpreter.state.read("e"), + Some(ExprResult::Integer(6.store())) + ); + } + } + } + + #[test] + fn type_builtin() { + let input = r#" +class Foo: + def __init__(self): + self.x = 0 + +a = Foo() +b = type(a) +c = b() + +d = [1,2,3] +e = type(d) +f = e([1,2,3]) +g = e({4,5}) + +h = {1,2} +i = type(h) +j = i({6,7}) + +k = (1,2) +l = type(k) +m = l((8,9)) + +n = {"a": 1,"b": 2} +o = type(n) +p = o({"c": 3,"d": 4}) + +q = type(None) +r = type(Ellipsis) +s = type(NotImplemented) + +t = type(slice) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter + .state + .read("b") + .unwrap() + .as_class() + .unwrap() + .borrow() + .name, + "Foo".to_string() + ); + assert_eq!( + interpreter + .state + .read("c") + .unwrap() + .as_object() + .unwrap() + .borrow() + .class + .borrow() + .name, + "Foo" + ); + assert_eq!( + interpreter.state.read("f"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()), + ExprResult::Integer(3.store()), + ])))) + ); + assert_eq!( + interpreter.state.read("g"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(4.store()), + ExprResult::Integer(5.store()), + ])))) + ); + assert_eq!( + interpreter.state.read("j"), + Some(ExprResult::Set(Container::new(Set::new(HashSet::from([ + ExprResult::Integer(6.store()), + ExprResult::Integer(7.store()), + ]))))) + ); + assert_eq!( + interpreter.state.read("m"), + Some(ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(8.store()), + ExprResult::Integer(9.store()), + ])))) + ); + assert_eq!( + interpreter.state.read("p"), + Some(ExprResult::Dict(Container::new(Dict::new(HashMap::from( + [ + ( + ExprResult::String(Str::new("c".to_string())), + ExprResult::Integer(3.store()) + ), + ( + ExprResult::String(Str::new("d".to_string())), + ExprResult::Integer(4.store()) + ), + ] + ))))) + ); + assert_eq!( + interpreter.state.read("q").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::None) + ); + assert_eq!( + interpreter.state.read("r").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Ellipsis) + ); + assert_eq!( + interpreter.state.read("s").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::NotImplemented) + ); + assert_eq!( + interpreter.state.read("t").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Type) + ); + } + } + } + + #[test] + fn list_comprehension() { + let input = r#" +a = [1,2,3] +b = [ i * 2 for i in a ] +c = [ i * 2 for i in a if False ] +d = [ j * 2 for j in a if j > 2 ] +e = [x * y for x in range(1,3) for y in range(1,3)] +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(2.store()), + ExprResult::Integer(4.store()), + ExprResult::Integer(6.store()), + ])))) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::List(Container::new(List::default()))) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(6.store()), + ])))) + ); + assert_eq!( + interpreter.state.read("e"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()), + ExprResult::Integer(2.store()), + ExprResult::Integer(4.store()), + ])))) + ); + } + } + } + + #[test] + fn set_comprehension() { + let input = r#" +a = [1,2,3] +b = { i * 2 for i in a } +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Set(Container::new(Set::new(HashSet::from([ + ExprResult::Integer(2.store()), + ExprResult::Integer(4.store()), + ExprResult::Integer(6.store()), + ]))))) + ); + } + } + } + + #[test] + fn object_comparison() { + let input = r#" +a = [8,9,10] +b = a == [8,9,10] +c = a == [8,9] +d = a == [8,10,9] +e = [8,9,10] == a +f = a != [8,9] +g = a != [8,10,9] +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!(interpreter.state.read("b"), Some(ExprResult::Boolean(true))); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Boolean(false)) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::Boolean(false)) + ); + assert_eq!(interpreter.state.read("e"), Some(ExprResult::Boolean(true))); + assert_eq!(interpreter.state.read("f"), Some(ExprResult::Boolean(true))); + assert_eq!(interpreter.state.read("g"), Some(ExprResult::Boolean(true))); + } + } + } + + #[test] + fn generator_basics() { + let input = r#" +def countdown(n): + yield n + yield n - 1 + yield n - 2 + +a = countdown(5) +b = next(a) +c = next(a) +d = next(a) +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(5.store())) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer(4.store())) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::Integer(3.store())) + ); + } + } + + let input = r#" +def countdown(n): + yield n + +a = countdown(5) +b = next(a) +c = next(a) +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::StopIteration( + interpreter.state.call_stack() + )) + ); + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn generator_as_iterator() { + let input = r#" +def countdown(n): + yield n + yield n - 1 + yield n - 2 + +z = 0 +for i in countdown(5): + z = z + i +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("z"), + Some(ExprResult::Integer(12.store())) + ); + } + } + + let input = r#" +def countdown(n): + yield n + yield n - 1 + yield n - 2 + +z = [ i for i in countdown(5) ] +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("z"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(5.store()), + ExprResult::Integer(4.store()), + ExprResult::Integer(3.store()) + ])))) + ); + } + } + } + + #[test] + fn generator_with_nested_yield() { + let input = r#" +def countdown(n): + while n > 0: + yield n + n = n - 1 + +z = 0 +for i in countdown(5): + z = z + i +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("z"), + Some(ExprResult::Integer(15.store())) + ); + } + } + + let input = r#" +def countdown(n): + for i in range(n): + yield i + +z = 0 +for i in countdown(5): + z = z + i +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("z"), + Some(ExprResult::Integer(10.store())) + ); + } + } + + let input = r#" +def countdown(): + for i in [1,2]: + yield i * 2 + +a = list(countdown()) +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(2.store()), + ExprResult::Integer(4.store()), + ])))) + ); + } + } + + let input = r#" +def countdown(n): + if n > 5: + while n < 10: + yield n + n += 1 + elif n > 3: + while n < 8: + yield n + n += 2 + else: + while n > 0: + yield n + n -= 1 + +a = [ i for i in countdown(4) ] +b = [ i for i in countdown(3) ] +c = [ i for i in countdown(7) ] +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(4.store()), + ExprResult::Integer(6.store()), + ])))) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(3.store()), + ExprResult::Integer(2.store()), + ExprResult::Integer(1.store()) + ])))) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(7.store()), + ExprResult::Integer(8.store()), + ExprResult::Integer(9.store()) + ])))) + ); + } + } + } + + #[test] + fn basic_inheritance() { + let input = r#" +class Parent: + def baz(self): + return 4 + +class Foo(Parent): + def __init__(self): + self.x = 12 + +f = Foo() +a = f.baz() +b = f.x +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert!(interpreter.state.is_class("Foo")); + assert!(interpreter.state.is_class("Parent")); + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(4.store())) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(12.store())) + ); + } + } + + let input = r#" +class Parent: + def baz(self): + self.y = 11 + return 4 + +class Foo(Parent): + def __init__(self): + self.x = 0 + + def bar(self): + return self.y + +f = Foo() +a = f.baz() +b = f.bar() +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(4.store())) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(11.store())) + ); + } + } + + let input = r#" +class abstractclassmethod(classmethod): + pass +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + // This used to throw an error based on classmethod not yet being a class. This is + // found in abc.py in the Python standard lib. + assert!(matches!( + interpreter.state.read("abstractclassmethod"), + Some(ExprResult::Class(_)) + )); + assert_eq!( + interpreter + .state + .read("abstractclassmethod") + .unwrap() + .as_class() + .unwrap() + .super_mro() + .first() + .unwrap() + .borrow() + .name, + "classmethod".to_string() + ); + } + } + } + + #[test] + fn more_inheritance() { + // Test that a parent constructor is not called when a child constructor is defined. + let input = r#" +class Parent: + def __init__(self): + self.x = 3 + + def one(self): + return 1 + + def bar(self): + return 2 + +class ChildTwo(Parent): + def __init__(self): + pass + + def three(self): + return self.x + +d = ChildTwo().three() +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::VariableNotFound( + "x".to_string(), + interpreter.state.call_stack() + )) + ); + } + Ok(_) => panic!("Expected an error!"), + } + + // Test that multiple levels of a hierarchy can be traversed. + let input = r#" +class Parent: + def __init__(self): + self.x = 3 + + def one(self): + return 1 + + def bar(self): + return 2 + +class ChildTwo(Parent): + pass + +class ChildThree(ChildTwo): + def three(self): + return self.x + +child_three = ChildThree() +d = child_three.three() +e = child_three.one() +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::Integer(3.store())) + ); + assert_eq!( + interpreter.state.read("e"), + Some(ExprResult::Integer(1.store())) + ); + } + } + + // Test that an attribute defined in a parent's constructor is stored in the child. + let input = r#" +class Parent: + def __init__(self): + self.x = 3 + +class Child(Parent): + def three(self): + return self.x + +child = Child() +c = child.three() +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer(3.store())) + ); + } + } + + // Check that calls to `super()` return a `Type::Super`. + let input = r#" +class Parent: + pass + +class Child(Parent): + def one(self): + return super() + + @classmethod + def two(cls): + return super() + +child = Child() +a = child.one() +b = Child.two() +c = child.two() +d = type(a) +e = type(b) +f = type(c) +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert!(matches!( + interpreter.state.read("a"), + Some(ExprResult::Super(_)) + )); + assert!(matches!( + interpreter.state.read("b"), + Some(ExprResult::Super(_)) + )); + assert!(matches!( + interpreter.state.read("c"), + Some(ExprResult::Super(_)) + )); + assert_eq!( + interpreter.state.read("d").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Super) + ); + assert_eq!( + interpreter.state.read("e").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Super) + ); + assert_eq!( + interpreter.state.read("f").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Super) + ); + } + } + + // Check that calls to `super()` works in an instance method. + let input = r#" +class Parent: + def one(self): + return 1 + +class Child(Parent): + def one(self): + return super().one() + +child = Child() +a = child.one() +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(1.store())) + ); + } + } + + // Check that calls to `super()` works in an instance method including references to `self`. + let input = r#" +class Parent: + def __init__(self): + self.val = 5 + + def one(self): + return self.val + +class Child(Parent): + def __init__(self): + super().__init__() + + def one(self): + return super().one() + + def two(self): + return self.val + +child = Child() +a = child.one() +b = child.two() +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(5.store())) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(5.store())) + ); + } + } + + // Check that calls to `super()` works in a class method. + let input = r#" +class Parent: + @classmethod + def two(cls): + return 2 + +class Child(Parent): + @classmethod + def two(cls): + return super().two() + +child = Child() +b = child.two() +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(2.store())) + ); + } + } + + // Check that calls to `super()` works in a class method included references to `cls`. + let input = r#" +class Parent: + val = 12 + + @classmethod + def two(cls): + return cls.val + +class Child(Parent): + @classmethod + def two(cls): + return super().two() + +child = Child() +b = child.two() +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(12.store())) + ); + } + } + } + + #[test] + fn multiple_inheritance() { + let input = r#" +class Bar: pass +class Baz: pass +class Foo(Bar, Baz): pass + +a = Foo.__mro__ +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + let mro = interpreter.state.read("a").map(|a| { + a.as_tuple() + .unwrap() + .into_iter() + .map(|i| i.as_class().unwrap().borrow().name.clone()) + .collect::>() + }); + assert_eq!( + mro, + Some(vec![ + "Foo".into(), + "Bar".into(), + "Baz".into(), + "object".into() + ]) + ); + } + } + } + + #[test] + fn dictionaries() { + let input = r#" +a = { "b": 4, 'c': 5 } +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Dict(Container::new(Dict::new(HashMap::from( + [ + ( + ExprResult::String(Str::new("b".to_string())), + ExprResult::Integer(4.store()) + ), + ( + ExprResult::String(Str::new("c".to_string())), + ExprResult::Integer(5.store()) + ), + ] + ))))) + ); + } + } + + let input = r#" +a = { "b": 4, 'c': 5 } +b = a.items() +c = { key: value * 2 for key, value in a.items() } +d = dict({ "b": 4, 'c': 5 }) +e = dict([('b', 4), ('c', 5)]) +f = a["b"] +g = {} +h = {}.items() + +i = {}.keys() +j = {"b": 4, 'c': 5}.keys() +k = iter({}.keys()) +l = type(iter({}.keys())) + +m = {}.values() +n = {"b": 4, 'c': 5}.values() +o = iter({}.values()) +p = type(iter({}.values())) + +q = iter({}.items()) +r = type(iter({}.items())) + +s = type({}.keys()) +t = type({}.values()) +u = type({}.items()) +v = [ val for val in a ] +w = { key for key, value in a.items() } +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Dict(Container::new(Dict::new(HashMap::from( + [ + ( + ExprResult::String(Str::new("b".to_string())), + ExprResult::Integer(4.store()) + ), + ( + ExprResult::String(Str::new("c".to_string())), + ExprResult::Integer(5.store()) + ), + ] + ))))) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::DictItems(DictItems::new(vec![ + ( + ExprResult::String(Str::new("b".to_string())), + ExprResult::Integer(4.store()) + ), + ( + ExprResult::String(Str::new("c".to_string())), + ExprResult::Integer(5.store()) + ), + ]))) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Dict(Container::new(Dict::new(HashMap::from( + [ + ( + ExprResult::String(Str::new("b".to_string())), + ExprResult::Integer(8.store()) + ), + ( + ExprResult::String(Str::new("c".to_string())), + ExprResult::Integer(10.store()) + ), + ] + ))))) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::Dict(Container::new(Dict::new(HashMap::from( + [ + ( + ExprResult::String(Str::new("b".to_string())), + ExprResult::Integer(4.store()) + ), + ( + ExprResult::String(Str::new("c".to_string())), + ExprResult::Integer(5.store()) + ), + ] + ))))) + ); + assert_eq!( + interpreter.state.read("e"), + Some(ExprResult::Dict(Container::new(Dict::new(HashMap::from( + [ + ( + ExprResult::String(Str::new("b".to_string())), + ExprResult::Integer(4.store()) + ), + ( + ExprResult::String(Str::new("c".to_string())), + ExprResult::Integer(5.store()) + ), + ] + ))))) + ); + assert_eq!( + interpreter.state.read("f"), + Some(ExprResult::Integer(4.store())) + ); + assert_eq!( + interpreter.state.read("g"), + Some(ExprResult::Dict(Container::new(Dict::new(HashMap::new())))) + ); + assert_eq!( + interpreter.state.read("h"), + Some(ExprResult::DictItems(DictItems::new(vec![]))) + ); + assert!(matches!( + interpreter.state.read("q"), + Some(ExprResult::DictItemsIterator(_)) + )); + assert_eq!( + interpreter.state.read("r").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::DictItemIterator) + ); + assert_eq!( + interpreter.state.read("i"), + Some(ExprResult::DictKeys(DictKeys::new(vec![]))) + ); + assert_eq!( + interpreter.state.read("j"), + Some(ExprResult::DictKeys(DictKeys::new(vec![ + ExprResult::String(Str::new("b".into())), + ExprResult::String(Str::new("c".into())), + ]))) + ); + assert!(matches!( + interpreter.state.read("k"), + Some(ExprResult::DictKeysIterator(_)) + )); + assert_eq!( + interpreter.state.read("l").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::DictKeyIterator) + ); + assert_eq!( + interpreter.state.read("m"), + Some(ExprResult::DictValues(DictValues::new(vec![]))) + ); + assert_eq!( + interpreter.state.read("n"), + Some(ExprResult::DictValues(DictValues::new(vec![ + ExprResult::Integer(4.store()), + ExprResult::Integer(5.store()), + ]))) + ); + assert!(matches!( + interpreter.state.read("o"), + Some(ExprResult::DictValuesIterator(_)) + )); + assert_eq!( + interpreter.state.read("p").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::DictValueIterator) + ); + assert_eq!( + interpreter.state.read("s").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::DictKeys) + ); + assert_eq!( + interpreter.state.read("t").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::DictValues) + ); + assert_eq!( + interpreter.state.read("u").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::DictItems) + ); + assert_eq!( + interpreter.state.read("v"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::String(Str::new("b".into())), + ExprResult::String(Str::new("c".into())), + ])))) + ); + assert_eq!( + interpreter.state.read("w"), + Some(ExprResult::Set(Container::new(Set::new(HashSet::from([ + ExprResult::String(Str::new("b".into())), + ExprResult::String(Str::new("c".into())), + ]))))) + ); + } + } + } + + #[test] + fn assert() { + let input = r#" +assert True +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => {} + } + + let input = r#" +assert False +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::AssertionError( + interpreter.state.call_stack() + )) + ) + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn try_except_finally() { + let input = r#" +try: + a = 4 / 0 +except: + a = 2 +finally: + a = 3 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(3.store())) + ); + } + } + + let input = r#" +try: + a = 4 / 0 +except: + a = 2 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(2.store())) + ); + } + } + + let input = r#" +try: + a = 4 / 1 +except: + a = 2 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(4.store())) + ); + } + } + + let input = r#" +try: + a = 4 / 1 +except: + a = 2 +finally: + a = 3 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(3.store())) + ); + } + } + + let input = r#" +try: + a = 4 / 1 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => assert!(matches!(e, MemphisError::Parser(ParserError::SyntaxError))), + Ok(_) => panic!("Expected an error!"), + } + + let input = r#" +try: + a = 4 / 0 +except ZeroDivisionError: + a = 2 +except Exception: + a = 3 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(2.store())) + ); + } + } + + let input = r#" +try: + b = b + 1 + a = 4 / 0 +except ZeroDivisionError: + a = 2 +except Exception: + a = 3 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(3.store())) + ); + } + } + + let input = r#" +try: + b = b + 1 + a = 4 / 0 +except ZeroDivisionError: + a = 2 +except: + a = 3 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(3.store())) + ); + } + } + + let input = r#" +try: + b = b + 1 + a = 4 / 0 +except ZeroDivisionError: + a = 2 +except Exception as e: + a = 3 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(3.store())) + ); + assert_eq!( + interpreter.state.read("e"), + Some(ExprResult::Exception(Box::new( + InterpreterError::VariableNotFound( + "b".into(), + interpreter.state.call_stack() + ) + ))) + ); + } + } + + let input = r#" +try: + b = b + 1 + a = 4 / 0 +except (ZeroDivisionError, Exception): + a = 2 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(2.store())) + ); + } + } + + let input = r#" +try: + b = b + 1 + a = 4 / 0 +except (Exception, ZeroDivisionError): + a = 2 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(2.store())) + ); + } + } + + let input = r#" +try: + a = 8 / 1 +except ZeroDivisionError: + a = 2 +except Exception as e: + a = 3 +else: + a = 4 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(4.store())) + ); + } + } + + let input = r#" +try: + a = 8 / 1 +except ZeroDivisionError: + a = 2 +except Exception as e: + a = 3 +else: + a = 4 +finally: + a = 5 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(5.store())) + ); + } + } + + // Uncaught exception + let input = r#" +try: + a = 8 / 0 +except ValueError: + a = 2 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::DivisionByZero( + "division by zero".into(), + interpreter.state.call_stack(), + )) + ); + } + Ok(_) => panic!("Expected an error!"), + } + + let input = r#" +try: + a = 8 / 0 +except ZeroDivisionError: + raise +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::DivisionByZero( + "division by zero".into(), + interpreter.state.call_stack(), + )) + ); + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn args_and_kwargs() { + let input = r#" +def test_kwargs(**kwargs): + print(kwargs['a']) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + let expected_name = "test_kwargs".to_string(); + let expected_args = ParsedArgDefinitions { + args: vec![], + args_var: None, + kwargs_var: Some("kwargs".into()), + }; + assert!(matches!( + interpreter.state.read("test_kwargs").unwrap().as_function().unwrap().borrow().clone(), + Function { + name, + args, + .. + } if name == expected_name && args == expected_args + )); + } + } + + let input = r#" +def test_kwargs(**kwargs): + return kwargs['a'] + +a = test_kwargs(a=5, b=2) +# A second test to ensure the value is not being set using b=2 +b = test_kwargs(a=5, b=2) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(5.store())) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(5.store())) + ); + } + } + + let input = r#" +def test_kwargs(**kwargs): + return kwargs['a'] + +a = test_kwargs(**{'a': 5, 'b': 2}) +c = {'a': 4, 'b': 3} +b = test_kwargs(**c) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(5.store())) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(4.store())) + ); + } + } + + let input = r#" +def test_args(*args): + return args[1] + +c = [0, 1] +b = test_args(*c) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(1.store())) + ); + } + } + + let input = r#" +def test_args(*args): + return args[1] + +c = [2, 3] +b = test_args(0, 1, *c) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(1.store())) + ); + } + } + + let input = r#" +def test_args(one, two, *args): + return args[1] + +c = [2, 3] +b = test_args(0, 1, *c) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(3.store())) + ); + } + } + + let input = r#" +def test_args(one, two): + return one + +b = test_args(0) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::WrongNumberOfArguments( + 2, + 1, + interpreter.state.call_stack() + )) + ) + } + Ok(_) => panic!("Expected an error!"), + } + + let input = r#" +def test_args(one, two): + return one + +b = test_args(1, 2, 3) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::WrongNumberOfArguments( + 2, + 3, + interpreter.state.call_stack() + )) + ) + } + Ok(_) => panic!("Expected an error!"), + } + + let input = r#" +def test_args(one, two, *args): + return args + +b = test_args(1, 2) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Tuple(Container::new(Tuple::default()))) + ); + } + } + + let input = r#" +class Foo: + def __init__(self, **kwargs): + self.a = kwargs['a'] + +foo = Foo(a=5) +a = foo.a +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(5.store())) + ); + } + } + } + + #[test] + fn closures() { + let input = r#" +def _cell_factory(): + a = 1 + def f(): + nonlocal a + return f.__closure__ + +a = type(_cell_factory()[0]) +b = _cell_factory()[0].cell_contents +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Cell) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(1.store())) + ); + } + } + + let input = r#" +def _cell_factory(): + a = 1 + b = 2 + def f(): + nonlocal a + return f.__closure__ + +a = type(_cell_factory()[0]) +b = _cell_factory()[0].cell_contents +c = len(_cell_factory()) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Cell) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(1.store())) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer(1.store())) + ); + } + } + + let input = r#" +def _cell_factory(): + a = 1 + b = 2 + def f(): + print(a) + print(b) + return f.__closure__ + +a = _cell_factory()[0].cell_contents +b = _cell_factory()[1].cell_contents +c = len(_cell_factory()) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(1.store())) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(2.store())) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer(2.store())) + ); + } + } + + let input = r#" +def _cell_factory(): + a = 1 + b = 2 + def f(): + b = 3 + print(a) + print(b) + return f.__closure__ + +c = len(_cell_factory()) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer(1.store())) + ); + } + } + + let input = r#" +def _cell_factory(): + a = 1 + b = 2 + def f(): + b = 3 + return f.__closure__ + +c = _cell_factory() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!(interpreter.state.read("c"), Some(ExprResult::None)); + } + } + } + + #[test] + fn decorators() { + let input = r#" +def test_decorator(func): + def wrapper(): + return func() * 2 + return wrapper + +def get_val_undecorated(): + return 2 + +@test_decorator +def get_val_decorated(): + return 2 + +@test_decorator +@test_decorator +def twice_decorated(): + return 2 + +a = test_decorator(get_val_undecorated)() +b = get_val_decorated() +c = twice_decorated() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(4.store())) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(4.store())) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer(8.store())) + ); + } + } + + let input = r#" +def multiply(factor): + def decorator(func): + def wrapper(*args, **kwargs): + return func(*args, **kwargs) * factor + return wrapper + return decorator + +@multiply(3) +def get_val(): + return 2 + +@multiply(4) +def get_larger_val(): + return 2 + +a = get_val() +b = get_larger_val() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(6.store())) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(8.store())) + ); + } + } + } + + #[test] + fn raise() { + let input = r#" +raise TypeError +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::TypeError( + None, + interpreter.state.call_stack() + )) + ) + } + Ok(_) => panic!("Expected an error!"), + } + + let input = r#" +raise TypeError('type is no good') +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::TypeError( + Some("type is no good".into()), + interpreter.state.call_stack() + )) + ) + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[cfg(feature = "c_stdlib")] + #[test] + fn c_stdlib() { + let input = r#" +import sys +a = sys.maxsize + +import time +b = time.time() +c = time.ctime() +d = time.strftime("%a, %d %b %Y %H:%M:%S +0000") + +from _weakref import ref + +e = type(sys.implementation) +f = type(sys) + +g = type(itertools) +h = type(_thread) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert!(matches!( + interpreter.state.read("a"), + Some(ExprResult::Integer(_)) + )); + assert!(matches!( + interpreter.state.read("b"), + Some(ExprResult::FloatingPoint(_)) + )); + assert!(matches!( + interpreter.state.read("c"), + Some(ExprResult::String(_)) + )); + assert!(matches!( + interpreter.state.read("d"), + Some(ExprResult::String(_)) + )); + match interpreter.state.read("a") { + Some(ExprResult::Integer(a)) => { + assert!(a.borrow().clone() > 0); + } + _ => panic!("Unexpected type!"), + } + match interpreter.state.read("b") { + Some(ExprResult::FloatingPoint(b)) => { + assert!(b > 1701281981.0); + } + _ => panic!("Unexpected type!"), + } + match interpreter.state.read("c") { + Some(ExprResult::String(c)) => { + assert!(c.0.len() > 10); + } + _ => panic!("Unexpected type!"), + } + match interpreter.state.read("d") { + Some(ExprResult::String(d)) => { + assert!(d.0.len() > 10); + } + _ => panic!("Unexpected type!"), + } + assert!(matches!( + interpreter.state.read("ref"), + Some(ExprResult::CPythonObject(_)) + )); + assert!(matches!( + interpreter.state.read("e"), + Some(ExprResult::CPythonClass(_)) + )); + assert_eq!( + interpreter.state.read("f").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Module) + ); + assert_eq!( + interpreter.state.read("g").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Module) + ); + assert_eq!( + interpreter.state.read("h").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Module) + ); + } + } + } + + #[test] + fn context_manager() { + let input = r#" +class MyContextManager: + def __init__(self): + self.a = 0 + + def __enter__(self): + self.a += 1 + return self + + def call(self): + self.a += 1 + + def __exit__(self, exc_type, exc_value, traceback): + self.a += 1 + self + +with MyContextManager() as cm: + cm.call() + +a = cm.a +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(3.store())) + ); + } + } + + let input = r#" +class MyContextManager: + def __init__(self): + self.a = 0 + + def call(self): + self.a += 1 + + def __exit__(self, exc_type, exc_value, traceback): + self.a += 1 + self + +with MyContextManager() as cm: + cm.call() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::MissingContextManagerProtocol( + interpreter.state.call_stack() + )) + ), + Ok(_) => panic!("Expected an exception!"), + } + + let input = r#" +class MyContextManager: + def __init__(self): + self.a = 0 + + def __enter__(self): + self.a += 1 + return self + + def call(self): + self.a += 1 + +with MyContextManager() as cm: + cm.call() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::MissingContextManagerProtocol( + interpreter.state.call_stack() + )) + ), + Ok(_) => panic!("Expected an exception!"), + } + } + + #[test] + fn delete() { + let input = r#" +a = 4 +del a +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!(interpreter.state.read("a"), None); + } + } + + let input = r#" +a = {'b': 1, 'c': 2} +del a['b'] +c = a['b'] +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::KeyError( + "b".into(), + interpreter.state.call_stack() + )) + ) + } + Ok(_) => panic!("Expected an error"), + } + + let input = r#" +a = [0,1,2] +del a[1] +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(0.store()), + ExprResult::Integer(2.store()) + ])))) + ); + } + } + + let input = r#" +class Foo: + def __init__(self): + self.x = 1 + +f = Foo() +del f.x +a = f.x +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::VariableNotFound( + "x".into(), + interpreter.state.call_stack() + )) + ); + } + Ok(_) => panic!("Expected an error"), + } + + let input = r#" +class Foo: + def bar(self): + return 1 + +f = Foo() +del f.bar +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::VariableNotFound( + "bar".into(), + interpreter.state.call_stack() + )) + ); + } + Ok(_) => panic!("Expected an error"), + } + + let input = r#" +a = 4 +b = 5 +c = 6 +del a, c +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!(interpreter.state.read("a"), None); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(5.store())) + ); + assert_eq!(interpreter.state.read("c"), None); + } + } + } + + #[test] + fn byte_string() { + let input = r#" +a = b'hello' +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Bytes(Container::new(Bytes::new( + "hello".into() + )))) + ); + } + } + + let input = r#" +a = iter(b'hello') +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert!(matches!( + interpreter.state.read("a"), + Some(ExprResult::BytesIterator(_)) + )) + } + } + + let input = r#" +a = type(iter(b'hello')) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::BytesIterator) + ) + } + } + } + + #[test] + fn byte_array() { + let input = r#" +a = bytearray() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::ByteArray(Container::new(ByteArray::new( + "".into() + )))) + ); + } + } + + let input = r#" +a = bytearray(b'hello') +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::ByteArray(Container::new(ByteArray::new( + "hello".into() + )))) + ); + } + } + + let input = r#" +a = bytearray('hello') +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::TypeError( + Some("string argument without an encoding".into()), + interpreter.state.call_stack() + )) + ); + } + Ok(_) => panic!("Expected an error!"), + } + + let input = r#" +a = iter(bytearray()) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert!(matches!( + interpreter.state.read("a"), + Some(ExprResult::ByteArrayIterator(_)) + )) + } + } + + let input = r#" +a = type(iter(bytearray())) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::ByteArrayIterator) + ) + } + } + } + + #[test] + fn bytes_builtin() { + let input = r#" +a = bytes() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Bytes(Container::new(Bytes::new("".into())))) + ); + } + } + + let input = r#" +a = bytes(b'hello') +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Bytes(Container::new(Bytes::new( + "hello".into() + )))) + ); + } + } + + let input = r#" +a = bytes('hello') +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::TypeError( + Some("string argument without an encoding".into()), + interpreter.state.call_stack() + )) + ); + } + Ok(_) => panic!("Expected an error!"), + } + + let input = r#" +a = iter(bytes()) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert!(matches!( + interpreter.state.read("a"), + Some(ExprResult::BytesIterator(_)) + )) + } + } + + let input = r#" +a = type(iter(bytes())) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::BytesIterator) + ) + } + } + } + + #[test] + fn compound_operator() { + let input = r#" +a = 5 +a += 1 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(6.store())) + ); + } + } + + let input = r#" +a = 5 +a -= 1 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(4.store())) + ); + } + } + + let input = r#" +a = 5 +a *= 2 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(10.store())) + ); + } + } + + let input = r#" +a = 5 +a /= 2 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(2.store())) + ); + } + } + + let input = r#" +a = 0b0101 +a &= 0b0100 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(4.store())) + ); + } + } + + let input = r#" +a = 0b0101 +a |= 0b1000 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(13.store())) + ); + } + } + + let input = r#" +a = 0b0101 +a ^= 0b0100 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(1.store())) + ); + } + } + + let input = r#" +a = 5 +a //= 2 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(2.store())) + ); + } + } + + let input = r#" +a = 0b0101 +a <<= 1 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(10.store())) + ); + } + } + + let input = r#" +a = 0b0101 +a >>= 1 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(2.store())) + ); + } + } + + let input = r#" +a = 11 +a %= 2 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(1.store())) + ); + } + } + + let input = r#" +a = 2 +a **= 3 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(8.store())) + ); + } + } + } + + #[test] + fn iter_builtin() { + let input = r#" +a = iter([1,2,3]) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert!(matches!( + interpreter.state.read("a"), + Some(ExprResult::ListIterator(_)) + )); + } + } + + let input = r#" +b = 0 +for i in iter([1,2,3]): + b += i +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(6.store())) + ); + } + } + } + + #[test] + fn f_strings() { + let input = r#" +name = "John" +a = f"Hello {name}" +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::String(Str::new("Hello John".into()))) + ); + } + } + } + + #[test] + fn reversed() { + let input = r#" +a = reversed([]) +b = iter(reversed([])) +c = type(reversed([])) +d = type(iter(reversed([]))) + +e = [ i for i in reversed([1,2,3]) ] +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert!(matches!( + interpreter.state.read("a"), + Some(ExprResult::ReversedIterator(_)) + )); + assert!(matches!( + interpreter.state.read("b"), + Some(ExprResult::ReversedIterator(_)) + )); + assert_eq!( + interpreter.state.read("c").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::ReversedIterator) + ); + assert_eq!( + interpreter.state.read("d").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::ReversedIterator) + ); + assert_eq!( + interpreter.state.read("e"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(3.store()), + ExprResult::Integer(2.store()), + ExprResult::Integer(1.store()), + ])))) + ); + } + } + } + + #[test] + fn binary_operators() { + let input = "a = 0x1010 & 0x0011"; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(0x0010.store())) + ); + } + } + + let input = "a = 0o1010 | 0o0011"; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(0o1011.store())) + ); + } + } + + let input = "a = 0b1010 ^ 0b0011"; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(0b1001.store())) + ); + } + } + + let input = "a = 23 % 5"; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(3.store())) + ); + } + } + + let input = "a = 0b0010 << 1"; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(0b0100.store())) + ); + } + } + + let input = "a = 2 * 3 << 2 + 4 & 205"; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(128.store())) + ); + } + } + + let input = "a = ~0b1010"; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer((-11).store())) + ); + } + } + + let input = "a = ~5.5"; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::TypeError( + Some("bad operand type for unary ~: 'float'".to_string()), + interpreter.state.call_stack() + )) + ), + Ok(_) => panic!("Expected an error!"), + } + + // This tests the right-associativity of exponentiation. + // right-associativity gives 2 ** (3 ** 2) == 512 + // NOT + // left-associativity which gives (2 ** 3) ** 2 == 64 + let input = "a = 2 ** 3 ** 2"; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(512.store())) + ); + } + } + } + + #[test] + fn control_flow() { + let input = r#" +a = 0 +for i in [1,2,3,4,5,6]: + if i == 4: + break + a += i +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(6.store())) + ); + } + } + + let input = r#" +a = 0 +for i in [1,2,3,4,5,6]: + if i == 4: + continue + a += i +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(17.store())) + ); + } + } + + let input = r#" +a = 0 +i = 0 +b = [1,2,3,4,5,6] +while i < 6: + i += 1 + if b[i-1] == 4: + break + a += b[i-1] +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(6.store())) + ); + } + } + + let input = r#" +a = 0 +i = 0 +b = [1,2,3,4,5,6] +while i < 6: + i += 1 + if b[i-1] == 4: + continue + a += b[i-1] +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(17.store())) + ); + } + } + + let input = r#" +a = 0 +for i in [1,2,3,4,5,6]: + if i == 4: + break + a += i +else: + a = 1024 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(6.store())) + ); + } + } + + let input = r#" +a = 0 +for i in [1,2,3,4,5,6]: + a += i +else: + a = 1024 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(1024.store())) + ); + } + } + } + + #[test] + fn zip() { + let input = r#" +a = zip() +b = iter(zip()) +c = type(zip()) + +d = [ i for i in zip([1,2,3], [4,5,6]) ] +e = [ i for i in iter(zip([1,2,3], [4,5,6])) ] + +f = [ i for i in zip(range(5), range(4)) ] +g = [ i for i in zip(range(5), range(4), range(3)) ] + +h = [ i for i in zip([1,2,3], [4,5,6], strict=False) ] +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert!(matches!( + interpreter.state.read("a"), + Some(ExprResult::Zip(_)) + )); + assert!(matches!( + interpreter.state.read("b"), + Some(ExprResult::Zip(_)) + )); + assert_eq!( + interpreter.state.read("c").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Zip) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(4.store()), + ]))), + ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(2.store()), + ExprResult::Integer(5.store()), + ]))), + ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(3.store()), + ExprResult::Integer(6.store()), + ]))), + ])))) + ); + assert_eq!( + interpreter.state.read("e"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(4.store()), + ]))), + ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(2.store()), + ExprResult::Integer(5.store()), + ]))), + ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(3.store()), + ExprResult::Integer(6.store()), + ]))), + ])))) + ); + assert_eq!( + interpreter.state.read("f"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(0.store()), + ExprResult::Integer(0.store()), + ]))), + ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(1.store()), + ]))), + ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(2.store()), + ExprResult::Integer(2.store()), + ]))), + ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(3.store()), + ExprResult::Integer(3.store()), + ]))), + ])))) + ); + assert_eq!( + interpreter.state.read("g"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(0.store()), + ExprResult::Integer(0.store()), + ExprResult::Integer(0.store()), + ]))), + ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(1.store()), + ExprResult::Integer(1.store()), + ]))), + ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(2.store()), + ExprResult::Integer(2.store()), + ExprResult::Integer(2.store()), + ]))), + ])))) + ); + assert_eq!( + interpreter.state.read("h"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(4.store()), + ]))), + ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(2.store()), + ExprResult::Integer(5.store()), + ]))), + ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(3.store()), + ExprResult::Integer(6.store()), + ]))), + ])))) + ); + } + } + + let input = r#" +f = [ i for i in zip(range(5), range(4), strict=True) ] +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!(e, MemphisError::Interpreter(InterpreterError::RuntimeError)); + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn type_class() { + let input = r#" +a = type +b = type.__dict__ +c = type(type.__dict__) +d = type(dict.__dict__['fromkeys']) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Type) + ); + assert!(matches!( + interpreter.state.read("b"), + Some(ExprResult::MappingProxy(_)) + )); + assert_eq!( + interpreter.state.read("c").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::MappingProxy) + ); + assert_eq!( + interpreter.state.read("d").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::BuiltinMethod) + ); + } + } + } + + #[test] + fn type_alias() { + let input = r#" +a = type(list[int]) +b = type(a) +c = type(int | str) +d = list[int] +e = int | str +f = int + +# This used to fail inside classmethod::__new__ +class MyClass: + __class_getitem__ = classmethod(a) +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert!(matches!( + interpreter.state.read("a").unwrap(), + ExprResult::Class(_) + )); + assert_eq!( + interpreter.state.read("b").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Type) + ); + assert!(matches!( + interpreter.state.read("c").unwrap(), + ExprResult::Class(_) + )); + assert!(matches!( + interpreter.state.read("d").unwrap(), + ExprResult::TypeNode(_) + )); + assert!(matches!( + interpreter.state.read("e").unwrap(), + ExprResult::TypeNode(_) + )); + assert!(matches!( + interpreter.state.read("f").unwrap(), + ExprResult::Class(_) + )); + } + } + } + + #[test] + fn class_variable() { + let input = r#" +class Foo: + a = 6 + +b = Foo.a +Foo.a = 5 +c = Foo.a +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(6.store())) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer(5.store())) + ); + } + } + + let input = r#" +class Foo: + a = 6 + + def __init__(self): + self.a = 5 + +b = Foo.a +c = Foo().a +d = Foo.a +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(6.store())) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer(5.store())) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::Integer(6.store())) + ); + } + } + } + + #[test] + fn class_method() { + let input = r#" +class Foo: + def make(cls): + return 5 + + make = classmethod(make) + +b = Foo.make() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(5.store())) + ); + } + } + + let input = r#" +class Foo: + @classmethod + def make(cls): + return 5 + +b = Foo.make() +c = Foo().make() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(5.store())) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer(5.store())) + ); + } + } + + let input = r#" +class Foo: + val = 10 + + def __init__(self): + self.val = 9 + + @classmethod + def make(cls): + return cls.val + +b = Foo.make() +c = Foo.val +d = Foo().val +e = Foo().make() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(10.store())) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer(10.store())) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::Integer(9.store())) + ); + assert_eq!( + interpreter.state.read("e"), + Some(ExprResult::Integer(10.store())) + ); + } + } + + let input = r#" +class Foo: + def __init__(self): + self.val = 9 + + @classmethod + def make(cls): + return cls.val + +b = Foo.make() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::VariableNotFound( + "val".into(), + interpreter.state.call_stack() + )) + ); + } + Ok(_) => panic!("Expected an error!"), + } + + let input = r#" +class Foo: + def __init__(self): + self.val = 9 + + @classmethod + def make(cls): + return cls.val + +b = Foo().make() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::VariableNotFound( + "val".into(), + interpreter.state.call_stack() + )) + ); + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn static_method() { + let input = r#" +class Foo: + @staticmethod + def make(): + return 5 + +b = Foo.make() +c = Foo().make() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(5.store())) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer(5.store())) + ); + } + } + + // Before we explicitly supported static methods, this case used to work. Let's test it to + // ensure we keep getting an error now. + let input = r#" +class Foo: + def make(): + return 5 + +c = Foo().make() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::WrongNumberOfArguments( + 0, + 1, + interpreter.state.call_stack() + )) + ); + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn new_method() { + let input = r#" +class SingletonA: + _instance = None + _initialized = False + + def __new__(cls, *args, **kwargs): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self, data): + if not self._initialized: + self.data = data + self._initialized = True + +singleton1 = SingletonA("First") +singleton2 = SingletonA("Second") + +a = singleton1.data +b = singleton2.data +c = singleton1 is singleton2 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::String(Str::new("First".into()))) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::String(Str::new("First".into()))) + ); + assert_eq!(interpreter.state.read("c"), Some(ExprResult::Boolean(true))); + } + } + + let input = r#" +class SingletonB: + _instance = None + + def __new__(cls, *args, **kwargs): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self, data): + self.data = data + +singleton1 = SingletonB("First") +singleton2 = SingletonB("Second") + +a = singleton1.data +b = singleton2.data +c = singleton1 is singleton2 +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::String(Str::new("Second".into()))) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::String(Str::new("Second".into()))) + ); + assert_eq!(interpreter.state.read("c"), Some(ExprResult::Boolean(true))); + } + } + + let input = r#" +class Foo: + def __new__(cls): + pass + +a = Foo() +"#; + let (mut parser, mut interpreter) = init(input); + + // TODO This test should assign None to a, but right now our object creation process + // expects an error. + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::ExpectedObject( + interpreter.state.call_stack() + )) + ); + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn metaclasses() { + let input = r#" +class InterfaceMeta(type): + def __new__(mcls, name, bases, namespace, **kwargs): + return super().__new__(mcls, name, bases, namespace) + + def run(cls): + return 5 + +class BaseInterface(metaclass=InterfaceMeta): + @classmethod + def run_base(cls): + return 5 + +class ConcreteImplementation(BaseInterface): + pass + +a = ConcreteImplementation.run() +b = ConcreteImplementation.run_base() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(5.store())) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(5.store())) + ); + } + } + + let input = r#" +class InterfaceMeta(type): + def __new__(mcls, name, bases, namespace, **kwargs): + return super().__new__(mcls, name, bases, namespace) + + def run(cls): + return 5 + +class BaseInterface(metaclass=InterfaceMeta): + pass + +class ConcreteImplementation(BaseInterface): + pass + +# This should use the metaclass implementation. +a = ConcreteImplementation.run() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(5.store())) + ); + } + } + + let input = r#" +class ABCMeta(type): + def __new__(mcls, name, bases, namespace, **kwargs): + cls = super().__new__(mcls, name, bases, namespace) + cls.val = 33 + return cls + + def register(cls): + return cls.val + +class Coroutine(metaclass=ABCMeta): + def sub_func(self): + pass + +a = Coroutine.register() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(33.store())) + ); + } + } + } + + #[test] + fn scope_modifiers() { + let input = r#" +global_var_one = 10 +global_var_two = 10 + +def global_shadow(): + global_var_one = 9 +global_shadow() + +def global_modified(): + global global_var_two + global_var_two = 9 +global_modified() + +a = global_var_one +b = global_var_two +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(10.store())) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(9.store())) + ); + } + } + + let input = r#" +def nonlocal_shadow(): + var = 5 + def f(): + var = 4 + f() + return var + +def nonlocal_modified(): + var = 5 + def f(): + nonlocal var + var = 4 + f() + return var + +a = nonlocal_shadow() +b = nonlocal_modified() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(5.store())) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(4.store())) + ); + } + } + + let input = r#" +def outer(): + x = 10 + + def middle(): + def inner(): + # Refers to x in the outer scope, not middle, as it isn't defined in middle + nonlocal x + x = 20 + + inner() + + middle() + return x + +a = outer() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(20.store())) + ); + } + } + + let input = r#" +def foo(): + def inner(): + nonlocal a + inner() +foo() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + // this should become a parser error but it will require adding scope + // context to the parser + MemphisError::Interpreter(InterpreterError::SyntaxError( + interpreter.state.call_stack() + )) + ); + } + Ok(_) => panic!("Expected an error!"), + } + + let input = r#" +def foo(): + nonlocal a +foo() +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + // this should become a parser error but it will require adding scope + // context to the parser + MemphisError::Interpreter(InterpreterError::SyntaxError( + interpreter.state.call_stack() + )) + ); + } + Ok(_) => panic!("Expected an error!"), + } + + let input = r#" +nonlocal a +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + // this should become a parser error but it will require adding scope + // context to the parser + MemphisError::Interpreter(InterpreterError::SyntaxError( + interpreter.state.call_stack() + )) + ); + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn object_builtin() { + let input = r#" +a = object +b = object() +c = object().__str__ +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Object) + ); + assert!(matches!( + interpreter.state.read("b"), + Some(ExprResult::Object(_)) + )); + assert!(matches!( + interpreter.state.read("c"), + Some(ExprResult::Method(_)) + )); + } + } + } + + #[test] + fn int_builtin() { + let input = r#" +a = int +b = int() +c = int(5) +d = int('6') +"#; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Int) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(0.store())) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer(5.store())) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::Integer(6.store())) + ); + } + } + } + + #[test] + fn bound_function() { + let input = r#" +class Child: + def one(self): + return 1 + + @classmethod + def two(cls): + return 2 + + @staticmethod + def three(): + return 3 + +child = Child() +a = child.one +b = Child.one +c = child.two +d = Child.two +e = child.three +f = Child.three + +g = type(a) +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert!(matches!( + interpreter.state.read("a"), + Some(ExprResult::Method(_)) + )); + //assert!(matches!( + // interpreter.state.read("b"), + // Some(ExprResult::Function(_)) + //)); + assert!(matches!( + interpreter.state.read("c"), + Some(ExprResult::Method(_)) + )); + assert!(matches!( + interpreter.state.read("d"), + Some(ExprResult::Method(_)) + )); + assert!(matches!( + interpreter.state.read("e"), + Some(ExprResult::Function(_)) + )); + assert!(matches!( + interpreter.state.read("f"), + Some(ExprResult::Function(_)) + )); + assert_eq!( + interpreter.state.read("g").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Method) + ); + } + } + + let input = r#" +class Child: + def one(self): + return 1 + + @classmethod + def two(cls): + return 2 + + @staticmethod + def three(): + return 3 + +child = Child() +a = child.one() +#b = Child.one # This one causes an error, test this separately +c = child.two() +d = Child.two() +e = child.three() +f = Child.three() +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(1.store())) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer(2.store())) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::Integer(2.store())) + ); + assert_eq!( + interpreter.state.read("e"), + Some(ExprResult::Integer(3.store())) + ); + assert_eq!( + interpreter.state.read("f"), + Some(ExprResult::Integer(3.store())) + ); + } + } + + let input = r#" +class Child: + def one(self): + return 1 + +child = Child() +b = Child.one() +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::TypeError( + Some("one missing 1 required positional argument: 'self'".to_string()), + interpreter.state.call_stack() + )) + ), + Ok(_) => panic!("Expected an error!"), + } + + let input = r#" +class Child: + class_var = 22 + + def __init__(self): + self.instance_var = 11 + + def one(self): + return self.instance_var + + @classmethod + def two(cls): + return cls.class_var + +child = Child() +a = child.one() +#b = Child.one # This one causes an error, test this separately +c = child.two() +d = Child.two() +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(11.store())) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer(22.store())) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::Integer(22.store())) + ); + } + } + } + + #[test] + fn unpacking() { + let input = r#" +def foo(): + return 2, 3 + +a = foo() +b, c = foo() +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(2.store()), + ExprResult::Integer(3.store()) + ])))) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(2.store())) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer(3.store())) + ); + } + } + + let input = r#" +b, c = [1, 2, 3] +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::ValueError( + "too many values to unpack (expected 2)".into(), + interpreter.state.call_stack() + )) + ), + Ok(_) => panic!("Expected an error!"), + } + + let input = r#" +a, b, c = [2, 3] +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::ValueError( + "not enough values to unpack (expected 3, got 2)".into(), + interpreter.state.call_stack() + )) + ), + Ok(_) => panic!("Expected an error!"), + } + + let input = r#" +b, c = (1, 2) +d, e = [1, 2] +f, g = {1, 2} +h, i, j = range(1, 4) +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(1.store())) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer(2.store())) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::Integer(1.store())) + ); + assert_eq!( + interpreter.state.read("e"), + Some(ExprResult::Integer(2.store())) + ); + assert_eq!( + interpreter.state.read("f"), + Some(ExprResult::Integer(1.store())) + ); + assert_eq!( + interpreter.state.read("g"), + Some(ExprResult::Integer(2.store())) + ); + assert_eq!( + interpreter.state.read("h"), + Some(ExprResult::Integer(1.store())) + ); + assert_eq!( + interpreter.state.read("i"), + Some(ExprResult::Integer(2.store())) + ); + assert_eq!( + interpreter.state.read("j"), + Some(ExprResult::Integer(3.store())) + ); + } + } + + let input = r#" +l = [1,2] +a = (*l,) +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Tuple(Container::new(Tuple::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()) + ])))) + ); + } + } + + let input = r#" +a = (*5) +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::TypeError( + Some("Value after * must be an iterable, not int".into()), + interpreter.state.call_stack() + )) + ), + Ok(_) => panic!("Expected an error!"), + } + + // TODO not sure where to detect this, probably in semantic analysis + // let input = r#" + // l = [1,2] + // a = (*l) + // "#; + // + // let (mut parser, mut interpreter) = init(input); + // + // match interpreter.run(&mut parser) { + // Err(e) => assert_eq!( + // e, + // MemphisError::Interpreter(InterpreterError::ValueError( + // "not enough values to unpack (expected 3, got 2)".into(), + // interpreter.state.call_stack() + // )) + // ), + // Ok(_) => panic!("Expected an error!"), + // } + } + + #[test] + fn ternary_operation() { + let input = r#" +a = 5 if True else 6 +b = 7 if False else 8 +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(5.store())) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(8.store())) + ); + } + } + } + + #[test] + fn slices() { + let input = r#" +a = list(range(1,11)) +b = a[:2] +c = a[7:] +d = a[::2] +e = a[::-2] +f = a[2:4] +g = a[-1:] +h = a[:-9] +i = a[4:2] + +j = slice(5) +k = slice(5,10) +l = slice(5,10,2) +m = type(slice(5)) + +word = "hello" +n = word[0] +o = word[:1] +p = word[:2] +#q = word[-1] +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()), + ])))) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(8.store()), + ExprResult::Integer(9.store()), + ExprResult::Integer(10.store()), + ])))) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(3.store()), + ExprResult::Integer(5.store()), + ExprResult::Integer(7.store()), + ExprResult::Integer(9.store()), + ])))) + ); + assert_eq!( + interpreter.state.read("e"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(10.store()), + ExprResult::Integer(8.store()), + ExprResult::Integer(6.store()), + ExprResult::Integer(4.store()), + ExprResult::Integer(2.store()), + ])))) + ); + assert_eq!( + interpreter.state.read("f"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(3.store()), + ExprResult::Integer(4.store()), + ])))) + ); + assert_eq!( + interpreter.state.read("g"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(10.store()), + ])))) + ); + assert_eq!( + interpreter.state.read("h"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(1.store()), + ])))) + ); + assert_eq!( + interpreter.state.read("i"), + Some(ExprResult::List(Container::new(List::default()))) + ); + assert!(matches!( + interpreter.state.read("j"), + Some(ExprResult::Slice(_)) + )); + assert!(matches!( + interpreter.state.read("k"), + Some(ExprResult::Slice(_)) + )); + assert!(matches!( + interpreter.state.read("l"), + Some(ExprResult::Slice(_)) + )); + assert_eq!( + interpreter.state.read("m").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Slice) + ); + assert_eq!( + interpreter.state.read("n"), + Some(ExprResult::String(Str::new("h".into()))) + ); + assert_eq!( + interpreter.state.read("o"), + Some(ExprResult::String(Str::new("h".into()))) + ); + assert_eq!( + interpreter.state.read("p"), + Some(ExprResult::String(Str::new("he".into()))) + ); + //assert_eq!( + // interpreter.state.read("q"), + // Some(ExprResult::String(Str::new("he".into()))) + //); + } + } + } + + #[test] + fn property_decorator() { + let input = r#" +class Foo: + def __init__(self): + self.val = 3 + + @property + def run(self): + return 2 * self.val + +a = Foo().run +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(6.store())) + ); + } + } + } + + #[test] + fn slash_args() { + let input = r#" +def foo(cls, /): + pass +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert!(matches!( + interpreter.state.read("foo"), + Some(ExprResult::Function(_)) + )); + } + } + } + + #[test] + fn globals_builtin() { + let input = r#" +a = 4 +b = globals() +c = b['a'] +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer(4.store())) + ); + } + } + } + + #[test] + fn generator_comprehension() { + let input = r#" +a = (i * 2 for i in [1,2]) +b = next(a) +c = next(a) + +d = (x * y for x in [2,4] for y in [3,5]) +e = type(d) +f = list(d) +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(2.store())) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Integer(4.store())) + ); + assert!(matches!( + interpreter.state.read("d"), + Some(ExprResult::Generator(_)) + )); + assert_eq!( + interpreter.state.read("e").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Generator) + ); + assert_eq!( + interpreter.state.read("f"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(6.store()), + ExprResult::Integer(10.store()), + ExprResult::Integer(12.store()), + ExprResult::Integer(20.store()), + ])))) + ); + } + } + } + + #[test] + fn frozenset() { + let input = r#" +a = frozenset([1,2,2]) +b = frozenset() +c = type(b) +d = [ i for i in a ] + +e = frozenset().__contains__ +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::FrozenSet(Container::new(FrozenSet::new( + HashSet::from([ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()), + ]) + )))) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::FrozenSet(Container::new(FrozenSet::default()))) + ); + assert_eq!( + interpreter.state.read("c").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::FrozenSet) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::List(Container::new(List::new(vec![ + ExprResult::Integer(1.store()), + ExprResult::Integer(2.store()), + ])))) + ); + assert_eq!( + interpreter.state.read("e").unwrap().get_type(), + Type::Method + ); + } + } + + let input = "frozenset([1,2,3], [1,2])"; + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::WrongNumberOfArguments( + 1, + 3, + interpreter.state.call_stack() + )) + ) + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn default_args() { + let input = r#" +def foo(data=None): + return data if data is not None else 99 + +a = foo(88) +b = foo() +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(88.store())) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(99.store())) + ); + } + } + + let input = r#" +def foo(data_one, data_two=None): + pass + +b = foo() +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::WrongNumberOfArguments( + 2, + 0, + interpreter.state.call_stack() + )) + ) + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn getattr_builtin() { + let input = r#" +class Foo: + def __init__(self): + self.val = 44 + +f = Foo() +a = getattr(f, 'val') +b = getattr(f, 'val_two', 33) +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Integer(44.store())) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Integer(33.store())) + ); + } + } + + let input = r#" +class Foo: + pass + +f = Foo() +b = getattr(f, 'val_two') +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::AttributeError( + "Foo".into(), + "val_two".into(), + interpreter.state.call_stack() + )) + ) + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn isinstance_builtin() { + let input = r#" +class Foo: pass + +class Bar(Foo): pass + +class Baz: pass + +a = isinstance([], list) +b = isinstance([], int) +c = isinstance(int, Foo) +d = isinstance(type, Foo) +e = isinstance(Foo(), Foo) +f = isinstance(Foo, Foo) +g = isinstance(Bar(), Foo) +h = isinstance(Baz(), Foo) +i = isinstance(Foo, type) +j = isinstance(Foo(), type) +k = isinstance([], (int, list)) +l = isinstance([], (int, Foo)) +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!(interpreter.state.read("a"), Some(ExprResult::Boolean(true))); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Boolean(false)) + ); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Boolean(false)) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::Boolean(false)) + ); + assert_eq!(interpreter.state.read("e"), Some(ExprResult::Boolean(true))); + assert_eq!( + interpreter.state.read("f"), + Some(ExprResult::Boolean(false)) + ); + assert_eq!(interpreter.state.read("g"), Some(ExprResult::Boolean(true))); + assert_eq!( + interpreter.state.read("h"), + Some(ExprResult::Boolean(false)) + ); + assert_eq!(interpreter.state.read("i"), Some(ExprResult::Boolean(true))); + assert_eq!( + interpreter.state.read("j"), + Some(ExprResult::Boolean(false)) + ); + assert_eq!(interpreter.state.read("k"), Some(ExprResult::Boolean(true))); + assert_eq!( + interpreter.state.read("l"), + Some(ExprResult::Boolean(false)) + ); + } + } + + let input = r#" +isinstance([], (int, 5)) +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::TypeError( + Some( + "isinstance() arg 2 must be a type, a tuple of types, or a union" + .into() + ), + interpreter.state.call_stack() + )) + ) + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn issubclass_builtin() { + let input = r#" +class Foo: pass + +class Bar(Foo): pass + +class Baz: pass + +a = issubclass(int, Foo) +b = issubclass(type, Foo) +c = issubclass(Foo, Foo) +d = issubclass(Bar, Foo) +e = issubclass(Foo, type) +f = issubclass(Baz, Foo) +g = issubclass(Foo, object) +h = issubclass(Foo, Bar) +i = issubclass(object, object) +j = issubclass(type, type) +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Boolean(false)) + ); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Boolean(false)) + ); + assert_eq!(interpreter.state.read("c"), Some(ExprResult::Boolean(true))); + assert_eq!(interpreter.state.read("d"), Some(ExprResult::Boolean(true))); + assert_eq!( + interpreter.state.read("e"), + Some(ExprResult::Boolean(false)) + ); + assert_eq!( + interpreter.state.read("f"), + Some(ExprResult::Boolean(false)) + ); + assert_eq!(interpreter.state.read("g"), Some(ExprResult::Boolean(true))); + assert_eq!( + interpreter.state.read("h"), + Some(ExprResult::Boolean(false)) + ); + assert_eq!(interpreter.state.read("i"), Some(ExprResult::Boolean(true))); + assert_eq!(interpreter.state.read("j"), Some(ExprResult::Boolean(true))); + } + } + + let input = r#" +issubclass([], type) +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::TypeError( + Some("issubclass() arg 1 must be a class".into()), + interpreter.state.call_stack() + )) + ) + } + Ok(_) => panic!("Expected an error!"), + } + + let input = r#" +issubclass(object, []) +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => { + assert_eq!( + e, + MemphisError::Interpreter(InterpreterError::TypeError( + Some( + "issubclass() arg 2 must be a type, a tuple of types, or a union" + .into() + ), + interpreter.state.call_stack() + )) + ) + } + Ok(_) => panic!("Expected an error!"), + } + } + + #[test] + fn bool_builtin() { + let input = r#" +a = bool() +b = bool(True) +c = bool(False) +d = bool([]) +e = bool([1]) +f = bool('') +g = bool('hello') +h = bool(0) +i = bool(5) +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Boolean(false)) + ); + assert_eq!(interpreter.state.read("b"), Some(ExprResult::Boolean(true))); + assert_eq!( + interpreter.state.read("c"), + Some(ExprResult::Boolean(false)) + ); + assert_eq!( + interpreter.state.read("d"), + Some(ExprResult::Boolean(false)) + ); + assert_eq!(interpreter.state.read("e"), Some(ExprResult::Boolean(true))); + assert_eq!( + interpreter.state.read("f"), + Some(ExprResult::Boolean(false)) + ); + assert_eq!(interpreter.state.read("g"), Some(ExprResult::Boolean(true))); + assert_eq!( + interpreter.state.read("h"), + Some(ExprResult::Boolean(false)) + ); + assert_eq!(interpreter.state.read("i"), Some(ExprResult::Boolean(true))); + } + } + } + + #[test] + fn memoryview_builtin() { + let input = r#" +a = memoryview +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert!(matches!( + interpreter.state.read("a"), + Some(ExprResult::Class(_)) + )); + } + } + } + + #[test] + fn yield_from() { + let input = r#" +def countdown(n): + while n > 0: + yield n + n -= 1 + +# def countdown_from(x, y): +# yield from countdown(x) +# yield from countdown(y) + +def countdown_from(x, y): + for number in countdown(x): + yield number + for number in countdown(y): + yield number + +sum = 0 +for number in countdown_from(3, 2): + print(number) + sum += number +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("sum"), + Some(ExprResult::Integer(9.store())) + ); + } + } + } + + #[test] + fn traceback_and_frame() { + let input = r#" +try: + raise TypeError +except TypeError as exc: + a = type(exc.__traceback__) + b = type(exc.__traceback__.tb_frame) +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Traceback) + ); + assert_eq!( + interpreter.state.read("b").unwrap().as_class().unwrap(), + interpreter.state.get_type_class(Type::Frame) + ); + } + } + } + + #[test] + fn asyncio() { + let input = r#" +a = asyncio.run +b = asyncio.sleep +c = asyncio.create_task +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + // these should probably just return Function, not BuiltinFunction + // testing here to confirm they do not get bound to their module + assert_eq!( + interpreter.state.read("a").unwrap().get_type(), + Type::BuiltinFunction + ); + assert_eq!( + interpreter.state.read("b").unwrap().get_type(), + Type::BuiltinFunction + ); + assert_eq!( + interpreter.state.read("c").unwrap().get_type(), + Type::BuiltinFunction + ); + } + } + } + + #[test] + fn multiple_assignment() { + let input = r#" +a = b = True +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!(interpreter.state.read("a"), Some(ExprResult::Boolean(true))); + assert_eq!(interpreter.state.read("b"), Some(ExprResult::Boolean(true))); + } + } + } + + #[test] + fn object_equality() { + let input = r#" +class Foo: pass + +f = Foo() +g = Foo() +a = f == g +b = f != g +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Boolean(false)) + ); + assert_eq!(interpreter.state.read("b"), Some(ExprResult::Boolean(true))); + } + } + + let input = r#" +class Foo: + def __init__(self, x): + self.x = x + +f = Foo(4) +g = Foo(4) +a = f == g +b = f != g +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!( + interpreter.state.read("a"), + Some(ExprResult::Boolean(false)) + ); + assert_eq!(interpreter.state.read("b"), Some(ExprResult::Boolean(true))); + } + } + + let input = r#" +class Foo: + def __init__(self, x): + self.x = x + + def __eq__(self, other): + self.x == other.x + +f = Foo(4) +g = Foo(4) +a = f == g +b = f != g +"#; + + let (mut parser, mut interpreter) = init(input); + + match interpreter.run(&mut parser) { + Err(e) => panic!("Interpreter error: {:?}", e), + Ok(_) => { + assert_eq!(interpreter.state.read("a"), Some(ExprResult::Boolean(true))); + assert_eq!( + interpreter.state.read("b"), + Some(ExprResult::Boolean(false)) + ); + } + } + } +} diff --git a/src/treewalk/mod.rs b/src/treewalk/mod.rs new file mode 100644 index 0000000..fe4b31a --- /dev/null +++ b/src/treewalk/mod.rs @@ -0,0 +1,22 @@ +mod call_stack; +mod evaluators; +mod execution_context; +mod executor; +#[allow(clippy::module_inception)] +mod interpreter; +mod module_loader; +mod scope; +mod scope_manager; +mod state; +mod type_registry; +pub mod types; + +pub use call_stack::{CallStack, StackFrame}; +pub use execution_context::ExecutionContextManager; +pub use executor::Executor; +pub use interpreter::Interpreter; +pub use module_loader::{LoadedModule, ModuleLoader}; +pub use scope::Scope; +pub use scope_manager::ScopeManager; +pub use state::State; +pub use type_registry::TypeRegistry; diff --git a/src/treewalk/module_loader.rs b/src/treewalk/module_loader.rs new file mode 100644 index 0000000..c879d5b --- /dev/null +++ b/src/treewalk/module_loader.rs @@ -0,0 +1,239 @@ +use std::collections::HashMap; +use std::env; +use std::fs; +use std::io::{self, ErrorKind}; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::str; + +use crate::core::{log, LogLevel}; +use crate::parser::types::ImportPath; + +fn lookup_python_site_packages(command: &str) -> Vec { + let output = Command::new("python3") + .args(["-c", command]) + .output() + .expect("Failed to retrieve Python site-packages path"); + + if !output.status.success() { + panic!("Failed to retrieve Python site-packages path"); + } + + let output_str = str::from_utf8(&output.stdout) + .map_err(|e| io::Error::new(ErrorKind::InvalidData, e)) + .expect("Failed to retrieve Python site-packages path"); + + output_str.lines().map(PathBuf::from).collect() +} + +fn init_paths() -> Vec { + // The location of any "standard-lib" modules we add ourselves. This refers to the lib + // directory of this repository. + let mut paths = vec![PathBuf::from("./lib".to_string())]; + + // This is the location of packages installed by pip, i.e. pendulum. + // TODO can we get rid of this in favor of sys.path below? + let mut site_packages = + lookup_python_site_packages("import site; print('\\n'.join(site.getsitepackages()))"); + paths.append(&mut site_packages); + + // This seems to have some overlap with the site-packages above, yet it contains the full set + // of paths including standard lib items, i.e. argparse. + let mut sys_path = lookup_python_site_packages("import sys; print('\\n'.join(sys.path))"); + paths.append(&mut sys_path); + paths +} + +#[derive(Debug, PartialEq, Clone)] +pub struct LoadedModule { + name: Option, + path: Option, + text: Option, +} + +impl LoadedModule { + pub fn new(name: &str, path: PathBuf, text: String) -> Self { + Self { + name: Some(name.to_string()), + path: Some(path), + text: Some(text), + } + } + + /// An empty module occurs when there is no Python code for a module. This can occur for a few + /// reasons: + /// 1) Rust-backed module + /// 2) a module created as a layer in an import such as `import mypackage.mymodule`. + pub fn empty() -> Self { + Self { + name: None, + path: None, + text: None, + } + } + + /// This exists for the unit tests where code is provided directly as a string without reading + /// from the file system. + pub fn new_virtual(text: &str) -> Self { + Self { + name: None, + path: None, + text: Some(text.to_string()), + } + } + + pub fn empty_path() -> PathBuf { + "".to_string().into() + } + + pub fn path(&self) -> PathBuf { + self.path.clone().unwrap_or(Self::empty_path()) + } + + pub fn empty_name() -> String { + "".to_string() + } + + pub fn name(&self) -> String { + self.name.clone().unwrap_or(Self::empty_name()) + } + + pub fn text(&self) -> Option { + self.text.clone() + } +} + +pub struct ModuleLoader { + run_dir: PathBuf, + paths: Vec, + modules: HashMap, +} + +impl ModuleLoader { + pub fn new() -> Self { + let run_dir = env::current_dir().expect("Failed to get current directory"); + + Self { + run_dir, + paths: init_paths(), + modules: HashMap::new(), + } + } + + pub fn load_root(&mut self, filepath: PathBuf) -> Option { + let path = filepath + .parent() + .map_or_else(|| PathBuf::from("./"), |parent| parent.to_path_buf()); + + // Insert at the start of the paths so this directory is searched first on subsequent + // module imports + self.paths.insert(0, path); + self.load_module_code("", filepath) + } + + fn load_module_code(&self, name: &str, filepath: PathBuf) -> Option { + if let Ok(text) = fs::read_to_string(filepath.clone()) { + log(LogLevel::Debug, || { + format!("Loading: {}", filepath.display()) + }); + Some(LoadedModule::new(name, self.run_dir.join(filepath), text)) + } else { + None + } + } + + /// This will look for a Python module in the following directories: + /// 1) the directory of the root script + /// 2) the /lib directory + /// 3) the site_packages directory for the current python target + pub fn load_absolute_path( + &mut self, + name: &ImportPath, + path_segments: &Vec, + ) -> Option { + self.paths + .iter() + .flat_map(|path| expand_path(path, path_segments)) + .find_map(|filename| self.load_module_code(&name.as_str(), filename)) + } + + pub fn load_relative_path( + &mut self, + name: &ImportPath, + level: &usize, + path_segments: &Vec, + current_path: Option, + ) -> Option { + let base_path = match current_path { + // The value in `current_path` contains the filename, so we must add 1 to the level to + // get back to the directory. We could change this in the future, but this seemed + // cleaner for the caller to provide. + Some(p) => up_n_levels(&p, &(level + 1)), + None => up_n_levels(&self.run_dir, level), + }; + + expand_path(base_path.as_ref()?, path_segments) + .into_iter() + .find_map(|filename| self.load_module_code(&name.as_str(), filename)) + } + + pub fn load_module( + &mut self, + import_path: &ImportPath, + current_path: Option, + ) -> Option { + if let Some(code) = self.modules.get(&import_path.as_str()) { + return Some(code.clone()); + } + + let module = match import_path { + ImportPath::Absolute(path_segments) => { + self.load_absolute_path(import_path, path_segments) + } + ImportPath::Relative(level, path_segments) => { + self.load_relative_path(import_path, level, path_segments, current_path) + } + }; + + if let Some(input) = module { + self.modules.insert(import_path.as_str(), input.clone()); + Some(input) + } else { + None + } + } +} + +/// For a given path and segments, this returns both the `../base.py` and `../base/__init__.py` +/// versions. +fn expand_path(path: &Path, path_segments: &Vec) -> Vec { + let mut normal_path = path.to_path_buf(); + for (index, value) in path_segments.iter().enumerate() { + if index == path_segments.len() - 1 { + normal_path.push(value.to_owned() + ".py"); + } else { + normal_path.push(value); + } + } + + let mut init_path = path.to_path_buf(); + for value in path_segments { + init_path.push(value); + } + init_path.push("__init__.py"); + + vec![normal_path, init_path] +} + +fn up_n_levels(original: &Path, n: &usize) -> Option { + let mut path = original.to_path_buf(); + for _ in 0..*n { + match path.parent() { + Some(parent_path) => { + path = parent_path.to_path_buf(); + } + None => return None, + } + } + Some(path) +} diff --git a/src/treewalk/scope.rs b/src/treewalk/scope.rs new file mode 100644 index 0000000..4096675 --- /dev/null +++ b/src/treewalk/scope.rs @@ -0,0 +1,153 @@ +use std::collections::{HashMap, HashSet}; + +use crate::{ + core::Container, + treewalk::types::{ + utils::ResolvedArguments, Dict, DictItems, ExprResult, Function, Str, Tuple, + }, + types::errors::InterpreterError, +}; + +use super::Interpreter; + +/// This represents a symbol table for a given scope. +#[derive(Debug, PartialEq, Clone, Default)] +pub struct Scope { + symbol_table: HashMap, + + /// Used to hold directives such as `global x` which will expire with this scope. + global_vars: HashSet, + + /// Used to hold directives such as `nonlocal x` which will expire with this scope. + nonlocal_vars: HashSet, +} + +impl Scope { + pub fn new( + interpreter: &Interpreter, + function: &Container, + arguments: &ResolvedArguments, + ) -> Result, InterpreterError> { + let mut scope = Self::default(); + + let function_args = &function.borrow().args; + + // Function expects fewer positional args than it was invoked with and there is not an + // `args_var` in which to store the rest. + if function_args.args.len() < arguments.bound_len() && function_args.args_var.is_none() { + return Err(InterpreterError::WrongNumberOfArguments( + function_args.args.len(), + arguments.bound_len(), + interpreter.state.call_stack(), + )); + } + + let bound_args = arguments.bound_args(); + + for (index, arg_definition) in function_args.args.iter().enumerate() { + // Check if the argument is provided, otherwise use default + let value = if index < bound_args.len() { + bound_args[index].clone() + } else { + match &arg_definition.default { + Some(default_value) => interpreter.evaluate_expr(default_value)?, + None => { + // Function expects more positional args than it was invoked with. + return Err(InterpreterError::WrongNumberOfArguments( + function_args.args.len(), + arguments.bound_len(), + interpreter.state.call_stack(), + )); + } + } + }; + + scope.insert(&arg_definition.arg, value); + } + + if let Some(ref args_var) = function_args.args_var { + let extra = arguments.len() - function_args.args.len(); + let left_over = bound_args.iter().rev().take(extra).rev().cloned().collect(); + let args_value = ExprResult::Tuple(Container::new(Tuple::new(left_over))); + scope.insert(args_var.as_str(), args_value); + } + + if let Some(ref kwargs_var) = function_args.kwargs_var { + let kwargs_value = ExprResult::Dict(Container::new(Dict::new(arguments.get_kwargs()))); + scope.insert(kwargs_var.as_str(), kwargs_value); + } + + Ok(Container::new(scope.to_owned())) + } + + fn from_hash(symbol_table: HashMap) -> Self { + Self { + symbol_table, + global_vars: HashSet::new(), + nonlocal_vars: HashSet::new(), + } + } + + pub fn get(&self, name: &str) -> Option { + self.symbol_table.get(name).cloned() + } + + /// Return a list of all the symbols available in this `Scope`. + pub fn symbols(&self) -> Vec { + self.symbol_table.keys().cloned().collect() + } + + pub fn delete(&mut self, name: &str) -> Option { + self.symbol_table.remove(name) + } + + /// Insert an `ExprResult` to this `Scope`. The `Scope` is returned to allow calls to be + /// chained. + pub fn insert(&mut self, name: &str, value: ExprResult) -> &mut Self { + self.symbol_table.insert(name.to_string(), value); + self + } + + /// Given a variable `var`, indicate that `var` should refer to the variable in the + /// global/module scope (which does not live in this struct) for the duration of _this_ + /// local scope. + pub fn mark_global(&mut self, name: &str) { + self.global_vars.insert(name.to_string()); + } + + /// Given a variable `var`, indicate that `var` should refer to the variable in the + /// enclosing scope (which does not live in this struct) for the duration of _this_ + /// local scope. + pub fn mark_nonlocal(&mut self, name: &str) { + self.nonlocal_vars.insert(name.to_string()); + } + + pub fn has_global(&self, name: &str) -> bool { + self.global_vars.contains(name) + } + + pub fn has_nonlocal(&self, name: &str) -> bool { + self.nonlocal_vars.contains(name) + } + + pub fn as_dict(&self) -> Container { + let mut items = HashMap::new(); + for (key, value) in self.symbol_table.iter() { + items.insert(ExprResult::String(Str::new(key.clone())), value.clone()); + } + + Container::new(Dict::new(items)) + } + + pub fn from_dict(dict: DictItems) -> Self { + let mut symbol_table = HashMap::new(); + for item in dict.into_iter() { + let tuple = item.as_tuple().unwrap(); + let key = tuple.first().as_string().unwrap(); + let value = tuple.second(); + symbol_table.insert(key, value); + } + + Self::from_hash(symbol_table) + } +} diff --git a/src/treewalk/scope_manager.rs b/src/treewalk/scope_manager.rs new file mode 100644 index 0000000..3c22fec --- /dev/null +++ b/src/treewalk/scope_manager.rs @@ -0,0 +1,248 @@ +use crate::core::{Container, Stack}; +use crate::domain::Context; +use crate::treewalk::executor::{AsyncioCreateTaskBuiltin, AsyncioRunBuiltin, AsyncioSleepBuiltin}; +use crate::treewalk::types::{ + builtins::{ + GetattrBuiltin, GlobalsBuiltin, IsinstanceBuiltin, IssubclassBuiltin, IterBuiltin, + LenBuiltin, NextBuiltin, PrintBuiltin, + }, + traits::Callable, + utils::EnvironmentFrame, + ExprResult, Module, +}; +#[cfg(feature = "c_stdlib")] +use crate::types::cpython::utils as cpython_utils; + +use super::{LoadedModule, Scope, TypeRegistry}; + +fn get_asyncio_builtins() -> Vec> { + vec![ + Box::new(AsyncioRunBuiltin), + Box::new(AsyncioSleepBuiltin), + Box::new(AsyncioCreateTaskBuiltin), + ] +} + +fn get_builtins() -> Vec> { + vec![ + Box::new(GetattrBuiltin), + Box::new(GlobalsBuiltin), + Box::new(IsinstanceBuiltin), + Box::new(IssubclassBuiltin), + Box::new(IterBuiltin), + Box::new(LenBuiltin), + Box::new(NextBuiltin), + Box::new(PrintBuiltin), + ] +} + +fn init_builtin_scope() -> Scope { + let mut scope = Scope::default(); + for builtin in get_builtins() { + scope.insert( + &builtin.name(), + ExprResult::BuiltinFunction(Container::new(builtin)), + ); + } + + let mut asyncio_scope = Scope::default(); + for builtin in get_asyncio_builtins() { + asyncio_scope.insert( + &builtin.name(), + ExprResult::BuiltinFunction(Container::new(builtin)), + ); + } + + scope.insert( + "asyncio", + ExprResult::Module(Container::new(Module::new( + LoadedModule::empty(), + asyncio_scope, + ))), + ); + + #[cfg(feature = "c_stdlib")] + cpython_utils::init_scope(&mut scope); + + scope +} + +/// This struct implements Python's scoping rules by storing data to power the +/// `read`/`write`/`delete` interface available to the interpreter. +/// +/// The rule of thumb for Python scoping is LEGB: local, enclosing, global (aka module), builtin. +pub struct ScopeManager { + /// A stack of `Scope` objects for each local (think: function) scope. + local_scope_stack: Stack>, + + /// A stack of captured environments to support closures. + captured_env_stack: Stack>, + + /// A stack of modules to support symbol resolution local to specific modules. + module_stack: Stack>, + + /// The read-only scope which contains builtin methods such as `print()`, `open()`, etc. There + /// is only one of these so we do not need a stack. + builtin_scope: Scope, + + /// This stack allows us to know whether to search on the `local_scope_stack` or the + /// `module_stack` when resolving a symbol. + context_stack: Stack, +} + +impl ScopeManager { + pub fn new() -> Self { + ScopeManager { + local_scope_stack: Stack::with_initial(Container::new(Scope::default())), + captured_env_stack: Stack::default(), + module_stack: Stack::with_initial(Container::new(Module::empty())), + builtin_scope: init_builtin_scope(), + context_stack: Stack::with_initial(Context::Global), + } + } + + /// This is to insert `list()`, `set()`, etc into the builtin scope. We must do it here instead + /// of in `init_builtin_scope()` because we want to use the singleton instances owned by + /// `TypeRegistry`. + pub fn register_callable_builtin_types(&mut self, registry: &TypeRegistry) { + for builtin_class in registry.get_callable_builtin_types() { + self.builtin_scope.insert( + builtin_class.borrow().builtin_type().value(), + ExprResult::Class(builtin_class.clone()), + ); + } + } + + pub fn push_captured_env(&mut self, scope: Container) { + self.captured_env_stack.push(scope); + } + + pub fn pop_captured_env(&mut self) -> Option> { + self.captured_env_stack.pop() + } + + pub fn push_local(&mut self, scope: Container) { + self.local_scope_stack.push(scope); + self.context_stack.push(Context::Local); + } + + pub fn pop_local(&mut self) -> Option> { + self.context_stack.pop(); + self.local_scope_stack.pop() + } + + pub fn push_module(&mut self, module: Container) { + self.module_stack.push(module); + self.context_stack.push(Context::Global); + } + + pub fn pop_module(&mut self) -> Option> { + self.context_stack.pop(); + self.module_stack.pop() + } + + /// Given a variable `var`, indicate that `var` should refer to the variable in the + /// global/module scope for the duration of the current local scope. + pub fn mark_global(&self, var: &str) { + self.read_local().borrow_mut().mark_global(var); + } + + /// Given a variable `var`, indicate that `var` should refer to the variable in the enclosing + /// scope for the duration of the current local scope. + pub fn mark_nonlocal(&self, var: &str) { + self.read_local().borrow_mut().mark_nonlocal(var); + } + + pub fn delete(&mut self, name: &str) -> Option { + if self.read_local().borrow().get(name).is_some() { + return self.read_local().borrow_mut().delete(name); + } + + // TODO it sounds like there may be some nuances but ultimately we should be able to delete + // from a captured env + + for module in self.module_stack.iter_mut().rev() { + if module.borrow().scope.get(name).is_some() { + return module.borrow_mut().scope.delete(name); + } + } + + None + } + + pub fn read(&self, name: &str) -> Option { + if let Some(value) = self.read_local().borrow().get(name) { + return Some(value); + } + + // TODO I'm not sure we should be searching the entire captured environment here. I think + // only the closure free vars should be available, but I don't yet know of a good way to + // connect those here. + if let Some(env) = self.read_captured_env() { + if let Some(value) = env.borrow().read(name) { + return Some(value); + } + } + + for module in self.module_stack.iter().rev() { + // We really shouldn't be accessing the module scope directly here, but the `get` + // method on either `MemberAccessor` or `ModuleInterface` requires a reference to the + // Interpreter. We'll need to fix this at some point. + if let Some(value) = module.borrow().scope.get(name) { + return Some(value); + } + } + + self.builtin_scope.get(name) + } + + pub fn write(&mut self, name: &str, value: ExprResult) { + let local_scope = self.read_local().borrow().clone(); + + if local_scope.has_global(name) { + self.read_module().borrow_mut().scope.insert(name, value); + } else if local_scope.has_nonlocal(name) { + if let Some(env) = self.read_captured_env() { + env.borrow_mut().write(name, value); + } + } else { + match self.read_context() { + Context::Local => { + self.read_local().borrow_mut().insert(name, value); + } + Context::Global => { + self.read_module().borrow_mut().scope.insert(name, value); + } + } + } + } + + /// This assumes we always have a local scope stack. + pub fn read_local(&self) -> Container { + self.local_scope_stack + .top() + .expect("failed to find local scope") + } + + pub fn read_captured_env(&self) -> Option>> { + self.captured_env_stack.top().map(Box::new) + } + + /// This assumes we always have a module stack. + pub fn read_module(&self) -> Container { + self.module_stack + .top() + .expect("failed to find module scope") + } + + /// This assumes we always have a context stack. + fn read_context(&self) -> Context { + self.context_stack.top().expect("failed to find context") + } + + /// Used during the parsing process to determine whether to insert a `Expr::FunctionCall` or + /// `Expr::ClassInstantiation` into the AST. + pub fn is_class(&self, name: &str) -> bool { + matches!(self.read(name), Some(ExprResult::Class(_))) + } +} diff --git a/src/treewalk/state.rs b/src/treewalk/state.rs new file mode 100644 index 0000000..e20ed2d --- /dev/null +++ b/src/treewalk/state.rs @@ -0,0 +1,222 @@ +use std::path::PathBuf; + +use crate::core::Container; +use crate::parser::types::{ImportPath, LoopIndex}; +use crate::treewalk::types::{ + utils::EnvironmentFrame, Class, Dict, ExprResult, Function, Module, Type, +}; +use crate::treewalk::{ + CallStack, ExecutionContextManager, Executor, LoadedModule, ModuleLoader, Scope, ScopeManager, + StackFrame, TypeRegistry, +}; + +pub struct State { + module_loader: ModuleLoader, + scope_manager: ScopeManager, + call_stack: CallStack, + executor: Container, + type_registry: TypeRegistry, + execution_context: ExecutionContextManager, +} + +impl Default for State { + fn default() -> Self { + Self::new() + } +} + +impl State { + pub fn new() -> Self { + let type_registry = TypeRegistry::new(); + let mut scope_manager = ScopeManager::new(); + + // We still want the `TypeRegistry` to own the type classes, but we must make some of them + // available in the builtin scope before execution begins. + scope_manager.register_callable_builtin_types(&type_registry); + + State { + scope_manager, + module_loader: ModuleLoader::new(), + call_stack: CallStack::new(), + executor: Container::new(Executor::new()), + type_registry, + execution_context: ExecutionContextManager::new(), + } + } +} + +impl Container { + pub fn get_type(&self, result: &ExprResult) -> ExprResult { + match result { + #[cfg(feature = "c_stdlib")] + ExprResult::CPythonObject(o) => o.get_type(), + ExprResult::Object(o) => ExprResult::Class(o.borrow().class.clone()), + _ => ExprResult::Class(self.get_type_class(result.get_type())), + } + } + + /// Write an `ExprResult` to the symbol table. + pub fn write(&self, name: &str, value: ExprResult) { + self.borrow_mut().scope_manager.write(name, value); + } + + /// Attempt to read an `ExprResult`, adhering to Python scoping rules. + pub fn read(&self, name: &str) -> Option { + self.borrow().scope_manager.read(name) + } + + /// Attempt to delete an `ExprResult`, adhering to Python scoping rules. + pub fn delete(&self, name: &str) -> Option { + self.borrow_mut().scope_manager.delete(name) + } + + pub fn write_loop_index(&self, index: &LoopIndex, value: ExprResult) { + match index { + LoopIndex::Variable(var) => { + self.write(var, value); + } + LoopIndex::Tuple(tuple_index) => { + for (key, value) in tuple_index.iter().zip(value) { + self.write(key, value); + } + } + }; + } + + /// Return the `CallStack` at the current moment in time. This should be used at the time of an + /// exception or immediately before any other use as it is a snapshot and will not keep updating. + pub fn call_stack(&self) -> CallStack { + self.borrow().call_stack.clone() + } + + pub fn get_executor(&self) -> Container { + self.borrow_mut().executor.clone() + } + + pub fn push_captured_env(&self, frame: Container) { + self.borrow_mut().scope_manager.push_captured_env(frame); + } + + pub fn pop_captured_env(&self) -> Option> { + self.borrow_mut().scope_manager.pop_captured_env() + } + + pub fn push_local(&self, scope: Container) { + self.borrow_mut().scope_manager.push_local(scope); + } + + pub fn pop_local(&self) -> Option> { + self.borrow_mut().scope_manager.pop_local() + } + + pub fn push_context(&self, stack_frame: StackFrame) { + self.borrow_mut().call_stack.push_context(stack_frame); + } + + pub fn set_line(&self, line: usize) { + self.borrow_mut().call_stack.set_line(line); + } + + pub fn pop_context(&self) -> Option { + self.borrow_mut().call_stack.pop_context() + } + + pub fn push_module(&self, module: Container) { + self.borrow_mut().scope_manager.push_module(module); + } + + pub fn pop_module(&self) -> Option> { + self.borrow_mut().scope_manager.pop_module() + } + + pub fn current_module(&self) -> Container { + self.borrow().scope_manager.read_module() + } + + pub fn push_class(&self, class: Container) { + self.borrow_mut().execution_context.push_class(class); + } + + pub fn pop_class(&self) -> Option> { + self.borrow_mut().execution_context.pop_class() + } + + pub fn push_function(&self, function: Container) { + self.borrow_mut().execution_context.push_function(function); + } + + pub fn pop_function(&self) -> Option> { + self.borrow_mut().execution_context.pop_function() + } + + pub fn push_receiver(&self, receiver: ExprResult) { + self.borrow_mut().execution_context.push_receiver(receiver); + } + + pub fn pop_receiver(&self) -> Option { + self.borrow_mut().execution_context.pop_receiver() + } + + pub fn current_class(&self) -> Option> { + self.borrow().execution_context.read_class() + } + + pub fn current_function(&self) -> Option> { + self.borrow().execution_context.read_current_function() + } + + pub fn current_receiver(&self) -> Option { + self.borrow().execution_context.read_current_receiver() + } + + pub fn read_captured_env(&self) -> Option>> { + self.borrow().scope_manager.read_captured_env() + } + + pub fn read_globals(&self) -> Container { + self.borrow() + .scope_manager + .read_module() + .borrow() + .scope + .as_dict() + } + + pub fn mark_nonlocal(&self, name: &str) { + self.borrow_mut().scope_manager.mark_nonlocal(name); + } + + pub fn mark_global(&self, name: &str) { + self.borrow_mut().scope_manager.mark_global(name); + } + + /// Return a singleton `Class` for builtin types such as list, set, tuple, dict, etc. + pub fn get_type_class(&self, type_: Type) -> Container { + self.borrow().type_registry.get_type_class(type_) + } + + pub fn get_environment_frame(&self) -> Container { + Container::new(EnvironmentFrame::new( + self.borrow().scope_manager.read_local(), + self.borrow().scope_manager.read_captured_env(), + )) + } + + pub fn is_class(&self, name: &str) -> bool { + self.borrow().scope_manager.is_class(name) + } + + pub fn load_root(&self, filepath: PathBuf) -> Option { + self.borrow_mut().module_loader.load_root(filepath) + } + + pub fn load_module( + &self, + import_path: &ImportPath, + current_path: Option, + ) -> Option { + self.borrow_mut() + .module_loader + .load_module(import_path, current_path) + } +} diff --git a/src/treewalk/type_registry.rs b/src/treewalk/type_registry.rs new file mode 100644 index 0000000..4367db9 --- /dev/null +++ b/src/treewalk/type_registry.rs @@ -0,0 +1,306 @@ +use std::collections::HashMap; + +use crate::core::Container; +use crate::treewalk::types::{ + iterators::{ReversedIterator, ZipIterator}, + traits::{AttributeResolver, Callable, MemberAccessor}, + Bool, ByteArray, Bytes, Class, Classmethod, Coroutine, Dict, Exception, ExprResult, FrozenSet, + Function, Int, List, Memoryview, Object, Property, Range, Set, Slice, Staticmethod, Str, Super, + Traceback, Tuple, Type, TypeClass, +}; + +/// `Type::Type` and `Type::Object` are excluded here because they are initialized separately. +fn builtin_methods() -> HashMap>> { + HashMap::from([ + (Type::Super, Super::get_methods().into_iter()), + (Type::Bool, Bool::get_methods().into_iter()), + (Type::Int, Int::get_methods().into_iter()), + (Type::Str, Str::get_methods().into_iter()), + (Type::List, List::get_methods().into_iter()), + (Type::Set, Set::get_methods().into_iter()), + (Type::FrozenSet, FrozenSet::get_methods().into_iter()), + (Type::Tuple, Tuple::get_methods().into_iter()), + (Type::Dict, Dict::get_methods().into_iter()), + (Type::Range, Range::get_methods().into_iter()), + (Type::Slice, Slice::get_methods().into_iter()), + (Type::Zip, ZipIterator::get_methods().into_iter()), + ( + Type::ReversedIterator, + ReversedIterator::get_methods().into_iter(), + ), + (Type::Bytes, Bytes::get_methods().into_iter()), + (Type::ByteArray, ByteArray::get_methods().into_iter()), + (Type::Memoryview, Memoryview::get_methods().into_iter()), + (Type::Coroutine, Coroutine::get_methods().into_iter()), + (Type::Classmethod, Classmethod::get_methods().into_iter()), + (Type::Staticmethod, Staticmethod::get_methods().into_iter()), + (Type::Property, Property::get_methods().into_iter()), + ]) +} + +/// `Type::Type` and `Type::Object` are excluded here because they are initialized separately. +fn dynamic_attributes() -> HashMap>> { + HashMap::from([ + ( + Type::Function, + Function::get_dynamic_attributes().into_iter(), + ), + ( + Type::Exception, + Exception::get_dynamic_attributes().into_iter(), + ), + ( + Type::Traceback, + Traceback::get_dynamic_attributes().into_iter(), + ), + ]) +} + +/// A list of all the variants of `Type` which should have a type class created. As of 2024-02-16, +/// this is all the variants. +/// +/// We leave `Type::Type` out of here beacuse it must be initialized first as it is the metaclass +/// for all the these type classes. +/// +/// We also leave `Type::Object` out of here because it must be initialized first as it is the +/// parent class for all of these type classes. +fn all_types() -> Vec { + vec![ + Type::Super, + Type::GetSetDescriptor, + Type::MemberDescriptor, + Type::Method, + Type::Function, + Type::BuiltinFunction, + Type::BuiltinMethod, + Type::Generator, + Type::Coroutine, + Type::None, + Type::Ellipsis, + Type::NotImplemented, + Type::Bool, + Type::Int, + Type::Str, + Type::List, + Type::Set, + Type::FrozenSet, + Type::Zip, + Type::Tuple, + Type::Range, + Type::Slice, + Type::Bytes, + Type::ByteArray, + Type::Memoryview, + Type::Dict, + Type::DictItems, + Type::DictKeys, + Type::DictValues, + Type::MappingProxy, + Type::DictItemIterator, + Type::DictKeyIterator, + Type::DictValueIterator, + Type::BytesIterator, + Type::ByteArrayIterator, + Type::RangeIterator, + Type::StringIterator, + Type::ListIterator, + Type::ReversedIterator, + Type::SetIterator, + Type::TupleIterator, + Type::Exception, + Type::Traceback, + Type::Frame, + Type::Module, + Type::Cell, + Type::Code, + Type::Classmethod, + Type::Staticmethod, + Type::Property, + ] +} + +/// These types are callable and behave like a builtin function. +fn callable_types() -> Vec { + vec![ + Type::Type, + Type::Object, + Type::Super, + Type::Bool, + Type::Int, + Type::Str, + Type::List, + Type::Dict, + Type::Set, + Type::FrozenSet, + Type::Tuple, + Type::Range, + Type::Slice, + //Type::Complex, + //Type::Float, + Type::Bytes, + Type::ByteArray, + Type::Memoryview, + Type::Zip, // this refers to the iterator itself + Type::ReversedIterator, + // ---------------------------------------------------------------------------------------- + // These three are a bit weird. They aren't types per-se, but this is how we create builtin + // classes for now so we'll continue to use this approach. However, you will notice that + // these two will not have corresponding entries in `ExprResult` like the rest. + // ---------------------------------------------------------------------------------------- + Type::Classmethod, + Type::Staticmethod, + Type::Property, + // ---------------------------------------------------------------------------------------- + // Technically not a builtin, but it is callable. We may need to handle builtin class such + // as these separately. + Type::Exception, + ] +} + +/// Create the `Type::Type` class which is the metaclass to all classes, including itself. +/// +/// For the hierarchy to work, we give it a parent class of `Type::ObjectMeta`, which contains all +/// the builtin methods of `Type::Object`, and a metaclass of `Type::TypeMeta`, which contains all +/// the builtin methods of `Type::Type`. The "meta" types should never be used directly, but a +/// cycle is created if we try to make Type inherit from Object while Object's metaclass is Type. +fn type_class() -> Container { + let mut object_base = Class::new_builtin(Type::ObjectMeta, None, vec![]); + for method in Object::get_methods().into_iter() { + object_base.insert( + &method.name(), + ExprResult::BuiltinMethod(Container::new(method)), + ); + } + + let mut type_base = Class::new_builtin(Type::TypeMeta, None, vec![]); + for method in TypeClass::get_methods().into_iter() { + type_base.insert( + &method.name(), + ExprResult::BuiltinMethod(Container::new(method)), + ); + } + + for attribute in TypeClass::get_dynamic_attributes().into_iter() { + type_base + .borrow_mut() + .add_dynamic_attribute(attribute.name(), attribute); + } + + let mut type_class = Class::new_builtin(Type::Type, Some(type_base), vec![object_base]); + for method in TypeClass::get_methods().into_iter() { + type_class.insert( + &method.name(), + ExprResult::BuiltinMethod(Container::new(method)), + ); + } + + for attribute in TypeClass::get_dynamic_attributes().into_iter() { + type_class + .borrow_mut() + .add_dynamic_attribute(attribute.name(), attribute); + } + + type_class +} + +/// Create the `Type::Object` class which is the parent class to all classes, including +/// `Type::Expr`, except itself. +fn object_class(metaclass: Container) -> Container { + let mut object_class = Class::new_builtin(Type::Object, Some(metaclass), vec![]); + for method in Object::get_methods().into_iter() { + object_class.insert( + &method.name(), + ExprResult::BuiltinMethod(Container::new(method)), + ); + } + + object_class +} + +fn init_type_classes() -> HashMap> { + let mut type_classes = HashMap::new(); + + // Create the `Type::Type` class and use it as the metaclass for all the other type classes. + let type_class = type_class(); + type_classes.insert(Type::Type, type_class.clone()); + + // Create the `Type::Object` and use it as the parent class for `Type::Type` and all other type + // classes. + let object_class = object_class(type_class.clone()); + type_classes.insert(Type::Object, object_class.clone()); + + // TODO in theory, the parent of `Type::Type` should be `Type::Object`. The code is hanging + // with this line presumably due to a cycle. Maybe there's a way to break this since this is a + // known and expected case. + //type_class.borrow_mut().parent_class = Some(object_class.clone()); + + // Create all the other type classes using `Type::Type` and `Type::Object`. + let mut methods = builtin_methods(); + let mut attributes = dynamic_attributes(); + for type_ in all_types() { + let mut class = + Class::new_builtin(type_, Some(type_class.clone()), vec![object_class.clone()]); + let builtin_type = class.borrow().builtin_type(); + + // Add the builtin methods for this type class. + // + // Calling `.remove` here allows us to transfer ownership of the methods to the class, + // which is what we want since this is just initialization code. + if let Some(mut methods_for_type) = methods.remove(&builtin_type) { + for method in methods_for_type.by_ref() { + class.insert( + &method.name(), + ExprResult::BuiltinMethod(Container::new(method)), + ); + } + } + + // Add the dynamic attributes for this type class. + if let Some(mut attributes_for_type) = attributes.remove(&builtin_type) { + for attr in attributes_for_type.by_ref() { + class.borrow_mut().add_dynamic_attribute(attr.name(), attr); + } + } + + type_classes.insert(builtin_type, class); + } + + type_classes +} + +/// This struct holds a singleton `Class` for each variant of `Type` supported in Python. The +/// `Class` will contain any builtin methods which are supported. +pub struct TypeRegistry { + type_classes: HashMap>, +} + +impl TypeRegistry { + pub fn new() -> Self { + Self { + type_classes: init_type_classes(), + } + } + + /// Safe to call `unwrap()` here because we will have a type class for all `Type`s. + /// TODO we still need to enforce this at compile-time ideally. + pub fn get_type_class(&self, type_: Type) -> Container { + self.type_classes + .get(&type_) + .unwrap_or_else(|| { + panic!( + "TypeRegistry initialization failed for <{}>!", + type_.value() + ) + }) + .clone() + } + + /// We need a way to expose the builtin types so they can be stored in the builtin scope inside + /// the `ScopeManager`. + pub fn get_callable_builtin_types(&self) -> Vec> { + callable_types() + .iter() + .map(|callable_type| self.get_type_class(callable_type.clone())) + .collect() + } +} diff --git a/src/treewalk/types/bool.rs b/src/treewalk/types/bool.rs new file mode 100644 index 0000000..4f5cb76 --- /dev/null +++ b/src/treewalk/types/bool.rs @@ -0,0 +1,53 @@ +use crate::{treewalk::Interpreter, types::errors::InterpreterError}; + +use super::{ + function::BindingType, + traits::Callable, + utils::{Dunder, ResolvedArguments}, + ExprResult, +}; + +pub struct Bool; + +impl Bool { + pub fn get_methods() -> Vec> { + vec![Box::new(NewBuiltin)] + } +} + +struct NewBuiltin; + +impl Callable for NewBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + if args.len() == 1 { + Ok(ExprResult::Boolean(false)) + } else if args.len() == 2 { + let input = args + .get_arg(1) + .as_boolean() + .ok_or(InterpreterError::ExpectedBoolean( + interpreter.state.call_stack(), + ))?; + + Ok(ExprResult::Boolean(input)) + } else { + Err(InterpreterError::WrongNumberOfArguments( + 1, + args.len(), + interpreter.state.call_stack(), + )) + } + } + + fn name(&self) -> String { + Dunder::New.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} diff --git a/src/treewalk/types/builtins.rs b/src/treewalk/types/builtins.rs new file mode 100644 index 0000000..6c66cae --- /dev/null +++ b/src/treewalk/types/builtins.rs @@ -0,0 +1,351 @@ +use crate::core::Container; +use crate::treewalk::types::{ + function::BindingType, iterators::StringIterator, traits::Callable, utils::ResolvedArguments, + ExprResult, +}; +use crate::treewalk::Interpreter; +use crate::types::errors::InterpreterError; + +pub struct GetattrBuiltin; +pub struct GlobalsBuiltin; +pub struct IsinstanceBuiltin; +pub struct IssubclassBuiltin; +pub struct IterBuiltin; +pub struct LenBuiltin; +pub struct NextBuiltin; +pub struct PrintBuiltin; + +impl Callable for GetattrBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + if ![2, 3].contains(&args.len()) { + return Err(InterpreterError::WrongNumberOfArguments( + 2, + args.len(), + interpreter.state.call_stack(), + )); + } + + let object = args.get_arg(0); + let name = args + .get_arg(1) + .as_string() + .ok_or(InterpreterError::ExpectedString( + interpreter.state.call_stack(), + ))?; + + let attr = object + .as_member_accessor(interpreter) + .get(interpreter, name.as_str()); + + if let Some(attr) = attr { + Ok(attr) + } else { + // Use the default value if provided + if args.len() == 3 { + Ok(args.get_arg(2)) + } else { + Err(InterpreterError::AttributeError( + object.get_class(interpreter).name(), + name, + interpreter.state.call_stack(), + )) + } + } + } + + fn name(&self) -> String { + "getattr".into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} + +impl Callable for GlobalsBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + utils::validate_args(&args, 0, interpreter.state.call_stack())?; + Ok(ExprResult::Dict(interpreter.state.read_globals())) + } + + fn name(&self) -> String { + "globals".into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} + +fn has_overlap(vec1: &[T], vec2: &[T]) -> bool { + vec1.iter().any(|item| vec2.contains(item)) +} + +impl Callable for IsinstanceBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + utils::validate_args(&args, 2, interpreter.state.call_stack())?; + let msg = Some("isinstance() arg 2 must be a type, a tuple of types, or a union".into()); + + let instance_class = args.get_arg(0).get_class(interpreter); + + let reference_class = match args.get_arg(1) { + ExprResult::Class(class) => vec![class], + ExprResult::Tuple(tuple) => { + let classes: Result, _> = tuple + .into_iter() + .map(|item| { + item.as_class().ok_or(InterpreterError::TypeError( + msg.clone(), + interpreter.state.call_stack(), + )) + }) + .collect(); + classes? + } + _ => { + return Err(InterpreterError::TypeError( + msg, + interpreter.state.call_stack(), + )) + } + }; + + let isinstance = if args.get_arg(0).is_class() { + has_overlap(&reference_class, &instance_class.borrow().metaclass().mro()) + } else { + has_overlap(&reference_class, &instance_class.mro()) + }; + + Ok(ExprResult::Boolean(isinstance)) + } + + fn name(&self) -> String { + "isinstance".into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} + +impl Callable for IssubclassBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + utils::validate_args(&args, 2, interpreter.state.call_stack())?; + + let instance_class = args + .get_arg(0) + .as_class() + .ok_or(InterpreterError::TypeError( + Some("issubclass() arg 1 must be a class".into()), + interpreter.state.call_stack(), + ))?; + + let reference_class = args + .get_arg(1) + .as_class() + .ok_or(InterpreterError::TypeError( + Some("issubclass() arg 2 must be a type, a tuple of types, or a union".into()), + interpreter.state.call_stack(), + ))?; + + Ok(ExprResult::Boolean( + instance_class.mro().contains(&reference_class), + )) + } + + fn name(&self) -> String { + "issubclass".into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} + +impl Callable for PrintBuiltin { + fn call( + &self, + _interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + println!( + "{}", + args.iter_args() + .map(ToString::to_string) + .collect::>() + .join(" ") + ); + Ok(ExprResult::Void) + } + + fn name(&self) -> String { + "print".into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} + +impl Callable for LenBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + utils::validate_args(&args, 1, interpreter.state.call_stack())?; + + let iterator = + args.get_arg(0) + .clone() + .try_into_iter() + .ok_or(InterpreterError::ExpectedIterable( + interpreter.state.call_stack(), + ))?; + + Ok(ExprResult::Integer(Container::new(iterator.count() as i64))) + } + + fn name(&self) -> String { + "len".into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} + +impl Callable for NextBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + utils::validate_args(&args, 1, interpreter.state.call_stack())?; + + let generator = + args.get_arg(0) + .as_generator() + .ok_or(InterpreterError::ExpectedIterable( + interpreter.state.call_stack(), + ))?; + + generator + .clone() + .borrow_mut() + .next() + .ok_or(InterpreterError::StopIteration( + interpreter.state.call_stack(), + )) + } + + fn name(&self) -> String { + "next".into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} + +impl Callable for IterBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + utils::validate_args(&args, 1, interpreter.state.call_stack())?; + + match args.get_arg(0) { + ExprResult::String(s) => Ok(ExprResult::StringIterator(StringIterator::new(s.clone()))), + ExprResult::List(list) => Ok(ExprResult::ListIterator(list.clone().into_iter())), + ExprResult::ReversedIterator(_) => Ok(args.get_arg(0).clone()), + ExprResult::Set(set) => Ok(ExprResult::SetIterator(set.clone().into_iter())), + ExprResult::Zip(_) => Ok(args.get_arg(0).clone()), + ExprResult::Tuple(tuple) => Ok(ExprResult::TupleIterator(tuple.clone().into_iter())), + ExprResult::DictItems(dict) => { + Ok(ExprResult::DictItemsIterator(dict.clone().into_iter())) + } + ExprResult::DictKeys(dict) => { + Ok(ExprResult::DictKeysIterator(dict.clone().into_iter())) + } + ExprResult::DictValues(dict) => { + Ok(ExprResult::DictValuesIterator(dict.clone().into_iter())) + } + ExprResult::Bytes(b) => Ok(ExprResult::BytesIterator(b.borrow().0.to_vec())), + ExprResult::ByteArray(b) => Ok(ExprResult::ByteArrayIterator(b.borrow().0.to_vec())), + ExprResult::Range(r) => Ok(ExprResult::RangeIterator(r.clone().into_iter())), + _ => Err(InterpreterError::ExpectedObject( + interpreter.state.call_stack(), + )), + } + } + + fn name(&self) -> String { + "iter".into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} + +/// This can be used when you need something that implements `Callable` to compile, but you don't +/// plan on ever running with this. A placeholder. +pub struct NoopCallable; + +impl Callable for NoopCallable { + fn call( + &self, + _interpreter: &Interpreter, + _args: ResolvedArguments, + ) -> Result { + unimplemented!() + } + + fn name(&self) -> String { + unimplemented!() + } + + fn binding_type(&self) -> BindingType { + unimplemented!() + } +} + +pub mod utils { + use crate::treewalk::CallStack; + + use super::*; + + pub(crate) fn validate_args( + args: &ResolvedArguments, + expected_length: usize, + call_stack: CallStack, + ) -> Result<(), InterpreterError> { + if args.len() != expected_length { + Err(InterpreterError::WrongNumberOfArguments( + expected_length, + args.len(), + call_stack, + )) + } else { + Ok(()) + } + } +} diff --git a/src/treewalk/types/bytearray.rs b/src/treewalk/types/bytearray.rs new file mode 100644 index 0000000..64f82fa --- /dev/null +++ b/src/treewalk/types/bytearray.rs @@ -0,0 +1,63 @@ +use crate::{core::Container, treewalk::Interpreter, types::errors::InterpreterError}; + +use super::{ + function::BindingType, + traits::Callable, + utils::{Dunder, ResolvedArguments}, + ExprResult, +}; + +/// A mutable version of a byte string. +#[derive(Debug, Clone, PartialEq)] +pub struct ByteArray(pub Vec); + +impl ByteArray { + pub fn get_methods() -> Vec> { + vec![Box::new(NewBuiltin)] + } + + pub fn new(bytes: Vec) -> Self { + Self(bytes) + } +} + +struct NewBuiltin; + +impl Callable for NewBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + match args.len() { + 1 => Ok(ExprResult::ByteArray(Container::new(ByteArray::new( + "".into(), + )))), + 2 => match args.get_arg(1) { + ExprResult::String(_) => Err(InterpreterError::TypeError( + Some("string argument without an encoding".into()), + interpreter.state.call_stack(), + )), + ExprResult::Bytes(s) => Ok(ExprResult::ByteArray(Container::new(ByteArray::new( + s.borrow().0.clone(), + )))), + _ => todo!(), + }, + // TODO support an optional encoding + 3 => todo!(), + _ => Err(InterpreterError::WrongNumberOfArguments( + args.len(), + 1, + interpreter.state.call_stack(), + )), + } + } + + fn name(&self) -> String { + Dunder::New.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} diff --git a/src/treewalk/types/bytes.rs b/src/treewalk/types/bytes.rs new file mode 100644 index 0000000..d42eb46 --- /dev/null +++ b/src/treewalk/types/bytes.rs @@ -0,0 +1,59 @@ +use crate::{core::Container, treewalk::Interpreter, types::errors::InterpreterError}; + +use super::{ + function::BindingType, + traits::Callable, + utils::{Dunder, ResolvedArguments}, + ExprResult, +}; + +/// A mutable version of a byte string. +#[derive(Debug, Clone, PartialEq)] +pub struct Bytes(pub Vec); + +impl Bytes { + pub fn get_methods() -> Vec> { + vec![Box::new(NewBuiltin)] + } + + pub fn new(bytes: Vec) -> Self { + Self(bytes) + } +} + +struct NewBuiltin; + +impl Callable for NewBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + match args.len() { + 1 => Ok(ExprResult::Bytes(Container::new(Bytes::new("".into())))), + 2 => match args.get_arg(1) { + ExprResult::String(_) => Err(InterpreterError::TypeError( + Some("string argument without an encoding".into()), + interpreter.state.call_stack(), + )), + ExprResult::Bytes(s) => Ok(ExprResult::Bytes(s)), + _ => todo!(), + }, + // TODO support an optional encoding + 3 => todo!(), + _ => Err(InterpreterError::WrongNumberOfArguments( + args.len(), + 1, + interpreter.state.call_stack(), + )), + } + } + + fn name(&self) -> String { + Dunder::New.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} diff --git a/src/treewalk/types/cell.rs b/src/treewalk/types/cell.rs new file mode 100644 index 0000000..cdf4e7f --- /dev/null +++ b/src/treewalk/types/cell.rs @@ -0,0 +1,34 @@ +use crate::treewalk::{Interpreter, Scope}; + +use super::{traits::MemberAccessor, ExprResult}; + +/// This corresponds to the Python internal `Cell` class, which is returned for values captured in +/// a closure. +#[derive(Clone)] +pub struct Cell { + scope: Scope, +} + +impl Cell { + pub fn new(value: ExprResult) -> Self { + let mut scope = Scope::default(); + scope.insert("cell_contents", value); + Self { + scope: scope.to_owned(), + } + } +} + +impl MemberAccessor for Cell { + fn insert(&mut self, _name: &str, _value: ExprResult) { + unimplemented!(); + } + + fn delete(&mut self, _name: &str) -> Option { + unimplemented!(); + } + + fn get(&self, _interpreter: &Interpreter, name: &str) -> Option { + self.scope.get(name) + } +} diff --git a/src/treewalk/types/class.rs b/src/treewalk/types/class.rs new file mode 100644 index 0000000..9a5f6ac --- /dev/null +++ b/src/treewalk/types/class.rs @@ -0,0 +1,362 @@ +use std::collections::HashMap; +use std::fmt::{Display, Error, Formatter}; + +use crate::{ + core::{log, Container, LogLevel}, + treewalk::{Interpreter, Scope}, + types::errors::InterpreterError, +}; + +use super::{ + function::BindingType, + traits::{AttributeResolver, Callable, MemberAccessor}, + utils::{Dunder, ResolvedArguments}, + ExprResult, Str, Tuple, Type, +}; + +/// The method resolution for [`Dunder::New`] is different than normal methods. We must know +/// whether a class or object is being created to know which [`Dunder::New`] method to invoke. +#[derive(Debug)] +pub enum InstantiationType { + Class, + Object, + Super, +} + +#[derive(Debug, PartialEq)] +pub struct Class { + pub name: String, + + /// This is semantically required to be non-empty, similar to `metaclass`. + parent_classes: Vec>, + + /// This is semantically required. See `Class::metaclass()` for an explanation of why it is + /// optional in the struct definition. + metaclass: Option>, + pub scope: Scope, + builtin_type: Option, + + dynamic_attributes: HashMap>, +} + +impl Class { + /// The primary public interface to create a class. A metaclass will be used if one is found to + /// have a `Dunder::New` method, falling back to the `Type::Type` metaclass. + pub fn new( + interpreter: &Interpreter, + name: &str, + parent_classes: Vec>, + metaclass: Option>, + ) -> Result, InterpreterError> { + let type_class = interpreter.state.get_type_class(Type::Type); + let metaclass = Self::find_metaclass(metaclass, parent_classes.clone(), type_class); + + let bases = if parent_classes.is_empty() { + ExprResult::Tuple(Container::new(Tuple::default())) + } else { + let bases = parent_classes + .iter() + .cloned() + .map(ExprResult::Class) + .collect::>(); + ExprResult::Tuple(Container::new(Tuple::new(bases))) + }; + + let args = &ResolvedArguments::default() + .add_arg(ExprResult::Class(metaclass.clone())) + .add_arg(ExprResult::String(Str::new(name.into()))) + .add_arg(bases) + .add_arg(ExprResult::Dict(Scope::default().as_dict())); + interpreter + .evaluate_new_method( + &ExprResult::Class(metaclass), + args, + InstantiationType::Class, + )? + .as_class() + .ok_or(InterpreterError::ExpectedClass( + interpreter.state.call_stack(), + )) + } + + /// Create the class. This is used by `Dunder::New` for `Type::Type` under the hood. + pub fn new_base( + name: String, + parent_classes: Vec>, + metaclass: Option>, + scope: Scope, + ) -> Container { + Container::new(Self { + name, + parent_classes, + metaclass, + scope, + builtin_type: None, + dynamic_attributes: HashMap::default(), + }) + } + + pub fn new_builtin( + name: Type, + metaclass: Option>, + parent_classes: Vec>, + ) -> Container { + Container::new(Self { + name: name.value().to_string(), + parent_classes, + metaclass, + scope: Scope::default(), + builtin_type: Some(name), + dynamic_attributes: HashMap::default(), + }) + } + + /// The primary accessor for the metaclass of a class. The property is optional because of + /// the boot-strapping problem where the `Type::Type` class is the metaclass of itself. + pub fn metaclass(&self) -> Container { + self.metaclass + .clone() + .unwrap_or_else(|| panic!("attempted to access beyond the metaclass hierarchy!")) + } + + /// This should only be used in a context that is known to contain only builtin types. + pub fn builtin_type(&self) -> Type { + self.builtin_type.clone().unwrap_or_else(|| { + panic!("attempted to access the builtin type for a user-defined type!") + }) + } + + pub fn is_builtin_type(&self) -> bool { + self.builtin_type.is_some() + } + + pub fn is_metaclass(&self) -> bool { + // is this correct? + self.parent_classes + .iter() + .any(|c| c.borrow().is_type(&Type::Type)) + } + + pub fn is_type(&self, type_: &Type) -> bool { + if let Some(ref builtin_type) = self.builtin_type { + builtin_type == type_ + } else { + false + } + } + + pub fn add_dynamic_attribute(&mut self, name: &str, attribute: Box) { + self.dynamic_attributes.insert(name.to_string(), attribute); + } + + fn find_metaclass_inner( + parent_classes: Vec>, + type_class: Container, + ) -> Option> { + for parent_class in parent_classes.iter() { + let metaclass = parent_class.borrow().metaclass(); + + if !metaclass.same_identity(&type_class) { + return Some(metaclass); + } else { + let parents = parent_class.borrow().parent_classes.clone(); + if let Some(metaclass) = Self::find_metaclass_inner(parents, type_class.clone()) { + return Some(metaclass); + } + } + } + + None + } + + /// - If a child class explicitly specifies a metaclass, that metaclass is used. + /// - If the child class does not specify a metaclass: + /// - The child class will inherit the metaclass of its parent class. This means if the + /// parent class had a specific metaclass (other than type), the child class will also use + /// that metaclass, unless it explicitly specifies a different one. + /// - If neither the child class nor any of its parents specify a metaclass, then the default + /// metaclass type is used. + fn find_metaclass( + metaclass: Option>, + parent_classes: Vec>, + type_class: Container, + ) -> Container { + if let Some(metaclass) = metaclass { + return metaclass; + } + + // We cannot use MRO here because the class doesn't exist yet. + if let Some(metaclass) = Self::find_metaclass_inner(parent_classes, type_class.clone()) { + return metaclass; + } + + type_class + } +} + +impl Container { + fn mro_inner(&self) -> Vec> { + let mut hierarchy = vec![self.clone()]; + + for parent in &self.borrow().parent_classes { + hierarchy.push(parent.clone()); + let mut additional_parents = parent.mro_inner(); + hierarchy.append(&mut additional_parents); + } + + hierarchy + } + + /// Produce the Method Resolution Order (MRO) for this class. I could see this being a + /// generator in the future, since many consumers do not require the full chain. + pub fn mro(&self) -> Vec> { + let original = self.mro_inner(); + + // The Python spec states that for any duplicates, the farthest right item should be kept. + let mut hierarchy = vec![]; + for class in original.iter().rev() { + if hierarchy + .iter() + .any(|c: &Container| c.same_identity(class)) + { + continue; + } + + hierarchy.push(class.clone()); + } + + hierarchy.iter().cloned().rev().collect() + } + + pub fn super_mro(&self) -> Vec> { + self.mro() + .iter() + .skip(1) + .cloned() + .collect::>>() + } + + /// When performing resolution on the `Dunder::New` method, whether or not to consider a + /// metaclass depends on whether a class (yes) or object (no) is being instantiated. + /// + /// The reason for this is if metaclasses were considered when instantiating an object, the + /// lookup would always resolve to the `Dunder::New` of , which has a different + /// interface since it is responsible for creating a class. When instantiating an object, we + /// want to invoke the `Dunder::New` of instead. + pub fn get_new_method( + &self, + interpreter: &Interpreter, + instantiation_type: &InstantiationType, + ) -> Option { + match instantiation_type { + InstantiationType::Class => self.get(interpreter, Dunder::New.value()), + InstantiationType::Object => self.search_mro(Dunder::New.value()), + InstantiationType::Super => self.search_super_mro(Dunder::New.value()), + } + } + + /// Use the class MRO to search for an attribute. This does not consider metaclasses but it + /// does consider the class itself. + fn search(&self, iterable: Vec>, name: &str) -> Option { + for class in iterable { + if let Some(attr) = class.borrow().scope.get(name) { + log(LogLevel::Debug, || format!("Found: {}::{}", class, name)); + return Some(attr); + } + } + + None + } + + fn search_mro(&self, name: &str) -> Option { + self.search(self.mro(), name) + } + + fn search_super_mro(&self, name: &str) -> Option { + self.search(self.super_mro(), name) + } +} + +impl MemberAccessor for Container { + /// Attribute access for a class uses this order: + /// 1. the class itself + /// 2. parent class MRO + /// 3. metaclass of the class + /// 4. metclass MRO + /// 5. dynamic attributes of the class MRO (such as `Dunder::Code`) + /// 6. dynamic attributes of the metaclass MRO (such as `Dunder::Dict`) + /// + /// I wonder if I can combine 5 and 6 in some way of if this is even correct to start with. + fn get(&self, _interpreter: &Interpreter, name: &str) -> Option { + log(LogLevel::Debug, || { + format!("Searching for: {}::{}", self, name) + }); + + // (1) and (2) + if let Some(attr) = self.search_mro(name) { + return Some(attr); + } + + // (3) and (4) + if let Some(attr) = self.borrow().metaclass().search_mro(name) { + return Some(attr); + } + + // (5) + // If an attribute is not found in the symbol table (user-defined attributes, user-defined + // methods, and builtin methods), check for any dynamic attributes that we must resolve + // beore returning. + for class in self.mro() { + // We may find this on the class or its parent, but we should pass ourselves into + // the call to `resolve`. + if let Some(attr) = class.borrow().dynamic_attributes.get(name) { + return Some(attr.resolve(self.clone())); + } + } + + // (6) + for class in self.borrow().metaclass().mro() { + // We may find this on the metaclass or its parent, but we should pass ourselves into + // the call to `resolve`. + if let Some(attr) = class.borrow().dynamic_attributes.get(name) { + return Some(attr.resolve(self.clone())); + } + } + + None + } + + fn delete(&mut self, name: &str) -> Option { + self.borrow_mut().scope.delete(name) + + // TODO support delete attributes from parent classes? + } + + fn insert(&mut self, name: &str, value: ExprResult) { + self.borrow_mut().scope.insert(name, value); + } +} + +impl Display for Container { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + write!(f, "", self.borrow().name) + } +} + +impl Callable for Container { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + ExprResult::new(interpreter, self.clone(), args) + } + + fn name(&self) -> String { + self.borrow().name.clone() + } + + fn binding_type(&self) -> BindingType { + BindingType::Class + } +} diff --git a/src/treewalk/types/classmethod.rs b/src/treewalk/types/classmethod.rs new file mode 100644 index 0000000..45a40c8 --- /dev/null +++ b/src/treewalk/types/classmethod.rs @@ -0,0 +1,58 @@ +use crate::{treewalk::Interpreter, types::errors::InterpreterError}; + +use super::{ + builtins::utils, + function::BindingType, + traits::Callable, + utils::{Dunder, ResolvedArguments}, + ExprResult, +}; + +pub struct Classmethod; + +impl Classmethod { + pub fn get_methods() -> Vec> { + vec![Box::new(NewBuiltin)] + } +} + +pub struct NewBuiltin; + +impl Callable for NewBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + // The first arg is the class itself, the second arg is the function + utils::validate_args(&args, 2, interpreter.state.call_stack())?; + + // This is a workaround for Generic type behavior found in _collections_abc.py. + // _weakrefset.py also uses this. + // + // ``` + // GenericAlias = type(list[int]) + // __class_getitem__ = classmethod(GenericAlias) + // ``` + if args.get_arg(1).as_class().is_some() { + return Ok(ExprResult::None); + } + + let function = args + .get_arg(1) + .as_function() + .ok_or(InterpreterError::ExpectedFunction( + interpreter.state.call_stack(), + ))?; + function.borrow_mut().binding_type = BindingType::Class; + Ok(ExprResult::Function(function)) + } + + fn name(&self) -> String { + Dunder::New.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Class + } +} diff --git a/src/treewalk/types/coroutine.rs b/src/treewalk/types/coroutine.rs new file mode 100644 index 0000000..398ad4f --- /dev/null +++ b/src/treewalk/types/coroutine.rs @@ -0,0 +1,182 @@ +use std::time::{Duration, Instant}; + +use crate::core::Container; +use crate::parser::types::Statement; +use crate::{ + treewalk::types::{ + builtins::utils, + function::BindingType, + pausable::{Frame, Pausable, PausableContext, PausableState, PausableStepResult}, + traits::Callable, + utils::ResolvedArguments, + ExprResult, Function, + }, + treewalk::{Interpreter, Scope}, + types::errors::InterpreterError, +}; + +pub enum Poll { + Waiting, + Ready(ExprResult), +} + +/// Stateful encapsulation of a pausable `Function` with a `Scope`. This struct needs a +/// `CoroutineExecutor` to be run. +pub struct Coroutine { + scope: Container, + function: Container, + context: Container, + wait_on: Option>, + wake_at: Option, + return_val: Option, +} + +impl Coroutine { + pub fn get_methods() -> Vec> { + vec![Box::new(CloseBuiltin)] + } + + pub fn new(scope: Container, function: Container) -> Self { + let frame = Frame::new(function.borrow().clone().body); + + Self { + scope, + function, + context: PausableContext::new(frame), + wait_on: None, + wake_at: None, + return_val: None, + } + } + + pub fn sleep(&mut self, duration_in_s: f64) { + let micros = duration_in_s * 1_000_000.0; + let diff = Duration::from_micros(micros as u64); + self.wake_at = Some(Instant::now() + diff); + } + + fn is_blocked(&self) -> bool { + self.wake_at.is_some_and(|t| t > Instant::now()) + || self + .wait_on + .clone() + .is_some_and(|coroutine| coroutine.borrow().is_finished().is_none()) + } + + pub fn is_finished(&self) -> Option { + self.return_val.clone() + } + + pub fn has_work(&self) -> bool { + !(self.is_blocked() || self.is_finished().is_some()) + } + + pub fn wait_on(&mut self, coroutine: Container) { + self.wait_on = Some(coroutine); + } + + pub fn set_return_val(&mut self, return_val: ExprResult) { + self.return_val = Some(return_val); + } +} + +impl Pausable for Container { + fn context(&self) -> Container { + self.borrow().context.clone() + } + + fn scope(&self) -> Container { + self.borrow().scope.clone() + } + + fn function(&self) -> Container { + self.borrow().function.clone() + } + + fn set_scope(&self, scope: Container) { + self.borrow_mut().scope = scope; + } + + fn finish( + &self, + _interpreter: &Interpreter, + result: ExprResult, + ) -> Result { + self.borrow_mut().set_return_val(result.clone()); + Ok(ExprResult::Void) + } + + fn handle_step( + &self, + interpreter: &Interpreter, + statement: Statement, + control_flow: bool, + ) -> Result { + match self.execute_statement(interpreter, statement, control_flow)? { + Poll::Ready(val) => Ok(PausableStepResult::Return(val)), + Poll::Waiting => { + self.on_exit(interpreter); + Ok(PausableStepResult::Break) + } + } + } +} + +impl Container { + pub fn has_started(&self) -> bool { + self.context().current_state() != PausableState::Created + } + + /// Execute the next instruction in the `Frame` and return whether we hit an `await` or not. If + /// the next instruction is a control flow statement which leads the execution into a block, + /// the coroutine state is updated to reflect this. + fn execute_statement( + &self, + interpreter: &Interpreter, + statement: Statement, + control_flow: bool, + ) -> Result { + if !control_flow { + match interpreter.evaluate_statement(&statement) { + // We cannot return the default value here because certain statement types may + // actually have a return value (expression, return, etc). + Ok(result) => Ok(Poll::Ready(result)), + Err(InterpreterError::EncounteredSleep) => Ok(Poll::Waiting), + Err(InterpreterError::EncounteredAwait) => { + self.context().step_back(); + Ok(Poll::Waiting) + } + Err(InterpreterError::EncounteredReturn(result)) => Ok(Poll::Ready(result)), + Err(e) => Err(e), + } + } else { + // We return `Void` here because this is the return type of all statements (with a few + // exceptions that we don't have to worry about here). + Ok(Poll::Ready(ExprResult::Void)) + } + } +} + +struct CloseBuiltin; + +// I'm not sure what coroutine.close() should do. The stdlib says this is used to prevent a +// ResourceWarning, but I'm not doing anything when I invoke a coroutine right now that would lead +// to this. +impl Callable for CloseBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + utils::validate_args(&args, 0, interpreter.state.call_stack())?; + Ok(ExprResult::Void) + } + + fn name(&self) -> String { + "close".into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Instance + } +} diff --git a/src/treewalk/types/descriptor.rs b/src/treewalk/types/descriptor.rs new file mode 100644 index 0000000..16b2b20 --- /dev/null +++ b/src/treewalk/types/descriptor.rs @@ -0,0 +1,48 @@ +use std::fmt::{Display, Error, Formatter}; + +use crate::{core::Container, treewalk::Interpreter, types::errors::InterpreterError}; + +use super::{traits::Callable, utils::ResolvedArguments, Class, ExprResult}; + +/// This is sometimes known as a non-data descriptor because it only has a getter. Those with a +/// setter or deleter are known as data scriptors. +#[derive(Clone, Debug, PartialEq)] +pub struct MemberDescriptor { + class: Container, + get: Container>, +} + +impl MemberDescriptor { + pub fn new(class: Container, get: Container>) -> Self { + Self { class, get } + } + + pub fn get( + &self, + interpreter: &Interpreter, + instance: ExprResult, + ) -> Result { + // It's a bit weird to have to convert to an `ExprResult` just to call `bind_if_needed`. + // Maybe we can clean that up sometime, but for now I think its better to ensure we are + // using the same binding mechanism everywhere. + let callable = ExprResult::BuiltinFunction(self.get.clone()) + .bind_if_needed(interpreter, instance) + .as_callable() + .ok_or(InterpreterError::ExpectedFunction( + interpreter.state.call_stack(), + ))?; + + interpreter.call(callable, &ResolvedArguments::default()) + } +} + +impl Display for MemberDescriptor { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + write!( + f, + "", + self.get.borrow().name(), + self.class.borrow().name + ) + } +} diff --git a/src/treewalk/types/dict.rs b/src/treewalk/types/dict.rs new file mode 100644 index 0000000..6bc1d11 --- /dev/null +++ b/src/treewalk/types/dict.rs @@ -0,0 +1,276 @@ +use std::{ + collections::HashMap, + fmt::{Display, Error, Formatter}, +}; + +use crate::{core::Container, treewalk::Interpreter, types::errors::InterpreterError}; + +use super::{ + builtins::utils, + function::BindingType, + iterators::DictKeysIterator, + traits::{Callable, IndexRead, IndexWrite}, + utils::{Dunder, ResolvedArguments}, + DictItems, ExprResult, +}; + +#[derive(Debug, PartialEq, Clone)] +pub struct Dict { + pub items: HashMap, +} + +impl Dict { + pub fn get_methods() -> Vec> { + vec![ + Box::new(NewBuiltin), + Box::new(InitBuiltin), + Box::new(DictKeysBuiltin), + Box::new(DictValuesBuiltin), + Box::new(DictItemsBuiltin), + Box::new(FromKeysBuiltin), + ] + } + + pub fn new(items: HashMap) -> Self { + Self { items } + } + + pub fn default() -> Self { + Self { + items: HashMap::new(), + } + } + + pub fn raw(&self) -> HashMap { + self.items.clone() + } +} + +impl IndexRead for Container { + fn get(&self, index: &ExprResult) -> Option { + self.borrow().items.get(index).cloned() + } +} + +impl IndexWrite for Container { + fn insert(&mut self, index: &ExprResult, value: ExprResult) { + self.borrow_mut().items.insert(index.clone(), value); + } + + fn delete(&mut self, index: &ExprResult) -> Option { + self.borrow_mut().items.remove(index) + } +} + +impl From for Dict { + fn from(dict: DictItems) -> Self { + let mut items: HashMap = HashMap::new(); + + for i in dict { + match i { + ExprResult::Tuple(tuple) => { + items.insert(tuple.first(), tuple.second()); + } + _ => panic!("expected a tuple!"), + } + } + + Dict::new(items) + } +} + +impl Display for Container { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + let items = self + .borrow() + .items + .iter() + .map(|x| x.0.to_string() + ": " + &x.1.to_string()) + .collect::>() + .join(", "); + write!(f, "{{{}}}", items) + } +} + +/// We can reuse `DictKeysIterator` here because an iterator over a `Dict` will just return its +/// keys by default. +impl IntoIterator for Container { + type Item = ExprResult; + type IntoIter = DictKeysIterator; + + fn into_iter(self) -> Self::IntoIter { + DictKeysIterator::new(self.borrow().clone().into()) + } +} + +struct NewBuiltin; +struct InitBuiltin; +struct DictItemsBuiltin; +struct DictKeysBuiltin; +struct DictValuesBuiltin; +struct FromKeysBuiltin; + +impl Callable for DictItemsBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + utils::validate_args(&args, 0, interpreter.state.call_stack())?; + + let dict = args + .get_self() + .ok_or(InterpreterError::ExpectedDict( + interpreter.state.call_stack(), + ))? + .as_dict() + .ok_or(InterpreterError::ExpectedDict( + interpreter.state.call_stack(), + ))?; + + Ok(ExprResult::DictItems(dict.clone().borrow().clone().into())) + } + + fn name(&self) -> String { + "items".into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Instance + } +} + +impl Callable for DictKeysBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + utils::validate_args(&args, 0, interpreter.state.call_stack())?; + + let dict = args + .get_self() + .ok_or(InterpreterError::ExpectedDict( + interpreter.state.call_stack(), + ))? + .as_dict() + .ok_or(InterpreterError::ExpectedDict( + interpreter.state.call_stack(), + ))?; + + Ok(ExprResult::DictKeys(dict.clone().borrow().clone().into())) + } + + fn name(&self) -> String { + "keys".into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Instance + } +} + +impl Callable for DictValuesBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + utils::validate_args(&args, 0, interpreter.state.call_stack())?; + + let dict = args + .get_self() + .ok_or(InterpreterError::ExpectedDict( + interpreter.state.call_stack(), + ))? + .as_dict() + .ok_or(InterpreterError::ExpectedDict( + interpreter.state.call_stack(), + ))?; + + Ok(ExprResult::DictValues(dict.clone().borrow().clone().into())) + } + + fn name(&self) -> String { + "values".into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Instance + } +} + +impl Callable for FromKeysBuiltin { + fn call( + &self, + _interpreter: &Interpreter, + _args: ResolvedArguments, + ) -> Result { + unimplemented!() + } + + fn name(&self) -> String { + "fromkeys".into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Class + } +} + +impl Callable for NewBuiltin { + fn call( + &self, + _interpreter: &Interpreter, + _args: ResolvedArguments, + ) -> Result { + Ok(ExprResult::Dict(Container::new(Dict::default()))) + } + + fn name(&self) -> String { + Dunder::New.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} + +impl Callable for InitBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + utils::validate_args(&args, 1, interpreter.state.call_stack())?; + + let output = args + .get_self() + .ok_or(InterpreterError::ExpectedDict( + interpreter.state.call_stack(), + ))? + .as_dict() + .ok_or(InterpreterError::ExpectedDict( + interpreter.state.call_stack(), + ))?; + + let input = args + .get_arg(0) + .as_dict() + .ok_or(InterpreterError::ExpectedDict( + interpreter.state.call_stack(), + ))?; + + *output.borrow_mut() = input.borrow().clone(); + + Ok(ExprResult::Void) + } + + fn name(&self) -> String { + Dunder::Init.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Instance + } +} diff --git a/src/treewalk/types/dict_items.rs b/src/treewalk/types/dict_items.rs new file mode 100644 index 0000000..f1e440e --- /dev/null +++ b/src/treewalk/types/dict_items.rs @@ -0,0 +1,86 @@ +use std::fmt::{Display, Error, Formatter}; + +use crate::core::Container; + +use super::{Dict, ExprResult, List, Tuple}; + +#[derive(Debug, PartialEq, Clone)] +pub struct DictItems { + items: Vec<(ExprResult, ExprResult)>, +} + +impl DictItems { + pub fn new(items: Vec<(ExprResult, ExprResult)>) -> Self { + Self { items } + } +} + +impl From for DictItems { + fn from(dict: Dict) -> Self { + let mut items: Vec<(ExprResult, ExprResult)> = vec![]; + for i in dict.items.keys() { + items.push((i.clone(), dict.items[i].clone())); + } + // TODO this should support non-strings + items.sort_by(|a, b| a.0.as_string().unwrap().cmp(&b.0.as_string().unwrap())); + DictItems::new(items) + } +} + +impl From> for DictItems { + fn from(list: Container) -> Self { + let mut items: Vec<(ExprResult, ExprResult)> = vec![]; + for i in list { + match i { + ExprResult::Tuple(tuple) => { + items.push((tuple.first(), tuple.second())); + } + _ => panic!("expected a tuple!"), + } + } + DictItems::new(items) + } +} + +impl Display for DictItems { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + let items = DictItemsIterator::new(self.clone()) + .map(|x| x.to_string()) + .collect::>() + .join(", "); + write!(f, "[{}]", items) + } +} + +impl IntoIterator for DictItems { + type Item = ExprResult; + type IntoIter = DictItemsIterator; + + fn into_iter(self) -> Self::IntoIter { + DictItemsIterator::new(self) + } +} + +#[derive(Clone)] +pub struct DictItemsIterator(DictItems); + +impl DictItemsIterator { + fn new(dict_items: DictItems) -> Self { + DictItemsIterator(dict_items) + } +} + +impl Iterator for DictItemsIterator { + type Item = ExprResult; + + fn next(&mut self) -> Option { + if self.0.items.is_empty() { + None + } else { + let removed = self.0.items.remove(0); + Some(ExprResult::Tuple(Container::new(Tuple::new(vec![ + removed.0, removed.1, + ])))) + } + } +} diff --git a/src/treewalk/types/dict_keys.rs b/src/treewalk/types/dict_keys.rs new file mode 100644 index 0000000..46e52a0 --- /dev/null +++ b/src/treewalk/types/dict_keys.rs @@ -0,0 +1,67 @@ +use std::fmt::{Display, Error, Formatter}; + +use super::{Dict, ExprResult}; + +#[derive(Debug, PartialEq, Clone)] +pub struct DictKeys { + items: Vec, +} + +impl DictKeys { + pub fn new(items: Vec) -> Self { + Self { items } + } +} + +impl From for DictKeys { + fn from(dict: Dict) -> Self { + let mut items: Vec = vec![]; + for i in dict.items.keys() { + items.push(i.clone()); + } + // TODO this should support non-strings + items.sort_by_key(|a| a.as_string().unwrap()); + DictKeys::new(items) + } +} + +impl Display for DictKeys { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + let items = DictKeysIterator::new(self.clone()) + .map(|x| x.to_string()) + .collect::>() + .join(", "); + write!(f, "[{}]", items) + } +} + +impl IntoIterator for DictKeys { + type Item = ExprResult; + type IntoIter = DictKeysIterator; + + fn into_iter(self) -> Self::IntoIter { + DictKeysIterator::new(self) + } +} + +#[derive(Clone)] +pub struct DictKeysIterator(DictKeys); + +impl DictKeysIterator { + pub fn new(dict_keys: DictKeys) -> Self { + DictKeysIterator(dict_keys) + } +} + +impl Iterator for DictKeysIterator { + type Item = ExprResult; + + fn next(&mut self) -> Option { + if self.0.items.is_empty() { + None + } else { + let removed = self.0.items.remove(0); + Some(removed) + } + } +} diff --git a/src/treewalk/types/dict_values.rs b/src/treewalk/types/dict_values.rs new file mode 100644 index 0000000..c265d92 --- /dev/null +++ b/src/treewalk/types/dict_values.rs @@ -0,0 +1,67 @@ +use std::fmt::{Display, Error, Formatter}; + +use super::{Dict, ExprResult}; + +#[derive(Debug, PartialEq, Clone)] +pub struct DictValues { + items: Vec, +} + +impl DictValues { + pub fn new(items: Vec) -> Self { + Self { items } + } +} + +impl From for DictValues { + fn from(dict: Dict) -> Self { + let mut items: Vec = vec![]; + for i in dict.items.keys() { + items.push(dict.items[i].clone()); + } + // TODO this should support non-integer + items.sort_by_key(|a| *a.as_integer().unwrap().borrow()); + DictValues::new(items) + } +} + +impl Display for DictValues { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + let items = DictValuesIterator::new(self.clone()) + .map(|x| x.to_string()) + .collect::>() + .join(", "); + write!(f, "[{}]", items) + } +} + +impl IntoIterator for DictValues { + type Item = ExprResult; + type IntoIter = DictValuesIterator; + + fn into_iter(self) -> Self::IntoIter { + DictValuesIterator::new(self) + } +} + +#[derive(Clone)] +pub struct DictValuesIterator(DictValues); + +impl DictValuesIterator { + fn new(dict_values: DictValues) -> Self { + DictValuesIterator(dict_values) + } +} + +impl Iterator for DictValuesIterator { + type Item = ExprResult; + + fn next(&mut self) -> Option { + if self.0.items.is_empty() { + None + } else { + let removed = self.0.items.remove(0); + Some(removed) + } + } +} diff --git a/src/treewalk/types/exception.rs b/src/treewalk/types/exception.rs new file mode 100644 index 0000000..bef7a90 --- /dev/null +++ b/src/treewalk/types/exception.rs @@ -0,0 +1,45 @@ +use crate::core::Container; + +use super::{traits::AttributeResolver, utils::Dunder, Class, ExprResult}; + +#[derive(Debug, PartialEq, Clone)] +pub struct Exception; + +impl Exception { + pub fn get_dynamic_attributes() -> Vec> { + vec![Box::new(TracebackAttribute)] + } +} + +struct TracebackAttribute; + +impl AttributeResolver for TracebackAttribute { + fn resolve(&self, _class: Container) -> ExprResult { + ExprResult::Traceback(Traceback) + } + + fn name(&self) -> &'static str { + Dunder::Traceback.value() + } +} + +#[derive(Debug, PartialEq, Clone)] +pub struct Traceback; + +impl Traceback { + pub fn get_dynamic_attributes() -> Vec> { + vec![Box::new(FrameAttribute)] + } +} + +struct FrameAttribute; + +impl AttributeResolver for FrameAttribute { + fn resolve(&self, _class: Container) -> ExprResult { + ExprResult::Frame + } + + fn name(&self) -> &'static str { + "tb_frame" + } +} diff --git a/src/treewalk/types/frozenset.rs b/src/treewalk/types/frozenset.rs new file mode 100644 index 0000000..391dc1b --- /dev/null +++ b/src/treewalk/types/frozenset.rs @@ -0,0 +1,117 @@ +use std::{ + collections::HashSet, + fmt::{Display, Error, Formatter}, +}; + +use crate::{core::Container, treewalk::Interpreter, types::errors::InterpreterError}; + +use super::{ + function::BindingType, + iterators::ListIterator, + traits::Callable, + utils::{Dunder, ResolvedArguments}, + ExprResult, Set, +}; + +#[derive(Debug, PartialEq, Clone)] +pub struct FrozenSet { + pub items: HashSet, +} + +impl FrozenSet { + pub fn get_methods() -> Vec> { + vec![Box::new(NewBuiltin), Box::new(ContainsBuiltin)] + } + + pub fn new(items: HashSet) -> Self { + Self { items } + } + + pub fn default() -> Self { + Self { + items: HashSet::new(), + } + } +} + +impl From> for Container { + fn from(set: Container) -> Container { + Container::new(FrozenSet::new(set.borrow().clone().items)) + } +} + +impl IntoIterator for Container { + type Item = ExprResult; + type IntoIter = ListIterator; + + fn into_iter(self) -> Self::IntoIter { + let set: Container = self.into(); + ListIterator::new(set.into()) + } +} + +impl Display for Container { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + let set: Container = self.clone().into(); + let items = ListIterator::new(set.into()) + .map(|x| x.to_string()) + .collect::>() + .join(", "); + write!(f, "frozenset({{{}}})", items) + } +} + +struct NewBuiltin; +struct ContainsBuiltin; + +impl Callable for NewBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + if args.len() == 1 { + Ok(ExprResult::FrozenSet(Container::new(FrozenSet::default()))) + } else if args.len() == 2 { + let input_set = args + .get_arg(1) + .as_set() + .ok_or(InterpreterError::ExpectedSet( + interpreter.state.call_stack(), + ))?; + Ok(ExprResult::FrozenSet(input_set.into())) + } else { + Err(InterpreterError::WrongNumberOfArguments( + 1, + args.len(), + interpreter.state.call_stack(), + )) + } + } + + fn name(&self) -> String { + Dunder::New.into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} + +impl Callable for ContainsBuiltin { + fn call( + &self, + _interpreter: &Interpreter, + _args: ResolvedArguments, + ) -> Result { + unimplemented!(); + } + + fn name(&self) -> String { + Dunder::Contains.into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Instance + } +} diff --git a/src/treewalk/types/function.rs b/src/treewalk/types/function.rs new file mode 100644 index 0000000..af5150e --- /dev/null +++ b/src/treewalk/types/function.rs @@ -0,0 +1,270 @@ +use std::any::Any; +use std::fmt::{Display, Error, Formatter}; + +use crate::{ + core::Container, + parser::{ + static_analysis::{FunctionAnalysisVisitor, YieldDetector}, + types::{Block, Closure, Expr, ParsedArgDefinitions}, + }, + treewalk::{Interpreter, Scope, State}, + types::errors::InterpreterError, +}; + +use super::{ + builtins::NoopCallable, + traits::{AttributeResolver, Callable, MemberAccessor}, + utils::{Dunder, EnvironmentFrame, ResolvedArguments}, + Cell, Class, ExprResult, MemberDescriptor, Module, Tuple, +}; + +/// How we evaluate a [`Function`] depends on whether it is async or a generator or a +/// traditional function. This is independent from [`BindingType`]. +pub enum FunctionType { + Regular, + Generator, + Async, +} + +/// An indicator for whether a [`Function`] is bound to an instance, a class, or nothing. +#[derive(Debug, PartialEq, Clone)] +pub enum BindingType { + Instance, + Class, + Static, +} + +/// This is a placeholder for what is calcuated on a functions [`Dunder::Code`]. +#[derive(Clone)] +pub struct Code; + +#[derive(Debug, PartialEq, Clone)] +pub struct Function { + pub name: String, + pub args: ParsedArgDefinitions, + pub body: Block, + pub module: Container, + pub class_context: Option>, + pub line_number: usize, + pub decorators: Vec, + pub is_async: bool, + pub captured_env: Container, + pub binding_type: BindingType, + pub scope: Scope, + pub closure: Closure, +} + +impl Function { + pub fn get_dynamic_attributes() -> Vec> { + vec![Box::new(CodeAttribute), Box::new(GlobalsAttribute)] + } + + pub fn new( + state: Container, + name: String, + args: ParsedArgDefinitions, + body: Block, + decorators: Vec, + is_async: bool, + ) -> Self { + let module = state.current_module(); + let class_context = state.current_class(); + let line_number = state.call_stack().line_number(); + let captured_env = state.get_environment_frame(); + + // Ideally, we wouldn't need to save this as state, but classmethod and staticmethod + // currently work by changing this state on the function. + let binding_type = if name == Dunder::New.value() || class_context.is_none() { + BindingType::Static + } else if let Some(ref class) = class_context { + // All methods that are discovered via a metaclass should be treated as class methods. + if class.borrow().is_metaclass() { + BindingType::Class + } else { + BindingType::Instance + } + } else { + BindingType::Instance + }; + + let mut visitor = FunctionAnalysisVisitor::new(); + body.accept(&mut visitor); + + Self { + name, + args, + body, + module, + class_context, + line_number, + decorators, + is_async, + captured_env, + binding_type, + scope: Scope::default(), + closure: visitor.into(), + } + } + + pub fn new_lambda(state: Container, args: ParsedArgDefinitions, body: Block) -> Self { + Self::new(state, "".into(), args, body, vec![], false) + } + + pub fn new_anonymous_generator(state: Container, body: Block) -> Self { + Self::new( + state, + "".into(), + ParsedArgDefinitions::default(), + body, + vec![], + false, + ) + } + + pub fn is_generator(&self) -> bool { + let mut detector = YieldDetector { found_yield: false }; + self.body.accept(&mut detector); + detector.found_yield + } + + pub fn binding_type(&self) -> BindingType { + self.binding_type.clone() + } + + fn get_code(&self) -> ExprResult { + ExprResult::Code(Container::new(Code)) + } + + fn get_closure(&self) -> ExprResult { + let mut items = vec![]; + for key in self.closure.get_free_vars() { + let value = self + .captured_env + .borrow() + .scope + .borrow() + .get(key.as_str()) + .unwrap(); + items.push(ExprResult::Cell(Container::new(Cell::new(value)))); + } + + match items.is_empty() { + true => ExprResult::None, + false => ExprResult::Tuple(Container::new(Tuple::new(items))), + } + } +} + +impl MemberAccessor for Function { + fn insert(&mut self, name: &str, value: ExprResult) { + self.scope.insert(name, value); + } + + fn get(&self, _interpreter: &Interpreter, name: &str) -> Option { + match Dunder::from(name) { + Dunder::Code => Some(self.get_code()), + Dunder::Closure => Some(self.get_closure()), + _ => self.scope.get(name), + } + } + + fn delete(&mut self, name: &str) -> Option { + self.scope.delete(name) + } +} + +impl Container { + pub fn apply_decorators( + &self, + interpreter: &Interpreter, + ) -> Result { + let mut result = ExprResult::Function(self.clone()); + if self.borrow().decorators.is_empty() { + return Ok(result); + } + + let decorators = self.borrow().decorators.clone(); + for decorator in decorators.iter() { + let decorator_result = interpreter.evaluate_expr(decorator)?; + + let arguments = ResolvedArguments::default().add_arg(result); + + let function = + decorator_result + .as_callable() + .ok_or(InterpreterError::ExpectedFunction( + interpreter.state.call_stack(), + ))?; + + result = interpreter.call(function, &arguments)?; + } + + Ok(result) + } +} + +impl Callable for Container { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + let scope = Scope::new(interpreter, self, &args)?; + interpreter.invoke_function(self.clone(), scope) + } + + fn name(&self) -> String { + self.borrow().name.clone() + } + + fn binding_type(&self) -> BindingType { + self.borrow().binding_type() + } + + fn function_type(&self) -> FunctionType { + if self.borrow().is_async { + FunctionType::Async + } else if self.borrow().is_generator() { + FunctionType::Generator + } else { + FunctionType::Regular + } + } + + fn as_any(&self) -> &dyn Any { + // returning a reference to self, not self directly. This is required so that there is a + // known size at compile-time. + self + } +} + +impl Display for Container { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + write!(f, "", self.borrow().name, self) + } +} + +struct CodeAttribute; + +impl AttributeResolver for CodeAttribute { + fn resolve(&self, _class: Container) -> ExprResult { + ExprResult::GetSetDescriptor + } + + fn name(&self) -> &'static str { + Dunder::Code.value() + } +} + +struct GlobalsAttribute; + +impl AttributeResolver for GlobalsAttribute { + fn resolve(&self, class: Container) -> ExprResult { + let descriptor = MemberDescriptor::new(class, Container::new(Box::new(NoopCallable))); + ExprResult::MemberDescriptor(descriptor) + } + + fn name(&self) -> &'static str { + Dunder::Globals.value() + } +} diff --git a/src/treewalk/types/generator.rs b/src/treewalk/types/generator.rs new file mode 100644 index 0000000..3c54516 --- /dev/null +++ b/src/treewalk/types/generator.rs @@ -0,0 +1,185 @@ +use crate::{ + core::Container, + parser::types::{Block, Expr, ForClause, LoopIndex, Statement}, + treewalk::{ + types::{ + pausable::{Frame, Pausable, PausableContext, PausableState, PausableStepResult}, + ExprResult, Function, + }, + Interpreter, Scope, State, + }, + types::errors::InterpreterError, +}; + +pub struct Generator { + scope: Container, + function: Container, + context: Container, +} + +impl Generator { + pub fn new(scope: Container, function: Container) -> Self { + let frame = Frame::new(function.borrow().clone().body); + + Self { + scope, + function, + context: PausableContext::new(frame), + } + } + + pub fn new_from_comprehension( + state: Container, + body: &Expr, + clauses: &[ForClause], + ) -> Self { + let generator_body = Self::build_nested_loops(body, clauses); + let function = Container::new(Function::new_anonymous_generator(state, generator_body)); + Self::new(Container::new(Scope::default()), function) + } + + // This is a utility which takes the parsed elements found in a generator comprehension and + // recursively builds a generator function out of them. This will then become the body of the + // function provided to a generator. + fn build_nested_loops(body: &Expr, clauses: &[ForClause]) -> Block { + if let Some((first_clause, remaining_clauses)) = clauses.split_first() { + let loop_body = if remaining_clauses.is_empty() { + // Base case: Yield the body + Block::new(vec![Statement::Yield(body.clone())]) + } else { + // Recursive case: Build nested loop for the remaining clauses + Self::build_nested_loops(body, remaining_clauses) + }; + + let index = if first_clause.indices.len() == 1 { + &first_clause.indices[0] + } else { + // This is if we need to unpack multiple variables + unimplemented!() + }; + + let for_in_loop = Statement::ForInLoop { + index: LoopIndex::Variable(index.to_string()), + iterable: *first_clause.iterable.clone(), + body: loop_body, + else_block: None, + }; + + Block::new(vec![for_in_loop]) + } else { + unreachable!() + } + } +} + +impl Pausable for Container { + fn context(&self) -> Container { + self.borrow().context.clone() + } + + fn scope(&self) -> Container { + self.borrow().scope.clone() + } + + fn function(&self) -> Container { + self.borrow().function.clone() + } + + fn set_scope(&self, scope: Container) { + self.borrow_mut().scope = scope; + } + + fn finish( + &self, + interpreter: &Interpreter, + _result: ExprResult, + ) -> Result { + Err(InterpreterError::StopIteration( + interpreter.state.call_stack(), + )) + } + + fn handle_step( + &self, + interpreter: &Interpreter, + statement: Statement, + control_flow: bool, + ) -> Result { + match self.execute_statement(interpreter, statement, control_flow)? { + Some(yielded) => { + self.on_exit(interpreter); + Ok(PausableStepResult::BreakAndReturn(yielded)) + } + None => Ok(PausableStepResult::NoOp), + } + } +} + +impl Container { + /// By this point, all control flow statements have already been handled manually. Evaluate all + /// other statements unless we encounter a yield. + /// + /// Only yield statements will cause a value to be returned, everything else will return + /// `None`. + fn execute_statement( + &self, + interpreter: &Interpreter, + statement: Statement, + control_flow: bool, + ) -> Result, InterpreterError> { + if !control_flow { + match statement { + Statement::Yield(expr) => Ok(Some(interpreter.evaluate_expr(&expr)?)), + Statement::YieldFrom(_) => unimplemented!(), + _ => { + // would we ever need to support a return statement here? + let _ = interpreter.evaluate_statement(&statement)?; + Ok(None) + } + } + } else { + Ok(None) + } + } +} + +#[derive(Clone)] +pub struct GeneratorIterator { + pub generator: Container, + pub interpreter: Interpreter, +} + +impl GeneratorIterator { + pub fn new(generator: Generator, interpreter: Interpreter) -> Self { + Self { + generator: Container::new(generator), + interpreter, + } + } +} + +impl Iterator for GeneratorIterator { + type Item = ExprResult; + + fn next(&mut self) -> Option { + if self.generator.context().current_state() == PausableState::Finished { + return None; + } + + // we need a better way to surface error during a generator run + match self.generator.run_until_pause(&self.interpreter) { + Ok(result) => Some(result), + Err(InterpreterError::StopIteration(_)) => None, + _ => panic!(), + } + } +} + +impl IntoIterator for Container { + type Item = ExprResult; + type IntoIter = GeneratorIterator; + + fn into_iter(self) -> Self::IntoIter { + self.borrow().clone() + } +} diff --git a/src/treewalk/types/int.rs b/src/treewalk/types/int.rs new file mode 100644 index 0000000..e3b1960 --- /dev/null +++ b/src/treewalk/types/int.rs @@ -0,0 +1,91 @@ +use std::fmt::{Display, Error, Formatter}; + +use crate::{core::Container, treewalk::Interpreter, types::errors::InterpreterError}; + +use super::{ + function::BindingType, + traits::Callable, + utils::{Dunder, ResolvedArguments}, + ExprResult, +}; + +pub struct Int; + +impl Int { + pub fn get_methods() -> Vec> { + vec![Box::new(NewBuiltin), Box::new(InitBuiltin)] + } +} + +impl Display for Container { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + write!(f, "{}", self.borrow()) + } +} + +struct NewBuiltin; +struct InitBuiltin; + +impl Callable for NewBuiltin { + fn call( + &self, + _interpreter: &Interpreter, + _args: ResolvedArguments, + ) -> Result { + Ok(ExprResult::Integer(Container::new(0))) + } + + fn name(&self) -> String { + Dunder::New.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} + +impl Callable for InitBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + let output = args + .get_self() + .ok_or(InterpreterError::ExpectedInteger( + interpreter.state.call_stack(), + ))? + .as_integer() + .ok_or(InterpreterError::ExpectedInteger( + interpreter.state.call_stack(), + ))?; + + if args.is_empty() { + Ok(ExprResult::Void) + } else if args.len() == 1 { + let input = args + .get_arg(0) + .as_integer() + .ok_or(InterpreterError::ExpectedInteger( + interpreter.state.call_stack(), + ))?; + + *output.borrow_mut() = *input.borrow(); + Ok(ExprResult::Void) + } else { + Err(InterpreterError::WrongNumberOfArguments( + 1, + args.len(), + interpreter.state.call_stack(), + )) + } + } + + fn name(&self) -> String { + Dunder::Init.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Instance + } +} diff --git a/src/treewalk/types/list.rs b/src/treewalk/types/list.rs new file mode 100644 index 0000000..0531e1e --- /dev/null +++ b/src/treewalk/types/list.rs @@ -0,0 +1,253 @@ +use std::{ + collections::VecDeque, + fmt::{Display, Error, Formatter}, + ops::Add, +}; + +use crate::{ + core::{Container, Storable}, + treewalk::Interpreter, + types::errors::InterpreterError, +}; + +use super::{ + builtins::utils::validate_args, + function::BindingType, + generator::GeneratorIterator, + traits::{Callable, IndexRead, IndexWrite}, + utils::{Dunder, ResolvedArguments}, + ExprResult, Range, Set, Slice, Tuple, +}; + +#[derive(Debug, PartialEq, Clone)] +pub struct List { + items: Vec, +} + +impl List { + pub fn get_methods() -> Vec> { + vec![Box::new(NewBuiltin), Box::new(AppendBuiltin)] + } + + pub fn default() -> Self { + Self { items: vec![] } + } + + pub fn new(items: Vec) -> Self { + Self { items } + } + + pub fn append(&mut self, item: ExprResult) { + self.items.push(item) + } + + /// Use this when you need a `pop_front` method. + pub fn as_queue(&self) -> VecDeque { + self.items.clone().into() + } + + pub fn len(&self) -> usize { + self.items.len() + } + + pub fn slice(&self, slice: &Slice) -> Self { + let len = self.items.len() as i64; + let receiver = Container::new(self.clone()); + + let sliced_items = Slice::slice(slice, len, |i| { + IndexRead::get(&receiver, &ExprResult::Integer(i.store())) + }); + + List::new(sliced_items) + } +} + +impl IndexRead for Container { + fn get(&self, key: &ExprResult) -> Option { + match key { + ExprResult::Integer(i) => self.borrow().items.get(*i.borrow() as usize).cloned(), + ExprResult::Slice(s) => Some(ExprResult::List(Container::new(self.borrow().slice(s)))), + _ => None, + } + } +} + +impl IndexWrite for Container { + fn insert(&mut self, index: &ExprResult, value: ExprResult) { + let i = index.as_integer_val().unwrap(); + self.borrow_mut().items[i as usize] = value; + } + + fn delete(&mut self, index: &ExprResult) -> Option { + let i = index.as_integer_val()?; + Some(self.borrow_mut().items.remove(i as usize)) + } +} + +impl Add for List { + type Output = List; + + fn add(self, other: List) -> List { + List { + items: [self.items, other.items].concat(), + } + } +} + +impl From> for Container { + fn from(range: Container) -> Container { + let start = range.borrow().start; + let stop = range.borrow().stop; + let items = (start..stop) + .map(|x| ExprResult::Integer(Container::new(x as i64))) + .collect(); + Container::new(List::new(items)) + } +} + +impl From> for Container { + fn from(set: Container) -> Container { + // Calling `into_iter()` directly off the `Set` results in a stack overflow. + //let mut items: Vec = set.into_iter().collect(); + let mut items: Vec = set.borrow().items.clone().into_iter().collect(); + + items.sort_by_key(|x| { + match x { + ExprResult::Integer(i) => *i.borrow(), + // TODO how should we sort strings here? + //ExprResult::String(s) => s.0, + _ => 0, + } + }); + + Container::new(List::new(items)) + } +} + +impl From> for Container { + fn from(tuple: Container) -> Container { + Container::new(List::new(tuple.borrow().raw())) + } +} + +impl From> for Container { + fn from(g: Container) -> Container { + let items = g.borrow().clone().collect::>(); + Container::new(List::new(items)) + } +} + +impl Display for Container { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + let items = ListIterator::new(self.clone()) + .map(|x| x.to_string()) + .collect::>() + .join(", "); + write!(f, "[{}]", items) + } +} + +impl IntoIterator for Container { + type Item = ExprResult; + type IntoIter = ListIterator; + + fn into_iter(self) -> Self::IntoIter { + ListIterator::new(self) + } +} + +#[derive(Clone)] +pub struct ListIterator { + list_ref: Container, + current_index: usize, +} + +impl ListIterator { + pub fn new(list_ref: Container) -> Self { + Self { + list_ref, + current_index: 0, + } + } +} + +impl Iterator for ListIterator { + type Item = ExprResult; + + fn next(&mut self) -> Option { + if self.current_index == self.list_ref.borrow().items.len() { + None + } else { + self.current_index += 1; + self.list_ref + .borrow() + .items + .get(self.current_index - 1) + .cloned() + } + } +} + +struct NewBuiltin; +struct AppendBuiltin; + +impl Callable for NewBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + if args.len() == 2 { + let output = args + .get_arg(1) + .as_list() + .ok_or(InterpreterError::ExpectedList( + interpreter.state.call_stack(), + ))?; + Ok(ExprResult::List(output)) + } else { + validate_args(&args, 1, interpreter.state.call_stack())?; + Ok(ExprResult::List(Container::new(List::default()))) + } + } + + fn name(&self) -> String { + Dunder::New.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} + +impl Callable for AppendBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + validate_args(&args, 1, interpreter.state.call_stack())?; + + let list = args + .get_self() + .ok_or(InterpreterError::ExpectedList( + interpreter.state.call_stack(), + ))? + .as_list() + .ok_or(InterpreterError::ExpectedList( + interpreter.state.call_stack(), + ))?; + + list.borrow_mut().append(args.get_arg(0).clone()); + + Ok(ExprResult::Void) + } + + fn name(&self) -> String { + "append".into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Instance + } +} diff --git a/src/treewalk/types/mapping_proxy.rs b/src/treewalk/types/mapping_proxy.rs new file mode 100644 index 0000000..9e84191 --- /dev/null +++ b/src/treewalk/types/mapping_proxy.rs @@ -0,0 +1,27 @@ +use std::fmt::{Display, Error, Formatter}; + +use crate::core::Container; + +use super::{traits::IndexRead, Dict, ExprResult}; + +/// A read-only view into a `Dict`. This is used by Python for things like `Dunder::Dict`. +#[derive(PartialEq)] +pub struct MappingProxy(Container); + +impl MappingProxy { + pub fn new(dict: Container) -> Self { + Self(dict) + } +} + +impl IndexRead for Container { + fn get(&self, index: &ExprResult) -> Option { + self.borrow().0.get(index) + } +} + +impl Display for Container { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + write!(f, "mappingproxy({})", self.borrow().0) + } +} diff --git a/src/treewalk/types/memoryview.rs b/src/treewalk/types/memoryview.rs new file mode 100644 index 0000000..388c552 --- /dev/null +++ b/src/treewalk/types/memoryview.rs @@ -0,0 +1,38 @@ +use crate::{treewalk::Interpreter, types::errors::InterpreterError}; + +use super::{ + function::BindingType, + traits::Callable, + utils::{Dunder, ResolvedArguments}, + ExprResult, +}; + +/// A mutable version of a byte string. +#[derive(Debug, Clone, PartialEq)] +pub struct Memoryview; + +impl Memoryview { + pub fn get_methods() -> Vec> { + vec![Box::new(NewBuiltin)] + } +} + +struct NewBuiltin; + +impl Callable for NewBuiltin { + fn call( + &self, + _interpreter: &Interpreter, + _args: ResolvedArguments, + ) -> Result { + unimplemented!() + } + + fn name(&self) -> String { + Dunder::New.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} diff --git a/src/treewalk/types/method.rs b/src/treewalk/types/method.rs new file mode 100644 index 0000000..88dbbb5 --- /dev/null +++ b/src/treewalk/types/method.rs @@ -0,0 +1,59 @@ +use std::fmt::{Display, Error, Formatter}; + +use crate::{core::Container, treewalk::Interpreter, types::errors::InterpreterError}; + +use super::{function::BindingType, traits::Callable, utils::ResolvedArguments, ExprResult}; + +#[derive(Debug, Clone)] +pub struct Method { + receiver: ExprResult, + function: Container>, +} + +impl Method { + pub fn new(receiver: ExprResult, function: Container>) -> Self { + Self { receiver, function } + } + + pub fn name(&self) -> String { + self.function.borrow().name() + " of " + &self.receiver.to_string() + } + + pub fn receiver(&self) -> ExprResult { + self.receiver.clone() + } +} + +impl Callable for Container { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + interpreter + .state + .push_receiver(self.borrow().receiver.clone()); + let result = self.borrow().function.borrow().call(interpreter, args); + interpreter.state.pop_receiver(); + + result + } + + fn name(&self) -> String { + self.borrow().name() + } + + fn binding_type(&self) -> BindingType { + self.borrow().function.borrow().binding_type() + } + + fn receiver(&self) -> Option { + Some(self.borrow().receiver()) + } +} + +impl Display for Container { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + write!(f, "", self.name()) + } +} diff --git a/src/treewalk/types/mod.rs b/src/treewalk/types/mod.rs new file mode 100644 index 0000000..d7dd85a --- /dev/null +++ b/src/treewalk/types/mod.rs @@ -0,0 +1,89 @@ +mod bool; +pub mod builtins; +mod bytearray; +mod bytes; +mod cell; +pub mod class; +mod classmethod; +pub mod coroutine; +mod descriptor; +mod dict; +mod dict_items; +mod dict_keys; +mod dict_values; +mod exception; +mod frozenset; +pub mod function; +mod generator; +mod int; +mod list; +mod mapping_proxy; +mod memoryview; +pub mod method; +pub mod module; +pub mod object; +pub mod pausable; +mod property; +mod range; +mod result; +mod reversed; +mod set; +mod slice; +mod staticmethod; +mod str; +mod super_type; +pub mod traits; +mod tuple; +mod r#type; +#[allow(clippy::module_inception)] +pub mod types; +pub mod utils; +mod zip; + +pub use bool::Bool; +pub use bytearray::ByteArray; +pub use bytes::Bytes; +pub use cell::Cell; +pub use class::Class; +pub use classmethod::Classmethod; +pub use coroutine::Coroutine; +pub use descriptor::MemberDescriptor; +pub use dict::Dict; +pub use dict_items::DictItems; +pub use dict_keys::DictKeys; +pub use dict_values::DictValues; +pub use exception::{Exception, Traceback}; +pub use frozenset::FrozenSet; +pub use function::{Code, Function}; +pub use generator::Generator; +pub use int::Int; +pub use list::List; +pub use mapping_proxy::MappingProxy; +pub use memoryview::Memoryview; +pub use method::Method; +pub use module::Module; +pub use object::Object; +pub use property::Property; +pub use r#type::TypeClass; +pub use range::Range; +pub use result::ExprResult; +pub use set::Set; +pub use slice::Slice; +pub use staticmethod::Staticmethod; +pub use str::Str; +pub use super_type::Super; +pub use tuple::Tuple; +pub use types::Type; + +pub mod iterators { + pub use super::dict_items::DictItemsIterator; + pub use super::dict_keys::DictKeysIterator; + pub use super::dict_values::DictValuesIterator; + pub use super::generator::GeneratorIterator; + pub use super::list::ListIterator; + pub use super::range::RangeIterator; + pub use super::result::ExprResultIterator; + pub use super::reversed::ReversedIterator; + pub use super::str::StringIterator; + pub use super::zip::ZipIterator; +} diff --git a/src/treewalk/types/module.rs b/src/treewalk/types/module.rs new file mode 100644 index 0000000..8c758ce --- /dev/null +++ b/src/treewalk/types/module.rs @@ -0,0 +1,74 @@ +use std::fmt::{Display, Error, Formatter}; +use std::path::PathBuf; + +use crate::core::Container; +use crate::treewalk::{Interpreter, LoadedModule, Scope}; + +use super::{ + traits::{MemberAccessor, ModuleInterface}, + ExprResult, +}; + +#[derive(Debug, PartialEq, Clone)] +pub struct Module { + pub scope: Scope, + loaded_module: Option, +} + +impl Module { + pub fn new(loaded_module: LoadedModule, scope: Scope) -> Self { + Self { + loaded_module: Some(loaded_module), + scope, + } + } + + pub fn empty() -> Self { + Self { + loaded_module: None, + scope: Scope::default(), + } + } + + pub fn path(&self) -> PathBuf { + self.loaded_module + .clone() + .map_or(LoadedModule::empty_path(), |m| m.path()) + } + + pub fn name(&self) -> String { + self.loaded_module + .clone() + .map_or(LoadedModule::empty_name(), |m| m.name()) + } +} + +impl ModuleInterface for Module { + fn get(&self, _interpreter: &Interpreter, name: &str) -> Option { + self.scope.get(name) + } + + fn dir(&self) -> Vec { + self.scope.symbols() + } +} + +impl MemberAccessor for Module { + fn get(&self, _interpreter: &Interpreter, name: &str) -> Option { + self.scope.get(name) + } + + fn delete(&mut self, _name: &str) -> Option { + unimplemented!(); + } + + fn insert(&mut self, _name: &str, _value: ExprResult) { + unimplemented!(); + } +} + +impl Display for Container { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + write!(f, "", self.borrow().name()) + } +} diff --git a/src/treewalk/types/object.rs b/src/treewalk/types/object.rs new file mode 100644 index 0000000..470b252 --- /dev/null +++ b/src/treewalk/types/object.rs @@ -0,0 +1,232 @@ +use std::fmt::{Display, Error, Formatter}; + +use crate::{ + core::{log, Container, LogLevel}, + treewalk::{Interpreter, Scope}, + types::errors::InterpreterError, +}; + +use super::{ + builtins::utils, + class::InstantiationType, + function::BindingType, + traits::{Callable, MemberAccessor}, + utils::{Dunder, ResolvedArguments}, + Class, ExprResult, +}; + +#[derive(Debug, PartialEq, Clone)] +pub struct Object { + pub class: Container, + scope: Scope, +} + +impl Object { + pub fn get_methods() -> Vec> { + vec![ + Box::new(InitBuiltin), + Box::new(NewBuiltin), + Box::new(EqBuiltin), + ] + } + + /// The primary public interface for creating an `Object` from a `Class`. + pub fn new( + class: Container, + interpreter: &Interpreter, + args: &ResolvedArguments, + ) -> Result, InterpreterError> { + let object = Self::create_object(class.clone(), interpreter)?; + Self::initialize_object(object.clone(), interpreter, args)?; + Ok(object) + } + + /// Create the object by calling the `Dunder::New` method according to its MRO. + fn create_object( + class: Container, + interpreter: &Interpreter, + ) -> Result, InterpreterError> { + let args = ResolvedArguments::default().add_arg(ExprResult::Class(class.clone())); + + interpreter + .evaluate_new_method(&ExprResult::Class(class), &args, InstantiationType::Object)? + .as_object() + .ok_or(InterpreterError::ExpectedObject( + interpreter.state.call_stack(), + )) + } + + /// Create the object with an empty symbol table. This is also called by the `Dunder::New` for + /// `Type::Object` builtin. + fn new_object_base(class: Container) -> Result, InterpreterError> { + Ok(Container::new(Self { + class, + scope: Scope::default(), + })) + } + + /// Call the most-specific `Dunder::Init` method in the object's inheritance hierarchy + /// (i.e. the lowest in the hierarchy only). + fn initialize_object( + object: Container, + interpreter: &Interpreter, + args: &ResolvedArguments, + ) -> Result<(), InterpreterError> { + let mro = object.borrow().class.mro(); + + for class in mro { + if class.get(interpreter, Dunder::Init.value()).is_some() { + interpreter.evaluate_method( + ExprResult::Object(object.clone()), + Dunder::Init.value(), + args, + )?; + + // Python only evaluates the most-specific `Dunder::Init` method and leaves the rest up + // to the classes themselves. Therefore, once we have evaluated a constructor, we + // can quit climbing the hierarchy. + break; + } + } + + Ok(()) + } +} + +impl MemberAccessor for Container { + fn insert(&mut self, name: &str, value: ExprResult) { + self.borrow_mut().scope.insert(name, value); + } + + fn delete(&mut self, name: &str) -> Option { + self.borrow_mut().scope.delete(name) + } + + fn get(&self, interpreter: &Interpreter, name: &str) -> Option { + log(LogLevel::Debug, || { + format!("Searching for: {}.{}", self, name) + }); + + if let Some(attr) = self.borrow().scope.get(name) { + log(LogLevel::Debug, || format!("Found: {}.{}", self, name)); + return Some(attr); + } + + if let Some(attr) = self.borrow().class.get(interpreter, name) { + log(LogLevel::Debug, || { + format!("Found: {}::{}", self.borrow().class, name) + }); + return match attr { + ExprResult::MemberDescriptor(descriptor) => descriptor + .get(interpreter, ExprResult::Object(self.clone())) + .ok(), + _ => Some(attr), + }; + } + + None + } +} + +impl Display for Container { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + write!( + f, + "<{} object at {:p}>", + self.borrow().class.borrow().name, + self + ) + } +} + +struct NewBuiltin; + +impl Callable for NewBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + utils::validate_args(&args, 1, interpreter.state.call_stack())?; + + // This is builtin for 'object' but the instance is created from the `cls` passed in as the + // first argument. + let class = args + .get_arg(0) + .as_class() + .ok_or(InterpreterError::ExpectedClass( + interpreter.state.call_stack(), + ))?; + + Ok(ExprResult::Object(Object::new_object_base(class)?)) + } + + fn name(&self) -> String { + Dunder::New.into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Class + } +} + +struct InitBuiltin; + +impl Callable for InitBuiltin { + fn call( + &self, + _interpreter: &Interpreter, + _args: ResolvedArguments, + ) -> Result { + Ok(ExprResult::Void) + } + + fn name(&self) -> String { + Dunder::Init.into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Instance + } +} + +/// The default behavior in Python for the `==` sign is to compare the object identity. This is +/// only used when `Dunder::Eq` is not overridden by a user-defined class. +struct EqBuiltin; + +impl Callable for EqBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + utils::validate_args(&args, 1, interpreter.state.call_stack())?; + + let a = args + .get_self() + .ok_or(InterpreterError::ExpectedObject( + interpreter.state.call_stack(), + ))? + .as_object() + .ok_or(InterpreterError::ExpectedObject( + interpreter.state.call_stack(), + ))?; + + let b = args + .get_arg(0) + .as_object() + .ok_or(InterpreterError::ExpectedObject( + interpreter.state.call_stack(), + ))?; + + Ok(ExprResult::Boolean(a.same_identity(&b))) + } + + fn name(&self) -> String { + Dunder::Eq.into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Instance + } +} diff --git a/src/treewalk/types/pausable/frame.rs b/src/treewalk/types/pausable/frame.rs new file mode 100644 index 0000000..6ed1098 --- /dev/null +++ b/src/treewalk/types/pausable/frame.rs @@ -0,0 +1,46 @@ +use crate::parser::types::{Block, Statement}; + +/// An association between a [`Block`] of code and the current statement. +#[derive(Debug, Clone, PartialEq)] +pub struct Frame { + program_counter: usize, + block: Block, +} + +impl Frame { + /// Initialize a [`Frame`]. + pub fn new(block: Block) -> Self { + Self { + block, + program_counter: 0, + } + } + + /// Return a boolean indicating whether we have instructions left in the block to evaluate. + pub fn is_finished(&self) -> bool { + self.len() <= self.program_counter + } + + /// Mutably access the next [`Statement`] of the block, incrementing the program counter. + pub fn next_statement(&mut self) -> Statement { + let statement = self.block.statements[self.program_counter].clone(); + self.program_counter += 1; + statement + } + + /// If we encountered an await during while evaluating an instruction, we need to reset the + /// program counter so that we can rerun this instruction again. + pub fn step_back(&mut self) { + self.program_counter -= 1; + } + + /// Reset the program counter to the start of the block. This is useful to simulate loops. + pub fn restart(&mut self) { + self.program_counter = 0; + } + + /// Return the length of the block held by this frame. + fn len(&self) -> usize { + self.block.statements.len() + } +} diff --git a/src/treewalk/types/pausable/mod.rs b/src/treewalk/types/pausable/mod.rs new file mode 100644 index 0000000..1403084 --- /dev/null +++ b/src/treewalk/types/pausable/mod.rs @@ -0,0 +1,7 @@ +mod frame; +mod pausable_context; +mod pausable_trait; + +pub use frame::Frame; +pub use pausable_context::{PausableContext, PausableState, PausableToken}; +pub use pausable_trait::{Pausable, PausableStepResult}; diff --git a/src/treewalk/types/pausable/pausable_context.rs b/src/treewalk/types/pausable/pausable_context.rs new file mode 100644 index 0000000..85e017b --- /dev/null +++ b/src/treewalk/types/pausable/pausable_context.rs @@ -0,0 +1,100 @@ +use std::collections::VecDeque; + +use crate::{ + core::{Container, Stack}, + parser::types::{Expr, LoopIndex, Statement}, + treewalk::types::ExprResult, +}; + +use super::Frame; +#[allow(unused_imports)] +use super::Pausable; + +/// An enumeration of the possible states in which a [`Pausable`] can exist. This is key to +/// implementing stack-based control flow. +#[derive(PartialEq, Clone, Debug)] +pub enum PausableState { + Created, + Running, + InWhileLoop(Expr), + InForLoop { + index: LoopIndex, + queue: Container>, + }, + InBlock, + Finished, +} + +/// The context that allows a [`Pausable`] to be paused and resumed. This represents an individual +/// [`Frame`] and its current [`PausableState`]. +#[derive(Clone)] +pub struct PausableToken { + frame: Frame, + state: PausableState, +} + +impl PausableToken { + pub(crate) fn new(frame: Frame, state: PausableState) -> Self { + Self { frame, state } + } +} + +/// The context that allows a [`Pausable`] to be paused and resumed. This represents a stack of +/// [`PausableToken`] objects. +pub struct PausableContext(Stack); + +impl PausableContext { + pub(crate) fn new(initial_frame: Frame) -> Container { + Container::new(Self(Stack::new(vec![PausableToken::new( + initial_frame, + PausableState::Created, + )]))) + } +} + +impl Container { + pub(crate) fn push_context(&mut self, context: PausableToken) { + self.borrow_mut().0.push(context); + } + + pub(crate) fn pop_context(&self) -> Option { + self.borrow_mut().0.pop() + } + + pub(crate) fn set_state(&self, state: PausableState) { + self.borrow_mut().0.with_top_mut(|context| { + context.state = state; + }); + } + + pub(crate) fn next_statement(&self) -> Statement { + self.borrow_mut() + .0 + .with_top_mut(|context| context.frame.next_statement()) + .unwrap() + } + + pub(crate) fn current_frame(&self) -> Frame { + self.borrow().0.top().unwrap().frame + } + + pub(crate) fn current_state(&self) -> PausableState { + self.borrow().0.top().unwrap().state + } + + pub(crate) fn restart_frame(&self) { + self.borrow_mut().0.with_top_mut(|context| { + context.frame.restart(); + }); + } + + pub(crate) fn start(&self) { + self.set_state(PausableState::Running); + } + + pub(crate) fn step_back(&self) { + self.borrow_mut().0.with_top_mut(|context| { + context.frame.step_back(); + }); + } +} diff --git a/src/treewalk/types/pausable/pausable_trait.rs b/src/treewalk/types/pausable/pausable_trait.rs new file mode 100644 index 0000000..8f40b27 --- /dev/null +++ b/src/treewalk/types/pausable/pausable_trait.rs @@ -0,0 +1,290 @@ +use crate::{ + core::Container, + parser::types::Statement, + treewalk::{ + types::{ExprResult, Function}, + Interpreter, Scope, StackFrame, + }, + types::errors::InterpreterError, +}; + +use super::{Frame, PausableContext, PausableState, PausableToken}; + +/// This instructs [`Pausable::run_until_pause`] what action should happen next. +pub enum PausableStepResult { + NoOp, + BreakAndReturn(ExprResult), + Return(ExprResult), + Break, +} + +/// The interface for generators and coroutines, which share the ability to be paused and resumed. +pub trait Pausable { + /// A getter for the [`PausableContext`] of a pausable function. + fn context(&self) -> Container; + + /// A getter for the [`Scope`] of a pausable function. + fn scope(&self) -> Container; + + /// A setter for the [`Scope`] of a pausable function. + fn set_scope(&self, scope: Container); + + /// A getter for the [`Function`] of a pausable function. + fn function(&self) -> Container; + + /// A handle to perform any necessary cleanup once this function returns, including set its + /// return value. + fn finish( + &self, + interpreter: &Interpreter, + result: ExprResult, + ) -> Result; + + /// A handle to invoke the discrete operation of evaluating an individual statement and + /// producing a [`PausableStepResult`] based on the control flow instructions and or the + /// expression return values encountered. + fn handle_step( + &self, + interpreter: &Interpreter, + statement: Statement, + control_flow: bool, + ) -> Result; + + /// The default behavior which selects the next [`Statement`] and manually evaluates any + /// control flow statements. This then calls [`Pausable::handle_step`] to set up any return + /// values based on whether a control flow structure was encountered. + fn step(&self, interpreter: &Interpreter) -> Result { + let statement = self.context().next_statement(); + + // Delegate to the common function for control flow + let encountered_control_flow = + self.execute_control_flow_statement(&statement, interpreter)?; + + self.handle_step(interpreter, statement, encountered_control_flow) + } + + /// The default behavior required to perform the necessary context switching when entering a + /// pausable function. + fn on_entry(&self, interpreter: &Interpreter) { + interpreter.state.push_local(self.scope()); + interpreter + .state + .push_context(StackFrame::new_function(self.function().borrow().clone())); + } + + /// The default behavior required to perform the necessary context switching when exiting a + /// pausable function. + fn on_exit(&self, interpreter: &Interpreter) { + interpreter.state.pop_context(); + if let Some(scope) = interpreter.state.pop_local() { + self.set_scope(scope); + } + } + + /// This function manually executes any control flow statements. Any changes are reflected by + /// invoking [`Container::push_context`] with the new [`Frame`] and + /// [`PausableState`]. + /// + /// This implementation uses a stack-based control flow to remember the next instruction + /// whenever this coroutine is awaited. + /// + /// A boolean is returned indicated whether a control flow statement was encountered. + fn execute_control_flow_statement( + &self, + statement: &Statement, + interpreter: &Interpreter, + ) -> Result { + match statement { + Statement::WhileLoop { body, condition } => { + if interpreter.evaluate_expr(condition)?.as_boolean().ok_or( + InterpreterError::ExpectedBoolean(interpreter.state.call_stack()), + )? { + self.context().push_context(PausableToken::new( + Frame::new(body.clone()), + PausableState::InWhileLoop(condition.clone()), + )); + } + + Ok(true) + } + Statement::IfElse { + if_part, + elif_parts, + else_part, + } => { + if interpreter + .evaluate_expr(&if_part.condition)? + .as_boolean() + .ok_or(InterpreterError::ExpectedBoolean( + interpreter.state.call_stack(), + ))? + { + self.context().push_context(PausableToken::new( + Frame::new(if_part.block.clone()), + PausableState::InBlock, + )); + + return Ok(true); + } + + for elif_part in elif_parts { + if interpreter + .evaluate_expr(&elif_part.condition)? + .as_boolean() + .ok_or(InterpreterError::ExpectedBoolean( + interpreter.state.call_stack(), + ))? + { + self.context().push_context(PausableToken::new( + Frame::new(elif_part.block.clone()), + PausableState::InBlock, + )); + + return Ok(true); + } + } + + if let Some(else_body) = else_part { + self.context().push_context(PausableToken::new( + Frame::new(else_body.clone()), + PausableState::InBlock, + )); + } + + Ok(true) + } + Statement::ForInLoop { + index, + iterable, + body, + .. + } => { + let items = interpreter.evaluate_expr(iterable)?.as_list().ok_or( + InterpreterError::ExpectedList(interpreter.state.call_stack()), + )?; + + let mut queue = items.borrow().as_queue(); + + if let Some(item) = queue.pop_front() { + interpreter.state.write_loop_index(index, item); + self.context().push_context(PausableToken::new( + Frame::new(body.clone()), + PausableState::InForLoop { + index: index.clone(), + queue: Container::new(queue), + }, + )); + } + + Ok(true) + } + _ => Ok(false), // only control flow statements are handled here + } + } + + /// Run this [`Pausable`] until it reaches a pause event. + fn run_until_pause(&self, interpreter: &Interpreter) -> Result { + self.on_entry(interpreter); + + let mut result = ExprResult::None; + loop { + match self.context().current_state() { + PausableState::Created => { + self.context().start(); + } + PausableState::Running => { + if self.context().current_frame().is_finished() { + self.context().set_state(PausableState::Finished); + self.on_exit(interpreter); + return self.finish(interpreter, result); + } + + match self.step(interpreter)? { + PausableStepResult::NoOp => {} + PausableStepResult::BreakAndReturn(val) => { + break Ok(val); + } + PausableStepResult::Return(val) => { + result = val; + } + PausableStepResult::Break => { + break Ok(ExprResult::Void); + } + }; + } + PausableState::InForLoop { index, queue } => { + if self.context().current_frame().is_finished() { + let item = queue.borrow_mut().pop_front(); + if let Some(item) = item { + interpreter.state.write_loop_index(&index, item); + self.context().restart_frame(); + } else { + self.context().pop_context(); + continue; + } + } + + match self.step(interpreter)? { + PausableStepResult::NoOp => {} + PausableStepResult::BreakAndReturn(val) => { + break Ok(val); + } + PausableStepResult::Return(val) => { + result = val; + } + PausableStepResult::Break => { + break Ok(ExprResult::Void); + } + }; + } + PausableState::InBlock => { + if self.context().current_frame().is_finished() { + self.context().pop_context(); + continue; + } + + match self.step(interpreter)? { + PausableStepResult::NoOp => {} + PausableStepResult::BreakAndReturn(val) => { + break Ok(val); + } + PausableStepResult::Return(val) => { + result = val; + } + PausableStepResult::Break => { + break Ok(ExprResult::Void); + } + }; + } + PausableState::InWhileLoop(condition) => { + if self.context().current_frame().is_finished() { + self.context().pop_context(); + continue; + } + + match self.step(interpreter)? { + PausableStepResult::NoOp => {} + PausableStepResult::BreakAndReturn(val) => { + break Ok(val); + } + PausableStepResult::Return(val) => { + result = val; + } + PausableStepResult::Break => { + break Ok(ExprResult::Void); + } + }; + + if self.context().current_frame().is_finished() + && interpreter.evaluate_expr(&condition)?.as_boolean().ok_or( + InterpreterError::ExpectedBoolean(interpreter.state.call_stack()), + )? + { + self.context().restart_frame(); + } + } + PausableState::Finished => unreachable!(), + } + } + } +} diff --git a/src/treewalk/types/property.rs b/src/treewalk/types/property.rs new file mode 100644 index 0000000..9d6251c --- /dev/null +++ b/src/treewalk/types/property.rs @@ -0,0 +1,58 @@ +use crate::{ + treewalk::{types::builtins::utils, Interpreter}, + types::errors::InterpreterError, +}; + +use super::{ + function::BindingType, + traits::Callable, + utils::{Dunder, ResolvedArguments}, + ExprResult, MemberDescriptor, +}; + +pub struct Property; + +impl Property { + pub fn get_methods() -> Vec> { + vec![Box::new(NewBuiltin)] + } +} + +pub struct NewBuiltin; + +impl Callable for NewBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + // The first arg is the class itself, the second arg is the function + utils::validate_args(&args, 2, interpreter.state.call_stack())?; + + let function = args + .get_arg(1) + .as_callable() + .ok_or(InterpreterError::ExpectedFunction( + interpreter.state.call_stack(), + ))?; + + let class = interpreter + .state + .current_class() + .ok_or(InterpreterError::ExpectedClass( + interpreter.state.call_stack(), + ))?; + + let descriptor = MemberDescriptor::new(class, function); + + Ok(ExprResult::MemberDescriptor(descriptor)) + } + + fn name(&self) -> String { + Dunder::New.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Class + } +} diff --git a/src/treewalk/types/range.rs b/src/treewalk/types/range.rs new file mode 100644 index 0000000..be2a942 --- /dev/null +++ b/src/treewalk/types/range.rs @@ -0,0 +1,185 @@ +use std::fmt::{Display, Error, Formatter}; + +use crate::{core::Container, treewalk::Interpreter, types::errors::InterpreterError}; + +use super::{ + function::BindingType, + traits::Callable, + utils::{Dunder, ResolvedArguments}, + ExprResult, +}; + +#[derive(Clone, PartialEq)] +pub struct Range { + pub start: usize, + pub stop: usize, + pub step: usize, +} + +impl Range { + pub fn get_methods() -> Vec> { + vec![Box::new(NewBuiltin), Box::new(InitBuiltin)] + } + + fn default() -> Self { + Self { + start: 0, + stop: 0, + step: 1, + } + } +} + +impl IntoIterator for Container { + type Item = ExprResult; + type IntoIter = RangeIterator; + + fn into_iter(self) -> Self::IntoIter { + RangeIterator::new(self.borrow().clone()) + } +} + +impl Display for Range { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + if self.step == 1 { + write!(f, "range({}, {})", self.start, self.stop) + } else { + write!(f, "range({}, {}, {})", self.start, self.stop, self.step) + } + } +} + +#[derive(Clone)] +pub struct RangeIterator(Range); + +impl RangeIterator { + fn new(range: Range) -> Self { + RangeIterator(range) + } +} + +impl Iterator for RangeIterator { + type Item = ExprResult; + + fn next(&mut self) -> Option { + if self.0.start < self.0.stop { + let result = self.0.start; + // Modify the start value in the range itself to prep the state for the next time + // `next` is called. + self.0.start += self.0.step; + Some(ExprResult::Integer(Container::new(result as i64))) + } else { + None + } + } +} + +struct NewBuiltin; + +impl Callable for NewBuiltin { + fn call( + &self, + _interpreter: &Interpreter, + _args: ResolvedArguments, + ) -> Result { + Ok(ExprResult::Range(Container::new(Range::default()))) + } + + fn name(&self) -> String { + Dunder::New.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} + +struct InitBuiltin; + +impl Callable for InitBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + let range = args + .get_self() + .ok_or(InterpreterError::ExpectedRange( + interpreter.state.call_stack(), + ))? + .as_range() + .ok_or(InterpreterError::ExpectedRange( + interpreter.state.call_stack(), + ))?; + + if args.len() == 1 { + let stop = args + .get_arg(0) + .as_integer() + .ok_or(InterpreterError::ExpectedInteger( + interpreter.state.call_stack(), + ))?; + + range.borrow_mut().stop = *stop.borrow() as usize; + + Ok(ExprResult::Void) + } else if args.len() == 2 { + let start = args + .get_arg(0) + .as_integer() + .ok_or(InterpreterError::ExpectedInteger( + interpreter.state.call_stack(), + ))?; + let stop = args + .get_arg(1) + .as_integer() + .ok_or(InterpreterError::ExpectedInteger( + interpreter.state.call_stack(), + ))?; + + range.borrow_mut().start = *start.borrow() as usize; + range.borrow_mut().stop = *stop.borrow() as usize; + + Ok(ExprResult::Void) + } else if args.len() == 3 { + let start = args + .get_arg(0) + .as_integer() + .ok_or(InterpreterError::ExpectedInteger( + interpreter.state.call_stack(), + ))?; + let stop = args + .get_arg(1) + .as_integer() + .ok_or(InterpreterError::ExpectedInteger( + interpreter.state.call_stack(), + ))?; + let step = args + .get_arg(2) + .as_integer() + .ok_or(InterpreterError::ExpectedInteger( + interpreter.state.call_stack(), + ))?; + + range.borrow_mut().start = *start.borrow() as usize; + range.borrow_mut().stop = *stop.borrow() as usize; + range.borrow_mut().step = *step.borrow() as usize; + + Ok(ExprResult::Void) + } else { + Err(InterpreterError::WrongNumberOfArguments( + 1, + args.len(), + interpreter.state.call_stack(), + )) + } + } + + fn name(&self) -> String { + Dunder::Init.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Instance + } +} diff --git a/src/treewalk/types/result.rs b/src/treewalk/types/result.rs new file mode 100644 index 0000000..5b30960 --- /dev/null +++ b/src/treewalk/types/result.rs @@ -0,0 +1,713 @@ +use std::fmt; +use std::fmt::{Display, Error, Formatter}; +use std::hash::{Hash, Hasher}; + +use crate::core::{Container, Voidable}; +use crate::treewalk::Interpreter; +#[cfg(feature = "c_stdlib")] +use crate::types::cpython::{CPythonClass, CPythonModule, CPythonObject}; +use crate::types::errors::InterpreterError; + +use super::{ + class::InstantiationType, + function::BindingType, + iterators::{ + DictItemsIterator, DictKeysIterator, DictValuesIterator, GeneratorIterator, ListIterator, + RangeIterator, ReversedIterator, StringIterator, ZipIterator, + }, + traits::{Callable, IndexRead, IndexWrite, MemberAccessor, ModuleInterface}, + types::TypeExpr, + utils::{Dunder, ResolvedArguments}, + ByteArray, Bytes, Cell, Class, Code, Coroutine, Dict, DictItems, DictKeys, DictValues, + FrozenSet, Function, List, MappingProxy, MemberDescriptor, Method, Module, Object, Range, Set, + Slice, Str, Super, Traceback, Tuple, Type, +}; + +#[derive(Clone)] +pub enum ExprResult { + /// This represents the return value for statements, which, unlike expressions, do not return a + /// value. + Void, + None, + Ellipsis, + NotImplemented, + Integer(Container), + FloatingPoint(f64), + String(Str), + Class(Container), + Object(Container), + Module(Container), + Super(Container), + /// TODO implement the descriptor protocol for `Dunder::Code` and others. + GetSetDescriptor, + /// TODO implement the descriptor protocol for `Dunder::Globals` and others. + MemberDescriptor(MemberDescriptor), + Function(Container), + Method(Container), + BuiltinFunction(Container>), + BuiltinMethod(Container>), + Generator(Container), + Coroutine(Container), + /// TODO this is a stub, we may need to flesh this out with bytecode if we ever want to support + /// self-modifying code or whatever this is used for. + Code(Container), + Cell(Container), + /// An immutable string of bytes. + Bytes(Container), + ByteArray(Container), + Boolean(bool), + List(Container), + Set(Container), + FrozenSet(Container), + Zip(ZipIterator), + Slice(Slice), + Dict(Container), + DictItems(DictItems), + DictKeys(DictKeys), + DictValues(DictValues), + MappingProxy(Container), + Range(Container), + Tuple(Container), + Exception(Box), + Traceback(Traceback), + Frame, + ListIterator(ListIterator), + ReversedIterator(ReversedIterator), + // this might need a real SetIterator, I'm not sure yet + SetIterator(ListIterator), + DictItemsIterator(DictItemsIterator), + DictKeysIterator(DictKeysIterator), + DictValuesIterator(DictValuesIterator), + RangeIterator(RangeIterator), + // this might need a real TupleIterator, I'm not sure yet + TupleIterator(ListIterator), + StringIterator(StringIterator), + // TODO use actual iterator here + BytesIterator(Vec), + // TODO use actual iterator here + ByteArrayIterator(Vec), + TypeNode(TypeExpr), + #[cfg(feature = "c_stdlib")] + CPythonModule(Container), + #[cfg(feature = "c_stdlib")] + CPythonObject(CPythonObject), + #[cfg(feature = "c_stdlib")] + CPythonClass(CPythonClass), +} + +/// Implement PartialEq manually because Py does not implement PartialEq. +impl PartialEq for ExprResult { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (ExprResult::Void, ExprResult::Void) => true, + (ExprResult::None, ExprResult::None) => true, + (ExprResult::Integer(a), ExprResult::Integer(b)) => a == b, + (ExprResult::FloatingPoint(a), ExprResult::FloatingPoint(b)) => a == b, + (ExprResult::String(a), ExprResult::String(b)) => a.0 == b.0, + (ExprResult::Bytes(a), ExprResult::Bytes(b)) => a == b, + (ExprResult::ByteArray(a), ExprResult::ByteArray(b)) => a == b, + (ExprResult::Boolean(a), ExprResult::Boolean(b)) => a == b, + (ExprResult::List(a), ExprResult::List(b)) => a == b, + (ExprResult::Set(a), ExprResult::Set(b)) => a == b, + (ExprResult::FrozenSet(a), ExprResult::FrozenSet(b)) => a == b, + (ExprResult::Dict(a), ExprResult::Dict(b)) => a == b, + (ExprResult::MappingProxy(a), ExprResult::MappingProxy(b)) => a == b, + (ExprResult::DictItems(a), ExprResult::DictItems(b)) => a == b, + (ExprResult::DictKeys(a), ExprResult::DictKeys(b)) => a == b, + (ExprResult::DictValues(a), ExprResult::DictValues(b)) => a == b, + (ExprResult::Range(a), ExprResult::Range(b)) => a == b, + (ExprResult::Tuple(a), ExprResult::Tuple(b)) => a == b, + (ExprResult::Function(a), ExprResult::Function(b)) => a == b, + (ExprResult::Class(a), ExprResult::Class(b)) => a == b, + // This uses `Dunder::Eq` and is handled in [`Interpreter::evaluate_binary_operation`]. + (ExprResult::Object(_), ExprResult::Object(_)) => unreachable!(), + (ExprResult::Exception(a), ExprResult::Exception(b)) => a == b, + (ExprResult::BuiltinMethod(a), ExprResult::BuiltinMethod(b)) => a.same_identity(b), + _ => false, + } + } +} +// For some reason, we have to create this here for the Eq trait to be +// satisfied for f64. +impl Eq for ExprResult {} + +impl Hash for ExprResult { + fn hash(&self, state: &mut H) + where + H: Hasher, + { + if let ExprResult::Set(set) = self { + for i in set.borrow().items.clone() { + i.as_integer().unwrap().borrow().hash(state) + } + } + } +} + +impl ExprResult { + pub fn new( + interpreter: &Interpreter, + class: Container, + arguments: ResolvedArguments, + ) -> Result { + // We have to handle calls to `type()` with only one parameter as a special case because + // this doesn't actually call the `Type::Type` `Dunder::New` method, which expects more + // arguments and would return a new class. Overloading the `Dunder::Init` method + // here on `Type::Type` would also create unintended behaviors. + let object = if class.borrow().is_type(&Type::Type) { + assert_eq!(arguments.len(), 1); + interpreter.state.get_type(&arguments.get_arg(0)) + } else if class.borrow().is_builtin_type() { + let mut new_args = arguments.clone(); + new_args.bind_new(ExprResult::Class(class.clone())); + let object = interpreter.evaluate_new_method( + &ExprResult::Class(class), + &new_args, + InstantiationType::Object, + )?; + + // I don't have a rock solid understanding of why this isn't necessary for super() + // besides the call to `Dunder::New` possibly returning an object that has already been + // initialized. + // + // Function is also weird. This is returned for `Dunder::New` for `Type::Classmethod` + // and `Type::Staticmethod`. + // + // Maybe these two should not be in this clause at all. + if !matches!(object, ExprResult::Super(_)) && !matches!(object, ExprResult::Function(_)) + { + interpreter.evaluate_method(object.clone(), Dunder::Init.value(), &arguments)?; + } + + object + } else { + ExprResult::Object(Object::new(class, interpreter, &arguments)?) + }; + + Ok(object) + } + + fn minimized_display(&self, f: &mut Formatter) -> Result<(), Error> { + match self { + ExprResult::Void => unreachable!(), + ExprResult::None => write!(f, "None"), + ExprResult::Ellipsis => write!(f, "Ellipsis"), + ExprResult::NotImplemented => write!(f, "NotImplemented"), + ExprResult::Super(_) => write!(f, ""), + ExprResult::GetSetDescriptor => write!(f, ""), + ExprResult::MemberDescriptor(m) => write!(f, "{}", m), + ExprResult::Class(c) => write!(f, "{}", c), + ExprResult::Object(o) => write!(f, "{}", o), + ExprResult::Method(m) => write!(f, "{}", m), + ExprResult::Function(func) => write!(f, "{}", func), + ExprResult::Generator(_) => write!(f, ""), + ExprResult::Coroutine(_) => write!(f, ""), + ExprResult::BuiltinFunction(func) => { + write!(f, "", func.borrow().name()) + } + ExprResult::BuiltinMethod(_) => write!(f, ""), + ExprResult::Integer(i) => write!(f, "{}", i), + ExprResult::FloatingPoint(i) => write!(f, "{}", i), + ExprResult::String(s) => write!(f, "{}", s.0), + ExprResult::Bytes(b) => write!(f, "b'{:?}'", b), + ExprResult::ByteArray(b) => write!(f, "bytearray(b'{:?}')", b), + ExprResult::Boolean(b) => { + if *b { + write!(f, "True") + } else { + write!(f, "False") + } + } + ExprResult::List(l) => write!(f, "{}", l), + ExprResult::Set(s) => write!(f, "{}", s), + ExprResult::FrozenSet(s) => write!(f, "{}", s), + ExprResult::Range(r) => write!(f, "{}", r.borrow()), + ExprResult::Tuple(t) => write!(f, "{}", t), + ExprResult::Zip(_) => write!(f, ""), + ExprResult::Slice(s) => write!(f, "{}", s), + ExprResult::Dict(d) => write!(f, "{}", d), + ExprResult::MappingProxy(d) => write!(f, "{}", d), + ExprResult::DictItems(d) => write!(f, "dict_items({})", d), + ExprResult::DictKeys(d) => write!(f, "dict_keys({})", d), + ExprResult::DictValues(d) => write!(f, "dict_values({})", d), + ExprResult::StringIterator(_) => write!(f, ""), + ExprResult::BytesIterator(_) => write!(f, ""), + ExprResult::ByteArrayIterator(_) => write!(f, ""), + ExprResult::ListIterator(_) => write!(f, ""), + ExprResult::ReversedIterator(_) => write!(f, ""), + ExprResult::SetIterator(_) => write!(f, ""), + ExprResult::DictItemsIterator(_) => write!(f, ""), + ExprResult::DictKeysIterator(_) => write!(f, ""), + ExprResult::DictValuesIterator(_) => write!(f, ""), + ExprResult::RangeIterator(_) => write!(f, ""), + ExprResult::TupleIterator(_) => write!(f, ""), + ExprResult::Code(_) => write!(f, ""), + ExprResult::Cell(_) => write!(f, ""), + ExprResult::Module(m) => write!(f, "{}", m), + ExprResult::Exception(_) => write!(f, ""), + ExprResult::Traceback(_) => write!(f, ""), + ExprResult::Frame => write!(f, ""), + ExprResult::TypeNode(t) => write!(f, "", t), + #[cfg(feature = "c_stdlib")] + ExprResult::CPythonModule(_) => write!(f, ""), + #[cfg(feature = "c_stdlib")] + ExprResult::CPythonObject(_) => write!(f, ""), + #[cfg(feature = "c_stdlib")] + ExprResult::CPythonClass(_) => write!(f, ""), + } + } + + pub fn try_into_iter(self) -> Option { + match self { + ExprResult::List(list) => Some(ExprResultIterator::List(list.clone().into_iter())), + ExprResult::ListIterator(list_iterator) => { + Some(ExprResultIterator::List(list_iterator)) + } + ExprResult::ReversedIterator(list_iterator) => { + Some(ExprResultIterator::Reversed(list_iterator)) + } + ExprResult::Set(set) => Some(ExprResultIterator::List(set.clone().into_iter())), + ExprResult::FrozenSet(set) => Some(ExprResultIterator::List(set.clone().into_iter())), + ExprResult::Zip(zip) => Some(ExprResultIterator::Zip(Box::new(zip))), + ExprResult::Tuple(list) => Some(ExprResultIterator::List(list.into_iter())), + ExprResult::Dict(dict) => Some(ExprResultIterator::Dict(dict.into_iter())), + ExprResult::DictItems(dict) => Some(ExprResultIterator::DictItems(dict.into_iter())), + ExprResult::Generator(generator) => { + Some(ExprResultIterator::Generator(generator.into_iter())) + } + ExprResult::Range(range) => Some(ExprResultIterator::Range(range.clone().into_iter())), + ExprResult::StringIterator(string_iterator) => { + Some(ExprResultIterator::String(string_iterator)) + } + _ => None, + } + } + + /// Check for object identity, as opposed to object value evaluated in `PartialEq` above. + pub fn is(&self, other: &Self) -> bool { + match (self, other) { + (ExprResult::None, ExprResult::None) => true, + (ExprResult::None, _) | (_, ExprResult::None) => false, + (ExprResult::Object(ref a), ExprResult::Object(ref b)) => a.same_identity(b), + _ => unimplemented!(), // Different variants or not both ExprResult::Object + } + } + + pub fn get_type(&self) -> Type { + match self { + ExprResult::Void => unreachable!(), + ExprResult::None => Type::None, + ExprResult::Ellipsis => Type::Ellipsis, + ExprResult::NotImplemented => Type::NotImplemented, + ExprResult::Class(_) => Type::Type, + ExprResult::Object(_) => Type::Object, + ExprResult::Super(_) => Type::Super, + ExprResult::GetSetDescriptor => Type::GetSetDescriptor, + ExprResult::MemberDescriptor(_) => Type::MemberDescriptor, + ExprResult::Method(_) => Type::Method, + ExprResult::Function(_) => Type::Function, + ExprResult::BuiltinFunction(_) => Type::BuiltinFunction, + ExprResult::BuiltinMethod(_) => Type::BuiltinMethod, + ExprResult::Generator(_) => Type::Generator, + ExprResult::Coroutine(_) => Type::Coroutine, + ExprResult::Integer(_) => Type::Int, + ExprResult::FloatingPoint(_) => Type::Float, + ExprResult::Bytes(_) => Type::Bytes, + ExprResult::ByteArray(_) => Type::ByteArray, + ExprResult::Boolean(_) => Type::Bool, + ExprResult::String(_) => Type::Str, + ExprResult::List(_) => Type::List, + ExprResult::Set(_) => Type::Set, + ExprResult::FrozenSet(_) => Type::FrozenSet, + ExprResult::Zip(_) => Type::Zip, + ExprResult::Tuple(_) => Type::Tuple, + ExprResult::Range(_) => Type::Range, + ExprResult::Slice(_) => Type::Slice, + ExprResult::Dict(_) => Type::Dict, + ExprResult::DictItems(_) => Type::DictItems, + ExprResult::DictKeys(_) => Type::DictKeys, + ExprResult::DictValues(_) => Type::DictValues, + ExprResult::MappingProxy(_) => Type::MappingProxy, + ExprResult::BytesIterator(_) => Type::BytesIterator, + ExprResult::ByteArrayIterator(_) => Type::ByteArrayIterator, + ExprResult::RangeIterator(_) => Type::RangeIterator, + ExprResult::StringIterator(_) => Type::StringIterator, + ExprResult::ListIterator(_) => Type::ListIterator, + ExprResult::ReversedIterator(_) => Type::ReversedIterator, + ExprResult::SetIterator(_) => Type::SetIterator, + ExprResult::TupleIterator(_) => Type::TupleIterator, + ExprResult::DictItemsIterator(_) => Type::DictItemIterator, + ExprResult::DictKeysIterator(_) => Type::DictKeyIterator, + ExprResult::DictValuesIterator(_) => Type::DictValueIterator, + ExprResult::TypeNode(_) => Type::Type, + ExprResult::Cell(_) => Type::Cell, + ExprResult::Code(_) => Type::Code, + ExprResult::Module(_) => Type::Module, + ExprResult::Exception(_) => Type::Exception, + ExprResult::Traceback(_) => Type::Traceback, + ExprResult::Frame => Type::Frame, + #[cfg(feature = "c_stdlib")] + ExprResult::CPythonModule(_) => Type::Module, + #[cfg(feature = "c_stdlib")] + ExprResult::CPythonObject(_) => unimplemented!(), + #[cfg(feature = "c_stdlib")] + ExprResult::CPythonClass(_) => unimplemented!(), + } + } + + pub fn get_class(&self, interpreter: &Interpreter) -> Container { + match self { + ExprResult::Object(o) => o.borrow().class.clone(), + ExprResult::Class(o) => o.clone(), + ExprResult::Super(s) => s.borrow().receiver().get_class(interpreter), + _ => interpreter.state.get_type_class(self.get_type()).clone(), + } + } + + /// Return a reference to an integer if this type supports it. To get the value itself, use + /// `as_integer_val()`. + pub fn as_integer(&self) -> Option> { + match self { + ExprResult::Integer(i) => Some(i.clone()), + ExprResult::String(s) => match s.0.parse::() { + Ok(i) => Some(Container::new(i)), + Err(_) => None, + }, + _ => None, + } + } + + /// Return an integer value if this type supports it. To get a reference, use `as_integer()`. + pub fn as_integer_val(&self) -> Option { + self.as_integer().map(|i| *i.borrow()) + } + + pub fn as_fp(&self) -> Option { + match self { + ExprResult::FloatingPoint(i) => Some(*i), + ExprResult::Integer(i) => Some(*i.borrow() as f64), + _ => None, + } + } + + pub fn as_class(&self) -> Option> { + match self { + ExprResult::Class(i) => Some(i.clone()), + // TODO should this use a trait interface? + // #[cfg(feature = "c_stdlib")] + // ExprResult::CPythonClass(i) => Some(i), + _ => None, + } + } + + pub fn as_module(&self) -> Option> { + match self { + ExprResult::Module(i) => Some(Box::new(i.borrow().clone())), + #[cfg(feature = "c_stdlib")] + ExprResult::CPythonModule(i) => Some(Box::new(i.borrow().clone())), + _ => None, + } + } + + /// Binding is not needed in the following cases: + /// - the receiver is a module. This is treated more as a namespace than a binding. + /// - the result is not callable + /// - the callable is statically bound + /// - the callable is the `Dunder::New` method for a class + /// - the callable is bound to an instance and the receiver is a class. This will fail + /// downstream, at this point we simply fail to bind it. + /// + /// A super receiver should use the receiver stored in itself. + /// All other callables should be bound to their receiver. + pub fn bind_if_needed(&self, interpreter: &Interpreter, receiver: ExprResult) -> ExprResult { + if receiver.is_module() { + return self.clone(); + } + + match self.as_callable() { + Some(function) => { + let binding_type = function.borrow().binding_type(); + if (binding_type == BindingType::Instance && receiver.is_class()) + || (binding_type == BindingType::Class + && function.borrow().name() == Dunder::New.value()) + || binding_type == BindingType::Static + { + return self.clone(); + } + + let object = if let ExprResult::Super(ref super_) = receiver { + super_.borrow().receiver() + } else if binding_type == BindingType::Class { + // We must explicitly get the class of the receiver for a class method in case + // the class method was invoked off the instance. + ExprResult::Class(receiver.get_class(interpreter)) + } else { + receiver + }; + + ExprResult::Method(Container::new(Method::new(object, function.clone()))) + } + _ => self.clone(), + } + } + + /// Besides `Object` and `Class`, this should only be used immutable access. We need to + /// implement `MemberAccessor` for `Container` rather than `T` for mutable access. + pub fn as_member_accessor(&self, interpreter: &Interpreter) -> Box { + match self { + ExprResult::Object(i) => Box::new(i.clone()), + ExprResult::Class(i) => Box::new(i.clone()), + ExprResult::Function(i) => Box::new(i.borrow().clone()), + ExprResult::Cell(i) => Box::new(i.borrow().clone()), + ExprResult::Module(i) => Box::new(i.borrow().clone()), + ExprResult::Super(i) => Box::new(i.clone()), + #[cfg(feature = "c_stdlib")] + ExprResult::CPythonModule(i) => Box::new(i.borrow().clone()), + _ => { + // We need this fallback case for instances of builtin types. + // i.e. [].append + // All attributes fetched off the builtin types not explicitly handled above do not + // support attribute writes, only reads of builtin attributes. + // We could split this into MemberRead versus MemberWrite in the future. + Box::new(interpreter.state.get_type_class(self.get_type()).clone()) + } + } + } + + pub fn as_index_read(&self) -> Option> { + match self { + ExprResult::List(list) => Some(Box::new(list.clone())), + ExprResult::Tuple(tuple) => Some(Box::new(tuple.clone())), + ExprResult::Dict(dict) => Some(Box::new(dict.clone())), + ExprResult::MappingProxy(proxy) => Some(Box::new(proxy.clone())), + ExprResult::String(s) => Some(Box::new(s.clone())), + _ => None, + } + } + + pub fn as_index_write(&self) -> Option> { + match self { + ExprResult::List(list) => Some(Box::new(list.clone())), + ExprResult::Dict(dict) => Some(Box::new(dict.clone())), + _ => None, + } + } + + pub fn as_function(&self) -> Option> { + match self { + ExprResult::Function(i) => Some(i.clone()), + _ => None, + } + } + + pub fn as_callable(&self) -> Option>> { + match self { + ExprResult::Function(i) => { + Some(Container::new(Box::new(i.clone()) as Box)) + } + ExprResult::Method(i) => Some(Container::new(Box::new(i.clone()) as Box)), + ExprResult::BuiltinMethod(i) => Some(i.clone()), + ExprResult::BuiltinFunction(i) => Some(i.clone()), + ExprResult::Class(i) => Some(Container::new(Box::new(i.clone()) as Box)), + #[cfg(feature = "c_stdlib")] + ExprResult::CPythonObject(i) => { + Some(Container::new(Box::new(i.clone()) as Box)) + } + _ => None, + } + } + + pub fn as_generator(&self) -> Option> { + match self { + ExprResult::Generator(i) => Some(i.clone()), + _ => None, + } + } + + pub fn as_coroutine(&self) -> Option> { + match self { + ExprResult::Coroutine(i) => Some(i.clone()), + _ => None, + } + } + + pub fn as_boolean(&self) -> Option { + match self { + ExprResult::Boolean(i) => Some(*i), + ExprResult::List(i) => Some(i.borrow().len() > 0), + ExprResult::String(i) => Some(!i.0.is_empty()), + ExprResult::Integer(i) => Some(*i.borrow() != 0), + _ => None, + } + } + + pub fn as_object(&self) -> Option> { + match self { + ExprResult::Object(i) => Some(i.clone()), + _ => None, + } + } + + pub fn as_list(&self) -> Option> { + match self { + ExprResult::List(list) => Some(list.clone()), + ExprResult::Set(set) => Some(set.clone().into()), + ExprResult::Tuple(tuple) => Some(tuple.clone().into()), + ExprResult::Range(range) => Some(range.clone().into()), + ExprResult::Generator(g) => Some(g.clone().into()), + _ => None, + } + } + + pub fn as_set(&self) -> Option> { + match self { + ExprResult::Set(set) => Some(set.clone()), + ExprResult::List(list) => Some(list.clone().into()), + ExprResult::Tuple(tuple) => Some(tuple.clone().into()), + ExprResult::Range(range) => Some(range.clone().into()), + _ => None, + } + } + + pub fn as_dict(&self) -> Option> { + match self { + ExprResult::Dict(i) => Some(i.clone()), + ExprResult::List(list) => { + let di: DictItems = list.clone().into(); + let d: Dict = di.into(); + Some(Container::new(d)) + } + _ => None, + } + } + + pub fn as_range(&self) -> Option> { + match self { + ExprResult::Range(i) => Some(i.clone()), + _ => None, + } + } + + pub fn as_tuple(&self) -> Option> { + match self { + ExprResult::List(i) => Some(i.clone().into()), + ExprResult::Tuple(i) => Some(i.clone()), + ExprResult::Set(set) => Some(set.clone().into()), + ExprResult::Range(range) => Some(range.clone().into()), + _ => None, + } + } + + pub fn as_string(&self) -> Option { + match self { + ExprResult::String(i) => Some(i.0.to_string()), + ExprResult::Integer(i) => Some(i.to_string()), + _ => None, + } + } + + pub fn negated(&self) -> Self { + match self { + ExprResult::FloatingPoint(i) => ExprResult::FloatingPoint(-*i), + ExprResult::Integer(i) => { + let old_val = *i.borrow(); + *i.borrow_mut() = -old_val; + ExprResult::Integer(i.clone()) + } + _ => unreachable!(), + } + } + + pub fn inverted(&self) -> Self { + match self { + ExprResult::Boolean(i) => ExprResult::Boolean(!i), + _ => unreachable!(), + } + } + + pub fn is_integer(&self) -> bool { + self.as_integer().is_some() + } + + pub fn is_fp(&self) -> bool { + self.as_fp().is_some() + } + + pub fn is_class(&self) -> bool { + self.as_class().is_some() + } + + pub fn is_module(&self) -> bool { + self.as_module().is_some() + } +} + +impl Voidable for ExprResult { + fn is_void(&self) -> bool { + matches!(self, ExprResult::Void) + } +} + +impl Display for ExprResult { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + self.minimized_display(f) + } +} + +impl fmt::Debug for ExprResult { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + self.minimized_display(f) + } +} + +#[derive(Clone)] +pub enum ExprResultIterator { + List(ListIterator), + Zip(Box), + Reversed(ReversedIterator), + Dict(DictKeysIterator), + DictItems(DictItemsIterator), + Generator(GeneratorIterator), + Range(RangeIterator), + String(StringIterator), +} + +impl ExprResultIterator { + pub fn contains(&mut self, item: ExprResult) -> bool { + for next_item in self.by_ref() { + if next_item == item { + return true; + } + } + + false + } +} + +impl Iterator for ExprResultIterator { + type Item = ExprResult; + + fn next(&mut self) -> Option { + match self { + ExprResultIterator::List(i) => i.next(), + ExprResultIterator::Zip(i) => i.next(), + ExprResultIterator::Reversed(i) => i.next(), + ExprResultIterator::Dict(i) => i.next(), + ExprResultIterator::DictItems(i) => i.next(), + ExprResultIterator::Generator(i) => i.next(), + ExprResultIterator::Range(i) => i.next(), + ExprResultIterator::String(i) => i.next(), + } + } +} + +impl IntoIterator for ExprResult { + type Item = ExprResult; + type IntoIter = ExprResultIterator; + + fn into_iter(self) -> Self::IntoIter { + let type_ = &self.get_type(); + self.try_into_iter() + .unwrap_or_else(|| panic!("attempted to call IntoIterator on a {}!", type_.value())) + } +} diff --git a/src/treewalk/types/reversed.rs b/src/treewalk/types/reversed.rs new file mode 100644 index 0000000..4f4eac1 --- /dev/null +++ b/src/treewalk/types/reversed.rs @@ -0,0 +1,77 @@ +use crate::{ + core::{Container, Storable}, + treewalk::{types::builtins::utils, Interpreter}, + types::errors::InterpreterError, +}; + +use super::{ + function::BindingType, + traits::{Callable, IndexRead}, + utils::{Dunder, ResolvedArguments}, + ExprResult, List, +}; + +#[derive(Clone)] +pub struct ReversedIterator { + list_ref: Container, + current_index: usize, +} + +impl ReversedIterator { + pub fn get_methods() -> Vec> { + vec![Box::new(NewBuiltin)] + } + + pub fn new(list_ref: Container) -> Self { + let current_index = list_ref.borrow().len(); + Self { + list_ref, + current_index, + } + } +} + +impl Iterator for ReversedIterator { + type Item = ExprResult; + + fn next(&mut self) -> Option { + if self.current_index == 0 { + None + } else { + self.current_index -= 1; + self.list_ref + .get(&ExprResult::Integer((self.current_index as i64).store())) + } + } +} + +struct NewBuiltin; + +impl Callable for NewBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + utils::validate_args(&args, 2, interpreter.state.call_stack())?; + + let list = args + .get_arg(1) + .as_list() + .ok_or(InterpreterError::ExpectedList( + interpreter.state.call_stack(), + ))?; + + Ok(ExprResult::ReversedIterator(ReversedIterator::new( + list.clone(), + ))) + } + + fn name(&self) -> String { + Dunder::New.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} diff --git a/src/treewalk/types/set.rs b/src/treewalk/types/set.rs new file mode 100644 index 0000000..0e1941d --- /dev/null +++ b/src/treewalk/types/set.rs @@ -0,0 +1,187 @@ +use std::{ + collections::HashSet, + fmt::{Display, Error, Formatter}, +}; + +use crate::{core::Container, treewalk::Interpreter, types::errors::InterpreterError}; + +use super::{ + builtins::utils, + function::BindingType, + iterators::ListIterator, + traits::Callable, + utils::{Dunder, ResolvedArguments}, + ExprResult, FrozenSet, List, Range, Tuple, +}; + +#[derive(Debug, PartialEq, Clone)] +pub struct Set { + pub items: HashSet, +} + +impl Set { + pub fn get_methods() -> Vec> { + vec![ + Box::new(NewBuiltin), + Box::new(InitBuiltin), + Box::new(AddBuiltin), + ] + } + + pub fn new(items: HashSet) -> Self { + Self { items } + } + + pub fn default() -> Self { + Self { + items: HashSet::new(), + } + } + + pub fn add(&mut self, item: ExprResult) -> bool { + self.items.insert(item) + } +} + +impl From> for Container { + fn from(list: Container) -> Container { + Container::new(Set::new(list.into_iter().collect())) + } +} + +impl From> for Container { + fn from(tuple: Container) -> Container { + Container::new(Set::new(tuple.into_iter().collect())) + } +} + +impl From> for Container { + fn from(range: Container) -> Container { + Container::new(Set::new(range.into_iter().collect())) + } +} + +impl From> for Container { + fn from(frozenset: Container) -> Container { + Container::new(Set::new(frozenset.borrow().clone().items)) + } +} + +impl IntoIterator for Container { + type Item = ExprResult; + type IntoIter = ListIterator; + + fn into_iter(self) -> Self::IntoIter { + ListIterator::new(self.into()) + } +} + +impl Display for Container { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + let items = ListIterator::new(self.clone().into()) + .map(|x| x.to_string()) + .collect::>() + .join(", "); + write!(f, "{{{}}}", items) + } +} + +struct NewBuiltin; +struct InitBuiltin; +struct AddBuiltin; + +impl Callable for NewBuiltin { + fn call( + &self, + _interpreter: &Interpreter, + _args: ResolvedArguments, + ) -> Result { + Ok(ExprResult::Set(Container::new(Set::default()))) + } + + fn name(&self) -> String { + Dunder::New.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} + +impl Callable for InitBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + let output_set = args + .get_self() + .ok_or(InterpreterError::ExpectedFunction( + interpreter.state.call_stack(), + ))? + .as_set() + .ok_or(InterpreterError::ExpectedSet( + interpreter.state.call_stack(), + ))?; + + if args.is_empty() { + Ok(ExprResult::Void) + } else if args.len() == 1 { + let input_set = args + .get_arg(0) + .as_set() + .ok_or(InterpreterError::ExpectedSet( + interpreter.state.call_stack(), + ))?; + + *output_set.borrow_mut() = input_set.borrow().clone(); + Ok(ExprResult::Void) + } else { + Err(InterpreterError::WrongNumberOfArguments( + 1, + args.len(), + interpreter.state.call_stack(), + )) + } + } + + fn name(&self) -> String { + Dunder::Init.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Instance + } +} + +impl Callable for AddBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + utils::validate_args(&args, 1, interpreter.state.call_stack())?; + + let set = args + .get_self() + .ok_or(InterpreterError::ExpectedSet( + interpreter.state.call_stack(), + ))? + .as_set() + .ok_or(InterpreterError::ExpectedSet( + interpreter.state.call_stack(), + ))?; + + let result = set.borrow_mut().add(args.get_arg(0)); + + Ok(ExprResult::Boolean(result)) + } + + fn name(&self) -> String { + "add".into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Instance + } +} diff --git a/src/treewalk/types/slice.rs b/src/treewalk/types/slice.rs new file mode 100644 index 0000000..9e686ab --- /dev/null +++ b/src/treewalk/types/slice.rs @@ -0,0 +1,185 @@ +use crate::{ + parser::types::{Expr, ParsedSliceParams}, + treewalk::Interpreter, + types::errors::InterpreterError, +}; +use std::fmt::{Display, Error, Formatter}; + +use super::{ + builtins::utils::validate_args, + function::BindingType, + traits::Callable, + utils::{Dunder, ResolvedArguments}, + ExprResult, +}; + +#[derive(Clone)] +pub struct Slice { + pub start: Option, + pub stop: Option, + pub step: Option, +} + +impl Slice { + pub fn get_methods() -> Vec> { + vec![Box::new(NewBuiltin)] + } + + pub fn new(start: Option, stop: Option, step: Option) -> Self { + Self { start, stop, step } + } + + pub fn resolve( + interpreter: &Interpreter, + parsed_params: &ParsedSliceParams, + ) -> Result { + let evaluate_to_integer = + |expr_option: &Option>| -> Result, InterpreterError> { + match expr_option { + Some(expr) => { + let integer = interpreter + .evaluate_expr(expr)? + .as_integer_val() + .ok_or_else(|| { + InterpreterError::ExpectedInteger(interpreter.state.call_stack()) + })?; + Ok(Some(integer)) + } + None => Ok(None), + } + }; + + let start = evaluate_to_integer(&parsed_params.start)?; + let stop = evaluate_to_integer(&parsed_params.stop)?; + let step = evaluate_to_integer(&parsed_params.step)?; + + Ok(Self { start, stop, step }) + } + + /// Adjusting start and stop according to Python's slicing rules of negative indices + /// wrapping around the iterable. + fn adjust_params(slice: &Slice, len: i64) -> (i64, i64, i64) { + let start = slice.start.unwrap_or(0); + let stop = slice.stop.unwrap_or(len); + let step = slice.step.unwrap_or(1); + + let start = if start < 0 { len + start } else { start }; + let stop = if stop < 0 { len + stop } else { stop }; + + let start = start.clamp(0, len); + let stop = stop.clamp(0, len); + + (start, stop, step) + } + + pub fn slice(slice: &Slice, len: i64, fetch: impl Fn(i64) -> Option) -> Vec { + let (start, stop, step) = Self::adjust_params(slice, len); + + let mut result = Vec::new(); + match step.cmp(&0) { + std::cmp::Ordering::Greater => { + let mut i = start; + while i < stop { + if let Some(item) = fetch(i) { + result.push(item); + } + i += step; + } + } + std::cmp::Ordering::Less => { + let mut i = stop - 1; + while i >= start { + if let Some(item) = fetch(i) { + result.push(item); + } + i += step; + } + } + std::cmp::Ordering::Equal => panic!("slice step cannot be zero"), + } + + result + } +} + +impl Display for Slice { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + let format_val = + |val: &Option| val.map_or("None".to_string(), |number| number.to_string()); + + let start = format_val(&self.start); + let stop = format_val(&self.stop); + let step = format_val(&self.step); + + write!(f, "slice({}, {}, {})", start, stop, step) + } +} + +struct NewBuiltin; + +impl Callable for NewBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + if args.len() == 2 { + let stop = + args.get_arg(1) + .as_integer_val() + .ok_or(InterpreterError::ExpectedInteger( + interpreter.state.call_stack(), + ))?; + Ok(ExprResult::Slice(Slice::new(None, Some(stop), None))) + } else if args.len() == 3 { + let start = + args.get_arg(1) + .as_integer_val() + .ok_or(InterpreterError::ExpectedInteger( + interpreter.state.call_stack(), + ))?; + let stop = + args.get_arg(2) + .as_integer_val() + .ok_or(InterpreterError::ExpectedInteger( + interpreter.state.call_stack(), + ))?; + Ok(ExprResult::Slice(Slice::new(Some(start), Some(stop), None))) + } else if args.len() == 4 { + let start = + args.get_arg(1) + .as_integer_val() + .ok_or(InterpreterError::ExpectedInteger( + interpreter.state.call_stack(), + ))?; + let stop = + args.get_arg(2) + .as_integer_val() + .ok_or(InterpreterError::ExpectedInteger( + interpreter.state.call_stack(), + ))?; + let step = + args.get_arg(3) + .as_integer_val() + .ok_or(InterpreterError::ExpectedInteger( + interpreter.state.call_stack(), + ))?; + Ok(ExprResult::Slice(Slice::new( + Some(start), + Some(stop), + Some(step), + ))) + } else { + validate_args(&args, 1, interpreter.state.call_stack())?; + unreachable!() + } + } + + fn name(&self) -> String { + Dunder::New.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} diff --git a/src/treewalk/types/staticmethod.rs b/src/treewalk/types/staticmethod.rs new file mode 100644 index 0000000..7a0f9f0 --- /dev/null +++ b/src/treewalk/types/staticmethod.rs @@ -0,0 +1,47 @@ +use crate::{treewalk::Interpreter, types::errors::InterpreterError}; + +use super::{ + builtins::utils, + function::BindingType, + traits::Callable, + utils::{Dunder, ResolvedArguments}, + ExprResult, +}; + +pub struct Staticmethod; + +impl Staticmethod { + pub fn get_methods() -> Vec> { + vec![Box::new(NewBuiltin)] + } +} + +pub struct NewBuiltin; + +impl Callable for NewBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + // The first arg is the class itself, the second arg is the function + utils::validate_args(&args, 2, interpreter.state.call_stack())?; + + let function = args + .get_arg(1) + .as_function() + .ok_or(InterpreterError::ExpectedFunction( + interpreter.state.call_stack(), + ))?; + function.borrow_mut().binding_type = BindingType::Static; + Ok(ExprResult::Function(function)) + } + + fn name(&self) -> String { + Dunder::New.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Class + } +} diff --git a/src/treewalk/types/str.rs b/src/treewalk/types/str.rs new file mode 100644 index 0000000..74738f2 --- /dev/null +++ b/src/treewalk/types/str.rs @@ -0,0 +1,93 @@ +use crate::{treewalk::Interpreter, types::errors::InterpreterError}; + +use super::{ + function::BindingType, + traits::{Callable, IndexRead}, + utils::ResolvedArguments, + ExprResult, Slice, +}; + +#[derive(Clone)] +pub struct Str(pub String); + +impl Str { + pub fn get_methods() -> Vec> { + vec![Box::new(JoinBuiltin)] + } + + pub fn new(str: String) -> Self { + Self(str) + } + + pub fn slice(&self, slice: &Slice) -> Self { + let len = self.0.chars().count() as i64; + + let sliced_string = Slice::slice(slice, len, |i| { + self.0.chars().nth(i as usize).map(|c| c.to_string()) + }) + .join(""); + + Str::new(sliced_string) + } +} + +impl IndexRead for Str { + fn get(&self, key: &ExprResult) -> Option { + match key { + ExprResult::Integer(i) => self + .0 + .chars() + .nth(*i.borrow() as usize) + .map(|c| c.to_string()) + .map(Str::new) + .map(ExprResult::String), + ExprResult::Slice(s) => Some(ExprResult::String(self.slice(s))), + _ => None, + } + } +} + +struct JoinBuiltin; + +impl Callable for JoinBuiltin { + fn call( + &self, + _interpreter: &Interpreter, + _args: ResolvedArguments, + ) -> Result { + unimplemented!() + } + + fn name(&self) -> String { + "join".into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Instance + } +} + +#[derive(Clone)] +pub struct StringIterator { + string: String, + position: usize, +} + +impl StringIterator { + pub fn new(string: Str) -> Self { + Self { + string: string.0.clone(), + position: 0, + } + } +} + +impl Iterator for StringIterator { + type Item = ExprResult; + + fn next(&mut self) -> Option { + let result = self.string[self.position..].chars().next()?; + self.position += result.len_utf8(); + Some(ExprResult::String(Str::new(result.to_string()))) + } +} diff --git a/src/treewalk/types/super_type.rs b/src/treewalk/types/super_type.rs new file mode 100644 index 0000000..388acd6 --- /dev/null +++ b/src/treewalk/types/super_type.rs @@ -0,0 +1,70 @@ +use crate::{core::Container, treewalk::Interpreter, types::errors::InterpreterError}; + +use super::{ + function::BindingType, + traits::{Callable, MemberAccessor}, + utils::{Dunder, ResolvedArguments}, + ExprResult, +}; + +#[derive(Debug, Clone)] +pub struct Super { + receiver: ExprResult, +} + +impl Super { + pub fn get_methods() -> Vec> { + vec![Box::new(NewBuiltin)] + } + + pub fn new(receiver: ExprResult) -> Self { + Self { receiver } + } + + pub fn receiver(&self) -> ExprResult { + self.receiver.clone() + } +} + +impl MemberAccessor for Container { + fn get(&self, interpreter: &Interpreter, name: &str) -> Option { + let class = &self.borrow().receiver().get_class(interpreter); + + // Retrieve the MRO for the class, excluding the class itself + for parent_class in class.super_mro() { + if let Some(result) = parent_class.get(interpreter, name) { + return Some(result); + } + } + + None + } + + fn delete(&mut self, _name: &str) -> Option { + unimplemented!() + } + + fn insert(&mut self, _name: &str, _value: ExprResult) { + unimplemented!() + } +} + +struct NewBuiltin; + +impl Callable for NewBuiltin { + fn call( + &self, + interpreter: &Interpreter, + _args: ResolvedArguments, + ) -> Result { + interpreter.evaluate_super() + } + + fn name(&self) -> String { + Dunder::New.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} diff --git a/src/treewalk/types/traits.rs b/src/treewalk/types/traits.rs new file mode 100644 index 0000000..bea285b --- /dev/null +++ b/src/treewalk/types/traits.rs @@ -0,0 +1,103 @@ +use std::{ + any::Any, + fmt::{Debug, Error, Formatter}, +}; + +use crate::{core::Container, treewalk::Interpreter, types::errors::InterpreterError}; + +use super::{ + function::{BindingType, FunctionType}, + utils::ResolvedArguments, + Class, ExprResult, +}; + +pub trait Callable { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result; + + fn name(&self) -> String; + + fn binding_type(&self) -> BindingType; + + fn function_type(&self) -> FunctionType { + FunctionType::Regular + } + + /// This stub exists so that we can downcast to `Container`. + fn as_any(&self) -> &dyn Any { + unreachable!() + } + + /// A callable will not have a receiver by default, but certain types (`Method`) can have them. + fn receiver(&self) -> Option { + None + } +} + +impl Debug for dyn Callable { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + write!(f, "<{}>", self.name()) + } +} + +// This probably isn't right, this is just so we can store these in a hash inside a class. +impl PartialEq for dyn Callable { + fn eq(&self, other: &Self) -> bool { + self.name() == other.name() + } +} + +pub trait MemberAccessor { + /// A pointer to the `Interpreter` is sometimes not needed, but is required to evalute method + /// calls for descriptors. + fn get(&self, interpreter: &Interpreter, name: &str) -> Option; + fn insert(&mut self, name: &str, value: ExprResult); + fn delete(&mut self, name: &str) -> Option; +} + +pub trait IndexRead { + fn get(&self, index: &ExprResult) -> Option; +} + +pub trait IndexWrite { + fn insert(&mut self, index: &ExprResult, value: ExprResult); + fn delete(&mut self, index: &ExprResult) -> Option; +} + +// pub trait IndexAccessor: IndexRead + IndexWrite {} + +pub trait ModuleInterface { + fn get(&self, interpreter: &Interpreter, name: &str) -> Option; + fn dir(&self) -> Vec; +} + +/// Provide an interface to resolve a dynamic attribute. For example `type.__dict__`. This is +/// conceptually similar to `Callable`. The difference lies in the resolution time. A `Callable` is +/// stored in a symbol table and resolved when it is invoked, whereas an `AttributeResolver` must +/// be resolved when it is accessed as a member (via a `MemberAccessor`). Hence, the need for a +/// separate mechanism. +/// +/// This is only necessary for dynamic attributes defined on a class. For those on an object, we +/// get a reference back to their underlying struct and can implement dynamic attributes through +/// the struct's `MemberAccessor`. On a class, we must register these with the type class before +/// runtime. +pub trait AttributeResolver { + fn resolve(&self, class: Container) -> ExprResult; + fn name(&self) -> &'static str; +} + +impl Debug for dyn AttributeResolver { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + write!(f, "", self.name()) + } +} + +// This probably isn't right, this is just so we can store these in a hash inside a class. +impl PartialEq for dyn AttributeResolver { + fn eq(&self, other: &Self) -> bool { + self.name() == other.name() + } +} diff --git a/src/treewalk/types/tuple.rs b/src/treewalk/types/tuple.rs new file mode 100644 index 0000000..1600286 --- /dev/null +++ b/src/treewalk/types/tuple.rs @@ -0,0 +1,157 @@ +use std::fmt::{Display, Error, Formatter}; + +use crate::{ + core::{Container, Storable}, + treewalk::Interpreter, + types::errors::InterpreterError, +}; + +use super::{ + builtins::utils, + function::BindingType, + iterators::ListIterator, + traits::{Callable, IndexRead}, + utils::{Dunder, ResolvedArguments}, + ExprResult, List, Range, Set, +}; + +#[derive(Debug, PartialEq, Clone)] +pub struct Tuple { + items: Vec, +} + +impl Tuple { + pub fn get_methods() -> Vec> { + vec![Box::new(NewBuiltin), Box::new(InitBuiltin)] + } + + pub fn default() -> Self { + Self { items: vec![] } + } + + pub fn new(items: Vec) -> Self { + Self { items } + } + + pub fn raw(&self) -> Vec { + self.items.clone() + } +} + +impl Container { + pub fn first(&self) -> ExprResult { + self.get(&ExprResult::Integer(0.store())).unwrap() + } + + pub fn second(&self) -> ExprResult { + self.get(&ExprResult::Integer(1.store())).unwrap() + } +} + +impl IndexRead for Container { + fn get(&self, index: &ExprResult) -> Option { + let i = index.as_integer_val()?; + self.borrow().items.get(i as usize).cloned() + } +} + +impl From> for Container { + fn from(set: Container) -> Container { + // Calling `into_iter()` directly off the `Set` results in a stack overflow. + //let mut items: Vec = set.into_iter().collect(); + let mut items: Vec = set.borrow().items.clone().into_iter().collect(); + items.sort_by_key(|x| *x.as_integer().unwrap().borrow()); + Container::new(Tuple::new(items)) + } +} + +impl From> for Container { + fn from(list: Container) -> Container { + Container::new(Tuple::new(list.into_iter().collect())) + } +} + +impl From> for Container { + fn from(range: Container) -> Container { + Container::new(Tuple::new(range.into_iter().collect())) + } +} + +impl Display for Container { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + let items = ListIterator::new(self.clone().into()) + .map(|x| x.to_string()) + .collect::>() + .join(", "); + write!(f, "({})", items) + } +} + +impl IntoIterator for Container { + type Item = ExprResult; + type IntoIter = ListIterator; + + fn into_iter(self) -> Self::IntoIter { + ListIterator::new(self.into()) + } +} + +struct NewBuiltin; +struct InitBuiltin; + +impl Callable for NewBuiltin { + fn call( + &self, + _interpreter: &Interpreter, + _args: ResolvedArguments, + ) -> Result { + Ok(ExprResult::Tuple(Container::new(Tuple::default()))) + } + + fn name(&self) -> String { + Dunder::New.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} + +impl Callable for InitBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + utils::validate_args(&args, 1, interpreter.state.call_stack())?; + + let output = args + .get_self() + .ok_or(InterpreterError::ExpectedFunction( + interpreter.state.call_stack(), + ))? + .as_tuple() + .ok_or(InterpreterError::ExpectedTuple( + interpreter.state.call_stack(), + ))?; + + let input = args + .get_arg(0) + .as_tuple() + .ok_or(InterpreterError::ExpectedTuple( + interpreter.state.call_stack(), + ))?; + + *output.borrow_mut() = input.borrow().clone(); + + Ok(ExprResult::Void) + } + + fn name(&self) -> String { + Dunder::Init.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Instance + } +} diff --git a/src/treewalk/types/type.rs b/src/treewalk/types/type.rs new file mode 100644 index 0000000..4f9b9bf --- /dev/null +++ b/src/treewalk/types/type.rs @@ -0,0 +1,136 @@ +use crate::{ + core::Container, + treewalk::{Interpreter, Scope}, + types::errors::InterpreterError, +}; + +use super::{ + builtins::utils, + function::BindingType, + traits::{AttributeResolver, Callable}, + utils::{Dunder, ResolvedArguments}, + Class, ExprResult, MappingProxy, Tuple, Type, +}; + +/// This represents the callable class `type` in Python. For an enum of all the builtin types, see +/// `types::interpreter::Type`. +pub struct TypeClass; + +impl TypeClass { + pub fn get_methods() -> Vec> { + vec![Box::new(NewBuiltin), Box::new(StrBuiltin)] + } + + pub fn get_dynamic_attributes() -> Vec> { + vec![Box::new(DictAttribute), Box::new(MroAttribute)] + } +} + +struct DictAttribute; +struct MroAttribute; + +impl AttributeResolver for DictAttribute { + fn resolve(&self, class: Container) -> ExprResult { + ExprResult::MappingProxy(Container::new(MappingProxy::new( + class.borrow().scope.as_dict(), + ))) + } + + fn name(&self) -> &'static str { + Dunder::Dict.value() + } +} + +impl AttributeResolver for MroAttribute { + fn resolve(&self, class: Container) -> ExprResult { + let mro = class.mro().iter().cloned().map(ExprResult::Class).collect(); + ExprResult::Tuple(Container::new(Tuple::new(mro))) + } + + fn name(&self) -> &'static str { + Dunder::Mro.value() + } +} + +struct NewBuiltin; + +impl Callable for NewBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + utils::validate_args(&args, 4, interpreter.state.call_stack())?; + + let mcls = args + .get_arg(0) + .as_class() + .ok_or(InterpreterError::ExpectedClass( + interpreter.state.call_stack(), + ))?; + let name = args + .get_arg(1) + .as_string() + .ok_or(InterpreterError::ExpectedString( + interpreter.state.call_stack(), + ))?; + // Default to the `Type::Object` class. + let parent_classes = args + .get_arg(2) + .as_tuple() + .ok_or(InterpreterError::ExpectedTuple( + interpreter.state.call_stack(), + ))? + .into_iter() + .map(|c| c.as_class().unwrap()) + .collect::>>(); + + let parent_classes = if parent_classes.is_empty() { + vec![interpreter.state.get_type_class(Type::Object)] + } else { + parent_classes + }; + + let namespace = args + .get_arg(3) + .as_dict() + .ok_or(InterpreterError::ExpectedDict( + interpreter.state.call_stack(), + ))?; + + Ok(ExprResult::Class(Class::new_base( + name, + parent_classes, + Some(mcls), + Scope::from_dict(namespace.clone().borrow().clone().into()), + ))) + } + + fn name(&self) -> String { + Dunder::New.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} + +struct StrBuiltin; + +impl Callable for StrBuiltin { + fn call( + &self, + _interpreter: &Interpreter, + _args: ResolvedArguments, + ) -> Result { + unimplemented!() + } + + fn name(&self) -> String { + Dunder::Str.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Instance + } +} diff --git a/src/treewalk/types/types.rs b/src/treewalk/types/types.rs new file mode 100644 index 0000000..ca48d82 --- /dev/null +++ b/src/treewalk/types/types.rs @@ -0,0 +1,203 @@ +use std::fmt::{Display, Error, Formatter}; + +use crate::parser::types::TypeNode; + +/// These are all of the builtin types. For the class 'type', see `TypeClass`. +#[derive(Debug, PartialEq, Clone, Hash, Eq)] +pub enum Type { + #[allow(clippy::enum_variant_names)] + Type, + // This is a hack to avoid a circular referece from Type to itself. We do the same thing with + // `ObjectMeta`. + #[allow(clippy::enum_variant_names)] + TypeMeta, + ObjectMeta, + Object, + Super, + GetSetDescriptor, + MemberDescriptor, + Method, + Function, + BuiltinFunction, + BuiltinMethod, + Generator, + Coroutine, + Ellipsis, + None, + NotImplemented, + Bool, + Int, + Float, + Str, + List, + Set, + FrozenSet, + Zip, + Tuple, + Range, + Slice, + Bytes, + ByteArray, + Memoryview, + Dict, + DictItems, + DictKeys, + DictValues, + MappingProxy, + StringIterator, + ListIterator, + ReversedIterator, + SetIterator, + TupleIterator, + DictItemIterator, + DictKeyIterator, + DictValueIterator, + BytesIterator, + ByteArrayIterator, + RangeIterator, + Exception, + Traceback, + Frame, + Module, + Cell, + Code, + Classmethod, + Staticmethod, + Property, +} + +static TYPE_MAPPINGS: &[(Type, &str)] = &[ + (Type::Type, "type"), + (Type::TypeMeta, "type"), + (Type::Object, "object"), + (Type::ObjectMeta, "object"), + (Type::Super, "super"), + (Type::GetSetDescriptor, "getset_descriptor"), + (Type::MemberDescriptor, "member_descriptor"), + (Type::Method, "method"), + (Type::Function, "function"), + (Type::BuiltinFunction, "builtin_function_or_method"), + (Type::BuiltinMethod, "builtin_function_or_method"), + (Type::Generator, "generator"), + (Type::Coroutine, "coroutine"), + (Type::Ellipsis, "ellipsis"), + (Type::None, "NoneType"), + (Type::NotImplemented, "NotImplementedType"), + (Type::Bool, "bool"), + (Type::Int, "int"), + (Type::Float, "float"), + (Type::Str, "str"), + (Type::List, "list"), + (Type::Set, "set"), + (Type::FrozenSet, "frozenset"), + (Type::Zip, "zip"), + (Type::Tuple, "tuple"), + (Type::Range, "range"), + (Type::Slice, "slice"), + (Type::Bytes, "bytes"), + (Type::ByteArray, "bytearray"), + (Type::Memoryview, "memoryview"), + (Type::Dict, "dict"), + (Type::DictItems, "dict_items"), + (Type::DictKeys, "dict_keys"), + (Type::DictValues, "dict_values"), + (Type::MappingProxy, "mappingproxy"), + (Type::StringIterator, "string_iterator"), + (Type::ListIterator, "list_iterator"), + // The builtin keyword here is different than the type + // string: "list_reverseiterator" + (Type::ReversedIterator, "reversed"), + (Type::SetIterator, "set_iterator"), + (Type::TupleIterator, "tuple_iterator"), + (Type::DictItemIterator, "dict_itemiterator"), + (Type::DictKeyIterator, "dict_keyiterator"), + (Type::DictValueIterator, "dict_valueiterator"), + (Type::BytesIterator, "bytes_iterator"), + (Type::ByteArrayIterator, "byte_array_iterator"), + (Type::RangeIterator, "range_iterator"), + (Type::Exception, "Exception"), + (Type::Traceback, "traceback"), + (Type::Frame, "frame"), + (Type::Module, "module"), + (Type::Cell, "cell"), + (Type::Code, "code"), + (Type::Classmethod, "classmethod"), + (Type::Staticmethod, "staticmethod"), + (Type::Property, "property"), +]; + +impl Type { + pub fn value(&self) -> &'static str { + TYPE_MAPPINGS + .iter() + .find_map( + |(variant, name)| { + if variant == self { + Some(name) + } else { + None + } + }, + ) + .expect("Invalid Type variant") + } +} + +impl Display for Type { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + write!(f, "{}", self.value()) + } +} + +#[derive(Debug, Clone)] +pub enum TypeExpr { + Generic { + base: Box, + parameters: Vec, + }, + Union(Vec), + Ellipsis, + Int, + List, + Str, +} + +/// Convert from `TypeNode`, which is used by the parser, to `TypeExpr`, which is used by the +/// interpreter. +fn convert_to_type_expr(type_node: &TypeNode) -> TypeExpr { + match type_node { + TypeNode::Generic { + base_type, + parameters, + } => { + let base_expr = match base_type.as_str() { + "list" => TypeExpr::List, + _ => unimplemented!(), + }; + + let param_exprs = parameters.iter().map(convert_to_type_expr).collect(); + + TypeExpr::Generic { + base: Box::new(base_expr), + parameters: param_exprs, + } + } + TypeNode::Union(parameters) => { + let param_exprs = parameters.iter().map(convert_to_type_expr).collect(); + + TypeExpr::Union(param_exprs) + } + TypeNode::Basic(type_str) => match type_str.as_str() { + "int" => TypeExpr::Int, + "str" => TypeExpr::Str, + "..." => TypeExpr::Ellipsis, + _ => unimplemented!(), + }, + } +} + +impl From<&TypeNode> for TypeExpr { + fn from(type_node: &TypeNode) -> TypeExpr { + convert_to_type_expr(type_node) + } +} diff --git a/src/treewalk/types/utils/dunder.rs b/src/treewalk/types/utils/dunder.rs new file mode 100644 index 0000000..89f2db4 --- /dev/null +++ b/src/treewalk/types/utils/dunder.rs @@ -0,0 +1,78 @@ +#[derive(PartialEq, Clone)] +pub enum Dunder { + // Methods + New, + Init, + Contains, + Eq, + Ne, + Enter, + Exit, + // Attributes + Code, + Globals, + Closure, + Dict, + Mro, + Str, + Traceback, + #[cfg(feature = "c_stdlib")] + Class, +} + +static DUNDER_MAPPINGS: &[(Dunder, &str)] = &[ + (Dunder::New, "__new__"), + (Dunder::Init, "__init__"), + (Dunder::Contains, "__contains__"), + (Dunder::Eq, "__eq__"), + (Dunder::Ne, "__ne__"), + (Dunder::Enter, "__enter__"), + (Dunder::Exit, "__exit__"), + (Dunder::Code, "__code__"), + (Dunder::Globals, "__globals__"), + (Dunder::Closure, "__closure__"), + (Dunder::Dict, "__dict__"), + (Dunder::Mro, "__mro__"), + (Dunder::Str, "__str__"), + (Dunder::Traceback, "__traceback__"), + #[cfg(feature = "c_stdlib")] + (Dunder::Class, "__class__"), +]; + +impl Dunder { + pub fn value(&self) -> &'static str { + DUNDER_MAPPINGS + .iter() + .find_map( + |(variant, name)| { + if variant == self { + Some(name) + } else { + None + } + }, + ) + .expect("Invalid Dunder variant") + } +} + +impl From<&str> for Dunder { + fn from(s: &str) -> Self { + DUNDER_MAPPINGS + .iter() + .find_map(|(variant, name)| { + if *name == s { + Some(variant.to_owned()) + } else { + None + } + }) + .expect("Unknown dunder method") + } +} + +impl From for String { + fn from(value: Dunder) -> Self { + value.value().to_string() + } +} diff --git a/src/treewalk/types/utils/environment_frame.rs b/src/treewalk/types/utils/environment_frame.rs new file mode 100644 index 0000000..3dfcc1d --- /dev/null +++ b/src/treewalk/types/utils/environment_frame.rs @@ -0,0 +1,41 @@ +use crate::{ + core::Container, + treewalk::{types::ExprResult, Scope}, +}; + +/// This implements lexical scoping necessary to support closures. +#[derive(Debug, PartialEq, Clone)] +pub struct EnvironmentFrame { + pub scope: Container, + parent: Option>>, +} + +impl EnvironmentFrame { + pub fn new(scope: Container, parent: Option>>) -> Self { + Self { scope, parent } + } + + /// This reads up the lexical scoping stack (as opposed to the runtime stack) to see if any + /// enclosing frames contain the value in scope. + pub fn read(&self, name: &str) -> Option { + match self.scope.borrow().get(name) { + Some(value) => Some(value.clone()), + None => match &self.parent { + Some(parent) => parent.borrow().read(name), + None => None, + }, + } + } + + /// Writes a value to the variable in the closest enclosing scope where it is defined. + /// If the variable is not found in any enclosing scopes, an error is thrown. + pub fn write(&mut self, name: &str, value: ExprResult) { + if self.scope.borrow().get(name).is_some() { + self.scope.borrow_mut().insert(name, value); + } else if let Some(parent) = &mut self.parent { + parent.borrow_mut().write(name, value); + } else { + panic!("not found!"); + } + } +} diff --git a/src/treewalk/types/utils/mod.rs b/src/treewalk/types/utils/mod.rs new file mode 100644 index 0000000..bc2d153 --- /dev/null +++ b/src/treewalk/types/utils/mod.rs @@ -0,0 +1,7 @@ +mod dunder; +mod environment_frame; +mod resolved_args; + +pub use dunder::Dunder; +pub use environment_frame::EnvironmentFrame; +pub use resolved_args::ResolvedArguments; diff --git a/src/treewalk/types/utils/resolved_args.rs b/src/treewalk/types/utils/resolved_args.rs new file mode 100644 index 0000000..ea83da1 --- /dev/null +++ b/src/treewalk/types/utils/resolved_args.rs @@ -0,0 +1,158 @@ +use std::collections::HashMap; + +use crate::{ + parser::types::ParsedArguments, + treewalk::{ + types::{ExprResult, Str}, + Interpreter, + }, + types::errors::InterpreterError, +}; + +/// Represents the fully resolved parameter state for all positional and keyword arguments. +/// +/// For the unresolved state, see [`ParsedArguments`]. +#[derive(Debug, Clone)] +pub struct ResolvedArguments { + bound_val: Option, + args: Vec, + kwargs: HashMap, +} + +impl ResolvedArguments { + pub fn default() -> Self { + Self { + bound_val: None, + args: vec![], + kwargs: HashMap::new(), + } + } + + pub fn from( + interpreter: &Interpreter, + arguments: &ParsedArguments, + ) -> Result { + let mut arg_values = arguments + .args + .iter() + .map(|arg| interpreter.evaluate_expr(arg)) + .collect::, _>>()?; + let mut kwarg_values: HashMap = arguments + .kwargs + .iter() + .map(|(key, value)| { + Ok(( + ExprResult::String(Str::new(key.into())), + interpreter.evaluate_expr(value)?, + )) + }) + .collect::, _>>()?; + + let mut second_arg_values = if let Some(ref args_var) = arguments.args_var { + let args_var_value = interpreter.evaluate_expr(args_var)?; + let args = args_var_value + .as_tuple() + .ok_or(InterpreterError::ExpectedTuple( + interpreter.state.call_stack(), + ))?; + args.clone().borrow().raw() + } else { + vec![] + }; + arg_values.append(&mut second_arg_values); + + let second_kwarg_values = if let Some(ref kwargs_var) = arguments.kwargs_var { + let kwargs_var_value = interpreter.evaluate_expr(kwargs_var)?; + let kwargs_dict = kwargs_var_value + .as_dict() + .ok_or(InterpreterError::ExpectedDict( + interpreter.state.call_stack(), + ))?; + kwargs_dict.clone().borrow().raw() + } else { + HashMap::new() + }; + + for (key, value) in second_kwarg_values { + if kwarg_values.insert(key.clone(), value).is_some() { + return Err(InterpreterError::KeyError( + key.to_string(), + interpreter.state.call_stack(), + )); + } + } + + Ok(Self::new(arg_values, kwarg_values)) + } + + pub fn new(args: Vec, kwargs: HashMap) -> Self { + Self { + bound_val: None, + args, + kwargs, + } + } + + pub fn add_arg(&mut self, arg: ExprResult) -> Self { + self.args.push(arg); + self.clone() + } + + pub fn bind(&mut self, val: ExprResult) { + self.bound_val = Some(val); + } + + /// The `Dunder::New` method expects the class to be passed in as the first argument but in + /// an unbound way. + pub fn bind_new(&mut self, val: ExprResult) { + self.args.insert(0, val); + } + + pub fn get_self(&self) -> Option { + self.bound_val.clone() + } + + /// Access a positional argument by index. Bound arguments are not included in this, use + /// `get_self` for those. + pub fn get_arg(&self, index: usize) -> ExprResult { + self.args[index].clone() + } + + /// Return a `Dict` of all the keyword arguments. + pub fn get_kwargs(&self) -> HashMap { + self.kwargs.clone() + } + + /// Access a keyword argument by key. + pub fn get_kwarg(&self, key: &ExprResult) -> Option { + self.kwargs.get(key).cloned() + } + + pub fn len(&self) -> usize { + self.args.len() + } + + pub fn is_empty(&self) -> bool { + self.args.is_empty() + } + + pub fn iter_args(&self) -> std::slice::Iter<'_, ExprResult> { + self.args.iter() + } + + pub fn bound_len(&self) -> usize { + self.bound_args().len() + } + + /// When we are loading a symbol table for a new scope, we must join the bound object with + /// the positional arguments since the function signature will expect the bound object. + pub fn bound_args(&self) -> Vec { + let mut base = if let Some(bound) = self.bound_val.clone() { + vec![bound] + } else { + vec![] + }; + base.append(&mut self.args.clone()); + base + } +} diff --git a/src/treewalk/types/zip.rs b/src/treewalk/types/zip.rs new file mode 100644 index 0000000..88986df --- /dev/null +++ b/src/treewalk/types/zip.rs @@ -0,0 +1,110 @@ +use crate::{core::Container, treewalk::Interpreter, types::errors::InterpreterError}; + +use super::{ + function::BindingType, + iterators::{ExprResultIterator, ListIterator}, + traits::Callable, + utils::{Dunder, ResolvedArguments}, + ExprResult, List, Str, Tuple, +}; + +#[derive(Clone)] +pub struct ZipIterator(Vec); + +impl ZipIterator { + pub fn get_methods() -> Vec> { + vec![Box::new(NewBuiltin)] + } + + pub fn new(items: Vec) -> Self { + Self(items) + } + + pub fn default() -> Self { + Self(vec![ExprResultIterator::List(ListIterator::new( + Container::new(List::new(vec![])), + ))]) + } +} + +impl Iterator for ZipIterator { + type Item = ExprResult; + + /// Return the next item from each of the composite iterators in a tuple until the shortest + /// iterator has been exhausted, then return `None`. + fn next(&mut self) -> Option { + // Advance all the composite iterators + let results = self + .0 + .iter_mut() + .map(|i| i.next()) + .collect::>>(); + + if results.iter().all(|r| r.is_some()) { + let r = results + .iter() + .map(|i| i.clone().unwrap()) + .collect::>(); + Some(ExprResult::Tuple(Container::new(Tuple::new(r)))) + } else { + None + } + } +} + +struct NewBuiltin; + +impl Callable for NewBuiltin { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + // The default behavior will stop zipping when the shortest iterator is exhausted, + // which matches default behavior from Python. Using strict=True causes this to throw an + // exception instead. + if args.len() == 1 { + Ok(ExprResult::Zip(ZipIterator::default())) + } else if args.len() >= 3 { + // The first arg is the class, so we must consume it before beginning the zip + // operation. + let mut iter = args.iter_args(); + iter.next(); + + let iters = iter + .map(|a| a.clone().into_iter()) + .collect::>(); + + if args + .get_kwarg(&ExprResult::String(Str::new("strict".to_string()))) + .is_some_and(|k| k == ExprResult::Boolean(true)) + { + let lengths = iters + .iter() + .map(|i| i.clone().count()) + .collect::>(); + let all_equal = lengths.is_empty() || lengths.iter().all(|&x| x == lengths[0]); + + if !all_equal { + return Err(InterpreterError::RuntimeError); + } + } + + Ok(ExprResult::Zip(ZipIterator::new(iters))) + } else { + Err(InterpreterError::WrongNumberOfArguments( + 2, + args.len(), + interpreter.state.call_stack(), + )) + } + } + + fn name(&self) -> String { + Dunder::New.value().into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Static + } +} diff --git a/src/types/cpython.rs b/src/types/cpython.rs new file mode 100644 index 0000000..6eab1f5 --- /dev/null +++ b/src/types/cpython.rs @@ -0,0 +1,201 @@ +use pyo3::prelude::Python; +use pyo3::types::{PyAny, PyCFunction, PyDict, PyModule, PyTuple}; +use pyo3::{pyclass, IntoPy, Py, PyObject, PyResult, ToPyObject}; + +use crate::treewalk::{ + types::{ + function::BindingType, + traits::{Callable, MemberAccessor, ModuleInterface}, + utils::{Dunder, ResolvedArguments}, + ExprResult, + }, + Interpreter, +}; +use crate::types::errors::InterpreterError; + +#[derive(Clone)] +pub struct CPythonModule { + _name: String, + pymodule: PyObject, +} + +impl CPythonModule { + pub fn new(name: &str) -> Self { + pyo3::prepare_freethreaded_python(); + let pymodule = Python::with_gil(|py| PyModule::import(py, name).expect("failed").into()); + + Self { + _name: name.into(), + pymodule, + } + } +} + +impl ModuleInterface for CPythonModule { + fn get(&self, interpreter: &Interpreter, name: &str) -> Option { + MemberAccessor::get(self, interpreter, name) + } + + fn dir(&self) -> Vec { + Python::with_gil(|py| { + let module = self.pymodule.as_ref(py).downcast::().unwrap(); + let dir_list = module.dir(); + dir_list + .iter() + .map(|item| item.extract::().unwrap()) + .collect() + }) + } +} + +impl MemberAccessor for CPythonModule { + fn get(&self, _interpreter: &Interpreter, name: &str) -> Option { + Python::with_gil(|py| { + let module = self.pymodule.as_ref(py).downcast::().ok(); + + if let Some(module) = module { + match module.getattr(name) { + Ok(py_attr) => Some(utils::from_pyobject(py, py_attr)), + Err(_) => None, + } + } else { + None + } + }) + } + + fn delete(&mut self, _name: &str) -> Option { + unimplemented!(); + } + + fn insert(&mut self, _name: &str, _value: ExprResult) { + unimplemented!(); + } +} + +impl Callable for CPythonObject { + fn call( + &self, + interpreter: &Interpreter, + args: ResolvedArguments, + ) -> Result { + Python::with_gil(|py| { + let py_attr: &PyAny = self.py_object.as_ref(py); + if py_attr.is_callable() { + if args.is_empty() { + let result = py_attr + .call0() + .map_err(|_| InterpreterError::RuntimeError)?; + Ok(utils::from_pyobject(py, result)) + } else if args.get_kwargs().is_empty() { + let result = py_attr.call1(utils::to_args(py, args)); + let result = result.map_err(|_| InterpreterError::RuntimeError)?; + Ok(utils::from_pyobject(py, result)) + } else { + // Need to use py_attr.call() here + unimplemented!() + } + } else { + Err(InterpreterError::FunctionNotFound( + self.name(), + interpreter.state.call_stack(), + )) + } + }) + } + + // I don't think we ever need this, which means we may be modeling something poorly here. + fn name(&self) -> String { + "".into() + } + + fn binding_type(&self) -> BindingType { + BindingType::Class + } +} + +#[pyclass(weakref)] +struct TestClass; + +impl ToPyObject for ExprResult { + fn to_object(&self, py: Python) -> PyObject { + match self { + ExprResult::Integer(val) => val.borrow().to_object(py), + ExprResult::String(s) => s.0.as_str().to_object(py), + ExprResult::Function(_) => { + // This still doesn't actually do anything. + let callback = + |_args: &PyTuple, _kwargs: Option<&PyDict>| -> PyResult<_> { Ok(true) }; + let py_cfunc = PyCFunction::new_closure(py, None, None, callback).unwrap(); + py_cfunc.to_object(py) + } + ExprResult::Class(_) => Py::new(py, TestClass {}).unwrap().to_object(py), + _ => unimplemented!(), + } + } +} + +#[derive(Clone)] +#[allow(dead_code)] +pub struct CPythonClass(PyObject); + +#[derive(Clone)] +pub struct CPythonObject { + py_object: PyObject, +} + +impl CPythonObject { + pub fn new(py_object: PyObject) -> Self { + Self { py_object } + } +} + +impl CPythonObject { + pub fn get_type(&self) -> ExprResult { + Python::with_gil(|py| { + let obj_ref: &PyAny = self.py_object.as_ref(py); + let obj_type = obj_ref.getattr(Dunder::Class.value()).unwrap(); + ExprResult::CPythonClass(CPythonClass(obj_type.into())) + }) + } +} + +pub mod utils { + use crate::{ + core::Container, + treewalk::{types::Str, Scope}, + }; + + use super::*; + + pub fn from_pyobject(py: Python, py_obj: &PyAny) -> ExprResult { + if let Ok(value) = py_obj.extract::() { + ExprResult::Integer(Container::new(value)) + } else if let Ok(value) = py_obj.extract::() { + ExprResult::FloatingPoint(value) + } else if let Ok(value) = py_obj.extract::<&str>() { + ExprResult::String(Str::new(value.to_string())) + } else { + ExprResult::CPythonObject(CPythonObject::new(py_obj.into_py(py))) + } + } + + pub fn to_args(py: Python, args: ResolvedArguments) -> &PyTuple { + let args = args + .iter_args() + .map(|a| a.to_object(py)) + .collect::>(); + PyTuple::new(py, args.iter().map(|item| item.as_ref(py))) + } + + pub fn init_scope(scope: &mut Scope) -> &mut Scope { + for module_str in vec!["itertools", "sys", "time", "_thread", "_weakref"].iter() { + scope.insert( + module_str, + ExprResult::CPythonModule(Container::new(CPythonModule::new(module_str))), + ); + } + + scope + } +} diff --git a/src/types/errors.rs b/src/types/errors.rs new file mode 100644 index 0000000..71e0c98 --- /dev/null +++ b/src/types/errors.rs @@ -0,0 +1,293 @@ +use std::fmt::{Display, Error, Formatter}; + +use crate::bytecode_vm::types::{CompilerError, VmError}; +use crate::core::{log, LogLevel}; +use crate::lexer::types::Token; +use crate::parser::types::{ExceptionLiteral, HandledException}; +use crate::treewalk::{types::ExprResult, CallStack}; + +#[derive(Debug, PartialEq, Clone)] +pub enum MemphisError { + Parser(ParserError), + Interpreter(InterpreterError), + Compiler(CompilerError), + Vm(VmError), +} + +#[derive(Debug, PartialEq, Clone)] +pub enum LexerError { + UnexpectedCharacter(char), +} + +#[derive(Debug, PartialEq, Clone)] +pub enum ParserError { + ExpectedToken(Token, Token), + UnexpectedToken(Token), + ExpectedException(String), + SyntaxError, +} + +#[derive(Debug, PartialEq, Clone)] +pub enum InterpreterError { + Exception(CallStack), + TypeError(Option, CallStack), + KeyError(String, CallStack), + ValueError(String, CallStack), + AttributeError(String, String, CallStack), + VariableNotFound(String, CallStack), + FunctionNotFound(String, CallStack), + MethodNotFound(String, CallStack), + ClassNotFound(String, CallStack), + ModuleNotFound(String, CallStack), + DivisionByZero(String, CallStack), + ExpectedVariable(CallStack), + ExpectedString(CallStack), + ExpectedInteger(CallStack), + ExpectedList(CallStack), + ExpectedTuple(CallStack), + ExpectedRange(CallStack), + ExpectedSet(CallStack), + ExpectedDict(CallStack), + ExpectedFloatingPoint(CallStack), + ExpectedBoolean(CallStack), + ExpectedObject(CallStack), + ExpectedClass(CallStack), + ExpectedFunction(CallStack), + ExpectedIterable(CallStack), + ExpectedCoroutine(CallStack), + WrongNumberOfArguments(usize, usize, CallStack), + StopIteration(CallStack), + AssertionError(CallStack), + MissingContextManagerProtocol(CallStack), + // TODO where this is used should really be moved into the parser but we currently don't have + // enough scope context during that stage to do so. + SyntaxError(CallStack), + RuntimeError, + EncounteredReturn(ExprResult), + EncounteredRaise, + EncounteredAwait, + EncounteredSleep, + EncounteredBreak, + EncounteredContinue, +} + +impl InterpreterError { + /// When an `InterpreterError` is thrown inside a try-except block, this method is used to + /// determine whether a given except clause should be run. It does this my mapping + /// `InterpreterError` variants (from the interpreter) to `ExceptionLiteral` variants from the + /// parser. + pub fn matches_except_clause(&self, handled_exceptions: &[HandledException]) -> bool { + if handled_exceptions.is_empty() { + return true; + } + + for literal in handled_exceptions.iter().map(|e| &e.literal) { + if literal == &ExceptionLiteral::Exception { + return true; + } + + let found_literal: Result = self.clone().try_into(); + + if found_literal.is_err() { + log(LogLevel::Warn, || { + format!("Unmatched exception type!\n{}", self) + }); + continue; + } else if let Ok(found_literal) = found_literal { + return &found_literal == literal; + } + } + + false + } +} + +impl Display for MemphisError { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + match self { + MemphisError::Parser(e) => write!(f, "{}", e), + MemphisError::Interpreter(e) => write!(f, "{}", e), + MemphisError::Vm(_) => unimplemented!(), + MemphisError::Compiler(_) => unimplemented!(), + } + } +} + +impl Display for LexerError { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + match self { + LexerError::UnexpectedCharacter(c) => write!(f, "Unexpected character: {}", c), + } + } +} + +impl Display for ParserError { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + match self { + ParserError::ExpectedToken(expected, found) => { + write!(f, "Expected token {:?}, found {:?}", expected, found) + } + ParserError::UnexpectedToken(token) => { + write!(f, "Unexpected token \"{:?}\"", token) + } + ParserError::ExpectedException(s) => { + write!(f, "Expected exception: \"{:?}\" is not defined", s) + } + ParserError::SyntaxError => { + write!(f, "SyntaxError") + } + } + } +} + +impl Display for InterpreterError { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + match self { + InterpreterError::Exception(call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Exception!") + } + InterpreterError::TypeError(message, call_stack) => { + write!(f, "{}", call_stack)?; + match message { + Some(message) => write!(f, "TypeError: {}", message), + None => write!(f, "TypeError"), + } + } + InterpreterError::KeyError(key, call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "KeyError: '{}'", key) + } + InterpreterError::ValueError(message, call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "ValueError: '{}'", message) + } + InterpreterError::AttributeError(class_name, field, call_stack) => { + write!(f, "{}", call_stack)?; + write!( + f, + "AttributeError: '{}' object has no attribute '{}'", + class_name, field + ) + } + InterpreterError::VariableNotFound(name, call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Variable \"{}\" not found", name) + } + InterpreterError::DivisionByZero(message, call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "ZeroDivisionError: {}", message) + } + InterpreterError::ClassNotFound(name, call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Class \"{}\" not found", name) + } + InterpreterError::MethodNotFound(name, call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Method \"{}\" not found", name) + } + InterpreterError::ModuleNotFound(name, call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Module \"{}\" not found", name) + } + InterpreterError::FunctionNotFound(name, call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Function \"{}\" not found", name) + } + InterpreterError::ExpectedVariable(call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Expected variable") + } + InterpreterError::ExpectedString(call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Expected string") + } + InterpreterError::ExpectedInteger(call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Expected integer") + } + InterpreterError::ExpectedList(call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Expected list") + } + InterpreterError::ExpectedTuple(call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Expected tuple") + } + InterpreterError::ExpectedRange(call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Expected range") + } + InterpreterError::ExpectedSet(call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Expected set") + } + InterpreterError::ExpectedDict(call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Expected dict") + } + InterpreterError::ExpectedFloatingPoint(call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Expected floating point") + } + InterpreterError::ExpectedBoolean(call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Expected boolean") + } + InterpreterError::ExpectedObject(call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Expected object") + } + InterpreterError::ExpectedClass(call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Expected class") + } + InterpreterError::ExpectedFunction(call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Expected function") + } + InterpreterError::ExpectedIterable(call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Expected iterable") + } + InterpreterError::ExpectedCoroutine(call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Expected coroutine") + } + InterpreterError::WrongNumberOfArguments(expected, found, call_stack) => { + write!(f, "{}", call_stack)?; + write!( + f, + "Wrong number of arguments, expected {} found {}", + expected, found + ) + } + InterpreterError::StopIteration(call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "Expected object") + } + InterpreterError::AssertionError(call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "AssertionError") + } + InterpreterError::MissingContextManagerProtocol(call_stack) => { + write!(f, "{}", call_stack)?; + write!(f, "object does not support the context manager protocol") + } + InterpreterError::SyntaxError(call_stack) => { + write!(f, "{}", call_stack) + } + InterpreterError::RuntimeError => { + write!(f, "RuntimeError") + } + InterpreterError::EncounteredReturn(_) + | InterpreterError::EncounteredRaise + | InterpreterError::EncounteredAwait + | InterpreterError::EncounteredSleep + | InterpreterError::EncounteredBreak + | InterpreterError::EncounteredContinue => { + unreachable!() + } + } + } +} diff --git a/src/types/mod.rs b/src/types/mod.rs new file mode 100644 index 0000000..6fda2f4 --- /dev/null +++ b/src/types/mod.rs @@ -0,0 +1,3 @@ +#[cfg(feature = "c_stdlib")] +pub mod cpython; +pub mod errors; diff --git a/tests/checks.rs b/tests/checks.rs new file mode 100644 index 0000000..223d23f --- /dev/null +++ b/tests/checks.rs @@ -0,0 +1,49 @@ +use std::io::{Error, ErrorKind, Result}; +use std::process::{Command, Stdio}; + +fn run_fmt() -> Result { + let mut memphis_cmd = Command::new("cargo") + .arg("fmt") + .arg("--check") + .stdout(Stdio::piped()) + .spawn()?; + + let status = memphis_cmd.wait()?; + if !status.success() { + return Err(Error::new(ErrorKind::Other, "Command failed")); + } + + Ok(status.success()) +} + +fn run_clippy() -> Result { + let mut memphis_cmd = Command::new("cargo") + .arg("clippy") + .env("RUSTFLAGS", "-D warnings") + .stdout(Stdio::piped()) + .spawn()?; + + let status = memphis_cmd.wait()?; + if !status.success() { + return Err(Error::new(ErrorKind::Other, "Command failed")); + } + + Ok(status.success()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn clippy() { + let success = run_clippy().unwrap(); + assert_eq!(success, true); + } + + #[test] + fn fmt() { + let success = run_fmt().unwrap(); + assert_eq!(success, true); + } +} diff --git a/tests/crosscheck_assignment.rs b/tests/crosscheck_assignment.rs new file mode 100644 index 0000000..979e40d --- /dev/null +++ b/tests/crosscheck_assignment.rs @@ -0,0 +1,50 @@ +use std::collections::HashMap; + +use memphis::crosscheck::{BytecodeVmAdapter, InterpreterTest, TestValue, TreewalkAdapter}; + +fn run_test(interpreter: &T) { + let input = r#" +a = 5 - 3 +"#; + interpreter.assert_var_expected(input, "a", TestValue::Integer(2)); + + let input = r#" +a = "Hello World" +"#; + interpreter.assert_var_expected(input, "a", TestValue::String("Hello World".into())); + + let input = r#" +a = 5 - 3 +b = 10 +c = None +"#; + interpreter.assert_vars_expected( + input, + HashMap::from([ + ("a", TestValue::Integer(2)), + ("b", TestValue::Integer(10)), + ("c", TestValue::None), + ]), + ); + + let input = r#" +a = 5 - 3 +b = 10 + a +"#; + interpreter.assert_vars_expected( + input, + HashMap::from([("a", TestValue::Integer(2)), ("b", TestValue::Integer(12))]), + ); +} + +#[test] +fn test_treewalk_assignment() { + let interpreter = TreewalkAdapter {}; + run_test(&interpreter); +} + +#[test] +fn test_bytecode_vm_assignment() { + let interpreter = BytecodeVmAdapter {}; + run_test(&interpreter); +} diff --git a/tests/crosscheck_control_flow.rs b/tests/crosscheck_control_flow.rs new file mode 100644 index 0000000..c2b1287 --- /dev/null +++ b/tests/crosscheck_control_flow.rs @@ -0,0 +1,39 @@ +use memphis::crosscheck::{BytecodeVmAdapter, InterpreterTest, TestValue, TreewalkAdapter}; + +fn run_test(interpreter: &T) { + let input = r#" +i = 0 +n = 4 +while i < n: + i = i + 1 +"#; + interpreter.assert_var_expected(input, "i", TestValue::Integer(4)); + + let input = r#" +i = 0 +if i < 10: + a = -1 +"#; + interpreter.assert_var_expected(input, "a", TestValue::Integer(-1)); + + let input = r#" +i = 0 +if i > 10: + a = -1 +else: + a = 3 +"#; + interpreter.assert_var_expected(input, "a", TestValue::Integer(3)); +} + +#[test] +fn test_treewalk_control_flow() { + let interpreter = TreewalkAdapter {}; + run_test(&interpreter); +} + +#[test] +fn test_bytecode_vm_control_flow() { + let interpreter = BytecodeVmAdapter {}; + run_test(&interpreter); +} diff --git a/tests/crosscheck_expressions.rs b/tests/crosscheck_expressions.rs new file mode 100644 index 0000000..da386f3 --- /dev/null +++ b/tests/crosscheck_expressions.rs @@ -0,0 +1,67 @@ +use memphis::crosscheck::{BytecodeVmAdapter, InterpreterTest, TestValue, TreewalkAdapter}; + +fn run_binary_expression_test(interpreter: &T) { + let input = "2 + 2"; + interpreter.assert_expr_expected(input, TestValue::Integer(4)); + + let input = "2 / 2"; + interpreter.assert_expr_expected(input, TestValue::Integer(1)); + + let input = "4 < 5"; + interpreter.assert_expr_expected(input, TestValue::Boolean(true)); + + let input = "4 > 5"; + interpreter.assert_expr_expected(input, TestValue::Boolean(false)); + + let input = "4 * (2 + 3)"; + interpreter.assert_expr_expected(input, TestValue::Integer(20)); +} + +fn run_unary_expression_test(interpreter: &T) { + let input = "-2"; + interpreter.assert_expr_expected(input, TestValue::Integer(-2)); + + let input = "-(-2)"; + interpreter.assert_expr_expected(input, TestValue::Integer(2)); + + let input = "+5"; + interpreter.assert_expr_expected(input, TestValue::Integer(5)); + + let input = "+(-5)"; + interpreter.assert_expr_expected(input, TestValue::Integer(-5)); + + let input = "not True"; + interpreter.assert_expr_expected(input, TestValue::Boolean(false)); + + let input = "not False"; + interpreter.assert_expr_expected(input, TestValue::Boolean(true)); + + let input = "~0b1101"; + interpreter.assert_expr_expected(input, TestValue::Integer(-14)); + + // TODO test unpacking here +} + +#[test] +fn test_treewalk_binary_expression() { + let interpreter = TreewalkAdapter {}; + run_binary_expression_test(&interpreter); +} + +#[test] +fn test_bytecode_vm_binary_expression() { + let interpreter = BytecodeVmAdapter {}; + run_binary_expression_test(&interpreter); +} + +#[test] +fn test_treewalk_unary_expression() { + let interpreter = TreewalkAdapter {}; + run_unary_expression_test(&interpreter); +} + +#[test] +fn test_bytecode_vm_unary_expression() { + let interpreter = BytecodeVmAdapter {}; + run_unary_expression_test(&interpreter); +} diff --git a/tests/crosscheck_function_call.rs b/tests/crosscheck_function_call.rs new file mode 100644 index 0000000..138cdec --- /dev/null +++ b/tests/crosscheck_function_call.rs @@ -0,0 +1,32 @@ +use memphis::crosscheck::{BytecodeVmAdapter, InterpreterTest, TestValue, TreewalkAdapter}; + +fn run_test(interpreter: &T) { + let input = r#" +def foo(a, b): + return a + b + +a = foo(2, 9) +"#; + interpreter.assert_var_expected(input, "a", TestValue::Integer(11)); + + let input = r#" +def foo(a, b): + c = 9 + return a + b + c + +a = foo(2, 9) +"#; + interpreter.assert_var_expected(input, "a", TestValue::Integer(20)); +} + +#[test] +fn test_treewalk_function_call() { + let interpreter = TreewalkAdapter {}; + run_test(&interpreter); +} + +#[test] +fn test_bytecode_vm_function_call() { + let interpreter = BytecodeVmAdapter {}; + run_test(&interpreter); +} diff --git a/tests/crosscheck_method_call.rs b/tests/crosscheck_method_call.rs new file mode 100644 index 0000000..66df412 --- /dev/null +++ b/tests/crosscheck_method_call.rs @@ -0,0 +1,25 @@ +use memphis::crosscheck::{BytecodeVmAdapter, InterpreterTest, TestValue, TreewalkAdapter}; + +fn run_test(interpreter: &T) { + let input = r#" +class Foo: + def bar(self): + return 4 + +f = Foo() +b = f.bar() +"#; + interpreter.assert_var_expected(input, "b", TestValue::Integer(4)); +} + +#[test] +fn test_treewalk_method_call() { + let interpreter = TreewalkAdapter {}; + run_test(&interpreter); +} + +#[test] +fn test_bytecode_vm_method_call() { + let interpreter = BytecodeVmAdapter {}; + run_test(&interpreter); +} diff --git a/tests/integration.rs b/tests/integration.rs new file mode 100644 index 0000000..145e54e --- /dev/null +++ b/tests/integration.rs @@ -0,0 +1,54 @@ +use std::process::Command; + +fn test_script(script: &'static str) { + let output = Command::new("target/debug/memphis") + .arg(script) + .output() + .expect("Failed to run test script"); + + if !output.status.success() { + panic!("Running script {} failed.", script); + } + + let expected = Command::new("python3") + .arg(script) + .output() + .expect("Failed to run test script"); + + if !expected.status.success() { + panic!("Running script {} failed.", script); + } + + assert_eq!( + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&expected.stdout), + "Running script {} produced unexpected output.", + script + ); +} + +#[cfg(test)] +mod integration_tests { + use super::*; + + fn ui_tests() -> Vec<&'static str> { + vec![ + "examples/test.py", + "examples/async/a.py", + "examples/async/b.py", + "examples/async/c.py", + "examples/async/d.py", + "examples/exceptions.py", + "examples/context_manager.py", + "examples/new_method.py", + "examples/builtins.py", + ] + } + + #[test] + fn run_scripts() { + for ui_test in ui_tests() { + test_script(ui_test); + } + } +} diff --git a/tests/other_backends.rs b/tests/other_backends.rs new file mode 100644 index 0000000..5eea79d --- /dev/null +++ b/tests/other_backends.rs @@ -0,0 +1,76 @@ +use std::process::Command; + +/// Compare the output of memphis running the Engine::BytecodeVm against python3. +fn run_script_and_compare(script: &'static str) { + let output = Command::new("target/debug/memphis") + .arg(script) + .arg("vm") + .output() + .expect("Failed to run test script"); + + if !output.status.success() { + panic!("Running script {} failed.", script); + } + + let expected = Command::new("python3") + .arg(script) + .output() + .expect("Failed to run test script"); + + if !expected.status.success() { + panic!("Running script {} failed.", script); + } + + assert_eq!( + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&expected.stdout), + "Running script {} produced unexpected output.", + script + ); +} + +#[cfg(test)] +mod bytecode_vm_tests { + use super::*; + + fn ui_tests() -> Vec<&'static str> { + vec!["examples/loop_perf.py"] + } + + #[test] + fn run_scripts() { + for ui_test in ui_tests() { + run_script_and_compare(ui_test); + } + } +} + +#[cfg(feature = "llvm_backend")] +#[cfg(test)] +mod llvm_backend_tests { + use super::*; + + /// Run memphis with the Engine::LlvmBackend engine and just confirm it doesn't fail. + fn run_script(script: &'static str) { + let output = Command::new("target/debug/memphis") + .arg(script) + .arg("llvm") + .output() + .expect("Failed to run test script"); + + if !output.status.success() { + panic!("Running script {} failed.", script); + } + } + + fn ui_tests() -> Vec<&'static str> { + vec!["examples/loop_perf.py"] + } + + #[test] + fn run_scripts() { + for ui_test in ui_tests() { + run_script(ui_test); + } + } +} diff --git a/tests/repl.rs b/tests/repl.rs new file mode 100644 index 0000000..52f9d39 --- /dev/null +++ b/tests/repl.rs @@ -0,0 +1,32 @@ +use std::io::{Error, ErrorKind, Result}; +use std::process::{Command, Stdio}; + +fn run_repl_with_pipe() -> Result { + let cat_cmd = Command::new("cat") + .arg("examples/repl.py") + .stdout(Stdio::piped()) + .spawn()?; + + let mut memphis_cmd = Command::new("target/debug/memphis") + .stdin(cat_cmd.stdout.unwrap()) + .stdout(Stdio::piped()) + .spawn()?; + + let status = memphis_cmd.wait()?; + if !status.success() { + return Err(Error::new(ErrorKind::Other, "Command failed")); + } + + Ok(status.success()) +} + +#[cfg(test)] +mod repl_tests { + use super::*; + + #[test] + fn run_repl() { + let success = run_repl_with_pipe().unwrap(); + assert_eq!(success, true); + } +}