diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 2d756c0..3ec1622 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -19,7 +19,9 @@ jobs: # Each feature flag combination features: - "" + - "stdlib" - "c_stdlib" + - "stdlib c_stdlib" - "repl" - "c_stdlib repl" diff --git a/Cargo.toml b/Cargo.toml index f257348..8072cd2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,12 +4,29 @@ version = "0.1.0" edition = "2021" license = "LGPL-3.0-or-later" +[lib] +# cdylib: compiles the code as a C-compatible dynamic library. Used when you want to link Rust code +# with other languages (like C or C++), or when you need to produce WebAssembly (Wasm) for use in +# JavaScript environments. When targeting Wasm, the `cdylib` crate type is required because it +# directs the Rust compiler to produce a binary compatible with the Wasm runtime, without including +# Rust-specific metadata and symbols that would otherwise be generated. + +# rlib: compiles code as a Rust-specific library. The default library type that includes all Rust +# metadata, allowing for full compatibility when linking with other Rust crates. If you're only +# using your library in other Rust code, you don’t usually need to specify crate-type = ["rlib"] +# explicitly, as this is the default. We only need it here because we specified `cdylib`. +crate-type = ["cdylib", "rlib"] + [dependencies] pyo3 = { version = "0.20.3", optional = true } inkwell = { version = "0.4.0", features = ["llvm17-0"], optional = true } crossterm = { version = "0.28.1", optional = true } +wasm-bindgen = { version = "0.2", optional = true } +console_error_panic_hook = { version = "0.1", optional = true } [features] c_stdlib = ["pyo3"] +stdlib = [] llvm_backend = ["inkwell"] repl = ["crossterm"] +wasm = ["wasm-bindgen", "console_error_panic_hook"] diff --git a/docs/DEVELOPING.md b/docs/DEVELOPING.md index d4a9351..7c048ea 100644 --- a/docs/DEVELOPING.md +++ b/docs/DEVELOPING.md @@ -14,8 +14,12 @@ cargo run --features repl ## Feature Flags Feature flags are needed to enable C stdlib or REPL support (or the experimental LLVM backend). ```bash +# if examples/test.py depends on stdlib features +cargo run --features stdlib examples/api.py # if examples/test.py depends on stdlib features implemented in C -cargo run --features c_stdlib examples/test.py +cargo run --features c_stdlib examples/api.py +# it's common to use these together to get as much of the stdlib support as we currently offer +cargo run --features stdlib,c_stdlib examples/api.py # script to run all combinations of feature flags ./test_features.sh @@ -36,3 +40,14 @@ sudo flamegraph -v -o tw.svg -- target/debug/memphis examples/loop_perf.py sudo flamegraph -v -o vm.svg -- MEMPHIS_ENGINE=vm target/debug/memphis examples/loop_perf.py sudo flamegraph -v -o llvm.svg -- MEMPHIS_ENGINE=llvm_backend target/debug/memphis examples/loop_perf.py ``` + +## WebAssembly +```bash +cargo install wasm-pack + +# build for the wasm target - we must specify a feature flag because our wasm_bindgen interface +# is behind the wasm feature flag +wasm-pack build --target web --out-dir wasm_ui/pkg -- --features wasm + +# then load wasm_ui/index.html in a browser +``` diff --git a/src/crosscheck/test_value.rs b/src/crosscheck/test_value.rs index 505d058..c4d77c7 100644 --- a/src/crosscheck/test_value.rs +++ b/src/crosscheck/test_value.rs @@ -1,3 +1,5 @@ +use std::fmt::{Display, Formatter, Result}; + use crate::bytecode_vm::types::Value; use crate::treewalk::types::ExprResult; @@ -10,6 +12,25 @@ pub enum TestValue { Integer(i64), String(String), Boolean(bool), + List(Vec), +} + +impl Display for TestValue { + fn fmt(&self, f: &mut Formatter<'_>) -> Result { + match self { + TestValue::None => write!(f, "None"), + TestValue::Integer(i) => write!(f, "{}", i), + TestValue::List(i) => { + let items = i + .iter() + .map(|x| x.to_string()) + .collect::>() + .join(", "); + write!(f, "[{}]", items) + } + _ => unimplemented!(), + } + } } impl From for TestValue { @@ -38,6 +59,13 @@ impl From for TestValue { TestValue::String(value.as_string().expect("failed to get string")) } ExprResult::Boolean(val) => TestValue::Boolean(val), + ExprResult::List(i) => { + let items = i + .into_iter() + .map(|item| item.into()) + .collect::>(); + TestValue::List(items) + } _ => unimplemented!( "Conversion to TestValue not implemented for type '{}'", value.get_type() diff --git a/src/lib.rs b/src/lib.rs index 22acdbb..c910c80 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,3 +17,27 @@ pub enum Engine { #[cfg(feature = "llvm_backend")] LlvmBackend, } + +#[cfg(feature = "wasm")] +mod wasm { + use console_error_panic_hook::set_once; + use wasm_bindgen::prelude::wasm_bindgen; + + use super::*; + use crosscheck::{InterpreterTest, TreewalkAdapter}; + + // Export a function to JavaScript + #[wasm_bindgen] + pub fn greet() -> String { + "Hello from WebAssembly!".to_string() + } + + #[wasm_bindgen] + pub fn evaluate(code: String) -> String { + // Set the panic hook for better error messages in the browser console + set_once(); + + let result = TreewalkAdapter.execute(&code); + format!("{}", result) + } +} diff --git a/src/treewalk/mod.rs b/src/treewalk/mod.rs index 0a4400e..a7ef3cb 100644 --- a/src/treewalk/mod.rs +++ b/src/treewalk/mod.rs @@ -8,6 +8,8 @@ mod module_loader; mod scope; mod scope_manager; mod state; +#[cfg(feature = "stdlib")] +mod stdlib; mod type_registry; pub mod types; pub mod typing; diff --git a/src/treewalk/module_loader.rs b/src/treewalk/module_loader.rs index e66014c..31503d0 100644 --- a/src/treewalk/module_loader.rs +++ b/src/treewalk/module_loader.rs @@ -1,53 +1,21 @@ -use std::collections::{HashMap, HashSet}; -use std::env; -use std::fs; -use std::io::{self, ErrorKind}; -use std::path::{Path, PathBuf}; -use std::process::Command; -use std::str; - -use crate::core::{log, Container, LogLevel}; -use crate::parser::types::ImportPath; - +use std::{ + collections::{HashMap, HashSet}, + fs, + path::{Path, PathBuf}, + str, +}; + +use crate::{ + core::{log, Container, LogLevel}, + parser::types::ImportPath, +}; + +#[cfg(feature = "stdlib")] +use super::stdlib::Stdlib; #[cfg(feature = "c_stdlib")] use super::types::cpython::CPythonModule; use super::types::{utils::Dunder, Module}; -fn lookup_python_site_packages(command: &str) -> Vec { - let output = Command::new("python3") - .args(["-c", command]) - .output() - .expect("Failed to retrieve Python site-packages path"); - - if !output.status.success() { - panic!("Failed to retrieve Python site-packages path"); - } - - let output_str = str::from_utf8(&output.stdout) - .map_err(|e| io::Error::new(ErrorKind::InvalidData, e)) - .expect("Failed to retrieve Python site-packages path"); - - output_str.lines().map(PathBuf::from).collect() -} - -fn init_paths() -> Vec { - // The location of any "standard-lib" modules we add ourselves. This refers to the lib - // directory of this repository. - let mut paths = vec![PathBuf::from("./lib".to_string())]; - - // This is the location of packages installed by pip, i.e. pendulum. - // TODO can we get rid of this in favor of sys.path below? - let mut site_packages = - lookup_python_site_packages("import site; print('\\n'.join(site.getsitepackages()))"); - paths.append(&mut site_packages); - - // This seems to have some overlap with the site-packages above, yet it contains the full set - // of paths including standard lib items, i.e. argparse. - let mut sys_path = lookup_python_site_packages("import sys; print('\\n'.join(sys.path))"); - paths.append(&mut sys_path); - paths -} - #[derive(Debug, PartialEq, Clone)] pub struct LoadedModule { name: Option, @@ -108,9 +76,6 @@ impl LoadedModule { } pub struct ModuleLoader { - /// The [`PathBuf`] representing the directory from which memphis was invoked. - run_dir: PathBuf, - /// The list of directories searched during each import. This will be seeded with the location /// of the Python stdlib present on the host system. paths: Vec, @@ -146,11 +111,11 @@ pub struct ModuleLoader { impl ModuleLoader { pub fn new() -> Self { - let run_dir = env::current_dir().expect("Failed to get current directory"); - Self { - run_dir, - paths: init_paths(), + #[cfg(feature = "stdlib")] + paths: Stdlib::init().paths().to_vec(), + #[cfg(not(feature = "stdlib"))] + paths: vec![], fs_cache: HashMap::default(), not_found_cache: HashSet::default(), module_cache: HashMap::default(), @@ -175,7 +140,7 @@ impl ModuleLoader { log(LogLevel::Debug, || { format!("Loading: {}", filepath.display()) }); - Some(LoadedModule::new(name, self.run_dir.join(filepath), text)) + Some(LoadedModule::new(name, filepath, text)) } else { None } @@ -208,7 +173,7 @@ impl ModuleLoader { // get back to the directory. We could change this in the future, but this seemed // cleaner for the caller to provide. Some(p) => up_n_levels(&p, &(level + 1)), - None => up_n_levels(&self.run_dir, level), + None => up_n_levels(&PathBuf::from("."), level), }; expand_path(base_path.as_ref()?, path_segments) diff --git a/src/treewalk/stdlib.rs b/src/treewalk/stdlib.rs new file mode 100644 index 0000000..bfda798 --- /dev/null +++ b/src/treewalk/stdlib.rs @@ -0,0 +1,51 @@ +use std::{ + io::{self, Error, ErrorKind}, + path::PathBuf, + process::Command, + str, +}; + +/// A holder for the paths we can later search for stdlib modules written in Python. +pub struct Stdlib { + paths: Vec, +} + +impl Stdlib { + pub fn init() -> Self { + // The location of any "standard-lib" modules we add ourselves. This refers to the lib + // directory of this repository. + let mut paths = vec![PathBuf::from("./lib".to_string())]; + + // This is the location of packages installed by pip, i.e. pendulum. + // TODO can we get rid of this in favor of sys.path below? + let mut site_packages = + run_in_python("import site; print('\\n'.join(site.getsitepackages()))") + .expect("Failed to get site packages path"); + paths.append(&mut site_packages); + + // This seems to have some overlap with the site-packages above, yet it contains the full set + // of paths including standard lib items, i.e. argparse. + let mut sys_path = run_in_python("import sys; print('\\n'.join(sys.path))") + .expect("Failed to get sys path"); + paths.append(&mut sys_path); + Self { paths } + } + + pub fn paths(&self) -> &[PathBuf] { + &self.paths + } +} + +/// Run a provided command on Python on the host machine. +fn run_in_python(command: &str) -> io::Result> { + let output = Command::new("python3").args(["-c", command]).output()?; + + if !output.status.success() { + return Err(Error::new(ErrorKind::Other, "Failed to run command")); + } + + let output_str = + str::from_utf8(&output.stdout).map_err(|e| Error::new(ErrorKind::InvalidData, e))?; + + Ok(output_str.lines().map(PathBuf::from).collect()) +} diff --git a/test_features.sh b/test_features.sh index 5e524a8..65109f9 100755 --- a/test_features.sh +++ b/test_features.sh @@ -3,6 +3,7 @@ # Feature combinations to test features=( "" + "stdlib" "c_stdlib" "repl" "c_stdlib repl" diff --git a/tests/crosscheck_assignment.rs b/tests/crosscheck_assignment.rs index 979e40d..4a079a5 100644 --- a/tests/crosscheck_assignment.rs +++ b/tests/crosscheck_assignment.rs @@ -39,12 +39,10 @@ b = 10 + a #[test] fn test_treewalk_assignment() { - let interpreter = TreewalkAdapter {}; - run_test(&interpreter); + run_test(&TreewalkAdapter); } #[test] fn test_bytecode_vm_assignment() { - let interpreter = BytecodeVmAdapter {}; - run_test(&interpreter); + run_test(&BytecodeVmAdapter); } diff --git a/tests/crosscheck_control_flow.rs b/tests/crosscheck_control_flow.rs index c2b1287..42f8b6c 100644 --- a/tests/crosscheck_control_flow.rs +++ b/tests/crosscheck_control_flow.rs @@ -28,12 +28,10 @@ else: #[test] fn test_treewalk_control_flow() { - let interpreter = TreewalkAdapter {}; - run_test(&interpreter); + run_test(&TreewalkAdapter); } #[test] fn test_bytecode_vm_control_flow() { - let interpreter = BytecodeVmAdapter {}; - run_test(&interpreter); + run_test(&BytecodeVmAdapter); } diff --git a/tests/crosscheck_expressions.rs b/tests/crosscheck_expressions.rs index da386f3..44430c2 100644 --- a/tests/crosscheck_expressions.rs +++ b/tests/crosscheck_expressions.rs @@ -44,24 +44,20 @@ fn run_unary_expression_test(interpreter: &T) { #[test] fn test_treewalk_binary_expression() { - let interpreter = TreewalkAdapter {}; - run_binary_expression_test(&interpreter); + run_binary_expression_test(&TreewalkAdapter); } #[test] fn test_bytecode_vm_binary_expression() { - let interpreter = BytecodeVmAdapter {}; - run_binary_expression_test(&interpreter); + run_binary_expression_test(&BytecodeVmAdapter); } #[test] fn test_treewalk_unary_expression() { - let interpreter = TreewalkAdapter {}; - run_unary_expression_test(&interpreter); + run_unary_expression_test(&TreewalkAdapter); } #[test] fn test_bytecode_vm_unary_expression() { - let interpreter = BytecodeVmAdapter {}; - run_unary_expression_test(&interpreter); + run_unary_expression_test(&BytecodeVmAdapter); } diff --git a/tests/crosscheck_function_call.rs b/tests/crosscheck_function_call.rs index 138cdec..51eccd6 100644 --- a/tests/crosscheck_function_call.rs +++ b/tests/crosscheck_function_call.rs @@ -21,12 +21,10 @@ a = foo(2, 9) #[test] fn test_treewalk_function_call() { - let interpreter = TreewalkAdapter {}; - run_test(&interpreter); + run_test(&TreewalkAdapter); } #[test] fn test_bytecode_vm_function_call() { - let interpreter = BytecodeVmAdapter {}; - run_test(&interpreter); + run_test(&BytecodeVmAdapter); } diff --git a/tests/crosscheck_method_call.rs b/tests/crosscheck_method_call.rs index 4f4fc2d..4f1a8c3 100644 --- a/tests/crosscheck_method_call.rs +++ b/tests/crosscheck_method_call.rs @@ -27,12 +27,10 @@ b = f.bar() #[test] fn test_treewalk_method_call() { - let interpreter = TreewalkAdapter {}; - run_test(&interpreter); + run_test(&TreewalkAdapter); } #[test] fn test_bytecode_vm_method_call() { - let interpreter = BytecodeVmAdapter {}; - run_test(&interpreter); + run_test(&BytecodeVmAdapter); } diff --git a/wasm_ui/index.html b/wasm_ui/index.html new file mode 100644 index 0000000..fcec854 --- /dev/null +++ b/wasm_ui/index.html @@ -0,0 +1,21 @@ + + + + + Wasm Test + + + + +