Qiskit · jlapeyre · Jan 13, 2024 · Jan 11, 2024 · Jan 11, 2024 · Jan 11, 2024
diff --git a/Cargo.toml b/Cargo.toml
@@ -3,16 +3,21 @@ members = ["crates/*"]
 resolver = "2"
 
 [workspace.package]
+version = "0.0.1"
 rust-version = "1.70"
 edition = "2021"
 license = "Apache-2.0"
 authors = ["OpenQASM3 parser team"]
+readme = "README.md"
+keywords = ["QASM", "openqasm3", "parser"]
+categories = ["parser-implementations"]
+repository = "https://github.com/Qiskit/openqasm3_parser"
 
 [workspace.dependencies]
 # local crates
-lexer = { path = "./crates/lexer", version = "0.0.0" }
-parser = { path = "./crates/parser", version = "0.1.0" }
-oq3_syntax = { path = "./crates/oq3_syntax", version = "0.0.0" }
-semantics = { path = "./crates/semantics", version = "0.0.0" }
-source_file = { path = "./crates/source_file", version = "0.0.0" }
-sourcegen = { path = "./crates/sourcegen", version = "0.0.0" }
+oq3_lexer = { path = "crates/oq3_lexer", version = "0.0.1" }
+oq3_parser = { path = "crates/oq3_parser", version = "0.0.1" }
+oq3_syntax = { path = "crates/oq3_syntax", version = "0.0.1" }
+oq3_semantics = { path = "crates/oq3_semantics", version = "0.0.1" }
+oq3_sourcegen = { path = "crates/oq3_sourcegen", version = "0.0.1" }
+oq3_source_file = { path = "crates/oq3_source_file", version = "0.0.1" }
diff --git a/README.md b/README.md
@@ -28,23 +28,22 @@ For instance, I hope to soon replace "I" with "we".
 
 The first three crates are based on tools for `rust` and `rust-analyzer`.
 
-* [lexer](./crates/lexer) -- A lightly modified version of the `rustc` (the rust compiler) lexer.
-* [parser](./crates/parser) -- Ingests output of `lexer` and outputs a concrete syntax tree.
+* [oq3_lexer](./crates/oq3_lexer) -- A lightly modified version of the `rustc` (the rust compiler) lexer.
+* [oq3_parser](./crates/oq3_parser) -- Ingests output of `lexer` and outputs a concrete syntax tree.
 * [oq3_syntax](./crates/oq3_syntax) -- Ingests output of `parser` and outputs an abstract syntax tree (AST).
 The rust-analyzer [documentation](#design) sometimes refers to this AST by something like "typed AST".
 This can be confusing. It does not mean that semantic
 analysis has been performed and OQ3 types have been assigned to all expressions. It means that the rust type system is
 used to encode syntactic elements, in contrast to some lower representations in the same crate.
-* [semantics](./crates/semantics) -- Performs [semantic analysis](https://en.wikipedia.org/wiki/Compiler#Front_end)
+* [semantics](./crates/oq3_semantics) -- Performs [semantic analysis](https://en.wikipedia.org/wiki/Compiler#Front_end)
 and outputs an [abstract semantic graph (ASG)](https://en.wikipedia.org/wiki/Abstract_semantic_graph)
 There are other names for this structure. But "ASG" is convenient.
-* [source_file](./crates/source_file) -- A higher-level interface to the syntactic AST. This sits beetween the syntactic AST and
+* [source_file](./crates/oq3_source_file) -- A higher-level interface to the syntactic AST. This sits beetween the syntactic AST and
 semantic ASG. This crate manages the main source file and incuded source files.
-* [ast_pyo3](./crates/source_file) Experimental code. It will not be used in it's current form.
 
 #### Supporting crates
 
-* [sourcegen](./crates/sourcegen) -- supports code generation. This is a very small crate that is copied here because the external
+* [sourcegen](./crates/oq3_sourcegen) -- supports code generation. This is a very small crate that is copied here because the external
 crate has a bug.
 
 ### Warning !
@@ -55,14 +54,14 @@ the test system (you read correctly). If possible, we plan to change this to a m
 ### Using this front end
 
 A reminder: A front end is not of much use unless you have a back end. Examples showing the entry points and how to use them,
-can be found in [./crates/semantics/examples/semdemo.rs](./crates/semantics/examples/semdemo.rs).
+can be found in [./crates/oq3_semantics/examples/semdemo.rs](./crates/oq3_semantics/examples/semdemo.rs).
 
 ```shell
 shell> export QASM3_PATH=./crates/semantics/examples/qasm/
 shell> cargo run --example semdemo -- semantic scratch1.qasm
 ```
 
-Replace `scratch1.qasm` with some file found in [./crates/semantics/examples/qasm/](./crates/semantics/examples/qasm/).
+Replace `scratch1.qasm` with some file found in [./crates/oq3_semantics/examples/qasm/](./crates/oq3_semantics/examples/qasm/).
 
 #### Search path
 

diff --git a/codegen_scripts/cleangenerated.sh b/codegen_scripts/cleangenerated.sh
@@ -9,8 +9,8 @@
 # are not touched by this script.
 
 cd ..
-rm crates/parser/src/syntax_kind/_syntax_kind_enum.rs
-rm crates/parser/src/syntax_kind/syntax_kind_enum.rs.~*
+rm crates/oq3_parser/src/syntax_kind/_syntax_kind_enum.rs
+rm crates/oq3_parser/src/syntax_kind/syntax_kind_enum.rs.~*
 
 rm crates/oq3_syntax/src/ast/generated/_new_nodes.rs
 rm crates/oq3_syntax/src/ast/generated/nodes.rs.~*

diff --git a/codegen_scripts/cpgenerated.sh b/codegen_scripts/cpgenerated.sh
@@ -7,5 +7,5 @@
 # Copy the generated code from the temporary files to which it is written
 # to it's final location where it will be compiled into the library.
 
-cd .. && cp -a --backup=t crates/parser/src/syntax_kind/_syntax_kind_enum.rs crates/parser/src/syntax_kind/syntax_kind_enum.rs
+cd .. && cp -a --backup=t crates/oq3_parser/src/syntax_kind/_syntax_kind_enum.rs crates/oq3_parser/src/syntax_kind/syntax_kind_enum.rs
 
diff --git a/codegen_scripts/mkgenerated.sh b/codegen_scripts/mkgenerated.sh
@@ -13,16 +13,16 @@
 # This should be enough for codegen.
 
 # But you still need to add the things to the parser grammar
-# 4 You might need to update crates/parser/src/grammar/expressions/atom.rs
-#   Or crates/parser/src/grammar/items.rs (eg. for `gate`)
+# 4 You might need to update crates/oq3_parser/src/grammar/expressions/atom.rs
+#   Or crates/oq3_parser/src/grammar/items.rs (eg. for `gate`)
 # Or other grammar files
 
 # Generated files are not given their final names in order not to clobber exisiting generated code
 # Here are the temporary filenames and the final filenames
 # You have to copy them manually.
 # crates/oq3_syntax/src/ast/generated/_new_tokens.rs --> tokens.rs
 # crates/oq3_syntax/src/ast/generated/_new_nodes.rs --> nodes.rs
-# crates/parser/src/syntax_kind/_generated.rs --> generated.rs
+# crates/oq3_parser/src/syntax_kind/_generated.rs --> generated.rs
 
 # Update: Running this script now seems robust. Originally all codegen was done
 # by a single test. I split it into two tests and run each of them twice.
@@ -42,6 +42,6 @@
 # Don't know why, but code gen will fail otherwise.
 
 cd ..
-rustfmt crates/parser/src/syntax_kind/_syntax_kind_enum.rs
+rustfmt crates/oq3_parser/src/syntax_kind/_syntax_kind_enum.rs
 rustfmt crates/oq3_syntax/src/ast/generated/_new_nodes.rs
 rustfmt crates/oq3_syntax/src/ast/generated/_new_tokens.rs
diff --git a/crates/lexer/Cargo.toml b/crates/lexer/Cargo.toml
diff --git a/crates/oq3_lexer/Cargo.toml b/crates/oq3_lexer/Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+name = "oq3_lexer"
+description = ""
+version.workspace = true
+edition.workspace = true
+rust-version.workspace = true
+license.workspace = true
+authors.workspace = true
+readme.workspace = true
+keywords.workspace = true
+categories.workspace = true
+repository.workspace = true
+
+[dependencies]
+unicode-xid = "0.2.0"
+
+[dependencies.unicode-properties]
+version = "0.1.0"
+default-features = false
+features = ["emoji"]
+
+[dev-dependencies]
+expect-test = "1.4.0"
diff --git a/crates/lexer/src/cursor.rs → crates/oq3_lexer/src/cursor.rs b/crates/lexer/src/cursor.rs → crates/oq3_lexer/src/cursor.rs
diff --git a/crates/lexer/src/lib.rs → crates/oq3_lexer/src/lib.rs b/crates/lexer/src/lib.rs → crates/oq3_lexer/src/lib.rs
diff --git a/crates/lexer/src/tests.rs → crates/oq3_lexer/src/tests.rs b/crates/lexer/src/tests.rs → crates/oq3_lexer/src/tests.rs
diff --git a/crates/lexer/src/unescape.rs → crates/oq3_lexer/src/unescape.rs b/crates/lexer/src/unescape.rs → crates/oq3_lexer/src/unescape.rs
diff --git a/crates/lexer/src/unescape/tests.rs → crates/oq3_lexer/src/unescape/tests.rs b/crates/lexer/src/unescape/tests.rs → crates/oq3_lexer/src/unescape/tests.rs
diff --git a/crates/oq3_parser/Cargo.toml b/crates/oq3_parser/Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+name = "oq3_parser"
+description = ""
+version.workspace = true
+edition.workspace = true
+rust-version.workspace = true
+license.workspace = true
+authors.workspace = true
+readme.workspace = true
+keywords.workspace = true
+categories.workspace = true
+repository.workspace = true
+
+[lib]
+doctest = false
+
+[dependencies]
+oq3_lexer.workspace = true
+drop_bomb = "0.1.5"
+limit = { version = "0.0.188", package = "ra_ap_limit" }
+
+[dev-dependencies]
+expect-test = "1.4.0"
diff --git a/crates/parser/src/event.rs → crates/oq3_parser/src/event.rs b/crates/parser/src/event.rs → crates/oq3_parser/src/event.rs
diff --git a/crates/parser/src/grammar.rs → crates/oq3_parser/src/grammar.rs b/crates/parser/src/grammar.rs → crates/oq3_parser/src/grammar.rs
diff --git a/crates/parser/src/grammar/expressions.rs → crates/oq3_parser/src/grammar/expressions.rs b/crates/parser/src/grammar/expressions.rs → crates/oq3_parser/src/grammar/expressions.rs
diff --git a/...es/parser/src/grammar/expressions/atom.rs → ...q3_parser/src/grammar/expressions/atom.rs b/...es/parser/src/grammar/expressions/atom.rs → ...q3_parser/src/grammar/expressions/atom.rs
diff --git a/crates/parser/src/grammar/items.rs → crates/oq3_parser/src/grammar/items.rs b/crates/parser/src/grammar/items.rs → crates/oq3_parser/src/grammar/items.rs
diff --git a/crates/parser/src/grammar/params.rs → crates/oq3_parser/src/grammar/params.rs b/crates/parser/src/grammar/params.rs → crates/oq3_parser/src/grammar/params.rs
diff --git a/crates/parser/src/input.rs → crates/oq3_parser/src/input.rs b/crates/parser/src/input.rs → crates/oq3_parser/src/input.rs
diff --git a/crates/parser/src/lexed_str.rs → crates/oq3_parser/src/lexed_str.rs b/crates/parser/src/lexed_str.rs → crates/oq3_parser/src/lexed_str.rs
@@ -6,7 +6,7 @@
 //! Note that `str` does *not* refer to a string in the target language.
 //! Note that strictly speaking the parser in this crate is not required to work
 //! on tokens which originated from text. Macros, eg, can synthesize tokens out
-//! of thin air. So, ideally, lexer should be an orthogonal crate. It is however
+//! of thin air. So, ideally, oq3_lexer should be an orthogonal crate. It is however
 //! convenient to include a text-based lexer here!
 //!
 //! Note that these tokens, unlike the tokens we feed into the parser, do
@@ -35,7 +35,7 @@ impl<'a> LexedStr<'a> {
     pub fn new(text: &'a str) -> LexedStr<'a> {
         let mut conv = Converter::new(text);
 
-        for token in lexer::tokenize(&text[conv.offset..]) {
+        for token in oq3_lexer::tokenize(&text[conv.offset..]) {
             let token_text = &text[conv.offset..][..token.len as usize];
             conv.extend_token(&token.kind, token_text);
         }
@@ -145,23 +145,23 @@ impl<'a> Converter<'a> {
         }
     }
 
-    fn extend_token(&mut self, kind: &lexer::TokenKind, token_text: &str) {
+    fn extend_token(&mut self, kind: &oq3_lexer::TokenKind, token_text: &str) {
         let (err, syntax_kind, text_len) = inner_extend_token(kind, token_text);
         let err = if err.is_empty() { None } else { Some(err) };
         self.push(syntax_kind, text_len, err);
     }
 }
 
-fn extend_literal_func(len: usize, kind: &lexer::LiteralKind) -> (&str, SyntaxKind, usize) {
+fn extend_literal_func(len: usize, kind: &oq3_lexer::LiteralKind) -> (&str, SyntaxKind, usize) {
     let mut err = "";
     let syntax_kind = match *kind {
-        lexer::LiteralKind::Int { empty_int, base: _ } => {
+        oq3_lexer::LiteralKind::Int { empty_int, base: _ } => {
             if empty_int {
                 err = "Missing digits after the integer base prefix";
             }
             INT_NUMBER
         }
-        lexer::LiteralKind::Float {
+        oq3_lexer::LiteralKind::Float {
             empty_exponent,
             base: _,
         } => {
@@ -170,41 +170,41 @@ fn extend_literal_func(len: usize, kind: &lexer::LiteralKind) -> (&str, SyntaxKi
             }
             FLOAT_NUMBER
         }
-        lexer::LiteralKind::TimingInt { empty_int, base } => {
+        oq3_lexer::LiteralKind::TimingInt { empty_int, base } => {
             if empty_int {
                 err = "Missing digits after the integer base prefix";
             }
-            if base != lexer::Base::Decimal {
+            if base != oq3_lexer::Base::Decimal {
                 err = "Base of timing integer literal is not decimal";
             }
             TIMING_INT_NUMBER
         }
-        lexer::LiteralKind::TimingFloat {
+        oq3_lexer::LiteralKind::TimingFloat {
             empty_exponent,
             base,
         } => {
             if empty_exponent {
                 err = "Missing digits after the exponent symbol";
             }
-            if base != lexer::Base::Decimal {
+            if base != oq3_lexer::Base::Decimal {
                 err = "Base of timing integer literal is not decimal";
             }
             TIMING_FLOAT_NUMBER
         }
-        lexer::LiteralKind::SimpleFloat => SIMPLE_FLOAT_NUMBER,
-        lexer::LiteralKind::Byte { terminated } => {
+        oq3_lexer::LiteralKind::SimpleFloat => SIMPLE_FLOAT_NUMBER,
+        oq3_lexer::LiteralKind::Byte { terminated } => {
             if !terminated {
                 err = "Missing trailing `'` symbol to terminate the byte literal";
             }
             BYTE
         }
-        lexer::LiteralKind::Str { terminated } => {
+        oq3_lexer::LiteralKind::Str { terminated } => {
             if !terminated {
                 err = "Missing trailing `\"` symbol to terminate the string literal";
             }
             STRING
         }
-        lexer::LiteralKind::BitStr {
+        oq3_lexer::LiteralKind::BitStr {
             terminated,
             consecutive_underscores,
         } => {
@@ -223,7 +223,7 @@ fn extend_literal_func(len: usize, kind: &lexer::LiteralKind) -> (&str, SyntaxKi
 }
 
 fn inner_extend_token<'a>(
-    kind: &'a lexer::TokenKind,
+    kind: &'a oq3_lexer::TokenKind,
     token_text: &str,
 ) -> (&'a str, SyntaxKind, usize) {
     // A note on an intended tradeoff:
@@ -234,65 +234,65 @@ fn inner_extend_token<'a>(
 
     let syntax_kind = {
         match kind {
-            lexer::TokenKind::LineComment => COMMENT,
-            lexer::TokenKind::BlockComment { terminated } => {
+            oq3_lexer::TokenKind::LineComment => COMMENT,
+            oq3_lexer::TokenKind::BlockComment { terminated } => {
                 if !terminated {
                     err = "Missing trailing `*/` symbols to terminate the block comment";
                 }
                 COMMENT
             }
 
-            lexer::TokenKind::Whitespace => WHITESPACE,
-            lexer::TokenKind::Ident if token_text == "_" => UNDERSCORE,
+            oq3_lexer::TokenKind::Whitespace => WHITESPACE,
+            oq3_lexer::TokenKind::Ident if token_text == "_" => UNDERSCORE,
 
             // If it looks like an identifer, look first if it is a keyword.
-            lexer::TokenKind::Ident => SyntaxKind::from_keyword(token_text)
+            oq3_lexer::TokenKind::Ident => SyntaxKind::from_keyword(token_text)
                 .unwrap_or(SyntaxKind::from_scalar_type(token_text).unwrap_or(IDENT)),
 
             // um, this does not look correct
-            lexer::TokenKind::HardwareIdent => {
+            oq3_lexer::TokenKind::HardwareIdent => {
                 SyntaxKind::from_keyword(token_text).unwrap_or(HARDWAREIDENT)
             }
 
-            lexer::TokenKind::InvalidIdent => {
+            oq3_lexer::TokenKind::InvalidIdent => {
                 err = "Ident contains invalid characters";
                 IDENT
             }
 
-            lexer::TokenKind::Literal { kind, .. } => {
+            oq3_lexer::TokenKind::Literal { kind, .. } => {
                 //                    self.extend_literal(token_text.len(), kind);
                 return extend_literal_func(token_text.len(), kind);
             }
 
-            lexer::TokenKind::Semi => T![;],
-            lexer::TokenKind::Comma => T![,],
-            lexer::TokenKind::Dot => T![.],
-            lexer::TokenKind::OpenParen => T!['('],
-            lexer::TokenKind::CloseParen => T![')'],
-            lexer::TokenKind::OpenBrace => T!['{'],
-            lexer::TokenKind::CloseBrace => T!['}'],
-            lexer::TokenKind::OpenBracket => T!['['],
-            lexer::TokenKind::CloseBracket => T![']'],
-            lexer::TokenKind::At => T![@],
-            lexer::TokenKind::Pound => T![#],
-            lexer::TokenKind::Tilde => T![~],
-            lexer::TokenKind::Question => T![?],
-            lexer::TokenKind::Colon => T![:],
-            lexer::TokenKind::Dollar => T![$],
-            lexer::TokenKind::Eq => T![=],
-            lexer::TokenKind::Bang => T![!],
-            lexer::TokenKind::Lt => T![<],
-            lexer::TokenKind::Gt => T![>],
-            lexer::TokenKind::Minus => T![-],
-            lexer::TokenKind::And => T![&],
-            lexer::TokenKind::Or => T![|],
-            lexer::TokenKind::Plus => T![+],
-            lexer::TokenKind::Star => T![*],
-            lexer::TokenKind::Slash => T![/],
-            lexer::TokenKind::Caret => T![^],
-            lexer::TokenKind::Percent => T![%],
-            lexer::TokenKind::Unknown => ERROR,
-            lexer::TokenKind::Eof => EOF,
+            oq3_lexer::TokenKind::Semi => T![;],
+            oq3_lexer::TokenKind::Comma => T![,],
+            oq3_lexer::TokenKind::Dot => T![.],
+            oq3_lexer::TokenKind::OpenParen => T!['('],
+            oq3_lexer::TokenKind::CloseParen => T![')'],
+            oq3_lexer::TokenKind::OpenBrace => T!['{'],
+            oq3_lexer::TokenKind::CloseBrace => T!['}'],
+            oq3_lexer::TokenKind::OpenBracket => T!['['],
+            oq3_lexer::TokenKind::CloseBracket => T![']'],
+            oq3_lexer::TokenKind::At => T![@],
+            oq3_lexer::TokenKind::Pound => T![#],
+            oq3_lexer::TokenKind::Tilde => T![~],
+            oq3_lexer::TokenKind::Question => T![?],
+            oq3_lexer::TokenKind::Colon => T![:],
+            oq3_lexer::TokenKind::Dollar => T![$],
+            oq3_lexer::TokenKind::Eq => T![=],
+            oq3_lexer::TokenKind::Bang => T![!],
+            oq3_lexer::TokenKind::Lt => T![<],
+            oq3_lexer::TokenKind::Gt => T![>],
+            oq3_lexer::TokenKind::Minus => T![-],
+            oq3_lexer::TokenKind::And => T![&],
+            oq3_lexer::TokenKind::Or => T![|],
+            oq3_lexer::TokenKind::Plus => T![+],
+            oq3_lexer::TokenKind::Star => T![*],
+            oq3_lexer::TokenKind::Slash => T![/],
+            oq3_lexer::TokenKind::Caret => T![^],
+            oq3_lexer::TokenKind::Percent => T![%],
+            oq3_lexer::TokenKind::Unknown => ERROR,
+            oq3_lexer::TokenKind::Eof => EOF,
         }
     };
     (err, syntax_kind, token_text.len())

diff --git a/crates/parser/src/lib.rs → crates/oq3_parser/src/lib.rs b/crates/parser/src/lib.rs → crates/oq3_parser/src/lib.rs
diff --git a/crates/parser/src/output.rs → crates/oq3_parser/src/output.rs b/crates/parser/src/output.rs → crates/oq3_parser/src/output.rs
diff --git a/crates/parser/src/parser.rs → crates/oq3_parser/src/parser.rs b/crates/parser/src/parser.rs → crates/oq3_parser/src/parser.rs
diff --git a/crates/parser/src/shortcuts.rs → crates/oq3_parser/src/shortcuts.rs b/crates/parser/src/shortcuts.rs → crates/oq3_parser/src/shortcuts.rs
diff --git a/crates/parser/src/syntax_kind.rs → crates/oq3_parser/src/syntax_kind.rs b/crates/parser/src/syntax_kind.rs → crates/oq3_parser/src/syntax_kind.rs
diff --git a/...arser/src/syntax_kind/syntax_kind_enum.rs → ...arser/src/syntax_kind/syntax_kind_enum.rs b/...arser/src/syntax_kind/syntax_kind_enum.rs → ...arser/src/syntax_kind/syntax_kind_enum.rs
diff --git a/crates/parser/src/token_set.rs → crates/oq3_parser/src/token_set.rs b/crates/parser/src/token_set.rs → crates/oq3_parser/src/token_set.rs
diff --git a/crates/semantics/.gitignore → crates/oq3_semantics/.gitignore b/crates/semantics/.gitignore → crates/oq3_semantics/.gitignore