diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index 34908b6eec86..10a909019fbd 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -192,7 +192,8 @@ jobs:
g++-4.8 \
gcc-4.8-multilib \
g++-4.8-multilib \
- dejagnu
+ dejagnu \
+ cargo
- name: Configure
run: |
diff --git a/.gitignore b/.gitignore
index 88b8aa27a882..b1c6625d645c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -76,3 +76,4 @@ test.code-workspace
gcc/rust/test3-tiny/*
.clang-format.swap
+libgrust/*/target/
diff --git a/gcc/rust/Make-lang.in b/gcc/rust/Make-lang.in
index 4d6460187924..7c8ab6e78464 100644
--- a/gcc/rust/Make-lang.in
+++ b/gcc/rust/Make-lang.in
@@ -54,6 +54,8 @@ GCCRS_D_OBJS = \
rust/rustspec.o \
$(END)
+LIBS += -ldl -lpthread
+
gccrs$(exeext): $(GCCRS_D_OBJS) $(EXTRA_GCC_OBJS) libcommon-target.a $(LIBDEPS)
+$(LINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ \
$(GCCRS_D_OBJS) $(EXTRA_GCC_OBJS) libcommon-target.a \
@@ -100,6 +102,7 @@ GRS_OBJS = \
rust/rust-proc-macro-invoc-lexer.o \
rust/rust-macro-substitute-ctx.o \
rust/rust-macro-builtins.o \
+ rust/rust-fmt.o \
rust/rust-hir.o \
rust/rust-hir-map.o \
rust/rust-attributes.o \
@@ -208,14 +211,14 @@ RUST_ALL_OBJS = $(GRS_OBJS) $(RUST_TARGET_OBJS)
rust_OBJS = $(RUST_ALL_OBJS) rust/rustspec.o
-RUST_LDFLAGS = $(LDFLAGS) -L./../libgrust/libproc_macro_internal
-RUST_LIBDEPS = $(LIBDEPS) ../libgrust/libproc_macro_internal/libproc_macro_internal.a
+RUST_LDFLAGS = $(LDFLAGS) -L./../libgrust/libproc_macro_internal -L./../libgrust/librustc_format_parser/
+RUST_LIBDEPS = $(LIBDEPS) ../libgrust/libproc_macro_internal/libproc_macro_internal.a rust/libformat_parser.a
# The compiler itself is called crab1
crab1$(exeext): $(RUST_ALL_OBJS) attribs.o $(BACKEND) $(RUST_LIBDEPS) $(rust.prev)
@$(call LINK_PROGRESS,$(INDEX.rust),start)
+$(LLINKER) $(ALL_LINKERFLAGS) $(RUST_LDFLAGS) -o $@ \
- $(RUST_ALL_OBJS) attribs.o $(BACKEND) $(LIBS) ../libgrust/libproc_macro_internal/libproc_macro_internal.a $(BACKENDLIBS)
+ $(RUST_ALL_OBJS) attribs.o $(BACKEND) $(LIBS) ../libgrust/libproc_macro_internal/libproc_macro_internal.a rust/libformat_parser.a $(BACKENDLIBS)
@$(call LINK_PROGRESS,$(INDEX.rust),end)
# Build hooks.
@@ -401,6 +404,13 @@ rust/%.o: rust/lex/%.cc
$(COMPILE) $(RUST_CXXFLAGS) $(RUST_INCLUDES) $<
$(POSTCOMPILE)
+%.toml:
+ echo $@
+
+rust/libformat_parser.a: $(srcdir)/../libgrust/libformat_parser/Cargo.toml $(wildcard $(srcdir)/../libgrust/libformat_parser/src/*.rs)
+ cargo build --manifest-path $(srcdir)/../libgrust/libformat_parser/Cargo.toml --release # FIXME: Not always release, right?
+ cp $(srcdir)/../libgrust/libformat_parser/target/release/liblibformat_parser.a $@
+
# build all rust/parse files in rust folder, add cross-folder includes
rust/%.o: rust/parse/%.cc
$(COMPILE) $(RUST_CXXFLAGS) $(RUST_INCLUDES) $<
diff --git a/gcc/rust/ast/rust-fmt.cc b/gcc/rust/ast/rust-fmt.cc
new file mode 100644
index 000000000000..511e94740c5e
--- /dev/null
+++ b/gcc/rust/ast/rust-fmt.cc
@@ -0,0 +1,43 @@
+// Copyright (C) 2020-2023 Free Software Foundation, Inc.
+
+// This file is part of GCC.
+
+// GCC is free software; you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3, or (at your option) any later
+// version.
+
+// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with GCC; see the file COPYING3. If not see
+// .
+
+#include "rust-fmt.h"
+#include "rust-diagnostics.h"
+
+namespace Rust {
+namespace Fmt {
+
+Pieces
+Pieces::collect (std::string &&to_parse)
+{
+ auto piece_slice = collect_pieces (to_parse.c_str ());
+
+ rust_debug ("[ARTHUR] %p, %lu", (const void *) piece_slice.base_ptr,
+ piece_slice.len);
+
+ // this performs multiple copies, can we avoid them maybe?
+ // auto pieces = std::vector (piece_slice.base_ptr,
+ // piece_slice.base_ptr + piece_slice.len);
+
+ return Pieces (piece_slice, std::move (to_parse));
+}
+
+Pieces::~Pieces () { destroy_pieces (slice); }
+
+} // namespace Fmt
+} // namespace Rust
diff --git a/gcc/rust/ast/rust-fmt.h b/gcc/rust/ast/rust-fmt.h
new file mode 100644
index 000000000000..0bf9695bb6d2
--- /dev/null
+++ b/gcc/rust/ast/rust-fmt.h
@@ -0,0 +1,269 @@
+// Copyright (C) 2023-2024 Free Software Foundation, Inc.
+
+// This file is part of GCC.
+
+// GCC is free software; you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3, or (at your option) any later
+// version.
+
+// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with GCC; see the file COPYING3. If not see
+// .
+
+#ifndef RUST_FMT_H
+#define RUST_FMT_H
+
+#include "rust-system.h"
+
+// FIXME: How to encode Option?
+
+namespace Rust {
+namespace Fmt {
+
+struct RustHamster
+{
+ // hehe
+};
+
+/// Enum of alignments which are supported.
+enum class Alignment
+{
+ /// The value will be aligned to the left.
+ AlignLeft,
+ /// The value will be aligned to the right.
+ AlignRight,
+ /// The value will be aligned in the center.
+ AlignCenter,
+ /// The value will take on a default alignment.
+ AlignUnknown,
+};
+
+/// Enum for the debug hex flags.
+enum class DebugHex
+{
+ /// The `x` flag in `{:x?}`.
+ Lower,
+ /// The `X` flag in `{:X?}`.
+ Upper,
+};
+
+/// Enum for the sign flags.
+enum class Sign
+{
+ /// The `+` flag.
+ Plus,
+ /// The `-` flag.
+ Minus,
+};
+
+/// Enum describing where an argument for a format can be located.
+struct Position
+{
+ enum class Tag
+ {
+ /// The argument is implied to be located at an index
+ ArgumentImplicitlyIs,
+ /// The argument is located at a specific index given in the format,
+ ArgumentIs,
+ /// The argument has a name.
+ ArgumentNamed,
+ };
+
+ struct ArgumentImplicitlyIs_Body
+ {
+ size_t _0;
+ };
+
+ struct ArgumentIs_Body
+ {
+ size_t _0;
+ };
+
+ struct ArgumentNamed_Body
+ {
+ RustHamster _0;
+ };
+
+ Tag tag;
+ union
+ {
+ ArgumentImplicitlyIs_Body argument_implicitly_is;
+ ArgumentIs_Body argument_is;
+ ArgumentNamed_Body argument_named;
+ };
+};
+
+/// Range inside of a `Span` used for diagnostics when we only have access to
+/// relative positions.
+struct InnerSpan
+{
+ size_t start;
+ size_t end;
+};
+
+/// A count is used for the precision and width parameters of an integer, and
+/// can reference either an argument or a literal integer.
+struct Count
+{
+ enum class Tag
+ {
+ /// The count is specified explicitly.
+ CountIs,
+ /// The count is specified by the argument with the given name.
+ CountIsName,
+ /// The count is specified by the argument at the given index.
+ CountIsParam,
+ /// The count is specified by a star (like in `{:.*}`) that refers to the
+ /// argument at the given index.
+ CountIsStar,
+ /// The count is implied and cannot be explicitly specified.
+ CountImplied,
+ };
+
+ struct CountIs_Body
+ {
+ size_t _0;
+ };
+
+ struct CountIsName_Body
+ {
+ RustHamster _0;
+ InnerSpan _1;
+ };
+
+ struct CountIsParam_Body
+ {
+ size_t _0;
+ };
+
+ struct CountIsStar_Body
+ {
+ size_t _0;
+ };
+
+ Tag tag;
+ union
+ {
+ CountIs_Body count_is;
+ CountIsName_Body count_is_name;
+ CountIsParam_Body count_is_param;
+ CountIsStar_Body count_is_star;
+ };
+};
+
+/// Specification for the formatting of an argument in the format string.
+struct FormatSpec
+{
+ /// Optionally specified character to fill alignment with.
+ const uint32_t *fill;
+ /// Span of the optionally specified fill character.
+ const InnerSpan *fill_span;
+ /// Optionally specified alignment.
+ Alignment align;
+ /// The `+` or `-` flag.
+ const Sign *sign;
+ /// The `#` flag.
+ bool alternate;
+ /// The `0` flag.
+ bool zero_pad;
+ /// The `x` or `X` flag. (Only for `Debug`.)
+ const DebugHex *debug_hex;
+ /// The integer precision to use.
+ Count precision;
+ /// The span of the precision formatting flag (for diagnostics).
+ const InnerSpan *precision_span;
+ /// The string width requested for the resulting format.
+ Count width;
+ /// The span of the width formatting flag (for diagnostics).
+ const InnerSpan *width_span;
+ /// The descriptor string representing the name of the format desired for
+ /// this argument, this can be empty or any number of characters, although
+ /// it is required to be one word.
+ RustHamster ty;
+ /// The span of the descriptor string (for diagnostics).
+ const InnerSpan *ty_span;
+};
+
+/// Representation of an argument specification.
+struct Argument
+{
+ /// Where to find this argument
+ Position position;
+ /// The span of the position indicator. Includes any whitespace in implicit
+ /// positions (`{ }`).
+ InnerSpan position_span;
+ /// How to format the argument
+ FormatSpec format;
+};
+
+/// A piece is a portion of the format string which represents the next part
+/// to emit. These are emitted as a stream by the `Parser` class.
+struct Piece
+{
+ enum class Tag
+ {
+ /// A literal string which should directly be emitted
+ String,
+ /// This describes that formatting should process the next argument (as
+ /// specified inside) for emission.
+ NextArgument,
+ };
+
+ struct String_Body
+ {
+ RustHamster _0;
+ };
+
+ struct NextArgument_Body
+ {
+ const Argument *_0;
+ };
+
+ Tag tag;
+ union
+ {
+ String_Body string;
+ NextArgument_Body next_argument;
+ };
+};
+
+struct PieceSlice
+{
+ const Piece *base_ptr;
+ size_t len;
+ size_t cap;
+};
+
+extern "C" {
+
+PieceSlice
+collect_pieces (const char *input);
+
+void destroy_pieces (PieceSlice);
+
+} // extern "C"
+
+struct Pieces
+{
+ static Pieces collect (std::string &&to_parse);
+ ~Pieces ();
+
+private:
+ Pieces (PieceSlice slice, std::string &&to_parse)
+ : slice (slice), to_parse (std::move (to_parse))
+ {}
+
+ PieceSlice slice;
+ std::string to_parse;
+};
+
+} // namespace Fmt
+} // namespace Rust
+
+#endif // !RUST_FMT_H
diff --git a/gcc/rust/expand/rust-macro-builtins.cc b/gcc/rust/expand/rust-macro-builtins.cc
index 71da575563db..2af05a5e3777 100644
--- a/gcc/rust/expand/rust-macro-builtins.cc
+++ b/gcc/rust/expand/rust-macro-builtins.cc
@@ -16,6 +16,8 @@
// along with GCC; see the file COPYING3. If not see
// .
+#include "libproc_macro_internal/tokenstream.h"
+#include "rust-token-converter.h"
#include "rust-system.h"
#include "rust-macro-builtins.h"
#include "rust-ast-fragment.h"
@@ -30,6 +32,7 @@
#include "rust-parse.h"
#include "rust-session-manager.h"
#include "rust-attribute-values.h"
+#include "rust-fmt.h"
namespace Rust {
@@ -89,8 +92,8 @@ std::unordered_map
{"env", MacroBuiltin::env_handler},
{"cfg", MacroBuiltin::cfg_handler},
{"include", MacroBuiltin::include_handler},
+ {"format_args", MacroBuiltin::format_args_handler},
/* Unimplemented macro builtins */
- {"format_args", MacroBuiltin::sorry},
{"option_env", MacroBuiltin::sorry},
{"format_args_nl", MacroBuiltin::sorry},
{"concat_idents", MacroBuiltin::sorry},
@@ -942,6 +945,34 @@ MacroBuiltin::stringify_handler (location_t invoc_locus,
return AST::Fragment ({node}, std::move (token));
}
+tl::optional
+MacroBuiltin::format_args_handler (location_t invoc_locus,
+ AST::MacroInvocData &invoc)
+{
+ auto tokens = invoc.get_delim_tok_tree ().to_token_stream ();
+ tokens.erase (tokens.begin ());
+ tokens.pop_back ();
+
+ std::stringstream stream;
+ for (const auto &tok : tokens)
+ stream << tok->as_string () << ' ';
+
+ rust_debug ("[ARTHU]: `%s`", stream.str ().c_str ());
+
+ // FIXME: We need to handle this
+ // // if it is not a literal, it's an eager macro invocation - return it
+ // if (!fmt_expr->is_literal ())
+ // {
+ // auto token_tree = invoc.get_delim_tok_tree ();
+ // return AST::Fragment ({AST::SingleASTNode (std::move (fmt_expr))},
+ // token_tree.to_token_stream ());
+ // }
+
+ auto pieces = Fmt::Pieces::collect (stream.str ());
+
+ return AST::Fragment::create_empty ();
+}
+
tl::optional
MacroBuiltin::sorry (location_t invoc_locus, AST::MacroInvocData &invoc)
{
diff --git a/gcc/rust/expand/rust-macro-builtins.h b/gcc/rust/expand/rust-macro-builtins.h
index 6a84a8b86f68..f9ab3fc3698e 100644
--- a/gcc/rust/expand/rust-macro-builtins.h
+++ b/gcc/rust/expand/rust-macro-builtins.h
@@ -157,6 +157,9 @@ class MacroBuiltin
static tl::optional line_handler (location_t invoc_locus,
AST::MacroInvocData &invoc);
+ static tl::optional
+ format_args_handler (location_t invoc_locus, AST::MacroInvocData &invoc);
+
static tl::optional sorry (location_t invoc_locus,
AST::MacroInvocData &invoc);
diff --git a/libgrust/libformat_parser/Cargo.lock b/libgrust/libformat_parser/Cargo.lock
new file mode 100644
index 000000000000..65e48263c71a
--- /dev/null
+++ b/libgrust/libformat_parser/Cargo.lock
@@ -0,0 +1,30 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "generic_format_parser"
+version = "0.1.0"
+dependencies = [
+ "unicode-xid",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.152"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7"
+
+[[package]]
+name = "libformat_parser"
+version = "0.1.0"
+dependencies = [
+ "generic_format_parser",
+ "libc",
+]
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
diff --git a/libgrust/libformat_parser/Cargo.toml b/libgrust/libformat_parser/Cargo.toml
new file mode 100644
index 000000000000..0fcfa3e89a4c
--- /dev/null
+++ b/libgrust/libformat_parser/Cargo.toml
@@ -0,0 +1,21 @@
+[package]
+name = "libformat_parser"
+version = "0.1.0"
+edition = "2021"
+
+[workspace]
+
+members = [
+ "generic_format_parser",
+]
+
+[dependencies]
+libc = "0.2"
+generic_format_parser = { path = "generic_format_parser" }
+
+[lib]
+crate_type = ["staticlib", "rlib"]
+
+[[bin]]
+name = "format_parser_test"
+path = "src/bin.rs"
diff --git a/libgrust/libformat_parser/cbindgen.toml b/libgrust/libformat_parser/cbindgen.toml
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/libgrust/libformat_parser/generic_format_parser/Cargo.toml b/libgrust/libformat_parser/generic_format_parser/Cargo.toml
new file mode 100644
index 000000000000..34577038cbed
--- /dev/null
+++ b/libgrust/libformat_parser/generic_format_parser/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "generic_format_parser"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+unicode-xid = "0.2.0"
diff --git a/libgrust/libformat_parser/generic_format_parser/src/lib.rs b/libgrust/libformat_parser/generic_format_parser/src/lib.rs
new file mode 100644
index 000000000000..8062bf9e5cec
--- /dev/null
+++ b/libgrust/libformat_parser/generic_format_parser/src/lib.rs
@@ -0,0 +1,1116 @@
+//! Macro support for format strings
+//!
+//! These structures are used when parsing format strings for the compiler.
+//! Parsing does not happen at runtime: structures of `std::fmt::rt` are
+//! generated instead.
+
+#![doc(
+ html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/",
+ html_playground_url = "https://play.rust-lang.org/",
+ test(attr(deny(warnings)))
+)]
+#![deny(rustc::untranslatable_diagnostic)]
+#![deny(rustc::diagnostic_outside_of_impl)]
+// WARNING: We want to be able to build this crate with a stable compiler,
+// so no `#![feature]` attributes should be added!
+
+fn is_id_start(c: char) -> bool {
+ c == '_' || unicode_xid::UnicodeXID::is_xid_start(c)
+}
+
+fn is_id_continue(c: char) -> bool {
+ unicode_xid::UnicodeXID::is_xid_continue(c)
+}
+
+// Workaround for Ubuntu 18.04. The default Rust package is 1.65 (and unlikely to change I assume?), but the
+// generic format parser library uses `is_some_and` which was introduced in 1.70. So this is a reimplementation,
+// directly taken from the standard library sources
+trait IsSomeAnd {
+ fn is_some_and(self, f: impl FnOnce(T) -> bool) -> bool;
+}
+
+impl IsSomeAnd for Option {
+ fn is_some_and(self, f: impl FnOnce(T) -> bool) -> bool {
+ match self {
+ None => false,
+ Some(x) => f(x),
+ }
+ }
+}
+
+// use rustc_lexer::unescape;
+pub use Alignment::*;
+pub use Count::*;
+pub use Piece::*;
+pub use Position::*;
+
+use std::iter;
+use std::str;
+use std::string;
+
+// Note: copied from rustc_span
+/// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub struct InnerSpan {
+ pub start: usize,
+ pub end: usize,
+}
+
+impl InnerSpan {
+ pub fn new(start: usize, end: usize) -> InnerSpan {
+ InnerSpan { start, end }
+ }
+}
+
+/// The location and before/after width of a character whose width has changed from its source code
+/// representation
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub struct InnerWidthMapping {
+ /// Index of the character in the source
+ pub position: usize,
+ /// The inner width in characters
+ pub before: usize,
+ /// The transformed width in characters
+ pub after: usize,
+}
+
+impl InnerWidthMapping {
+ pub fn new(position: usize, before: usize, after: usize) -> InnerWidthMapping {
+ InnerWidthMapping {
+ position,
+ before,
+ after,
+ }
+ }
+}
+
+/// Whether the input string is a literal. If yes, it contains the inner width mappings.
+#[derive(Clone, PartialEq, Eq)]
+enum InputStringKind {
+ NotALiteral,
+ Literal {
+ width_mappings: Vec,
+ },
+}
+
+/// The type of format string that we are parsing.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub enum ParseMode {
+ /// A normal format string as per `format_args!`.
+ Format,
+ /// An inline assembly template string for `asm!`.
+ InlineAsm,
+}
+
+#[derive(Copy, Clone)]
+struct InnerOffset(usize);
+
+impl InnerOffset {
+ fn to(self, end: InnerOffset) -> InnerSpan {
+ InnerSpan::new(self.0, end.0)
+ }
+}
+
+/// A piece is a portion of the format string which represents the next part
+/// to emit. These are emitted as a stream by the `Parser` class.
+#[derive(Clone, Debug, PartialEq)]
+pub enum Piece<'a> {
+ /// A literal string which should directly be emitted
+ String(&'a str),
+ /// This describes that formatting should process the next argument (as
+ /// specified inside) for emission.
+ NextArgument(Box>),
+}
+
+/// Representation of an argument specification.
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub struct Argument<'a> {
+ /// Where to find this argument
+ pub position: Position<'a>,
+ /// The span of the position indicator. Includes any whitespace in implicit
+ /// positions (`{ }`).
+ pub position_span: InnerSpan,
+ /// How to format the argument
+ pub format: FormatSpec<'a>,
+}
+
+/// Specification for the formatting of an argument in the format string.
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub struct FormatSpec<'a> {
+ /// Optionally specified character to fill alignment with.
+ pub fill: Option,
+ /// Span of the optionally specified fill character.
+ pub fill_span: Option,
+ /// Optionally specified alignment.
+ pub align: Alignment,
+ /// The `+` or `-` flag.
+ pub sign: Option,
+ /// The `#` flag.
+ pub alternate: bool,
+ /// The `0` flag.
+ pub zero_pad: bool,
+ /// The `x` or `X` flag. (Only for `Debug`.)
+ pub debug_hex: Option,
+ /// The integer precision to use.
+ pub precision: Count<'a>,
+ /// The span of the precision formatting flag (for diagnostics).
+ pub precision_span: Option,
+ /// The string width requested for the resulting format.
+ pub width: Count<'a>,
+ /// The span of the width formatting flag (for diagnostics).
+ pub width_span: Option,
+ /// The descriptor string representing the name of the format desired for
+ /// this argument, this can be empty or any number of characters, although
+ /// it is required to be one word.
+ pub ty: &'a str,
+ /// The span of the descriptor string (for diagnostics).
+ pub ty_span: Option,
+}
+
+/// Enum describing where an argument for a format can be located.
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum Position<'a> {
+ /// The argument is implied to be located at an index
+ ArgumentImplicitlyIs(usize),
+ /// The argument is located at a specific index given in the format,
+ ArgumentIs(usize),
+ /// The argument has a name.
+ ArgumentNamed(&'a str),
+}
+
+impl Position<'_> {
+ pub fn index(&self) -> Option {
+ match self {
+ ArgumentIs(i, ..) | ArgumentImplicitlyIs(i) => Some(*i),
+ _ => None,
+ }
+ }
+}
+
+/// Enum of alignments which are supported.
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum Alignment {
+ /// The value will be aligned to the left.
+ AlignLeft,
+ /// The value will be aligned to the right.
+ AlignRight,
+ /// The value will be aligned in the center.
+ AlignCenter,
+ /// The value will take on a default alignment.
+ AlignUnknown,
+}
+
+/// Enum for the sign flags.
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum Sign {
+ /// The `+` flag.
+ Plus,
+ /// The `-` flag.
+ Minus,
+}
+
+/// Enum for the debug hex flags.
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum DebugHex {
+ /// The `x` flag in `{:x?}`.
+ Lower,
+ /// The `X` flag in `{:X?}`.
+ Upper,
+}
+
+/// A count is used for the precision and width parameters of an integer, and
+/// can reference either an argument or a literal integer.
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum Count<'a> {
+ /// The count is specified explicitly.
+ CountIs(usize),
+ /// The count is specified by the argument with the given name.
+ CountIsName(&'a str, InnerSpan),
+ /// The count is specified by the argument at the given index.
+ CountIsParam(usize),
+ /// The count is specified by a star (like in `{:.*}`) that refers to the argument at the given index.
+ CountIsStar(usize),
+ /// The count is implied and cannot be explicitly specified.
+ CountImplied,
+}
+
+pub struct ParseError {
+ pub description: string::String,
+ pub note: Option,
+ pub label: string::String,
+ pub span: InnerSpan,
+ pub secondary_label: Option<(string::String, InnerSpan)>,
+ pub suggestion: Suggestion,
+}
+
+pub enum Suggestion {
+ None,
+ /// Replace inline argument with positional argument:
+ /// `format!("{foo.bar}")` -> `format!("{}", foo.bar)`
+ UsePositional,
+ /// Remove `r#` from identifier:
+ /// `format!("{r#foo}")` -> `format!("{foo}")`
+ RemoveRawIdent(InnerSpan),
+}
+
+/// The parser structure for interpreting the input format string. This is
+/// modeled as an iterator over `Piece` structures to form a stream of tokens
+/// being output.
+///
+/// This is a recursive-descent parser for the sake of simplicity, and if
+/// necessary there's probably lots of room for improvement performance-wise.
+pub struct Parser<'a> {
+ mode: ParseMode,
+ input: &'a str,
+ cur: iter::Peekable>,
+ /// Error messages accumulated during parsing
+ pub errors: Vec,
+ /// Current position of implicit positional argument pointer
+ pub curarg: usize,
+ /// `Some(raw count)` when the string is "raw", used to position spans correctly
+ style: Option,
+ /// Start and end byte offset of every successfully parsed argument
+ pub arg_places: Vec,
+ /// Characters whose length has been changed from their in-code representation
+ width_map: Vec,
+ /// Span of the last opening brace seen, used for error reporting
+ last_opening_brace: Option,
+ /// Whether the source string is comes from `println!` as opposed to `format!` or `print!`
+ append_newline: bool,
+ /// Whether this formatting string was written directly in the source. This controls whether we
+ /// can use spans to refer into it and give better error messages.
+ /// N.B: This does _not_ control whether implicit argument captures can be used.
+ pub is_source_literal: bool,
+ /// Start position of the current line.
+ cur_line_start: usize,
+ /// Start and end byte offset of every line of the format string. Excludes
+ /// newline characters and leading whitespace.
+ pub line_spans: Vec,
+}
+
+impl<'a> Iterator for Parser<'a> {
+ type Item = Piece<'a>;
+
+ fn next(&mut self) -> Option> {
+ if let Some(&(pos, c)) = self.cur.peek() {
+ match c {
+ '{' => {
+ let curr_last_brace = self.last_opening_brace;
+ let byte_pos = self.to_span_index(pos);
+ let lbrace_end = InnerOffset(byte_pos.0 + self.to_span_width(pos));
+ self.last_opening_brace = Some(byte_pos.to(lbrace_end));
+ self.cur.next();
+ if self.consume('{') {
+ self.last_opening_brace = curr_last_brace;
+
+ Some(String(self.string(pos + 1)))
+ } else {
+ let arg = self.argument(lbrace_end);
+ if let Some(rbrace_pos) = self.consume_closing_brace(&arg) {
+ if self.is_source_literal {
+ let lbrace_byte_pos = self.to_span_index(pos);
+ let rbrace_byte_pos = self.to_span_index(rbrace_pos);
+
+ let width = self.to_span_width(rbrace_pos);
+
+ self.arg_places.push(
+ lbrace_byte_pos.to(InnerOffset(rbrace_byte_pos.0 + width)),
+ );
+ }
+ } else {
+ if let Some(&(_, maybe)) = self.cur.peek() {
+ if maybe == '?' {
+ self.suggest_format();
+ } else {
+ self.suggest_positional_arg_instead_of_captured_arg(arg);
+ }
+ }
+ }
+ Some(NextArgument(Box::new(arg)))
+ }
+ }
+ '}' => {
+ self.cur.next();
+ if self.consume('}') {
+ Some(String(self.string(pos + 1)))
+ } else {
+ let err_pos = self.to_span_index(pos);
+ self.err_with_note(
+ "unmatched `}` found",
+ "unmatched `}`",
+ "if you intended to print `}`, you can escape it using `}}`",
+ err_pos.to(err_pos),
+ );
+ None
+ }
+ }
+ _ => Some(String(self.string(pos))),
+ }
+ } else {
+ if self.is_source_literal {
+ let span = self.span(self.cur_line_start, self.input.len());
+ if self.line_spans.last() != Some(&span) {
+ self.line_spans.push(span);
+ }
+ }
+ None
+ }
+ }
+}
+
+impl<'a> Parser<'a> {
+ /// Creates a new parser for the given format string
+ pub fn new(
+ s: &'a str,
+ style: Option,
+ snippet: Option,
+ append_newline: bool,
+ mode: ParseMode,
+ ) -> Parser<'a> {
+ let input_string_kind = find_width_map_from_snippet(s, snippet, style);
+ let (width_map, is_source_literal) = match input_string_kind {
+ InputStringKind::Literal { width_mappings } => (width_mappings, true),
+ InputStringKind::NotALiteral => (Vec::new(), false),
+ };
+
+ Parser {
+ mode,
+ input: s,
+ cur: s.char_indices().peekable(),
+ errors: vec![],
+ curarg: 0,
+ style,
+ arg_places: vec![],
+ width_map,
+ last_opening_brace: None,
+ append_newline,
+ is_source_literal,
+ cur_line_start: 0,
+ line_spans: vec![],
+ }
+ }
+
+ /// Notifies of an error. The message doesn't actually need to be of type
+ /// String, but I think it does when this eventually uses conditions so it
+ /// might as well start using it now.
+ fn err, S2: Into>(
+ &mut self,
+ description: S1,
+ label: S2,
+ span: InnerSpan,
+ ) {
+ self.errors.push(ParseError {
+ description: description.into(),
+ note: None,
+ label: label.into(),
+ span,
+ secondary_label: None,
+ suggestion: Suggestion::None,
+ });
+ }
+
+ /// Notifies of an error. The message doesn't actually need to be of type
+ /// String, but I think it does when this eventually uses conditions so it
+ /// might as well start using it now.
+ fn err_with_note<
+ S1: Into,
+ S2: Into,
+ S3: Into,
+ >(
+ &mut self,
+ description: S1,
+ label: S2,
+ note: S3,
+ span: InnerSpan,
+ ) {
+ self.errors.push(ParseError {
+ description: description.into(),
+ note: Some(note.into()),
+ label: label.into(),
+ span,
+ secondary_label: None,
+ suggestion: Suggestion::None,
+ });
+ }
+
+ /// Optionally consumes the specified character. If the character is not at
+ /// the current position, then the current iterator isn't moved and `false` is
+ /// returned, otherwise the character is consumed and `true` is returned.
+ fn consume(&mut self, c: char) -> bool {
+ self.consume_pos(c).is_some()
+ }
+
+ /// Optionally consumes the specified character. If the character is not at
+ /// the current position, then the current iterator isn't moved and `None` is
+ /// returned, otherwise the character is consumed and the current position is
+ /// returned.
+ fn consume_pos(&mut self, c: char) -> Option {
+ if let Some(&(pos, maybe)) = self.cur.peek() {
+ if c == maybe {
+ self.cur.next();
+ return Some(pos);
+ }
+ }
+ None
+ }
+
+ fn remap_pos(&self, mut pos: usize) -> InnerOffset {
+ for width in &self.width_map {
+ if pos > width.position {
+ pos += width.before - width.after;
+ } else if pos == width.position && width.after == 0 {
+ pos += width.before;
+ } else {
+ break;
+ }
+ }
+
+ InnerOffset(pos)
+ }
+
+ fn to_span_index(&self, pos: usize) -> InnerOffset {
+ // This handles the raw string case, the raw argument is the number of #
+ // in r###"..."### (we need to add one because of the `r`).
+ let raw = self.style.map_or(0, |raw| raw + 1);
+ let pos = self.remap_pos(pos);
+ InnerOffset(raw + pos.0 + 1)
+ }
+
+ fn to_span_width(&self, pos: usize) -> usize {
+ let pos = self.remap_pos(pos);
+ match self.width_map.iter().find(|w| w.position == pos.0) {
+ Some(w) => w.before,
+ None => 1,
+ }
+ }
+
+ fn span(&self, start_pos: usize, end_pos: usize) -> InnerSpan {
+ let start = self.to_span_index(start_pos);
+ let end = self.to_span_index(end_pos);
+ start.to(end)
+ }
+
+ /// Forces consumption of the specified character. If the character is not
+ /// found, an error is emitted.
+ fn consume_closing_brace(&mut self, arg: &Argument<'_>) -> Option {
+ self.ws();
+
+ let pos;
+ let description;
+
+ if let Some(&(peek_pos, maybe)) = self.cur.peek() {
+ if maybe == '}' {
+ self.cur.next();
+ return Some(peek_pos);
+ }
+
+ pos = peek_pos;
+ description = format!("expected `'}}'`, found `{maybe:?}`");
+ } else {
+ description = "expected `'}'` but string was terminated".to_owned();
+ // point at closing `"`
+ pos = self.input.len() - if self.append_newline { 1 } else { 0 };
+ }
+
+ let pos = self.to_span_index(pos);
+
+ let label = "expected `'}'`".to_owned();
+ let (note, secondary_label) = if arg.format.fill == Some('}') {
+ (
+ Some("the character `'}'` is interpreted as a fill character because of the `:` that precedes it".to_owned()),
+ arg.format.fill_span.map(|sp| ("this is not interpreted as a formatting closing brace".to_owned(), sp)),
+ )
+ } else {
+ (
+ Some("if you intended to print `{`, you can escape it using `{{`".to_owned()),
+ self.last_opening_brace
+ .map(|sp| ("because of this opening brace".to_owned(), sp)),
+ )
+ };
+
+ self.errors.push(ParseError {
+ description,
+ note,
+ label,
+ span: pos.to(pos),
+ secondary_label,
+ suggestion: Suggestion::None,
+ });
+
+ None
+ }
+
+ /// Consumes all whitespace characters until the first non-whitespace character
+ fn ws(&mut self) {
+ while let Some(&(_, c)) = self.cur.peek() {
+ if c.is_whitespace() {
+ self.cur.next();
+ } else {
+ break;
+ }
+ }
+ }
+
+ /// Parses all of a string which is to be considered a "raw literal" in a
+ /// format string. This is everything outside of the braces.
+ fn string(&mut self, start: usize) -> &'a str {
+ // we may not consume the character, peek the iterator
+ while let Some(&(pos, c)) = self.cur.peek() {
+ match c {
+ '{' | '}' => {
+ return &self.input[start..pos];
+ }
+ '\n' if self.is_source_literal => {
+ self.line_spans.push(self.span(self.cur_line_start, pos));
+ self.cur_line_start = pos + 1;
+ self.cur.next();
+ }
+ _ => {
+ if self.is_source_literal && pos == self.cur_line_start && c.is_whitespace() {
+ self.cur_line_start = pos + c.len_utf8();
+ }
+ self.cur.next();
+ }
+ }
+ }
+ &self.input[start..self.input.len()]
+ }
+
+ /// Parses an `Argument` structure, or what's contained within braces inside the format string.
+ fn argument(&mut self, start: InnerOffset) -> Argument<'a> {
+ let pos = self.position();
+
+ let end = self
+ .cur
+ .clone()
+ .find(|(_, ch)| !ch.is_whitespace())
+ .map_or(start, |(end, _)| self.to_span_index(end));
+ let position_span = start.to(end);
+
+ let format = match self.mode {
+ ParseMode::Format => self.format(),
+ ParseMode::InlineAsm => self.inline_asm(),
+ };
+
+ // Resolve position after parsing format spec.
+ let pos = match pos {
+ Some(position) => position,
+ None => {
+ let i = self.curarg;
+ self.curarg += 1;
+ ArgumentImplicitlyIs(i)
+ }
+ };
+
+ Argument {
+ position: pos,
+ position_span,
+ format,
+ }
+ }
+
+ /// Parses a positional argument for a format. This could either be an
+ /// integer index of an argument, a named argument, or a blank string.
+ /// Returns `Some(parsed_position)` if the position is not implicitly
+ /// consuming a macro argument, `None` if it's the case.
+ fn position(&mut self) -> Option> {
+ if let Some(i) = self.integer() {
+ Some(ArgumentIs(i))
+ } else {
+ match self.cur.peek() {
+ Some(&(lo, c)) if is_id_start(c) => {
+ let word = self.word();
+
+ // Recover from `r#ident` in format strings.
+ // FIXME: use a let chain
+ if word == "r" {
+ if let Some((pos, '#')) = self.cur.peek() {
+ if self.input[pos + 1..]
+ .chars()
+ .next()
+ .is_some_and(is_id_start)
+ {
+ self.cur.next();
+ let word = self.word();
+ let prefix_span = self.span(lo, lo + 2);
+ let full_span = self.span(lo, lo + 2 + word.len());
+ self.errors.insert(0, ParseError {
+ description: "raw identifiers are not supported".to_owned(),
+ note: Some("identifiers in format strings can be keywords and don't need to be prefixed with `r#`".to_string()),
+ label: "raw identifier used here".to_owned(),
+ span: full_span,
+ secondary_label: None,
+ suggestion: Suggestion::RemoveRawIdent(prefix_span),
+ });
+ return Some(ArgumentNamed(word));
+ }
+ }
+ }
+
+ Some(ArgumentNamed(word))
+ }
+
+ // This is an `ArgumentNext`.
+ // Record the fact and do the resolution after parsing the
+ // format spec, to make things like `{:.*}` work.
+ _ => None,
+ }
+ }
+ }
+
+ fn current_pos(&mut self) -> usize {
+ if let Some(&(pos, _)) = self.cur.peek() {
+ pos
+ } else {
+ self.input.len()
+ }
+ }
+
+ /// Parses a format specifier at the current position, returning all of the
+ /// relevant information in the `FormatSpec` struct.
+ fn format(&mut self) -> FormatSpec<'a> {
+ let mut spec = FormatSpec {
+ fill: None,
+ fill_span: None,
+ align: AlignUnknown,
+ sign: None,
+ alternate: false,
+ zero_pad: false,
+ debug_hex: None,
+ precision: CountImplied,
+ precision_span: None,
+ width: CountImplied,
+ width_span: None,
+ ty: &self.input[..0],
+ ty_span: None,
+ };
+ if !self.consume(':') {
+ return spec;
+ }
+
+ // fill character
+ if let Some(&(idx, c)) = self.cur.peek() {
+ if let Some((_, '>' | '<' | '^')) = self.cur.clone().nth(1) {
+ spec.fill = Some(c);
+ spec.fill_span = Some(self.span(idx, idx + 1));
+ self.cur.next();
+ }
+ }
+ // Alignment
+ if self.consume('<') {
+ spec.align = AlignLeft;
+ } else if self.consume('>') {
+ spec.align = AlignRight;
+ } else if self.consume('^') {
+ spec.align = AlignCenter;
+ }
+ // Sign flags
+ if self.consume('+') {
+ spec.sign = Some(Sign::Plus);
+ } else if self.consume('-') {
+ spec.sign = Some(Sign::Minus);
+ }
+ // Alternate marker
+ if self.consume('#') {
+ spec.alternate = true;
+ }
+ // Width and precision
+ let mut havewidth = false;
+
+ if self.consume('0') {
+ // small ambiguity with '0$' as a format string. In theory this is a
+ // '0' flag and then an ill-formatted format string with just a '$'
+ // and no count, but this is better if we instead interpret this as
+ // no '0' flag and '0$' as the width instead.
+ if let Some(end) = self.consume_pos('$') {
+ spec.width = CountIsParam(0);
+ spec.width_span = Some(self.span(end - 1, end + 1));
+ havewidth = true;
+ } else {
+ spec.zero_pad = true;
+ }
+ }
+
+ if !havewidth {
+ let start = self.current_pos();
+ spec.width = self.count(start);
+ if spec.width != CountImplied {
+ let end = self.current_pos();
+ spec.width_span = Some(self.span(start, end));
+ }
+ }
+
+ if let Some(start) = self.consume_pos('.') {
+ if self.consume('*') {
+ // Resolve `CountIsNextParam`.
+ // We can do this immediately as `position` is resolved later.
+ let i = self.curarg;
+ self.curarg += 1;
+ spec.precision = CountIsStar(i);
+ } else {
+ spec.precision = self.count(start + 1);
+ }
+ let end = self.current_pos();
+ spec.precision_span = Some(self.span(start, end));
+ }
+
+ let ty_span_start = self.current_pos();
+ // Optional radix followed by the actual format specifier
+ if self.consume('x') {
+ if self.consume('?') {
+ spec.debug_hex = Some(DebugHex::Lower);
+ spec.ty = "?";
+ } else {
+ spec.ty = "x";
+ }
+ } else if self.consume('X') {
+ if self.consume('?') {
+ spec.debug_hex = Some(DebugHex::Upper);
+ spec.ty = "?";
+ } else {
+ spec.ty = "X";
+ }
+ } else if self.consume('?') {
+ spec.ty = "?";
+ } else {
+ spec.ty = self.word();
+ if !spec.ty.is_empty() {
+ let ty_span_end = self.current_pos();
+ spec.ty_span = Some(self.span(ty_span_start, ty_span_end));
+ }
+ }
+ spec
+ }
+
+ /// Parses an inline assembly template modifier at the current position, returning the modifier
+ /// in the `ty` field of the `FormatSpec` struct.
+ fn inline_asm(&mut self) -> FormatSpec<'a> {
+ let mut spec = FormatSpec {
+ fill: None,
+ fill_span: None,
+ align: AlignUnknown,
+ sign: None,
+ alternate: false,
+ zero_pad: false,
+ debug_hex: None,
+ precision: CountImplied,
+ precision_span: None,
+ width: CountImplied,
+ width_span: None,
+ ty: &self.input[..0],
+ ty_span: None,
+ };
+ if !self.consume(':') {
+ return spec;
+ }
+
+ let ty_span_start = self.current_pos();
+ spec.ty = self.word();
+ if !spec.ty.is_empty() {
+ let ty_span_end = self.current_pos();
+ spec.ty_span = Some(self.span(ty_span_start, ty_span_end));
+ }
+
+ spec
+ }
+
+ /// Parses a `Count` parameter at the current position. This does not check
+ /// for 'CountIsNextParam' because that is only used in precision, not
+ /// width.
+ fn count(&mut self, start: usize) -> Count<'a> {
+ if let Some(i) = self.integer() {
+ if self.consume('$') {
+ CountIsParam(i)
+ } else {
+ CountIs(i)
+ }
+ } else {
+ let tmp = self.cur.clone();
+ let word = self.word();
+ if word.is_empty() {
+ self.cur = tmp;
+ CountImplied
+ } else if let Some(end) = self.consume_pos('$') {
+ let name_span = self.span(start, end);
+ CountIsName(word, name_span)
+ } else {
+ self.cur = tmp;
+ CountImplied
+ }
+ }
+ }
+
+ /// Parses a word starting at the current position. A word is the same as
+ /// Rust identifier, except that it can't start with `_` character.
+ fn word(&mut self) -> &'a str {
+ let start = match self.cur.peek() {
+ Some(&(pos, c)) if is_id_start(c) => {
+ self.cur.next();
+ pos
+ }
+ _ => {
+ return "";
+ }
+ };
+ let mut end = None;
+ while let Some(&(pos, c)) = self.cur.peek() {
+ if is_id_continue(c) {
+ self.cur.next();
+ } else {
+ end = Some(pos);
+ break;
+ }
+ }
+ let end = end.unwrap_or(self.input.len());
+ let word = &self.input[start..end];
+ if word == "_" {
+ self.err_with_note(
+ "invalid argument name `_`",
+ "invalid argument name",
+ "argument name cannot be a single underscore",
+ self.span(start, end),
+ );
+ }
+ word
+ }
+
+ fn integer(&mut self) -> Option {
+ let mut cur: usize = 0;
+ let mut found = false;
+ let mut overflow = false;
+ let start = self.current_pos();
+ while let Some(&(_, c)) = self.cur.peek() {
+ if let Some(i) = c.to_digit(10) {
+ let (tmp, mul_overflow) = cur.overflowing_mul(10);
+ let (tmp, add_overflow) = tmp.overflowing_add(i as usize);
+ if mul_overflow || add_overflow {
+ overflow = true;
+ }
+ cur = tmp;
+ found = true;
+ self.cur.next();
+ } else {
+ break;
+ }
+ }
+
+ if overflow {
+ let end = self.current_pos();
+ let overflowed_int = &self.input[start..end];
+ self.err(
+ format!(
+ "integer `{}` does not fit into the type `usize` whose range is `0..={}`",
+ overflowed_int,
+ usize::MAX
+ ),
+ "integer out of range for `usize`",
+ self.span(start, end),
+ );
+ }
+
+ found.then_some(cur)
+ }
+
+ fn suggest_format(&mut self) {
+ if let (Some(pos), Some(_)) = (self.consume_pos('?'), self.consume_pos(':')) {
+ let word = self.word();
+ let _end = self.current_pos();
+ let pos = self.to_span_index(pos);
+ self.errors.insert(
+ 0,
+ ParseError {
+ description: "expected format parameter to occur after `:`".to_owned(),
+ note: Some(format!(
+ "`?` comes after `:`, try `{}:{}` instead",
+ word, "?"
+ )),
+ label: "expected `?` to occur after `:`".to_owned(),
+ span: pos.to(pos),
+ secondary_label: None,
+ suggestion: Suggestion::None,
+ },
+ );
+ }
+ }
+
+ fn suggest_positional_arg_instead_of_captured_arg(&mut self, arg: Argument<'a>) {
+ if let Some(end) = self.consume_pos('.') {
+ let byte_pos = self.to_span_index(end);
+ let start = InnerOffset(byte_pos.0 + 1);
+ let field = self.argument(start);
+ // We can only parse `foo.bar` field access, any deeper nesting,
+ // or another type of expression, like method calls, are not supported
+ if !self.consume('}') {
+ return;
+ }
+ if let ArgumentNamed(_) = arg.position {
+ if let ArgumentNamed(_) = field.position {
+ self.errors.insert(
+ 0,
+ ParseError {
+ description: "field access isn't supported".to_string(),
+ note: None,
+ label: "not supported".to_string(),
+ span: InnerSpan::new(arg.position_span.start, field.position_span.end),
+ secondary_label: None,
+ suggestion: Suggestion::UsePositional,
+ },
+ );
+ }
+ }
+ }
+ }
+}
+
+/// Finds the indices of all characters that have been processed and differ between the actual
+/// written code (code snippet) and the `InternedString` that gets processed in the `Parser`
+/// in order to properly synthesise the intra-string `Span`s for error diagnostics.
+// TODO: Can we give an escaped string here? probably yes - and a valid one too
+fn find_width_map_from_snippet(
+ input: &str,
+ snippet: Option,
+ str_style: Option,
+) -> InputStringKind {
+ let snippet = match snippet {
+ Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s,
+ _ => return InputStringKind::NotALiteral,
+ };
+
+ if str_style.is_some() {
+ return InputStringKind::Literal {
+ width_mappings: Vec::new(),
+ };
+ }
+
+ // Strip quotes.
+ let snippet = &snippet[1..snippet.len() - 1];
+
+ // Macros like `println` add a newline at the end. That technically doesn't make them "literals" anymore, but it's fine
+ // since we will never need to point our spans there, so we lie about it here by ignoring it.
+ // Since there might actually be newlines in the source code, we need to normalize away all trailing newlines.
+ // If we only trimmed it off the input, `format!("\n")` would cause a mismatch as here we they actually match up.
+ // Alternatively, we could just count the trailing newlines and only trim one from the input if they don't match up.
+ let input_no_nl = input.trim_end_matches('\n');
+ let Some(unescaped) = unescape_string(snippet) else {
+ return InputStringKind::NotALiteral;
+ };
+
+ let unescaped_no_nl = unescaped.trim_end_matches('\n');
+
+ if unescaped_no_nl != input_no_nl {
+ // The source string that we're pointing at isn't our input, so spans pointing at it will be incorrect.
+ // This can for example happen with proc macros that respan generated literals.
+ return InputStringKind::NotALiteral;
+ }
+
+ let mut s = snippet.char_indices();
+ let mut width_mappings = vec![];
+ while let Some((pos, c)) = s.next() {
+ match (c, s.clone().next()) {
+ // skip whitespace and empty lines ending in '\\'
+ ('\\', Some((_, '\n'))) => {
+ let _ = s.next();
+ let mut width = 2;
+
+ while let Some((_, c)) = s.clone().next() {
+ if matches!(c, ' ' | '\n' | '\t') {
+ width += 1;
+ let _ = s.next();
+ } else {
+ break;
+ }
+ }
+
+ width_mappings.push(InnerWidthMapping::new(pos, width, 0));
+ }
+ ('\\', Some((_, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
+ width_mappings.push(InnerWidthMapping::new(pos, 2, 1));
+ let _ = s.next();
+ }
+ ('\\', Some((_, 'x'))) => {
+ // consume `\xAB` literal
+ s.nth(2);
+ width_mappings.push(InnerWidthMapping::new(pos, 4, 1));
+ }
+ ('\\', Some((_, 'u'))) => {
+ let mut width = 2;
+ let _ = s.next();
+
+ if let Some((_, next_c)) = s.next() {
+ if next_c == '{' {
+ // consume up to 6 hexanumeric chars
+ let digits_len = s
+ .clone()
+ .take(6)
+ .take_while(|(_, c)| c.is_digit(16))
+ .count();
+
+ let len_utf8 = s
+ .as_str()
+ .get(..digits_len)
+ .and_then(|digits| u32::from_str_radix(digits, 16).ok())
+ .and_then(char::from_u32)
+ .map_or(1, char::len_utf8);
+
+ // Skip the digits, for chars that encode to more than 1 utf-8 byte
+ // exclude as many digits as it is greater than 1 byte
+ //
+ // So for a 3 byte character, exclude 2 digits
+ let required_skips = digits_len.saturating_sub(len_utf8.saturating_sub(1));
+
+ // skip '{' and '}' also
+ width += required_skips + 2;
+
+ s.nth(digits_len);
+ } else if next_c.is_digit(16) {
+ width += 1;
+
+ // We suggest adding `{` and `}` when appropriate, accept it here as if
+ // it were correct
+ let mut i = 0; // consume up to 6 hexanumeric chars
+ while let (Some((_, c)), _) = (s.next(), i < 6) {
+ if c.is_digit(16) {
+ width += 1;
+ } else {
+ break;
+ }
+ i += 1;
+ }
+ }
+ }
+
+ width_mappings.push(InnerWidthMapping::new(pos, width, 1));
+ }
+ _ => {}
+ }
+ }
+
+ InputStringKind::Literal { width_mappings }
+}
+
+// TODO: I guess we can provide an `unescape_string` function to the parser... but how do we do that
+// Store it in the parser struct? we need to make it FFI-aware
+// SO this is not possible because we need `unescape_string` *before* we have a parser
+
+fn unescape_string(string: &str) -> Option {
+ // let mut buf = string::String::new();
+ // let mut ok = true;
+ // unescape::unescape_literal(string, unescape::Mode::Str, &mut |_, unescaped_char| {
+ // match unescaped_char {
+ // Ok(c) => buf.push(c),
+ // Err(_) => ok = false,
+ // }
+ // });
+
+ let buf = string::String::from(string);
+ let ok = true;
+
+ ok.then_some(buf)
+}
+
+// Assert a reasonable size for `Piece`
+// #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
+// rustc_index::static_assert_size!(Piece<'_>, 16);
+
+// #[cfg(test)]
+// mod tests;
diff --git a/libgrust/libformat_parser/src/bin.rs b/libgrust/libformat_parser/src/bin.rs
new file mode 100644
index 000000000000..4b1f903ad5fa
--- /dev/null
+++ b/libgrust/libformat_parser/src/bin.rs
@@ -0,0 +1,7 @@
+use libformat_parser::rust;
+
+fn main() {
+ dbg!(rust::collect_pieces(
+ std::env::args().nth(1).unwrap().as_str()
+ ));
+}
diff --git a/libgrust/libformat_parser/src/lib.rs b/libgrust/libformat_parser/src/lib.rs
new file mode 100644
index 000000000000..c164578a1039
--- /dev/null
+++ b/libgrust/libformat_parser/src/lib.rs
@@ -0,0 +1,363 @@
+//! FFI interface for `rustc_format_parser`
+
+// what's the plan? Have a function return something that can be constructed into a vector?
+// or an iterator?
+
+use std::ffi::CStr;
+
+trait IntoFFI {
+ fn into_ffi(self) -> T;
+}
+
+impl IntoFFI<*const T> for Option
+where
+ T: Sized,
+{
+ fn into_ffi(self) -> *const T {
+ match self.as_ref() {
+ None => std::ptr::null(),
+ Some(r) => r as *const T,
+ }
+ }
+}
+
+// FIXME: Make an ffi module in a separate file
+// FIXME: Remember to leak the boxed type somehow
+// FIXME: How to encode the Option type? As a pointer? Option -> Option<&T> -> *const T could work maybe?
+mod ffi {
+ use super::IntoFFI;
+
+ // Note: copied from rustc_span
+ /// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
+ #[derive(Copy, Clone, PartialEq, Eq, Debug)]
+ #[repr(C)]
+ pub struct InnerSpan {
+ pub start: usize,
+ pub end: usize,
+ }
+
+ /// The location and before/after width of a character whose width has changed from its source code
+ /// representation
+ #[derive(Copy, Clone, PartialEq, Eq)]
+ #[repr(C)]
+ pub struct InnerWidthMapping {
+ /// Index of the character in the source
+ pub position: usize,
+ /// The inner width in characters
+ pub before: usize,
+ /// The transformed width in characters
+ pub after: usize,
+ }
+
+ // TODO: Not needed for now?
+ // /// Whether the input string is a literal. If yes, it contains the inner width mappings.
+ // #[derive(Clone, PartialEq, Eq)]
+ // #[repr(C)]
+ // enum InputStringKind {
+ // NotALiteral,
+ // Literal {
+ // width_mappings: Vec,
+ // },
+ // }
+
+ // TODO: Not needed for now?
+ // /// The type of format string that we are parsing.
+ // #[derive(Copy, Clone, Debug, Eq, PartialEq)]
+ // #[repr(C)]
+ // pub enum ParseMode {
+ // /// A normal format string as per `format_args!`.
+ // Format,
+ // /// An inline assembly template string for `asm!`.
+ // InlineAsm,
+ // }
+
+ #[derive(Copy, Clone)]
+ #[repr(C)]
+ struct InnerOffset(usize);
+
+ /// A piece is a portion of the format string which represents the next part
+ /// to emit. These are emitted as a stream by the `Parser` class.
+ #[derive(Clone, Debug, PartialEq)]
+ #[repr(C)]
+ pub enum Piece<'a> {
+ /// A literal string which should directly be emitted
+ String(&'a str),
+ /// This describes that formatting should process the next argument (as
+ /// specified inside) for emission.
+ NextArgument(*const Argument<'a>),
+ }
+
+ impl<'a> Drop for Piece<'a> {
+ fn drop(&mut self) {
+ println!("dropping Piece: {:?}", self)
+ }
+ }
+
+ /// Representation of an argument specification.
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(C)]
+ pub struct Argument<'a> {
+ /// Where to find this argument
+ pub position: Position<'a>,
+ /// The span of the position indicator. Includes any whitespace in implicit
+ /// positions (`{ }`).
+ pub position_span: InnerSpan,
+ /// How to format the argument
+ pub format: FormatSpec<'a>,
+ }
+
+ /// Specification for the formatting of an argument in the format string.
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(C)]
+ pub struct FormatSpec<'a> {
+ /// Optionally specified character to fill alignment with.
+ pub fill: Option,
+ /// Span of the optionally specified fill character.
+ pub fill_span: *const InnerSpan,
+ /// Optionally specified alignment.
+ pub align: Alignment,
+ /// The `+` or `-` flag.
+ pub sign: *const Sign,
+ /// The `#` flag.
+ pub alternate: bool,
+ /// The `0` flag.
+ pub zero_pad: bool,
+ /// The `x` or `X` flag. (Only for `Debug`.)
+ pub debug_hex: *const DebugHex,
+ /// The integer precision to use.
+ pub precision: Count<'a>,
+ /// The span of the precision formatting flag (for diagnostics).
+ pub precision_span: *const InnerSpan,
+ /// The string width requested for the resulting format.
+ pub width: Count<'a>,
+ /// The span of the width formatting flag (for diagnostics).
+ pub width_span: *const InnerSpan,
+ /// The descriptor string representing the name of the format desired for
+ /// this argument, this can be empty or any number of characters, although
+ /// it is required to be one word.
+ pub ty: &'a str,
+ /// The span of the descriptor string (for diagnostics).
+ pub ty_span: *const InnerSpan,
+ }
+
+ /// Enum describing where an argument for a format can be located.
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(C)]
+ pub enum Position<'a> {
+ /// The argument is implied to be located at an index
+ ArgumentImplicitlyIs(usize),
+ /// The argument is located at a specific index given in the format,
+ ArgumentIs(usize),
+ /// The argument has a name.
+ ArgumentNamed(&'a str),
+ }
+
+ /// Enum of alignments which are supported.
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(C)]
+ pub enum Alignment {
+ /// The value will be aligned to the left.
+ AlignLeft,
+ /// The value will be aligned to the right.
+ AlignRight,
+ /// The value will be aligned in the center.
+ AlignCenter,
+ /// The value will take on a default alignment.
+ AlignUnknown,
+ }
+
+ /// Enum for the sign flags.
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(C)]
+ pub enum Sign {
+ /// The `+` flag.
+ Plus,
+ /// The `-` flag.
+ Minus,
+ }
+
+ /// Enum for the debug hex flags.
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(C)]
+ pub enum DebugHex {
+ /// The `x` flag in `{:x?}`.
+ Lower,
+ /// The `X` flag in `{:X?}`.
+ Upper,
+ }
+
+ /// A count is used for the precision and width parameters of an integer, and
+ /// can reference either an argument or a literal integer.
+ #[derive(Copy, Clone, Debug, PartialEq)]
+ #[repr(C)]
+ pub enum Count<'a> {
+ /// The count is specified explicitly.
+ CountIs(usize),
+ /// The count is specified by the argument with the given name.
+ CountIsName(&'a str, InnerSpan),
+ /// The count is specified by the argument at the given index.
+ CountIsParam(usize),
+ /// The count is specified by a star (like in `{:.*}`) that refers to the argument at the given index.
+ CountIsStar(usize),
+ /// The count is implied and cannot be explicitly specified.
+ CountImplied,
+ }
+
+ impl<'a> From> for Piece<'a> {
+ fn from(old: generic_format_parser::Piece<'a>) -> Self {
+ match old {
+ generic_format_parser::Piece::String(x) => Piece::String(x),
+ generic_format_parser::Piece::NextArgument(x) => {
+ // FIXME: This is problematic - if we do this, then we probably run into the issue that the Box
+ // is freed at the end of the call to collect_pieces. if we just .leak() it, then we have
+ // a memory leak... should we resend the info back to the Rust lib afterwards to free it?
+ // this is definitely the best way - store that pointer in the FFI piece and rebuild the box
+ // in a Rust destructor
+ let ptr = Box::leak(x);
+ let dst = Into::::into(*ptr);
+
+ Piece::NextArgument(&dst as *const Argument)
+ }
+ }
+ }
+ }
+
+ impl<'a> From> for Argument<'a> {
+ fn from(old: generic_format_parser::Argument<'a>) -> Self {
+ Argument {
+ position: old.position.into(),
+ position_span: old.position_span.into(),
+ format: old.format.into(),
+ }
+ }
+ }
+
+ impl<'a> From> for Position<'a> {
+ fn from(old: generic_format_parser::Position<'a>) -> Self {
+ match old {
+ generic_format_parser::Position::ArgumentImplicitlyIs(x) => {
+ Position::ArgumentImplicitlyIs(x.into())
+ }
+ generic_format_parser::Position::ArgumentIs(x) => Position::ArgumentIs(x.into()),
+ generic_format_parser::Position::ArgumentNamed(x) => {
+ Position::ArgumentNamed(x.into())
+ }
+ }
+ }
+ }
+
+ impl From for InnerSpan {
+ fn from(old: generic_format_parser::InnerSpan) -> Self {
+ InnerSpan {
+ start: old.start,
+ end: old.end,
+ }
+ }
+ }
+
+ impl<'a> From> for FormatSpec<'a> {
+ fn from(old: generic_format_parser::FormatSpec<'a>) -> Self {
+ FormatSpec {
+ fill: old.fill,
+ fill_span: old.fill_span.map(Into::into).into_ffi(),
+ align: old.align.into(),
+ sign: old.sign.map(Into::into).into_ffi(),
+ alternate: old.alternate,
+ zero_pad: old.zero_pad,
+ debug_hex: old.debug_hex.map(Into::into).into_ffi(),
+ precision: old.precision.into(),
+ precision_span: old.precision_span.map(Into::into).into_ffi(),
+ width: old.width.into(),
+ width_span: old.width_span.map(Into::into).into_ffi(),
+ ty: old.ty,
+ ty_span: old.ty_span.map(Into::into).into_ffi(),
+ }
+ }
+ }
+
+ impl From for DebugHex {
+ fn from(old: generic_format_parser::DebugHex) -> Self {
+ match old {
+ generic_format_parser::DebugHex::Lower => DebugHex::Lower,
+ generic_format_parser::DebugHex::Upper => DebugHex::Upper,
+ }
+ }
+ }
+
+ impl<'a> From> for Count<'a> {
+ fn from(old: generic_format_parser::Count<'a>) -> Self {
+ match old {
+ generic_format_parser::Count::CountIs(x) => Count::CountIs(x),
+ generic_format_parser::Count::CountIsName(x, y) => Count::CountIsName(x, y.into()),
+ generic_format_parser::Count::CountIsParam(x) => Count::CountIsParam(x),
+ generic_format_parser::Count::CountIsStar(x) => Count::CountIsStar(x),
+ generic_format_parser::Count::CountImplied => Count::CountImplied,
+ }
+ }
+ }
+
+ impl From for Sign {
+ fn from(old: generic_format_parser::Sign) -> Self {
+ match old {
+ generic_format_parser::Sign::Plus => Sign::Plus,
+ generic_format_parser::Sign::Minus => Sign::Minus,
+ }
+ }
+ }
+
+ impl From for Alignment {
+ fn from(old: generic_format_parser::Alignment) -> Self {
+ match old {
+ generic_format_parser::Alignment::AlignLeft => Alignment::AlignLeft,
+ generic_format_parser::Alignment::AlignRight => Alignment::AlignRight,
+ generic_format_parser::Alignment::AlignCenter => Alignment::AlignCenter,
+ generic_format_parser::Alignment::AlignUnknown => Alignment::AlignUnknown,
+ }
+ }
+ }
+}
+
+// FIXME: Rename?
+pub mod rust {
+ use generic_format_parser::{ParseMode, Parser, Piece};
+
+ pub fn collect_pieces(input: &str) -> Vec> {
+ let parser = Parser::new(input, None, None, true, ParseMode::Format);
+
+ parser.into_iter().collect()
+ }
+}
+
+// TODO: Should we instead make an FFIVector struct?
+#[repr(C)]
+pub struct PieceSlice {
+ base_ptr: *mut ffi::Piece<'static /* FIXME: That's wrong */>,
+ len: usize,
+ cap: usize,
+}
+
+#[no_mangle]
+pub extern "C" fn collect_pieces(input: *const libc::c_char) -> PieceSlice {
+ // FIXME: Add comment
+ let str = unsafe { CStr::from_ptr(input) };
+ dbg!(str);
+
+ // FIXME: No unwrap
+ let pieces: Vec> = rust::collect_pieces(str.to_str().unwrap())
+ .into_iter()
+ .map(Into::into)
+ .collect();
+
+ println!("[ARTHUR]: debug: {:?}, {:?}", pieces.as_ptr(), pieces.len());
+
+ PieceSlice {
+ len: pieces.len(),
+ cap: pieces.capacity(),
+ base_ptr: pieces.leak().as_mut_ptr(),
+ }
+}
+
+#[no_mangle]
+pub extern "C" fn destroy_pieces(PieceSlice { base_ptr, len, cap }: PieceSlice) {
+ let _ = unsafe { Vec::from_raw_parts(base_ptr, len, cap) };
+}